X86ISelLowering.cpp revision a72cb0ea0925e19ed5278b400bcedfe024cdd01c
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/ADT/StringExtras.h" 37using namespace llvm; 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 44 45 // Set up the TargetLowering object. 46 47 // X86 is weird, it always uses i8 for shift amounts and setcc results. 48 setShiftAmountType(MVT::i8); 49 setSetCCResultType(MVT::i8); 50 setSetCCResultContents(ZeroOrOneSetCCResult); 51 setSchedulingPreference(SchedulingForRegPressure); 52 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 53 setStackPointerRegisterToSaveRestore(X86StackPtr); 54 55 if (Subtarget->isTargetDarwin()) { 56 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 57 setUseUnderscoreSetJmp(false); 58 setUseUnderscoreLongJmp(false); 59 } else if (Subtarget->isTargetMingw()) { 60 // MS runtime is weird: it exports _setjmp, but longjmp! 61 setUseUnderscoreSetJmp(true); 62 setUseUnderscoreLongJmp(false); 63 } else { 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 } 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 70 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 71 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 72 if (Subtarget->is64Bit()) 73 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 74 75 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 76 77 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 78 // operation. 79 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 80 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 81 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 82 83 if (Subtarget->is64Bit()) { 84 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 86 } else { 87 if (X86ScalarSSE) 88 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 89 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 90 else 91 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 92 } 93 94 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 95 // this operation. 96 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 97 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 98 // SSE has no i16 to fp conversion, only i32 99 if (X86ScalarSSE) 100 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 101 else { 102 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 103 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 104 } 105 106 if (!Subtarget->is64Bit()) { 107 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 108 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 109 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 110 } 111 112 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 113 // this operation. 114 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 115 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 116 117 if (X86ScalarSSE) { 118 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 119 } else { 120 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 121 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 122 } 123 124 // Handle FP_TO_UINT by promoting the destination to a larger signed 125 // conversion. 126 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 127 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 128 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 129 130 if (Subtarget->is64Bit()) { 131 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 132 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 133 } else { 134 if (X86ScalarSSE && !Subtarget->hasSSE3()) 135 // Expand FP_TO_UINT into a select. 136 // FIXME: We would like to use a Custom expander here eventually to do 137 // the optimal thing for SSE vs. the default expansion in the legalizer. 138 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 139 else 140 // With SSE3 we can use fisttpll to convert to a signed i64. 141 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 142 } 143 144 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 145 if (!X86ScalarSSE) { 146 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 147 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 148 } 149 150 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 151 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 152 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 153 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 154 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 155 if (Subtarget->is64Bit()) 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 160 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 161 setOperationAction(ISD::FREM , MVT::f64 , Expand); 162 163 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 164 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 165 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 166 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 167 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 168 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 169 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 170 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 171 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 172 if (Subtarget->is64Bit()) { 173 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 176 } 177 178 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 179 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 180 181 // These should be promoted to a larger select which is supported. 182 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 183 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 184 // X86 wants to expand cmov itself. 185 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 186 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 187 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 188 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 190 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 191 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 192 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 193 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 194 if (Subtarget->is64Bit()) { 195 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 196 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 197 } 198 // X86 ret instruction may pop stack. 199 setOperationAction(ISD::RET , MVT::Other, Custom); 200 // Darwin ABI issue. 201 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 202 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 203 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 204 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 205 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 206 if (Subtarget->is64Bit()) { 207 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 208 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 209 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 210 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 211 } 212 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 213 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 214 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 215 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 216 // X86 wants to expand memset / memcpy itself. 217 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 218 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 219 220 // We don't have line number support yet. 221 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 222 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 223 // FIXME - use subtarget debug flags 224 if (!Subtarget->isTargetDarwin() && 225 !Subtarget->isTargetELF() && 226 !Subtarget->isTargetCygMing()) 227 setOperationAction(ISD::LABEL, MVT::Other, Expand); 228 229 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 230 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 231 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 232 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 233 if (Subtarget->is64Bit()) { 234 // FIXME: Verify 235 setExceptionPointerRegister(X86::RAX); 236 setExceptionSelectorRegister(X86::RDX); 237 } else { 238 setExceptionPointerRegister(X86::EAX); 239 setExceptionSelectorRegister(X86::EDX); 240 } 241 242 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 243 setOperationAction(ISD::VASTART , MVT::Other, Custom); 244 setOperationAction(ISD::VAARG , MVT::Other, Expand); 245 setOperationAction(ISD::VAEND , MVT::Other, Expand); 246 if (Subtarget->is64Bit()) 247 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 248 else 249 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 250 251 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 252 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 253 if (Subtarget->is64Bit()) 254 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 255 if (Subtarget->isTargetCygMing()) 256 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 257 else 258 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 259 260 if (X86ScalarSSE) { 261 // Set up the FP register classes. 262 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 263 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 264 265 // Use ANDPD to simulate FABS. 266 setOperationAction(ISD::FABS , MVT::f64, Custom); 267 setOperationAction(ISD::FABS , MVT::f32, Custom); 268 269 // Use XORP to simulate FNEG. 270 setOperationAction(ISD::FNEG , MVT::f64, Custom); 271 setOperationAction(ISD::FNEG , MVT::f32, Custom); 272 273 // Use ANDPD and ORPD to simulate FCOPYSIGN. 274 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 275 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 276 277 // We don't support sin/cos/fmod 278 setOperationAction(ISD::FSIN , MVT::f64, Expand); 279 setOperationAction(ISD::FCOS , MVT::f64, Expand); 280 setOperationAction(ISD::FREM , MVT::f64, Expand); 281 setOperationAction(ISD::FSIN , MVT::f32, Expand); 282 setOperationAction(ISD::FCOS , MVT::f32, Expand); 283 setOperationAction(ISD::FREM , MVT::f32, Expand); 284 285 // Expand FP immediates into loads from the stack, except for the special 286 // cases we handle. 287 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 288 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 289 addLegalFPImmediate(+0.0); // xorps / xorpd 290 } else { 291 // Set up the FP register classes. 292 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 293 294 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 295 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 296 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 297 298 if (!UnsafeFPMath) { 299 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 300 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 301 } 302 303 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 304 addLegalFPImmediate(+0.0); // FLD0 305 addLegalFPImmediate(+1.0); // FLD1 306 addLegalFPImmediate(-0.0); // FLD0/FCHS 307 addLegalFPImmediate(-1.0); // FLD1/FCHS 308 } 309 310 // First set operation action for all vector types to expand. Then we 311 // will selectively turn on ones that can be effectively codegen'd. 312 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 313 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 314 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 315 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 316 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 317 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 318 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 319 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 320 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 321 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 322 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 323 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 324 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 325 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 326 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 327 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 329 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 330 } 331 332 if (Subtarget->hasMMX()) { 333 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 334 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 335 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 336 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 337 338 // FIXME: add MMX packed arithmetics 339 340 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 341 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 342 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 343 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 344 345 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 346 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 347 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 348 349 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 350 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 351 352 setOperationAction(ISD::AND, MVT::v8i8, Promote); 353 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 354 setOperationAction(ISD::AND, MVT::v4i16, Promote); 355 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 356 setOperationAction(ISD::AND, MVT::v2i32, Promote); 357 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 358 setOperationAction(ISD::AND, MVT::v1i64, Legal); 359 360 setOperationAction(ISD::OR, MVT::v8i8, Promote); 361 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 362 setOperationAction(ISD::OR, MVT::v4i16, Promote); 363 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 364 setOperationAction(ISD::OR, MVT::v2i32, Promote); 365 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 366 setOperationAction(ISD::OR, MVT::v1i64, Legal); 367 368 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 369 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 370 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 371 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 372 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 373 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 374 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 375 376 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 377 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 378 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 379 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 380 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 381 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 382 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 383 384 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 385 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 386 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 387 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 388 389 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 390 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 391 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 392 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 393 394 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 395 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 396 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 397 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 398 } 399 400 if (Subtarget->hasSSE1()) { 401 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 402 403 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 404 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 405 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 406 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 407 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 408 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 409 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 410 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 411 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 412 } 413 414 if (Subtarget->hasSSE2()) { 415 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 416 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 417 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 418 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 419 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 420 421 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 422 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 423 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 424 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 425 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 426 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 427 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 428 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 429 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 430 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 431 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 432 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 433 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 434 435 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 436 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 437 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 438 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 439 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 440 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 441 442 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 443 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 444 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 445 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 446 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 447 } 448 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 449 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 450 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 451 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 452 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 454 455 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 456 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 457 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 458 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 459 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 460 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 461 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 462 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 463 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 464 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 465 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 466 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 467 } 468 469 // Custom lower v2i64 and v2f64 selects. 470 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 471 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 472 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 473 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 474 } 475 476 // We want to custom lower some of our intrinsics. 477 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 478 479 // We have target-specific dag combine patterns for the following nodes: 480 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 481 setTargetDAGCombine(ISD::SELECT); 482 483 computeRegisterProperties(); 484 485 // FIXME: These should be based on subtarget info. Plus, the values should 486 // be smaller when we are in optimizing for size mode. 487 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 488 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 489 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 490 allowUnalignedMemoryAccesses = true; // x86 supports it! 491} 492 493 494//===----------------------------------------------------------------------===// 495// Return Value Calling Convention Implementation 496//===----------------------------------------------------------------------===// 497 498#include "X86GenCallingConv.inc" 499 500/// LowerRET - Lower an ISD::RET node. 501SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 502 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 503 504 SmallVector<CCValAssign, 16> RVLocs; 505 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 506 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 507 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 508 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 509 510 511 // If this is the first return lowered for this function, add the regs to the 512 // liveout set for the function. 513 if (DAG.getMachineFunction().liveout_empty()) { 514 for (unsigned i = 0; i != RVLocs.size(); ++i) 515 if (RVLocs[i].isRegLoc()) 516 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 517 } 518 519 SDOperand Chain = Op.getOperand(0); 520 SDOperand Flag; 521 522 // Copy the result values into the output registers. 523 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 524 RVLocs[0].getLocReg() != X86::ST0) { 525 for (unsigned i = 0; i != RVLocs.size(); ++i) { 526 CCValAssign &VA = RVLocs[i]; 527 assert(VA.isRegLoc() && "Can only return in registers!"); 528 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 529 Flag); 530 Flag = Chain.getValue(1); 531 } 532 } else { 533 // We need to handle a destination of ST0 specially, because it isn't really 534 // a register. 535 SDOperand Value = Op.getOperand(1); 536 537 // If this is an FP return with ScalarSSE, we need to move the value from 538 // an XMM register onto the fp-stack. 539 if (X86ScalarSSE) { 540 SDOperand MemLoc; 541 542 // If this is a load into a scalarsse value, don't store the loaded value 543 // back to the stack, only to reload it: just replace the scalar-sse load. 544 if (ISD::isNON_EXTLoad(Value.Val) && 545 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 546 Chain = Value.getOperand(0); 547 MemLoc = Value.getOperand(1); 548 } else { 549 // Spill the value to memory and reload it into top of stack. 550 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 551 MachineFunction &MF = DAG.getMachineFunction(); 552 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 553 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 554 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 555 } 556 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); 557 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 558 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 559 Chain = Value.getValue(1); 560 } 561 562 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 563 SDOperand Ops[] = { Chain, Value }; 564 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 565 Flag = Chain.getValue(1); 566 } 567 568 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 569 if (Flag.Val) 570 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 571 else 572 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 573} 574 575 576/// LowerCallResult - Lower the result values of an ISD::CALL into the 577/// appropriate copies out of appropriate physical registers. This assumes that 578/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 579/// being lowered. The returns a SDNode with the same number of values as the 580/// ISD::CALL. 581SDNode *X86TargetLowering:: 582LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 583 unsigned CallingConv, SelectionDAG &DAG) { 584 585 // Assign locations to each value returned by this call. 586 SmallVector<CCValAssign, 16> RVLocs; 587 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 588 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 589 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 590 591 592 SmallVector<SDOperand, 8> ResultVals; 593 594 // Copy all of the result registers out of their specified physreg. 595 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 596 for (unsigned i = 0; i != RVLocs.size(); ++i) { 597 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 598 RVLocs[i].getValVT(), InFlag).getValue(1); 599 InFlag = Chain.getValue(2); 600 ResultVals.push_back(Chain.getValue(0)); 601 } 602 } else { 603 // Copies from the FP stack are special, as ST0 isn't a valid register 604 // before the fp stackifier runs. 605 606 // Copy ST0 into an RFP register with FP_GET_RESULT. 607 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 608 SDOperand GROps[] = { Chain, InFlag }; 609 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 610 Chain = RetVal.getValue(1); 611 InFlag = RetVal.getValue(2); 612 613 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 614 // an XMM register. 615 if (X86ScalarSSE) { 616 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 617 // shouldn't be necessary except that RFP cannot be live across 618 // multiple blocks. When stackifier is fixed, they can be uncoupled. 619 MachineFunction &MF = DAG.getMachineFunction(); 620 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 621 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 622 SDOperand Ops[] = { 623 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 624 }; 625 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 626 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 627 Chain = RetVal.getValue(1); 628 } 629 630 if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE) 631 // FIXME: we would really like to remember that this FP_ROUND 632 // operation is okay to eliminate if we allow excess FP precision. 633 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 634 ResultVals.push_back(RetVal); 635 } 636 637 // Merge everything together with a MERGE_VALUES node. 638 ResultVals.push_back(Chain); 639 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 640 &ResultVals[0], ResultVals.size()).Val; 641} 642 643 644//===----------------------------------------------------------------------===// 645// C & StdCall Calling Convention implementation 646//===----------------------------------------------------------------------===// 647// StdCall calling convention seems to be standard for many Windows' API 648// routines and around. It differs from C calling convention just a little: 649// callee should clean up the stack, not caller. Symbols should be also 650// decorated in some fancy way :) It doesn't support any vector arguments. 651 652/// AddLiveIn - This helper function adds the specified physical register to the 653/// MachineFunction as a live in value. It also creates a corresponding virtual 654/// register for it. 655static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 656 const TargetRegisterClass *RC) { 657 assert(RC->contains(PReg) && "Not the correct regclass!"); 658 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 659 MF.addLiveIn(PReg, VReg); 660 return VReg; 661} 662 663SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 664 bool isStdCall) { 665 unsigned NumArgs = Op.Val->getNumValues() - 1; 666 MachineFunction &MF = DAG.getMachineFunction(); 667 MachineFrameInfo *MFI = MF.getFrameInfo(); 668 SDOperand Root = Op.getOperand(0); 669 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 670 671 // Assign locations to all of the incoming arguments. 672 SmallVector<CCValAssign, 16> ArgLocs; 673 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 674 getTargetMachine(), ArgLocs); 675 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 676 677 SmallVector<SDOperand, 8> ArgValues; 678 unsigned LastVal = ~0U; 679 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 680 CCValAssign &VA = ArgLocs[i]; 681 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 682 // places. 683 assert(VA.getValNo() != LastVal && 684 "Don't support value assigned to multiple locs yet"); 685 LastVal = VA.getValNo(); 686 687 if (VA.isRegLoc()) { 688 MVT::ValueType RegVT = VA.getLocVT(); 689 TargetRegisterClass *RC; 690 if (RegVT == MVT::i32) 691 RC = X86::GR32RegisterClass; 692 else { 693 assert(MVT::isVector(RegVT)); 694 RC = X86::VR128RegisterClass; 695 } 696 697 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 698 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 699 700 // If this is an 8 or 16-bit value, it is really passed promoted to 32 701 // bits. Insert an assert[sz]ext to capture this, then truncate to the 702 // right size. 703 if (VA.getLocInfo() == CCValAssign::SExt) 704 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 705 DAG.getValueType(VA.getValVT())); 706 else if (VA.getLocInfo() == CCValAssign::ZExt) 707 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 708 DAG.getValueType(VA.getValVT())); 709 710 if (VA.getLocInfo() != CCValAssign::Full) 711 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 712 713 ArgValues.push_back(ArgValue); 714 } else { 715 assert(VA.isMemLoc()); 716 717 // Create the nodes corresponding to a load from this parameter slot. 718 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 719 VA.getLocMemOffset()); 720 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 721 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 722 } 723 } 724 725 unsigned StackSize = CCInfo.getNextStackOffset(); 726 727 ArgValues.push_back(Root); 728 729 // If the function takes variable number of arguments, make a frame index for 730 // the start of the first vararg value... for expansion of llvm.va_start. 731 if (isVarArg) 732 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 733 734 if (isStdCall && !isVarArg) { 735 BytesToPopOnReturn = StackSize; // Callee pops everything.. 736 BytesCallerReserves = 0; 737 } else { 738 BytesToPopOnReturn = 0; // Callee pops nothing. 739 740 // If this is an sret function, the return should pop the hidden pointer. 741 if (NumArgs && 742 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 743 ISD::ParamFlags::StructReturn)) 744 BytesToPopOnReturn = 4; 745 746 BytesCallerReserves = StackSize; 747 } 748 749 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 750 ReturnAddrIndex = 0; // No return address slot generated yet. 751 752 MF.getInfo<X86MachineFunctionInfo>() 753 ->setBytesToPopOnReturn(BytesToPopOnReturn); 754 755 // Return the new list of results. 756 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 757 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 758} 759 760SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 761 unsigned CC) { 762 SDOperand Chain = Op.getOperand(0); 763 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 764 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 765 SDOperand Callee = Op.getOperand(4); 766 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 767 768 // Analyze operands of the call, assigning locations to each operand. 769 SmallVector<CCValAssign, 16> ArgLocs; 770 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 771 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 772 773 // Get a count of how many bytes are to be pushed on the stack. 774 unsigned NumBytes = CCInfo.getNextStackOffset(); 775 776 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 777 778 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 779 SmallVector<SDOperand, 8> MemOpChains; 780 781 SDOperand StackPtr; 782 783 // Walk the register/memloc assignments, inserting copies/loads. 784 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 785 CCValAssign &VA = ArgLocs[i]; 786 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 787 788 // Promote the value if needed. 789 switch (VA.getLocInfo()) { 790 default: assert(0 && "Unknown loc info!"); 791 case CCValAssign::Full: break; 792 case CCValAssign::SExt: 793 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 794 break; 795 case CCValAssign::ZExt: 796 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 797 break; 798 case CCValAssign::AExt: 799 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 800 break; 801 } 802 803 if (VA.isRegLoc()) { 804 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 805 } else { 806 assert(VA.isMemLoc()); 807 if (StackPtr.Val == 0) 808 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 809 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 810 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 811 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 812 } 813 } 814 815 // If the first argument is an sret pointer, remember it. 816 bool isSRet = NumOps && 817 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 818 ISD::ParamFlags::StructReturn); 819 820 if (!MemOpChains.empty()) 821 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 822 &MemOpChains[0], MemOpChains.size()); 823 824 // Build a sequence of copy-to-reg nodes chained together with token chain 825 // and flag operands which copy the outgoing args into registers. 826 SDOperand InFlag; 827 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 828 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 829 InFlag); 830 InFlag = Chain.getValue(1); 831 } 832 833 // ELF / PIC requires GOT in the EBX register before function calls via PLT 834 // GOT pointer. 835 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 836 Subtarget->isPICStyleGOT()) { 837 Chain = DAG.getCopyToReg(Chain, X86::EBX, 838 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 839 InFlag); 840 InFlag = Chain.getValue(1); 841 } 842 843 // If the callee is a GlobalAddress node (quite common, every direct call is) 844 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 845 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 846 // We should use extra load for direct calls to dllimported functions in 847 // non-JIT mode. 848 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 849 getTargetMachine(), true)) 850 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 851 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 852 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 853 854 // Returns a chain & a flag for retval copy to use. 855 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 856 SmallVector<SDOperand, 8> Ops; 857 Ops.push_back(Chain); 858 Ops.push_back(Callee); 859 860 // Add argument registers to the end of the list so that they are known live 861 // into the call. 862 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 863 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 864 RegsToPass[i].second.getValueType())); 865 866 // Add an implicit use GOT pointer in EBX. 867 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 868 Subtarget->isPICStyleGOT()) 869 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 870 871 if (InFlag.Val) 872 Ops.push_back(InFlag); 873 874 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 875 NodeTys, &Ops[0], Ops.size()); 876 InFlag = Chain.getValue(1); 877 878 // Create the CALLSEQ_END node. 879 unsigned NumBytesForCalleeToPush = 0; 880 881 if (CC == CallingConv::X86_StdCall) { 882 if (isVarArg) 883 NumBytesForCalleeToPush = isSRet ? 4 : 0; 884 else 885 NumBytesForCalleeToPush = NumBytes; 886 } else { 887 // If this is is a call to a struct-return function, the callee 888 // pops the hidden struct pointer, so we have to push it back. 889 // This is common for Darwin/X86, Linux & Mingw32 targets. 890 NumBytesForCalleeToPush = isSRet ? 4 : 0; 891 } 892 893 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 894 Ops.clear(); 895 Ops.push_back(Chain); 896 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 897 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 898 Ops.push_back(InFlag); 899 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 900 InFlag = Chain.getValue(1); 901 902 // Handle result values, copying them out of physregs into vregs that we 903 // return. 904 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 905} 906 907 908//===----------------------------------------------------------------------===// 909// FastCall Calling Convention implementation 910//===----------------------------------------------------------------------===// 911// 912// The X86 'fastcall' calling convention passes up to two integer arguments in 913// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 914// and requires that the callee pop its arguments off the stack (allowing proper 915// tail calls), and has the same return value conventions as C calling convs. 916// 917// This calling convention always arranges for the callee pop value to be 8n+4 918// bytes, which is needed for tail recursion elimination and stack alignment 919// reasons. 920SDOperand 921X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 922 MachineFunction &MF = DAG.getMachineFunction(); 923 MachineFrameInfo *MFI = MF.getFrameInfo(); 924 SDOperand Root = Op.getOperand(0); 925 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 926 927 // Assign locations to all of the incoming arguments. 928 SmallVector<CCValAssign, 16> ArgLocs; 929 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 930 getTargetMachine(), ArgLocs); 931 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 932 933 SmallVector<SDOperand, 8> ArgValues; 934 unsigned LastVal = ~0U; 935 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 936 CCValAssign &VA = ArgLocs[i]; 937 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 938 // places. 939 assert(VA.getValNo() != LastVal && 940 "Don't support value assigned to multiple locs yet"); 941 LastVal = VA.getValNo(); 942 943 if (VA.isRegLoc()) { 944 MVT::ValueType RegVT = VA.getLocVT(); 945 TargetRegisterClass *RC; 946 if (RegVT == MVT::i32) 947 RC = X86::GR32RegisterClass; 948 else { 949 assert(MVT::isVector(RegVT)); 950 RC = X86::VR128RegisterClass; 951 } 952 953 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 954 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 955 956 // If this is an 8 or 16-bit value, it is really passed promoted to 32 957 // bits. Insert an assert[sz]ext to capture this, then truncate to the 958 // right size. 959 if (VA.getLocInfo() == CCValAssign::SExt) 960 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 961 DAG.getValueType(VA.getValVT())); 962 else if (VA.getLocInfo() == CCValAssign::ZExt) 963 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 964 DAG.getValueType(VA.getValVT())); 965 966 if (VA.getLocInfo() != CCValAssign::Full) 967 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 968 969 ArgValues.push_back(ArgValue); 970 } else { 971 assert(VA.isMemLoc()); 972 973 // Create the nodes corresponding to a load from this parameter slot. 974 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 975 VA.getLocMemOffset()); 976 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 977 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 978 } 979 } 980 981 ArgValues.push_back(Root); 982 983 unsigned StackSize = CCInfo.getNextStackOffset(); 984 985 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 986 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 987 // arguments and the arguments after the retaddr has been pushed are aligned. 988 if ((StackSize & 7) == 0) 989 StackSize += 4; 990 } 991 992 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 993 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 994 ReturnAddrIndex = 0; // No return address slot generated yet. 995 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 996 BytesCallerReserves = 0; 997 998 MF.getInfo<X86MachineFunctionInfo>() 999 ->setBytesToPopOnReturn(BytesToPopOnReturn); 1000 1001 // Return the new list of results. 1002 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1003 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1004} 1005 1006SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1007 unsigned CC) { 1008 SDOperand Chain = Op.getOperand(0); 1009 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1010 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1011 SDOperand Callee = Op.getOperand(4); 1012 1013 // Analyze operands of the call, assigning locations to each operand. 1014 SmallVector<CCValAssign, 16> ArgLocs; 1015 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1016 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1017 1018 // Get a count of how many bytes are to be pushed on the stack. 1019 unsigned NumBytes = CCInfo.getNextStackOffset(); 1020 1021 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1022 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1023 // arguments and the arguments after the retaddr has been pushed are aligned. 1024 if ((NumBytes & 7) == 0) 1025 NumBytes += 4; 1026 } 1027 1028 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1029 1030 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1031 SmallVector<SDOperand, 8> MemOpChains; 1032 1033 SDOperand StackPtr; 1034 1035 // Walk the register/memloc assignments, inserting copies/loads. 1036 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1037 CCValAssign &VA = ArgLocs[i]; 1038 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1039 1040 // Promote the value if needed. 1041 switch (VA.getLocInfo()) { 1042 default: assert(0 && "Unknown loc info!"); 1043 case CCValAssign::Full: break; 1044 case CCValAssign::SExt: 1045 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1046 break; 1047 case CCValAssign::ZExt: 1048 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1049 break; 1050 case CCValAssign::AExt: 1051 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1052 break; 1053 } 1054 1055 if (VA.isRegLoc()) { 1056 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1057 } else { 1058 assert(VA.isMemLoc()); 1059 if (StackPtr.Val == 0) 1060 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1061 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1062 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1063 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1064 } 1065 } 1066 1067 if (!MemOpChains.empty()) 1068 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1069 &MemOpChains[0], MemOpChains.size()); 1070 1071 // Build a sequence of copy-to-reg nodes chained together with token chain 1072 // and flag operands which copy the outgoing args into registers. 1073 SDOperand InFlag; 1074 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1075 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1076 InFlag); 1077 InFlag = Chain.getValue(1); 1078 } 1079 1080 // If the callee is a GlobalAddress node (quite common, every direct call is) 1081 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1082 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1083 // We should use extra load for direct calls to dllimported functions in 1084 // non-JIT mode. 1085 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1086 getTargetMachine(), true)) 1087 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1088 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1089 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1090 1091 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1092 // GOT pointer. 1093 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1094 Subtarget->isPICStyleGOT()) { 1095 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1096 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1097 InFlag); 1098 InFlag = Chain.getValue(1); 1099 } 1100 1101 // Returns a chain & a flag for retval copy to use. 1102 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1103 SmallVector<SDOperand, 8> Ops; 1104 Ops.push_back(Chain); 1105 Ops.push_back(Callee); 1106 1107 // Add argument registers to the end of the list so that they are known live 1108 // into the call. 1109 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1110 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1111 RegsToPass[i].second.getValueType())); 1112 1113 // Add an implicit use GOT pointer in EBX. 1114 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1115 Subtarget->isPICStyleGOT()) 1116 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1117 1118 if (InFlag.Val) 1119 Ops.push_back(InFlag); 1120 1121 // FIXME: Do not generate X86ISD::TAILCALL for now. 1122 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1123 NodeTys, &Ops[0], Ops.size()); 1124 InFlag = Chain.getValue(1); 1125 1126 // Returns a flag for retval copy to use. 1127 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1128 Ops.clear(); 1129 Ops.push_back(Chain); 1130 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1131 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1132 Ops.push_back(InFlag); 1133 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1134 InFlag = Chain.getValue(1); 1135 1136 // Handle result values, copying them out of physregs into vregs that we 1137 // return. 1138 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1139} 1140 1141 1142//===----------------------------------------------------------------------===// 1143// X86-64 C Calling Convention implementation 1144//===----------------------------------------------------------------------===// 1145 1146SDOperand 1147X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1148 MachineFunction &MF = DAG.getMachineFunction(); 1149 MachineFrameInfo *MFI = MF.getFrameInfo(); 1150 SDOperand Root = Op.getOperand(0); 1151 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1152 1153 static const unsigned GPR64ArgRegs[] = { 1154 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1155 }; 1156 static const unsigned XMMArgRegs[] = { 1157 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1158 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1159 }; 1160 1161 1162 // Assign locations to all of the incoming arguments. 1163 SmallVector<CCValAssign, 16> ArgLocs; 1164 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1165 getTargetMachine(), ArgLocs); 1166 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1167 1168 SmallVector<SDOperand, 8> ArgValues; 1169 unsigned LastVal = ~0U; 1170 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1171 CCValAssign &VA = ArgLocs[i]; 1172 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1173 // places. 1174 assert(VA.getValNo() != LastVal && 1175 "Don't support value assigned to multiple locs yet"); 1176 LastVal = VA.getValNo(); 1177 1178 if (VA.isRegLoc()) { 1179 MVT::ValueType RegVT = VA.getLocVT(); 1180 TargetRegisterClass *RC; 1181 if (RegVT == MVT::i32) 1182 RC = X86::GR32RegisterClass; 1183 else if (RegVT == MVT::i64) 1184 RC = X86::GR64RegisterClass; 1185 else if (RegVT == MVT::f32) 1186 RC = X86::FR32RegisterClass; 1187 else if (RegVT == MVT::f64) 1188 RC = X86::FR64RegisterClass; 1189 else { 1190 assert(MVT::isVector(RegVT)); 1191 if (MVT::getSizeInBits(RegVT) == 64) { 1192 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1193 RegVT = MVT::i64; 1194 } else 1195 RC = X86::VR128RegisterClass; 1196 } 1197 1198 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1199 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1200 1201 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1202 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1203 // right size. 1204 if (VA.getLocInfo() == CCValAssign::SExt) 1205 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1206 DAG.getValueType(VA.getValVT())); 1207 else if (VA.getLocInfo() == CCValAssign::ZExt) 1208 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1209 DAG.getValueType(VA.getValVT())); 1210 1211 if (VA.getLocInfo() != CCValAssign::Full) 1212 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1213 1214 // Handle MMX values passed in GPRs. 1215 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1216 MVT::getSizeInBits(RegVT) == 64) 1217 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1218 1219 ArgValues.push_back(ArgValue); 1220 } else { 1221 assert(VA.isMemLoc()); 1222 1223 // Create the nodes corresponding to a load from this parameter slot. 1224 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1225 VA.getLocMemOffset()); 1226 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1227 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1228 } 1229 } 1230 1231 unsigned StackSize = CCInfo.getNextStackOffset(); 1232 1233 // If the function takes variable number of arguments, make a frame index for 1234 // the start of the first vararg value... for expansion of llvm.va_start. 1235 if (isVarArg) { 1236 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1237 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1238 1239 // For X86-64, if there are vararg parameters that are passed via 1240 // registers, then we must store them to their spots on the stack so they 1241 // may be loaded by deferencing the result of va_next. 1242 VarArgsGPOffset = NumIntRegs * 8; 1243 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1244 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1245 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1246 1247 // Store the integer parameter registers. 1248 SmallVector<SDOperand, 8> MemOps; 1249 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1250 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1251 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1252 for (; NumIntRegs != 6; ++NumIntRegs) { 1253 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1254 X86::GR64RegisterClass); 1255 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1256 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1257 MemOps.push_back(Store); 1258 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1259 DAG.getConstant(8, getPointerTy())); 1260 } 1261 1262 // Now store the XMM (fp + vector) parameter registers. 1263 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1264 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1265 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1266 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1267 X86::VR128RegisterClass); 1268 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1269 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1270 MemOps.push_back(Store); 1271 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1272 DAG.getConstant(16, getPointerTy())); 1273 } 1274 if (!MemOps.empty()) 1275 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1276 &MemOps[0], MemOps.size()); 1277 } 1278 1279 ArgValues.push_back(Root); 1280 1281 ReturnAddrIndex = 0; // No return address slot generated yet. 1282 BytesToPopOnReturn = 0; // Callee pops nothing. 1283 BytesCallerReserves = StackSize; 1284 1285 // Return the new list of results. 1286 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1287 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1288} 1289 1290SDOperand 1291X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1292 unsigned CC) { 1293 SDOperand Chain = Op.getOperand(0); 1294 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1295 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1296 SDOperand Callee = Op.getOperand(4); 1297 1298 // Analyze operands of the call, assigning locations to each operand. 1299 SmallVector<CCValAssign, 16> ArgLocs; 1300 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1301 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1302 1303 // Get a count of how many bytes are to be pushed on the stack. 1304 unsigned NumBytes = CCInfo.getNextStackOffset(); 1305 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1306 1307 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1308 SmallVector<SDOperand, 8> MemOpChains; 1309 1310 SDOperand StackPtr; 1311 1312 // Walk the register/memloc assignments, inserting copies/loads. 1313 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1314 CCValAssign &VA = ArgLocs[i]; 1315 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1316 1317 // Promote the value if needed. 1318 switch (VA.getLocInfo()) { 1319 default: assert(0 && "Unknown loc info!"); 1320 case CCValAssign::Full: break; 1321 case CCValAssign::SExt: 1322 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1323 break; 1324 case CCValAssign::ZExt: 1325 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1326 break; 1327 case CCValAssign::AExt: 1328 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1329 break; 1330 } 1331 1332 if (VA.isRegLoc()) { 1333 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1334 } else { 1335 assert(VA.isMemLoc()); 1336 if (StackPtr.Val == 0) 1337 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1338 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1339 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1340 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1341 } 1342 } 1343 1344 if (!MemOpChains.empty()) 1345 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1346 &MemOpChains[0], MemOpChains.size()); 1347 1348 // Build a sequence of copy-to-reg nodes chained together with token chain 1349 // and flag operands which copy the outgoing args into registers. 1350 SDOperand InFlag; 1351 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1352 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1353 InFlag); 1354 InFlag = Chain.getValue(1); 1355 } 1356 1357 if (isVarArg) { 1358 // From AMD64 ABI document: 1359 // For calls that may call functions that use varargs or stdargs 1360 // (prototype-less calls or calls to functions containing ellipsis (...) in 1361 // the declaration) %al is used as hidden argument to specify the number 1362 // of SSE registers used. The contents of %al do not need to match exactly 1363 // the number of registers, but must be an ubound on the number of SSE 1364 // registers used and is in the range 0 - 8 inclusive. 1365 1366 // Count the number of XMM registers allocated. 1367 static const unsigned XMMArgRegs[] = { 1368 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1369 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1370 }; 1371 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1372 1373 Chain = DAG.getCopyToReg(Chain, X86::AL, 1374 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1375 InFlag = Chain.getValue(1); 1376 } 1377 1378 // If the callee is a GlobalAddress node (quite common, every direct call is) 1379 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1380 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1381 // We should use extra load for direct calls to dllimported functions in 1382 // non-JIT mode. 1383 if (getTargetMachine().getCodeModel() != CodeModel::Large 1384 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1385 getTargetMachine(), true)) 1386 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1387 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1388 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1389 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1390 1391 // Returns a chain & a flag for retval copy to use. 1392 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1393 SmallVector<SDOperand, 8> Ops; 1394 Ops.push_back(Chain); 1395 Ops.push_back(Callee); 1396 1397 // Add argument registers to the end of the list so that they are known live 1398 // into the call. 1399 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1400 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1401 RegsToPass[i].second.getValueType())); 1402 1403 if (InFlag.Val) 1404 Ops.push_back(InFlag); 1405 1406 // FIXME: Do not generate X86ISD::TAILCALL for now. 1407 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1408 NodeTys, &Ops[0], Ops.size()); 1409 InFlag = Chain.getValue(1); 1410 1411 // Returns a flag for retval copy to use. 1412 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1413 Ops.clear(); 1414 Ops.push_back(Chain); 1415 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1416 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1417 Ops.push_back(InFlag); 1418 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1419 InFlag = Chain.getValue(1); 1420 1421 // Handle result values, copying them out of physregs into vregs that we 1422 // return. 1423 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1424} 1425 1426 1427//===----------------------------------------------------------------------===// 1428// Other Lowering Hooks 1429//===----------------------------------------------------------------------===// 1430 1431 1432SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1433 if (ReturnAddrIndex == 0) { 1434 // Set up a frame object for the return address. 1435 MachineFunction &MF = DAG.getMachineFunction(); 1436 if (Subtarget->is64Bit()) 1437 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1438 else 1439 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1440 } 1441 1442 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1443} 1444 1445 1446 1447/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1448/// specific condition code. It returns a false if it cannot do a direct 1449/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1450/// needed. 1451static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1452 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1453 SelectionDAG &DAG) { 1454 X86CC = X86::COND_INVALID; 1455 if (!isFP) { 1456 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1457 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1458 // X > -1 -> X == 0, jump !sign. 1459 RHS = DAG.getConstant(0, RHS.getValueType()); 1460 X86CC = X86::COND_NS; 1461 return true; 1462 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1463 // X < 0 -> X == 0, jump on sign. 1464 X86CC = X86::COND_S; 1465 return true; 1466 } 1467 } 1468 1469 switch (SetCCOpcode) { 1470 default: break; 1471 case ISD::SETEQ: X86CC = X86::COND_E; break; 1472 case ISD::SETGT: X86CC = X86::COND_G; break; 1473 case ISD::SETGE: X86CC = X86::COND_GE; break; 1474 case ISD::SETLT: X86CC = X86::COND_L; break; 1475 case ISD::SETLE: X86CC = X86::COND_LE; break; 1476 case ISD::SETNE: X86CC = X86::COND_NE; break; 1477 case ISD::SETULT: X86CC = X86::COND_B; break; 1478 case ISD::SETUGT: X86CC = X86::COND_A; break; 1479 case ISD::SETULE: X86CC = X86::COND_BE; break; 1480 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1481 } 1482 } else { 1483 // On a floating point condition, the flags are set as follows: 1484 // ZF PF CF op 1485 // 0 | 0 | 0 | X > Y 1486 // 0 | 0 | 1 | X < Y 1487 // 1 | 0 | 0 | X == Y 1488 // 1 | 1 | 1 | unordered 1489 bool Flip = false; 1490 switch (SetCCOpcode) { 1491 default: break; 1492 case ISD::SETUEQ: 1493 case ISD::SETEQ: X86CC = X86::COND_E; break; 1494 case ISD::SETOLT: Flip = true; // Fallthrough 1495 case ISD::SETOGT: 1496 case ISD::SETGT: X86CC = X86::COND_A; break; 1497 case ISD::SETOLE: Flip = true; // Fallthrough 1498 case ISD::SETOGE: 1499 case ISD::SETGE: X86CC = X86::COND_AE; break; 1500 case ISD::SETUGT: Flip = true; // Fallthrough 1501 case ISD::SETULT: 1502 case ISD::SETLT: X86CC = X86::COND_B; break; 1503 case ISD::SETUGE: Flip = true; // Fallthrough 1504 case ISD::SETULE: 1505 case ISD::SETLE: X86CC = X86::COND_BE; break; 1506 case ISD::SETONE: 1507 case ISD::SETNE: X86CC = X86::COND_NE; break; 1508 case ISD::SETUO: X86CC = X86::COND_P; break; 1509 case ISD::SETO: X86CC = X86::COND_NP; break; 1510 } 1511 if (Flip) 1512 std::swap(LHS, RHS); 1513 } 1514 1515 return X86CC != X86::COND_INVALID; 1516} 1517 1518/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1519/// code. Current x86 isa includes the following FP cmov instructions: 1520/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1521static bool hasFPCMov(unsigned X86CC) { 1522 switch (X86CC) { 1523 default: 1524 return false; 1525 case X86::COND_B: 1526 case X86::COND_BE: 1527 case X86::COND_E: 1528 case X86::COND_P: 1529 case X86::COND_A: 1530 case X86::COND_AE: 1531 case X86::COND_NE: 1532 case X86::COND_NP: 1533 return true; 1534 } 1535} 1536 1537/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1538/// true if Op is undef or if its value falls within the specified range (L, H]. 1539static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1540 if (Op.getOpcode() == ISD::UNDEF) 1541 return true; 1542 1543 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1544 return (Val >= Low && Val < Hi); 1545} 1546 1547/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1548/// true if Op is undef or if its value equal to the specified value. 1549static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1550 if (Op.getOpcode() == ISD::UNDEF) 1551 return true; 1552 return cast<ConstantSDNode>(Op)->getValue() == Val; 1553} 1554 1555/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1556/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1557bool X86::isPSHUFDMask(SDNode *N) { 1558 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1559 1560 if (N->getNumOperands() != 4) 1561 return false; 1562 1563 // Check if the value doesn't reference the second vector. 1564 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1565 SDOperand Arg = N->getOperand(i); 1566 if (Arg.getOpcode() == ISD::UNDEF) continue; 1567 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1568 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1569 return false; 1570 } 1571 1572 return true; 1573} 1574 1575/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1576/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1577bool X86::isPSHUFHWMask(SDNode *N) { 1578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1579 1580 if (N->getNumOperands() != 8) 1581 return false; 1582 1583 // Lower quadword copied in order. 1584 for (unsigned i = 0; i != 4; ++i) { 1585 SDOperand Arg = N->getOperand(i); 1586 if (Arg.getOpcode() == ISD::UNDEF) continue; 1587 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1588 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1589 return false; 1590 } 1591 1592 // Upper quadword shuffled. 1593 for (unsigned i = 4; i != 8; ++i) { 1594 SDOperand Arg = N->getOperand(i); 1595 if (Arg.getOpcode() == ISD::UNDEF) continue; 1596 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1597 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1598 if (Val < 4 || Val > 7) 1599 return false; 1600 } 1601 1602 return true; 1603} 1604 1605/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1606/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1607bool X86::isPSHUFLWMask(SDNode *N) { 1608 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1609 1610 if (N->getNumOperands() != 8) 1611 return false; 1612 1613 // Upper quadword copied in order. 1614 for (unsigned i = 4; i != 8; ++i) 1615 if (!isUndefOrEqual(N->getOperand(i), i)) 1616 return false; 1617 1618 // Lower quadword shuffled. 1619 for (unsigned i = 0; i != 4; ++i) 1620 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1621 return false; 1622 1623 return true; 1624} 1625 1626/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1627/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1628static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1629 if (NumElems != 2 && NumElems != 4) return false; 1630 1631 unsigned Half = NumElems / 2; 1632 for (unsigned i = 0; i < Half; ++i) 1633 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1634 return false; 1635 for (unsigned i = Half; i < NumElems; ++i) 1636 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1637 return false; 1638 1639 return true; 1640} 1641 1642bool X86::isSHUFPMask(SDNode *N) { 1643 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1644 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1645} 1646 1647/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1648/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1649/// half elements to come from vector 1 (which would equal the dest.) and 1650/// the upper half to come from vector 2. 1651static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1652 if (NumOps != 2 && NumOps != 4) return false; 1653 1654 unsigned Half = NumOps / 2; 1655 for (unsigned i = 0; i < Half; ++i) 1656 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1657 return false; 1658 for (unsigned i = Half; i < NumOps; ++i) 1659 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1660 return false; 1661 return true; 1662} 1663 1664static bool isCommutedSHUFP(SDNode *N) { 1665 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1666 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1667} 1668 1669/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1670/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1671bool X86::isMOVHLPSMask(SDNode *N) { 1672 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1673 1674 if (N->getNumOperands() != 4) 1675 return false; 1676 1677 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1678 return isUndefOrEqual(N->getOperand(0), 6) && 1679 isUndefOrEqual(N->getOperand(1), 7) && 1680 isUndefOrEqual(N->getOperand(2), 2) && 1681 isUndefOrEqual(N->getOperand(3), 3); 1682} 1683 1684/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1685/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1686/// <2, 3, 2, 3> 1687bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1688 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1689 1690 if (N->getNumOperands() != 4) 1691 return false; 1692 1693 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1694 return isUndefOrEqual(N->getOperand(0), 2) && 1695 isUndefOrEqual(N->getOperand(1), 3) && 1696 isUndefOrEqual(N->getOperand(2), 2) && 1697 isUndefOrEqual(N->getOperand(3), 3); 1698} 1699 1700/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1701/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1702bool X86::isMOVLPMask(SDNode *N) { 1703 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1704 1705 unsigned NumElems = N->getNumOperands(); 1706 if (NumElems != 2 && NumElems != 4) 1707 return false; 1708 1709 for (unsigned i = 0; i < NumElems/2; ++i) 1710 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1711 return false; 1712 1713 for (unsigned i = NumElems/2; i < NumElems; ++i) 1714 if (!isUndefOrEqual(N->getOperand(i), i)) 1715 return false; 1716 1717 return true; 1718} 1719 1720/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1721/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1722/// and MOVLHPS. 1723bool X86::isMOVHPMask(SDNode *N) { 1724 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1725 1726 unsigned NumElems = N->getNumOperands(); 1727 if (NumElems != 2 && NumElems != 4) 1728 return false; 1729 1730 for (unsigned i = 0; i < NumElems/2; ++i) 1731 if (!isUndefOrEqual(N->getOperand(i), i)) 1732 return false; 1733 1734 for (unsigned i = 0; i < NumElems/2; ++i) { 1735 SDOperand Arg = N->getOperand(i + NumElems/2); 1736 if (!isUndefOrEqual(Arg, i + NumElems)) 1737 return false; 1738 } 1739 1740 return true; 1741} 1742 1743/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1744/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1745bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1746 bool V2IsSplat = false) { 1747 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1748 return false; 1749 1750 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1751 SDOperand BitI = Elts[i]; 1752 SDOperand BitI1 = Elts[i+1]; 1753 if (!isUndefOrEqual(BitI, j)) 1754 return false; 1755 if (V2IsSplat) { 1756 if (isUndefOrEqual(BitI1, NumElts)) 1757 return false; 1758 } else { 1759 if (!isUndefOrEqual(BitI1, j + NumElts)) 1760 return false; 1761 } 1762 } 1763 1764 return true; 1765} 1766 1767bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1768 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1769 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1770} 1771 1772/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1773/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1774bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1775 bool V2IsSplat = false) { 1776 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1777 return false; 1778 1779 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1780 SDOperand BitI = Elts[i]; 1781 SDOperand BitI1 = Elts[i+1]; 1782 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1783 return false; 1784 if (V2IsSplat) { 1785 if (isUndefOrEqual(BitI1, NumElts)) 1786 return false; 1787 } else { 1788 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1789 return false; 1790 } 1791 } 1792 1793 return true; 1794} 1795 1796bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1797 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1798 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1799} 1800 1801/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1802/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1803/// <0, 0, 1, 1> 1804bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1805 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1806 1807 unsigned NumElems = N->getNumOperands(); 1808 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1809 return false; 1810 1811 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1812 SDOperand BitI = N->getOperand(i); 1813 SDOperand BitI1 = N->getOperand(i+1); 1814 1815 if (!isUndefOrEqual(BitI, j)) 1816 return false; 1817 if (!isUndefOrEqual(BitI1, j)) 1818 return false; 1819 } 1820 1821 return true; 1822} 1823 1824/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1825/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1826/// <2, 2, 3, 3> 1827bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1828 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1829 1830 unsigned NumElems = N->getNumOperands(); 1831 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1832 return false; 1833 1834 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1835 SDOperand BitI = N->getOperand(i); 1836 SDOperand BitI1 = N->getOperand(i + 1); 1837 1838 if (!isUndefOrEqual(BitI, j)) 1839 return false; 1840 if (!isUndefOrEqual(BitI1, j)) 1841 return false; 1842 } 1843 1844 return true; 1845} 1846 1847/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1848/// specifies a shuffle of elements that is suitable for input to MOVSS, 1849/// MOVSD, and MOVD, i.e. setting the lowest element. 1850static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1851 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1852 return false; 1853 1854 if (!isUndefOrEqual(Elts[0], NumElts)) 1855 return false; 1856 1857 for (unsigned i = 1; i < NumElts; ++i) { 1858 if (!isUndefOrEqual(Elts[i], i)) 1859 return false; 1860 } 1861 1862 return true; 1863} 1864 1865bool X86::isMOVLMask(SDNode *N) { 1866 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1867 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1868} 1869 1870/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1871/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1872/// element of vector 2 and the other elements to come from vector 1 in order. 1873static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1874 bool V2IsSplat = false, 1875 bool V2IsUndef = false) { 1876 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1877 return false; 1878 1879 if (!isUndefOrEqual(Ops[0], 0)) 1880 return false; 1881 1882 for (unsigned i = 1; i < NumOps; ++i) { 1883 SDOperand Arg = Ops[i]; 1884 if (!(isUndefOrEqual(Arg, i+NumOps) || 1885 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1886 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1887 return false; 1888 } 1889 1890 return true; 1891} 1892 1893static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1894 bool V2IsUndef = false) { 1895 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1896 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1897 V2IsSplat, V2IsUndef); 1898} 1899 1900/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1901/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1902bool X86::isMOVSHDUPMask(SDNode *N) { 1903 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1904 1905 if (N->getNumOperands() != 4) 1906 return false; 1907 1908 // Expect 1, 1, 3, 3 1909 for (unsigned i = 0; i < 2; ++i) { 1910 SDOperand Arg = N->getOperand(i); 1911 if (Arg.getOpcode() == ISD::UNDEF) continue; 1912 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1913 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1914 if (Val != 1) return false; 1915 } 1916 1917 bool HasHi = false; 1918 for (unsigned i = 2; i < 4; ++i) { 1919 SDOperand Arg = N->getOperand(i); 1920 if (Arg.getOpcode() == ISD::UNDEF) continue; 1921 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1922 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1923 if (Val != 3) return false; 1924 HasHi = true; 1925 } 1926 1927 // Don't use movshdup if it can be done with a shufps. 1928 return HasHi; 1929} 1930 1931/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1932/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1933bool X86::isMOVSLDUPMask(SDNode *N) { 1934 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1935 1936 if (N->getNumOperands() != 4) 1937 return false; 1938 1939 // Expect 0, 0, 2, 2 1940 for (unsigned i = 0; i < 2; ++i) { 1941 SDOperand Arg = N->getOperand(i); 1942 if (Arg.getOpcode() == ISD::UNDEF) continue; 1943 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1944 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1945 if (Val != 0) return false; 1946 } 1947 1948 bool HasHi = false; 1949 for (unsigned i = 2; i < 4; ++i) { 1950 SDOperand Arg = N->getOperand(i); 1951 if (Arg.getOpcode() == ISD::UNDEF) continue; 1952 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1953 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1954 if (Val != 2) return false; 1955 HasHi = true; 1956 } 1957 1958 // Don't use movshdup if it can be done with a shufps. 1959 return HasHi; 1960} 1961 1962/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 1963/// specifies a identity operation on the LHS or RHS. 1964static bool isIdentityMask(SDNode *N, bool RHS = false) { 1965 unsigned NumElems = N->getNumOperands(); 1966 for (unsigned i = 0; i < NumElems; ++i) 1967 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 1968 return false; 1969 return true; 1970} 1971 1972/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1973/// a splat of a single element. 1974static bool isSplatMask(SDNode *N) { 1975 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1976 1977 // This is a splat operation if each element of the permute is the same, and 1978 // if the value doesn't reference the second vector. 1979 unsigned NumElems = N->getNumOperands(); 1980 SDOperand ElementBase; 1981 unsigned i = 0; 1982 for (; i != NumElems; ++i) { 1983 SDOperand Elt = N->getOperand(i); 1984 if (isa<ConstantSDNode>(Elt)) { 1985 ElementBase = Elt; 1986 break; 1987 } 1988 } 1989 1990 if (!ElementBase.Val) 1991 return false; 1992 1993 for (; i != NumElems; ++i) { 1994 SDOperand Arg = N->getOperand(i); 1995 if (Arg.getOpcode() == ISD::UNDEF) continue; 1996 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1997 if (Arg != ElementBase) return false; 1998 } 1999 2000 // Make sure it is a splat of the first vector operand. 2001 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2002} 2003 2004/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2005/// a splat of a single element and it's a 2 or 4 element mask. 2006bool X86::isSplatMask(SDNode *N) { 2007 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2008 2009 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2010 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2011 return false; 2012 return ::isSplatMask(N); 2013} 2014 2015/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2016/// specifies a splat of zero element. 2017bool X86::isSplatLoMask(SDNode *N) { 2018 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2019 2020 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2021 if (!isUndefOrEqual(N->getOperand(i), 0)) 2022 return false; 2023 return true; 2024} 2025 2026/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2027/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2028/// instructions. 2029unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2030 unsigned NumOperands = N->getNumOperands(); 2031 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2032 unsigned Mask = 0; 2033 for (unsigned i = 0; i < NumOperands; ++i) { 2034 unsigned Val = 0; 2035 SDOperand Arg = N->getOperand(NumOperands-i-1); 2036 if (Arg.getOpcode() != ISD::UNDEF) 2037 Val = cast<ConstantSDNode>(Arg)->getValue(); 2038 if (Val >= NumOperands) Val -= NumOperands; 2039 Mask |= Val; 2040 if (i != NumOperands - 1) 2041 Mask <<= Shift; 2042 } 2043 2044 return Mask; 2045} 2046 2047/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2048/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2049/// instructions. 2050unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2051 unsigned Mask = 0; 2052 // 8 nodes, but we only care about the last 4. 2053 for (unsigned i = 7; i >= 4; --i) { 2054 unsigned Val = 0; 2055 SDOperand Arg = N->getOperand(i); 2056 if (Arg.getOpcode() != ISD::UNDEF) 2057 Val = cast<ConstantSDNode>(Arg)->getValue(); 2058 Mask |= (Val - 4); 2059 if (i != 4) 2060 Mask <<= 2; 2061 } 2062 2063 return Mask; 2064} 2065 2066/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2067/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2068/// instructions. 2069unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2070 unsigned Mask = 0; 2071 // 8 nodes, but we only care about the first 4. 2072 for (int i = 3; i >= 0; --i) { 2073 unsigned Val = 0; 2074 SDOperand Arg = N->getOperand(i); 2075 if (Arg.getOpcode() != ISD::UNDEF) 2076 Val = cast<ConstantSDNode>(Arg)->getValue(); 2077 Mask |= Val; 2078 if (i != 0) 2079 Mask <<= 2; 2080 } 2081 2082 return Mask; 2083} 2084 2085/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2086/// specifies a 8 element shuffle that can be broken into a pair of 2087/// PSHUFHW and PSHUFLW. 2088static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2089 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2090 2091 if (N->getNumOperands() != 8) 2092 return false; 2093 2094 // Lower quadword shuffled. 2095 for (unsigned i = 0; i != 4; ++i) { 2096 SDOperand Arg = N->getOperand(i); 2097 if (Arg.getOpcode() == ISD::UNDEF) continue; 2098 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2099 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2100 if (Val > 4) 2101 return false; 2102 } 2103 2104 // Upper quadword shuffled. 2105 for (unsigned i = 4; i != 8; ++i) { 2106 SDOperand Arg = N->getOperand(i); 2107 if (Arg.getOpcode() == ISD::UNDEF) continue; 2108 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2109 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2110 if (Val < 4 || Val > 7) 2111 return false; 2112 } 2113 2114 return true; 2115} 2116 2117/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2118/// values in ther permute mask. 2119static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2120 SDOperand &V2, SDOperand &Mask, 2121 SelectionDAG &DAG) { 2122 MVT::ValueType VT = Op.getValueType(); 2123 MVT::ValueType MaskVT = Mask.getValueType(); 2124 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2125 unsigned NumElems = Mask.getNumOperands(); 2126 SmallVector<SDOperand, 8> MaskVec; 2127 2128 for (unsigned i = 0; i != NumElems; ++i) { 2129 SDOperand Arg = Mask.getOperand(i); 2130 if (Arg.getOpcode() == ISD::UNDEF) { 2131 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2132 continue; 2133 } 2134 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2135 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2136 if (Val < NumElems) 2137 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2138 else 2139 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2140 } 2141 2142 std::swap(V1, V2); 2143 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2144 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2145} 2146 2147/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2148/// match movhlps. The lower half elements should come from upper half of 2149/// V1 (and in order), and the upper half elements should come from the upper 2150/// half of V2 (and in order). 2151static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2152 unsigned NumElems = Mask->getNumOperands(); 2153 if (NumElems != 4) 2154 return false; 2155 for (unsigned i = 0, e = 2; i != e; ++i) 2156 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2157 return false; 2158 for (unsigned i = 2; i != 4; ++i) 2159 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2160 return false; 2161 return true; 2162} 2163 2164/// isScalarLoadToVector - Returns true if the node is a scalar load that 2165/// is promoted to a vector. 2166static inline bool isScalarLoadToVector(SDNode *N) { 2167 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2168 N = N->getOperand(0).Val; 2169 return ISD::isNON_EXTLoad(N); 2170 } 2171 return false; 2172} 2173 2174/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2175/// match movlp{s|d}. The lower half elements should come from lower half of 2176/// V1 (and in order), and the upper half elements should come from the upper 2177/// half of V2 (and in order). And since V1 will become the source of the 2178/// MOVLP, it must be either a vector load or a scalar load to vector. 2179static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2180 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2181 return false; 2182 // Is V2 is a vector load, don't do this transformation. We will try to use 2183 // load folding shufps op. 2184 if (ISD::isNON_EXTLoad(V2)) 2185 return false; 2186 2187 unsigned NumElems = Mask->getNumOperands(); 2188 if (NumElems != 2 && NumElems != 4) 2189 return false; 2190 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2191 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2192 return false; 2193 for (unsigned i = NumElems/2; i != NumElems; ++i) 2194 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2195 return false; 2196 return true; 2197} 2198 2199/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2200/// all the same. 2201static bool isSplatVector(SDNode *N) { 2202 if (N->getOpcode() != ISD::BUILD_VECTOR) 2203 return false; 2204 2205 SDOperand SplatValue = N->getOperand(0); 2206 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2207 if (N->getOperand(i) != SplatValue) 2208 return false; 2209 return true; 2210} 2211 2212/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2213/// to an undef. 2214static bool isUndefShuffle(SDNode *N) { 2215 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2216 return false; 2217 2218 SDOperand V1 = N->getOperand(0); 2219 SDOperand V2 = N->getOperand(1); 2220 SDOperand Mask = N->getOperand(2); 2221 unsigned NumElems = Mask.getNumOperands(); 2222 for (unsigned i = 0; i != NumElems; ++i) { 2223 SDOperand Arg = Mask.getOperand(i); 2224 if (Arg.getOpcode() != ISD::UNDEF) { 2225 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2226 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2227 return false; 2228 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2229 return false; 2230 } 2231 } 2232 return true; 2233} 2234 2235/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2236/// constant +0.0. 2237static inline bool isZeroNode(SDOperand Elt) { 2238 return ((isa<ConstantSDNode>(Elt) && 2239 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2240 (isa<ConstantFPSDNode>(Elt) && 2241 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2242} 2243 2244/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2245/// to an zero vector. 2246static bool isZeroShuffle(SDNode *N) { 2247 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2248 return false; 2249 2250 SDOperand V1 = N->getOperand(0); 2251 SDOperand V2 = N->getOperand(1); 2252 SDOperand Mask = N->getOperand(2); 2253 unsigned NumElems = Mask.getNumOperands(); 2254 for (unsigned i = 0; i != NumElems; ++i) { 2255 SDOperand Arg = Mask.getOperand(i); 2256 if (Arg.getOpcode() != ISD::UNDEF) { 2257 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2258 if (Idx < NumElems) { 2259 unsigned Opc = V1.Val->getOpcode(); 2260 if (Opc == ISD::UNDEF) 2261 continue; 2262 if (Opc != ISD::BUILD_VECTOR || 2263 !isZeroNode(V1.Val->getOperand(Idx))) 2264 return false; 2265 } else if (Idx >= NumElems) { 2266 unsigned Opc = V2.Val->getOpcode(); 2267 if (Opc == ISD::UNDEF) 2268 continue; 2269 if (Opc != ISD::BUILD_VECTOR || 2270 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2271 return false; 2272 } 2273 } 2274 } 2275 return true; 2276} 2277 2278/// getZeroVector - Returns a vector of specified type with all zero elements. 2279/// 2280static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2281 assert(MVT::isVector(VT) && "Expected a vector type"); 2282 unsigned NumElems = MVT::getVectorNumElements(VT); 2283 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2284 bool isFP = MVT::isFloatingPoint(EVT); 2285 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2286 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2287 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2288} 2289 2290/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2291/// that point to V2 points to its first element. 2292static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2293 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2294 2295 bool Changed = false; 2296 SmallVector<SDOperand, 8> MaskVec; 2297 unsigned NumElems = Mask.getNumOperands(); 2298 for (unsigned i = 0; i != NumElems; ++i) { 2299 SDOperand Arg = Mask.getOperand(i); 2300 if (Arg.getOpcode() != ISD::UNDEF) { 2301 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2302 if (Val > NumElems) { 2303 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2304 Changed = true; 2305 } 2306 } 2307 MaskVec.push_back(Arg); 2308 } 2309 2310 if (Changed) 2311 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2312 &MaskVec[0], MaskVec.size()); 2313 return Mask; 2314} 2315 2316/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2317/// operation of specified width. 2318static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2319 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2320 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2321 2322 SmallVector<SDOperand, 8> MaskVec; 2323 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2324 for (unsigned i = 1; i != NumElems; ++i) 2325 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2326 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2327} 2328 2329/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2330/// of specified width. 2331static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2332 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2333 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2334 SmallVector<SDOperand, 8> MaskVec; 2335 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2336 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2337 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2338 } 2339 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2340} 2341 2342/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2343/// of specified width. 2344static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2345 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2346 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2347 unsigned Half = NumElems/2; 2348 SmallVector<SDOperand, 8> MaskVec; 2349 for (unsigned i = 0; i != Half; ++i) { 2350 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2351 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2352 } 2353 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2354} 2355 2356/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2357/// 2358static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2359 SDOperand V1 = Op.getOperand(0); 2360 SDOperand Mask = Op.getOperand(2); 2361 MVT::ValueType VT = Op.getValueType(); 2362 unsigned NumElems = Mask.getNumOperands(); 2363 Mask = getUnpacklMask(NumElems, DAG); 2364 while (NumElems != 4) { 2365 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2366 NumElems >>= 1; 2367 } 2368 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2369 2370 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2371 Mask = getZeroVector(MaskVT, DAG); 2372 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2373 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2374 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2375} 2376 2377/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2378/// vector of zero or undef vector. 2379static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2380 unsigned NumElems, unsigned Idx, 2381 bool isZero, SelectionDAG &DAG) { 2382 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2383 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2384 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2385 SDOperand Zero = DAG.getConstant(0, EVT); 2386 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2387 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2388 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2389 &MaskVec[0], MaskVec.size()); 2390 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2391} 2392 2393/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2394/// 2395static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2396 unsigned NumNonZero, unsigned NumZero, 2397 SelectionDAG &DAG, TargetLowering &TLI) { 2398 if (NumNonZero > 8) 2399 return SDOperand(); 2400 2401 SDOperand V(0, 0); 2402 bool First = true; 2403 for (unsigned i = 0; i < 16; ++i) { 2404 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2405 if (ThisIsNonZero && First) { 2406 if (NumZero) 2407 V = getZeroVector(MVT::v8i16, DAG); 2408 else 2409 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2410 First = false; 2411 } 2412 2413 if ((i & 1) != 0) { 2414 SDOperand ThisElt(0, 0), LastElt(0, 0); 2415 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2416 if (LastIsNonZero) { 2417 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2418 } 2419 if (ThisIsNonZero) { 2420 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2421 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2422 ThisElt, DAG.getConstant(8, MVT::i8)); 2423 if (LastIsNonZero) 2424 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2425 } else 2426 ThisElt = LastElt; 2427 2428 if (ThisElt.Val) 2429 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2430 DAG.getConstant(i/2, TLI.getPointerTy())); 2431 } 2432 } 2433 2434 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2435} 2436 2437/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2438/// 2439static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2440 unsigned NumNonZero, unsigned NumZero, 2441 SelectionDAG &DAG, TargetLowering &TLI) { 2442 if (NumNonZero > 4) 2443 return SDOperand(); 2444 2445 SDOperand V(0, 0); 2446 bool First = true; 2447 for (unsigned i = 0; i < 8; ++i) { 2448 bool isNonZero = (NonZeros & (1 << i)) != 0; 2449 if (isNonZero) { 2450 if (First) { 2451 if (NumZero) 2452 V = getZeroVector(MVT::v8i16, DAG); 2453 else 2454 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2455 First = false; 2456 } 2457 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2458 DAG.getConstant(i, TLI.getPointerTy())); 2459 } 2460 } 2461 2462 return V; 2463} 2464 2465SDOperand 2466X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2467 // All zero's are handled with pxor. 2468 if (ISD::isBuildVectorAllZeros(Op.Val)) 2469 return Op; 2470 2471 // All one's are handled with pcmpeqd. 2472 if (ISD::isBuildVectorAllOnes(Op.Val)) 2473 return Op; 2474 2475 MVT::ValueType VT = Op.getValueType(); 2476 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2477 unsigned EVTBits = MVT::getSizeInBits(EVT); 2478 2479 unsigned NumElems = Op.getNumOperands(); 2480 unsigned NumZero = 0; 2481 unsigned NumNonZero = 0; 2482 unsigned NonZeros = 0; 2483 std::set<SDOperand> Values; 2484 for (unsigned i = 0; i < NumElems; ++i) { 2485 SDOperand Elt = Op.getOperand(i); 2486 if (Elt.getOpcode() != ISD::UNDEF) { 2487 Values.insert(Elt); 2488 if (isZeroNode(Elt)) 2489 NumZero++; 2490 else { 2491 NonZeros |= (1 << i); 2492 NumNonZero++; 2493 } 2494 } 2495 } 2496 2497 if (NumNonZero == 0) { 2498 if (NumZero == 0) 2499 // All undef vector. Return an UNDEF. 2500 return DAG.getNode(ISD::UNDEF, VT); 2501 else 2502 // A mix of zero and undef. Return a zero vector. 2503 return getZeroVector(VT, DAG); 2504 } 2505 2506 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2507 if (Values.size() == 1) 2508 return SDOperand(); 2509 2510 // Special case for single non-zero element. 2511 if (NumNonZero == 1) { 2512 unsigned Idx = CountTrailingZeros_32(NonZeros); 2513 SDOperand Item = Op.getOperand(Idx); 2514 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2515 if (Idx == 0) 2516 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2517 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2518 NumZero > 0, DAG); 2519 2520 if (EVTBits == 32) { 2521 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2522 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2523 DAG); 2524 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2525 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2526 SmallVector<SDOperand, 8> MaskVec; 2527 for (unsigned i = 0; i < NumElems; i++) 2528 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2529 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2530 &MaskVec[0], MaskVec.size()); 2531 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2532 DAG.getNode(ISD::UNDEF, VT), Mask); 2533 } 2534 } 2535 2536 // Let legalizer expand 2-wide build_vectors. 2537 if (EVTBits == 64) 2538 return SDOperand(); 2539 2540 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2541 if (EVTBits == 8 && NumElems == 16) { 2542 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2543 *this); 2544 if (V.Val) return V; 2545 } 2546 2547 if (EVTBits == 16 && NumElems == 8) { 2548 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2549 *this); 2550 if (V.Val) return V; 2551 } 2552 2553 // If element VT is == 32 bits, turn it into a number of shuffles. 2554 SmallVector<SDOperand, 8> V; 2555 V.resize(NumElems); 2556 if (NumElems == 4 && NumZero > 0) { 2557 for (unsigned i = 0; i < 4; ++i) { 2558 bool isZero = !(NonZeros & (1 << i)); 2559 if (isZero) 2560 V[i] = getZeroVector(VT, DAG); 2561 else 2562 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2563 } 2564 2565 for (unsigned i = 0; i < 2; ++i) { 2566 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2567 default: break; 2568 case 0: 2569 V[i] = V[i*2]; // Must be a zero vector. 2570 break; 2571 case 1: 2572 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2573 getMOVLMask(NumElems, DAG)); 2574 break; 2575 case 2: 2576 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2577 getMOVLMask(NumElems, DAG)); 2578 break; 2579 case 3: 2580 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2581 getUnpacklMask(NumElems, DAG)); 2582 break; 2583 } 2584 } 2585 2586 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2587 // clears the upper bits. 2588 // FIXME: we can do the same for v4f32 case when we know both parts of 2589 // the lower half come from scalar_to_vector (loadf32). We should do 2590 // that in post legalizer dag combiner with target specific hooks. 2591 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2592 return V[0]; 2593 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2594 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2595 SmallVector<SDOperand, 8> MaskVec; 2596 bool Reverse = (NonZeros & 0x3) == 2; 2597 for (unsigned i = 0; i < 2; ++i) 2598 if (Reverse) 2599 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2600 else 2601 MaskVec.push_back(DAG.getConstant(i, EVT)); 2602 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2603 for (unsigned i = 0; i < 2; ++i) 2604 if (Reverse) 2605 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2606 else 2607 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2608 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2609 &MaskVec[0], MaskVec.size()); 2610 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2611 } 2612 2613 if (Values.size() > 2) { 2614 // Expand into a number of unpckl*. 2615 // e.g. for v4f32 2616 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2617 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2618 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2619 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2620 for (unsigned i = 0; i < NumElems; ++i) 2621 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2622 NumElems >>= 1; 2623 while (NumElems != 0) { 2624 for (unsigned i = 0; i < NumElems; ++i) 2625 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2626 UnpckMask); 2627 NumElems >>= 1; 2628 } 2629 return V[0]; 2630 } 2631 2632 return SDOperand(); 2633} 2634 2635SDOperand 2636X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2637 SDOperand V1 = Op.getOperand(0); 2638 SDOperand V2 = Op.getOperand(1); 2639 SDOperand PermMask = Op.getOperand(2); 2640 MVT::ValueType VT = Op.getValueType(); 2641 unsigned NumElems = PermMask.getNumOperands(); 2642 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2643 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2644 bool V1IsSplat = false; 2645 bool V2IsSplat = false; 2646 2647 if (isUndefShuffle(Op.Val)) 2648 return DAG.getNode(ISD::UNDEF, VT); 2649 2650 if (isZeroShuffle(Op.Val)) 2651 return getZeroVector(VT, DAG); 2652 2653 if (isIdentityMask(PermMask.Val)) 2654 return V1; 2655 else if (isIdentityMask(PermMask.Val, true)) 2656 return V2; 2657 2658 if (isSplatMask(PermMask.Val)) { 2659 if (NumElems <= 4) return Op; 2660 // Promote it to a v4i32 splat. 2661 return PromoteSplat(Op, DAG); 2662 } 2663 2664 if (X86::isMOVLMask(PermMask.Val)) 2665 return (V1IsUndef) ? V2 : Op; 2666 2667 if (X86::isMOVSHDUPMask(PermMask.Val) || 2668 X86::isMOVSLDUPMask(PermMask.Val) || 2669 X86::isMOVHLPSMask(PermMask.Val) || 2670 X86::isMOVHPMask(PermMask.Val) || 2671 X86::isMOVLPMask(PermMask.Val)) 2672 return Op; 2673 2674 if (ShouldXformToMOVHLPS(PermMask.Val) || 2675 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2676 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2677 2678 bool Commuted = false; 2679 V1IsSplat = isSplatVector(V1.Val); 2680 V2IsSplat = isSplatVector(V2.Val); 2681 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2682 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2683 std::swap(V1IsSplat, V2IsSplat); 2684 std::swap(V1IsUndef, V2IsUndef); 2685 Commuted = true; 2686 } 2687 2688 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2689 if (V2IsUndef) return V1; 2690 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2691 if (V2IsSplat) { 2692 // V2 is a splat, so the mask may be malformed. That is, it may point 2693 // to any V2 element. The instruction selectior won't like this. Get 2694 // a corrected mask and commute to form a proper MOVS{S|D}. 2695 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2696 if (NewMask.Val != PermMask.Val) 2697 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2698 } 2699 return Op; 2700 } 2701 2702 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2703 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2704 X86::isUNPCKLMask(PermMask.Val) || 2705 X86::isUNPCKHMask(PermMask.Val)) 2706 return Op; 2707 2708 if (V2IsSplat) { 2709 // Normalize mask so all entries that point to V2 points to its first 2710 // element then try to match unpck{h|l} again. If match, return a 2711 // new vector_shuffle with the corrected mask. 2712 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2713 if (NewMask.Val != PermMask.Val) { 2714 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2715 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2716 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2717 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2718 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2719 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2720 } 2721 } 2722 } 2723 2724 // Normalize the node to match x86 shuffle ops if needed 2725 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2726 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2727 2728 if (Commuted) { 2729 // Commute is back and try unpck* again. 2730 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2731 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2732 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2733 X86::isUNPCKLMask(PermMask.Val) || 2734 X86::isUNPCKHMask(PermMask.Val)) 2735 return Op; 2736 } 2737 2738 // If VT is integer, try PSHUF* first, then SHUFP*. 2739 if (MVT::isInteger(VT)) { 2740 if (X86::isPSHUFDMask(PermMask.Val) || 2741 X86::isPSHUFHWMask(PermMask.Val) || 2742 X86::isPSHUFLWMask(PermMask.Val)) { 2743 if (V2.getOpcode() != ISD::UNDEF) 2744 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2745 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2746 return Op; 2747 } 2748 2749 if (X86::isSHUFPMask(PermMask.Val) && 2750 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2751 return Op; 2752 2753 // Handle v8i16 shuffle high / low shuffle node pair. 2754 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2755 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2756 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2757 SmallVector<SDOperand, 8> MaskVec; 2758 for (unsigned i = 0; i != 4; ++i) 2759 MaskVec.push_back(PermMask.getOperand(i)); 2760 for (unsigned i = 4; i != 8; ++i) 2761 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2762 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2763 &MaskVec[0], MaskVec.size()); 2764 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2765 MaskVec.clear(); 2766 for (unsigned i = 0; i != 4; ++i) 2767 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2768 for (unsigned i = 4; i != 8; ++i) 2769 MaskVec.push_back(PermMask.getOperand(i)); 2770 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2771 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2772 } 2773 } else { 2774 // Floating point cases in the other order. 2775 if (X86::isSHUFPMask(PermMask.Val)) 2776 return Op; 2777 if (X86::isPSHUFDMask(PermMask.Val) || 2778 X86::isPSHUFHWMask(PermMask.Val) || 2779 X86::isPSHUFLWMask(PermMask.Val)) { 2780 if (V2.getOpcode() != ISD::UNDEF) 2781 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2782 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2783 return Op; 2784 } 2785 } 2786 2787 if (NumElems == 4 && 2788 // Don't do this for MMX. 2789 MVT::getSizeInBits(VT) != 64) { 2790 MVT::ValueType MaskVT = PermMask.getValueType(); 2791 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2792 SmallVector<std::pair<int, int>, 8> Locs; 2793 Locs.reserve(NumElems); 2794 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2795 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2796 unsigned NumHi = 0; 2797 unsigned NumLo = 0; 2798 // If no more than two elements come from either vector. This can be 2799 // implemented with two shuffles. First shuffle gather the elements. 2800 // The second shuffle, which takes the first shuffle as both of its 2801 // vector operands, put the elements into the right order. 2802 for (unsigned i = 0; i != NumElems; ++i) { 2803 SDOperand Elt = PermMask.getOperand(i); 2804 if (Elt.getOpcode() == ISD::UNDEF) { 2805 Locs[i] = std::make_pair(-1, -1); 2806 } else { 2807 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2808 if (Val < NumElems) { 2809 Locs[i] = std::make_pair(0, NumLo); 2810 Mask1[NumLo] = Elt; 2811 NumLo++; 2812 } else { 2813 Locs[i] = std::make_pair(1, NumHi); 2814 if (2+NumHi < NumElems) 2815 Mask1[2+NumHi] = Elt; 2816 NumHi++; 2817 } 2818 } 2819 } 2820 if (NumLo <= 2 && NumHi <= 2) { 2821 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2822 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2823 &Mask1[0], Mask1.size())); 2824 for (unsigned i = 0; i != NumElems; ++i) { 2825 if (Locs[i].first == -1) 2826 continue; 2827 else { 2828 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2829 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2830 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2831 } 2832 } 2833 2834 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2835 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2836 &Mask2[0], Mask2.size())); 2837 } 2838 2839 // Break it into (shuffle shuffle_hi, shuffle_lo). 2840 Locs.clear(); 2841 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2842 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2843 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2844 unsigned MaskIdx = 0; 2845 unsigned LoIdx = 0; 2846 unsigned HiIdx = NumElems/2; 2847 for (unsigned i = 0; i != NumElems; ++i) { 2848 if (i == NumElems/2) { 2849 MaskPtr = &HiMask; 2850 MaskIdx = 1; 2851 LoIdx = 0; 2852 HiIdx = NumElems/2; 2853 } 2854 SDOperand Elt = PermMask.getOperand(i); 2855 if (Elt.getOpcode() == ISD::UNDEF) { 2856 Locs[i] = std::make_pair(-1, -1); 2857 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2858 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2859 (*MaskPtr)[LoIdx] = Elt; 2860 LoIdx++; 2861 } else { 2862 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2863 (*MaskPtr)[HiIdx] = Elt; 2864 HiIdx++; 2865 } 2866 } 2867 2868 SDOperand LoShuffle = 2869 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2870 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2871 &LoMask[0], LoMask.size())); 2872 SDOperand HiShuffle = 2873 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2874 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2875 &HiMask[0], HiMask.size())); 2876 SmallVector<SDOperand, 8> MaskOps; 2877 for (unsigned i = 0; i != NumElems; ++i) { 2878 if (Locs[i].first == -1) { 2879 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2880 } else { 2881 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2882 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2883 } 2884 } 2885 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2886 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2887 &MaskOps[0], MaskOps.size())); 2888 } 2889 2890 return SDOperand(); 2891} 2892 2893SDOperand 2894X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2895 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2896 return SDOperand(); 2897 2898 MVT::ValueType VT = Op.getValueType(); 2899 // TODO: handle v16i8. 2900 if (MVT::getSizeInBits(VT) == 16) { 2901 // Transform it so it match pextrw which produces a 32-bit result. 2902 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2903 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2904 Op.getOperand(0), Op.getOperand(1)); 2905 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2906 DAG.getValueType(VT)); 2907 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2908 } else if (MVT::getSizeInBits(VT) == 32) { 2909 SDOperand Vec = Op.getOperand(0); 2910 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2911 if (Idx == 0) 2912 return Op; 2913 // SHUFPS the element to the lowest double word, then movss. 2914 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2915 SmallVector<SDOperand, 8> IdxVec; 2916 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 2917 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2918 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2919 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2920 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2921 &IdxVec[0], IdxVec.size()); 2922 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2923 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2924 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2925 DAG.getConstant(0, getPointerTy())); 2926 } else if (MVT::getSizeInBits(VT) == 64) { 2927 SDOperand Vec = Op.getOperand(0); 2928 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2929 if (Idx == 0) 2930 return Op; 2931 2932 // UNPCKHPD the element to the lowest double word, then movsd. 2933 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2934 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2935 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2936 SmallVector<SDOperand, 8> IdxVec; 2937 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 2938 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2939 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2940 &IdxVec[0], IdxVec.size()); 2941 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2942 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2943 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2944 DAG.getConstant(0, getPointerTy())); 2945 } 2946 2947 return SDOperand(); 2948} 2949 2950SDOperand 2951X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2952 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 2953 // as its second argument. 2954 MVT::ValueType VT = Op.getValueType(); 2955 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 2956 SDOperand N0 = Op.getOperand(0); 2957 SDOperand N1 = Op.getOperand(1); 2958 SDOperand N2 = Op.getOperand(2); 2959 if (MVT::getSizeInBits(BaseVT) == 16) { 2960 if (N1.getValueType() != MVT::i32) 2961 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2962 if (N2.getValueType() != MVT::i32) 2963 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 2964 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2965 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2966 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2967 if (Idx == 0) { 2968 // Use a movss. 2969 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2970 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2971 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2972 SmallVector<SDOperand, 8> MaskVec; 2973 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2974 for (unsigned i = 1; i <= 3; ++i) 2975 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2976 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2977 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2978 &MaskVec[0], MaskVec.size())); 2979 } else { 2980 // Use two pinsrw instructions to insert a 32 bit value. 2981 Idx <<= 1; 2982 if (MVT::isFloatingPoint(N1.getValueType())) { 2983 if (ISD::isNON_EXTLoad(N1.Val)) { 2984 // Just load directly from f32mem to GR32. 2985 LoadSDNode *LD = cast<LoadSDNode>(N1); 2986 N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), 2987 LD->getSrcValue(), LD->getSrcValueOffset()); 2988 } else { 2989 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2990 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2991 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2992 DAG.getConstant(0, getPointerTy())); 2993 } 2994 } 2995 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2996 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2997 DAG.getConstant(Idx, getPointerTy())); 2998 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2999 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3000 DAG.getConstant(Idx+1, getPointerTy())); 3001 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3002 } 3003 } 3004 3005 return SDOperand(); 3006} 3007 3008SDOperand 3009X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3010 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3011 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3012} 3013 3014// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3015// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3016// one of the above mentioned nodes. It has to be wrapped because otherwise 3017// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3018// be used to form addressing mode. These wrapped nodes will be selected 3019// into MOV32ri. 3020SDOperand 3021X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3022 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3023 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3024 getPointerTy(), 3025 CP->getAlignment()); 3026 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3027 // With PIC, the address is actually $g + Offset. 3028 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3029 !Subtarget->isPICStyleRIPRel()) { 3030 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3031 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3032 Result); 3033 } 3034 3035 return Result; 3036} 3037 3038SDOperand 3039X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3040 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3041 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3042 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3043 // With PIC, the address is actually $g + Offset. 3044 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3045 !Subtarget->isPICStyleRIPRel()) { 3046 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3047 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3048 Result); 3049 } 3050 3051 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3052 // load the value at address GV, not the value of GV itself. This means that 3053 // the GlobalAddress must be in the base or index register of the address, not 3054 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3055 // The same applies for external symbols during PIC codegen 3056 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3057 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3058 3059 return Result; 3060} 3061 3062// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3063static SDOperand 3064LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3065 const MVT::ValueType PtrVT) { 3066 SDOperand InFlag; 3067 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3068 DAG.getNode(X86ISD::GlobalBaseReg, 3069 PtrVT), InFlag); 3070 InFlag = Chain.getValue(1); 3071 3072 // emit leal symbol@TLSGD(,%ebx,1), %eax 3073 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3074 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3075 GA->getValueType(0), 3076 GA->getOffset()); 3077 SDOperand Ops[] = { Chain, TGA, InFlag }; 3078 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3079 InFlag = Result.getValue(2); 3080 Chain = Result.getValue(1); 3081 3082 // call ___tls_get_addr. This function receives its argument in 3083 // the register EAX. 3084 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3085 InFlag = Chain.getValue(1); 3086 3087 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3088 SDOperand Ops1[] = { Chain, 3089 DAG.getTargetExternalSymbol("___tls_get_addr", 3090 PtrVT), 3091 DAG.getRegister(X86::EAX, PtrVT), 3092 DAG.getRegister(X86::EBX, PtrVT), 3093 InFlag }; 3094 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3095 InFlag = Chain.getValue(1); 3096 3097 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3098} 3099 3100// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3101// "local exec" model. 3102static SDOperand 3103LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3104 const MVT::ValueType PtrVT) { 3105 // Get the Thread Pointer 3106 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3107 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3108 // exec) 3109 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3110 GA->getValueType(0), 3111 GA->getOffset()); 3112 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3113 3114 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3115 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3116 3117 // The address of the thread local variable is the add of the thread 3118 // pointer with the offset of the variable. 3119 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3120} 3121 3122SDOperand 3123X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3124 // TODO: implement the "local dynamic" model 3125 // TODO: implement the "initial exec"model for pic executables 3126 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3127 "TLS not implemented for non-ELF and 64-bit targets"); 3128 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3129 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3130 // otherwise use the "Local Exec"TLS Model 3131 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3132 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3133 else 3134 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3135} 3136 3137SDOperand 3138X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3139 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3140 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3141 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3142 // With PIC, the address is actually $g + Offset. 3143 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3144 !Subtarget->isPICStyleRIPRel()) { 3145 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3146 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3147 Result); 3148 } 3149 3150 return Result; 3151} 3152 3153SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3154 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3155 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3156 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3157 // With PIC, the address is actually $g + Offset. 3158 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3159 !Subtarget->isPICStyleRIPRel()) { 3160 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3161 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3162 Result); 3163 } 3164 3165 return Result; 3166} 3167 3168SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3169 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3170 "Not an i64 shift!"); 3171 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3172 SDOperand ShOpLo = Op.getOperand(0); 3173 SDOperand ShOpHi = Op.getOperand(1); 3174 SDOperand ShAmt = Op.getOperand(2); 3175 SDOperand Tmp1 = isSRA ? 3176 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3177 DAG.getConstant(0, MVT::i32); 3178 3179 SDOperand Tmp2, Tmp3; 3180 if (Op.getOpcode() == ISD::SHL_PARTS) { 3181 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3182 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3183 } else { 3184 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3185 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3186 } 3187 3188 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3189 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3190 DAG.getConstant(32, MVT::i8)); 3191 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3192 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3193 3194 SDOperand Hi, Lo; 3195 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3196 3197 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3198 SmallVector<SDOperand, 4> Ops; 3199 if (Op.getOpcode() == ISD::SHL_PARTS) { 3200 Ops.push_back(Tmp2); 3201 Ops.push_back(Tmp3); 3202 Ops.push_back(CC); 3203 Ops.push_back(InFlag); 3204 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3205 InFlag = Hi.getValue(1); 3206 3207 Ops.clear(); 3208 Ops.push_back(Tmp3); 3209 Ops.push_back(Tmp1); 3210 Ops.push_back(CC); 3211 Ops.push_back(InFlag); 3212 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3213 } else { 3214 Ops.push_back(Tmp2); 3215 Ops.push_back(Tmp3); 3216 Ops.push_back(CC); 3217 Ops.push_back(InFlag); 3218 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3219 InFlag = Lo.getValue(1); 3220 3221 Ops.clear(); 3222 Ops.push_back(Tmp3); 3223 Ops.push_back(Tmp1); 3224 Ops.push_back(CC); 3225 Ops.push_back(InFlag); 3226 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3227 } 3228 3229 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3230 Ops.clear(); 3231 Ops.push_back(Lo); 3232 Ops.push_back(Hi); 3233 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3234} 3235 3236SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3237 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3238 Op.getOperand(0).getValueType() >= MVT::i16 && 3239 "Unknown SINT_TO_FP to lower!"); 3240 3241 SDOperand Result; 3242 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3243 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3244 MachineFunction &MF = DAG.getMachineFunction(); 3245 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3246 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3247 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3248 StackSlot, NULL, 0); 3249 3250 // Build the FILD 3251 SDVTList Tys; 3252 if (X86ScalarSSE) 3253 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3254 else 3255 Tys = DAG.getVTList(MVT::f64, MVT::Other); 3256 SmallVector<SDOperand, 8> Ops; 3257 Ops.push_back(Chain); 3258 Ops.push_back(StackSlot); 3259 Ops.push_back(DAG.getValueType(SrcVT)); 3260 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3261 Tys, &Ops[0], Ops.size()); 3262 3263 if (X86ScalarSSE) { 3264 Chain = Result.getValue(1); 3265 SDOperand InFlag = Result.getValue(2); 3266 3267 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3268 // shouldn't be necessary except that RFP cannot be live across 3269 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3270 MachineFunction &MF = DAG.getMachineFunction(); 3271 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3272 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3273 Tys = DAG.getVTList(MVT::Other); 3274 SmallVector<SDOperand, 8> Ops; 3275 Ops.push_back(Chain); 3276 Ops.push_back(Result); 3277 Ops.push_back(StackSlot); 3278 Ops.push_back(DAG.getValueType(Op.getValueType())); 3279 Ops.push_back(InFlag); 3280 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3281 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3282 } 3283 3284 return Result; 3285} 3286 3287SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3288 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3289 "Unknown FP_TO_SINT to lower!"); 3290 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3291 // stack slot. 3292 MachineFunction &MF = DAG.getMachineFunction(); 3293 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3294 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3295 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3296 3297 unsigned Opc; 3298 switch (Op.getValueType()) { 3299 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3300 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3301 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3302 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3303 } 3304 3305 SDOperand Chain = DAG.getEntryNode(); 3306 SDOperand Value = Op.getOperand(0); 3307 if (X86ScalarSSE) { 3308 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3309 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3310 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); 3311 SDOperand Ops[] = { 3312 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3313 }; 3314 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3315 Chain = Value.getValue(1); 3316 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3317 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3318 } 3319 3320 // Build the FP_TO_INT*_IN_MEM 3321 SDOperand Ops[] = { Chain, Value, StackSlot }; 3322 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3323 3324 // Load the result. 3325 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3326} 3327 3328SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3329 MVT::ValueType VT = Op.getValueType(); 3330 const Type *OpNTy = MVT::getTypeForValueType(VT); 3331 std::vector<Constant*> CV; 3332 if (VT == MVT::f64) { 3333 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3334 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3335 } else { 3336 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3337 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3338 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3339 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3340 } 3341 Constant *CS = ConstantStruct::get(CV); 3342 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3343 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 3344 SmallVector<SDOperand, 3> Ops; 3345 Ops.push_back(DAG.getEntryNode()); 3346 Ops.push_back(CPIdx); 3347 Ops.push_back(DAG.getSrcValue(NULL)); 3348 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3349 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3350} 3351 3352SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3353 MVT::ValueType VT = Op.getValueType(); 3354 const Type *OpNTy = MVT::getTypeForValueType(VT); 3355 std::vector<Constant*> CV; 3356 if (VT == MVT::f64) { 3357 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3358 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3359 } else { 3360 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3361 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3362 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3363 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3364 } 3365 Constant *CS = ConstantStruct::get(CV); 3366 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3367 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 3368 SmallVector<SDOperand, 3> Ops; 3369 Ops.push_back(DAG.getEntryNode()); 3370 Ops.push_back(CPIdx); 3371 Ops.push_back(DAG.getSrcValue(NULL)); 3372 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3373 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3374} 3375 3376SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3377 SDOperand Op0 = Op.getOperand(0); 3378 SDOperand Op1 = Op.getOperand(1); 3379 MVT::ValueType VT = Op.getValueType(); 3380 MVT::ValueType SrcVT = Op1.getValueType(); 3381 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3382 3383 // If second operand is smaller, extend it first. 3384 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3385 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3386 SrcVT = VT; 3387 } 3388 3389 // First get the sign bit of second operand. 3390 std::vector<Constant*> CV; 3391 if (SrcVT == MVT::f64) { 3392 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); 3393 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3394 } else { 3395 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); 3396 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3397 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3398 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3399 } 3400 Constant *CS = ConstantStruct::get(CV); 3401 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3402 SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other); 3403 SmallVector<SDOperand, 3> Ops; 3404 Ops.push_back(DAG.getEntryNode()); 3405 Ops.push_back(CPIdx); 3406 Ops.push_back(DAG.getSrcValue(NULL)); 3407 SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3408 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3409 3410 // Shift sign bit right or left if the two operands have different types. 3411 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3412 // Op0 is MVT::f32, Op1 is MVT::f64. 3413 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3414 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3415 DAG.getConstant(32, MVT::i32)); 3416 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3417 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3418 DAG.getConstant(0, getPointerTy())); 3419 } 3420 3421 // Clear first operand sign bit. 3422 CV.clear(); 3423 if (VT == MVT::f64) { 3424 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63)))); 3425 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3426 } else { 3427 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31)))); 3428 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3429 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3430 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3431 } 3432 CS = ConstantStruct::get(CV); 3433 CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3434 Tys = DAG.getVTList(VT, MVT::Other); 3435 Ops.clear(); 3436 Ops.push_back(DAG.getEntryNode()); 3437 Ops.push_back(CPIdx); 3438 Ops.push_back(DAG.getSrcValue(NULL)); 3439 SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3440 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3441 3442 // Or the value with the sign bit. 3443 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3444} 3445 3446SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3447 SDOperand Chain) { 3448 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3449 SDOperand Cond; 3450 SDOperand Op0 = Op.getOperand(0); 3451 SDOperand Op1 = Op.getOperand(1); 3452 SDOperand CC = Op.getOperand(2); 3453 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3454 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3455 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3456 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3457 unsigned X86CC; 3458 3459 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3460 Op0, Op1, DAG)) { 3461 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3462 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3463 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3464 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3465 } 3466 3467 assert(isFP && "Illegal integer SetCC!"); 3468 3469 SDOperand COps[] = { Chain, Op0, Op1 }; 3470 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3471 3472 switch (SetCCOpcode) { 3473 default: assert(false && "Illegal floating point SetCC!"); 3474 case ISD::SETOEQ: { // !PF & ZF 3475 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3476 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3477 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3478 Tmp1.getValue(1) }; 3479 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3480 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3481 } 3482 case ISD::SETUNE: { // PF | !ZF 3483 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3484 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3485 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3486 Tmp1.getValue(1) }; 3487 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3488 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3489 } 3490 } 3491} 3492 3493SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3494 bool addTest = true; 3495 SDOperand Chain = DAG.getEntryNode(); 3496 SDOperand Cond = Op.getOperand(0); 3497 SDOperand CC; 3498 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3499 3500 if (Cond.getOpcode() == ISD::SETCC) 3501 Cond = LowerSETCC(Cond, DAG, Chain); 3502 3503 if (Cond.getOpcode() == X86ISD::SETCC) { 3504 CC = Cond.getOperand(0); 3505 3506 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3507 // (since flag operand cannot be shared). Use it as the condition setting 3508 // operand in place of the X86ISD::SETCC. 3509 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3510 // to use a test instead of duplicating the X86ISD::CMP (for register 3511 // pressure reason)? 3512 SDOperand Cmp = Cond.getOperand(1); 3513 unsigned Opc = Cmp.getOpcode(); 3514 bool IllegalFPCMov = !X86ScalarSSE && 3515 MVT::isFloatingPoint(Op.getValueType()) && 3516 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3517 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3518 !IllegalFPCMov) { 3519 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3520 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3521 addTest = false; 3522 } 3523 } 3524 3525 if (addTest) { 3526 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3527 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3528 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3529 } 3530 3531 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3532 SmallVector<SDOperand, 4> Ops; 3533 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3534 // condition is true. 3535 Ops.push_back(Op.getOperand(2)); 3536 Ops.push_back(Op.getOperand(1)); 3537 Ops.push_back(CC); 3538 Ops.push_back(Cond.getValue(1)); 3539 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3540} 3541 3542SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3543 bool addTest = true; 3544 SDOperand Chain = Op.getOperand(0); 3545 SDOperand Cond = Op.getOperand(1); 3546 SDOperand Dest = Op.getOperand(2); 3547 SDOperand CC; 3548 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3549 3550 if (Cond.getOpcode() == ISD::SETCC) 3551 Cond = LowerSETCC(Cond, DAG, Chain); 3552 3553 if (Cond.getOpcode() == X86ISD::SETCC) { 3554 CC = Cond.getOperand(0); 3555 3556 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3557 // (since flag operand cannot be shared). Use it as the condition setting 3558 // operand in place of the X86ISD::SETCC. 3559 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3560 // to use a test instead of duplicating the X86ISD::CMP (for register 3561 // pressure reason)? 3562 SDOperand Cmp = Cond.getOperand(1); 3563 unsigned Opc = Cmp.getOpcode(); 3564 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3565 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3566 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3567 addTest = false; 3568 } 3569 } 3570 3571 if (addTest) { 3572 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3573 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3574 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3575 } 3576 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3577 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3578} 3579 3580SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3581 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3582 3583 if (Subtarget->is64Bit()) 3584 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3585 else 3586 switch (CallingConv) { 3587 default: 3588 assert(0 && "Unsupported calling convention"); 3589 case CallingConv::Fast: 3590 // TODO: Implement fastcc 3591 // Falls through 3592 case CallingConv::C: 3593 case CallingConv::X86_StdCall: 3594 return LowerCCCCallTo(Op, DAG, CallingConv); 3595 case CallingConv::X86_FastCall: 3596 return LowerFastCCCallTo(Op, DAG, CallingConv); 3597 } 3598} 3599 3600 3601// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3602// Calls to _alloca is needed to probe the stack when allocating more than 4k 3603// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3604// that the guard pages used by the OS virtual memory manager are allocated in 3605// correct sequence. 3606SDOperand X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3607 SelectionDAG &DAG) { 3608 assert(Subtarget->isTargetCygMing() && 3609 "This should be used only on Cygwin/Mingw targets"); 3610 3611 // Get the inputs. 3612 SDOperand Chain = Op.getOperand(0); 3613 SDOperand Size = Op.getOperand(1); 3614 // FIXME: Ensure alignment here 3615 3616 TargetLowering::ArgListTy Args; 3617 TargetLowering::ArgListEntry Entry; 3618 MVT::ValueType IntPtr = getPointerTy(); 3619 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3620 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3621 3622 Entry.Node = Size; 3623 Entry.Ty = IntPtrTy; 3624 Entry.isInReg = true; // Should pass in EAX 3625 Args.push_back(Entry); 3626 std::pair<SDOperand, SDOperand> CallResult = 3627 LowerCallTo(Chain, IntPtrTy, false, false, CallingConv::C, false, 3628 DAG.getExternalSymbol("_alloca", IntPtr), Args, DAG); 3629 3630 SDOperand SP = DAG.getCopyFromReg(CallResult.second, X86StackPtr, SPTy); 3631 3632 std::vector<MVT::ValueType> Tys; 3633 Tys.push_back(SPTy); 3634 Tys.push_back(MVT::Other); 3635 SDOperand Ops[2] = { SP, CallResult.second }; 3636 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 3637} 3638 3639SDOperand 3640X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3641 MachineFunction &MF = DAG.getMachineFunction(); 3642 const Function* Fn = MF.getFunction(); 3643 if (Fn->hasExternalLinkage() && 3644 Subtarget->isTargetCygMing() && 3645 Fn->getName() == "main") 3646 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3647 3648 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3649 if (Subtarget->is64Bit()) 3650 return LowerX86_64CCCArguments(Op, DAG); 3651 else 3652 switch(CC) { 3653 default: 3654 assert(0 && "Unsupported calling convention"); 3655 case CallingConv::Fast: 3656 // TODO: implement fastcc. 3657 3658 // Falls through 3659 case CallingConv::C: 3660 return LowerCCCArguments(Op, DAG); 3661 case CallingConv::X86_StdCall: 3662 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3663 return LowerCCCArguments(Op, DAG, true); 3664 case CallingConv::X86_FastCall: 3665 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3666 return LowerFastCCArguments(Op, DAG); 3667 } 3668} 3669 3670SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3671 SDOperand InFlag(0, 0); 3672 SDOperand Chain = Op.getOperand(0); 3673 unsigned Align = 3674 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3675 if (Align == 0) Align = 1; 3676 3677 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3678 // If not DWORD aligned, call memset if size is less than the threshold. 3679 // It knows how to align to the right boundary first. 3680 if ((Align & 3) != 0 || 3681 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3682 MVT::ValueType IntPtr = getPointerTy(); 3683 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3684 TargetLowering::ArgListTy Args; 3685 TargetLowering::ArgListEntry Entry; 3686 Entry.Node = Op.getOperand(1); 3687 Entry.Ty = IntPtrTy; 3688 Args.push_back(Entry); 3689 // Extend the unsigned i8 argument to be an int value for the call. 3690 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3691 Entry.Ty = IntPtrTy; 3692 Args.push_back(Entry); 3693 Entry.Node = Op.getOperand(3); 3694 Args.push_back(Entry); 3695 std::pair<SDOperand,SDOperand> CallResult = 3696 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3697 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3698 return CallResult.second; 3699 } 3700 3701 MVT::ValueType AVT; 3702 SDOperand Count; 3703 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3704 unsigned BytesLeft = 0; 3705 bool TwoRepStos = false; 3706 if (ValC) { 3707 unsigned ValReg; 3708 uint64_t Val = ValC->getValue() & 255; 3709 3710 // If the value is a constant, then we can potentially use larger sets. 3711 switch (Align & 3) { 3712 case 2: // WORD aligned 3713 AVT = MVT::i16; 3714 ValReg = X86::AX; 3715 Val = (Val << 8) | Val; 3716 break; 3717 case 0: // DWORD aligned 3718 AVT = MVT::i32; 3719 ValReg = X86::EAX; 3720 Val = (Val << 8) | Val; 3721 Val = (Val << 16) | Val; 3722 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3723 AVT = MVT::i64; 3724 ValReg = X86::RAX; 3725 Val = (Val << 32) | Val; 3726 } 3727 break; 3728 default: // Byte aligned 3729 AVT = MVT::i8; 3730 ValReg = X86::AL; 3731 Count = Op.getOperand(3); 3732 break; 3733 } 3734 3735 if (AVT > MVT::i8) { 3736 if (I) { 3737 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3738 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3739 BytesLeft = I->getValue() % UBytes; 3740 } else { 3741 assert(AVT >= MVT::i32 && 3742 "Do not use rep;stos if not at least DWORD aligned"); 3743 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3744 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3745 TwoRepStos = true; 3746 } 3747 } 3748 3749 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3750 InFlag); 3751 InFlag = Chain.getValue(1); 3752 } else { 3753 AVT = MVT::i8; 3754 Count = Op.getOperand(3); 3755 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3756 InFlag = Chain.getValue(1); 3757 } 3758 3759 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3760 Count, InFlag); 3761 InFlag = Chain.getValue(1); 3762 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3763 Op.getOperand(1), InFlag); 3764 InFlag = Chain.getValue(1); 3765 3766 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3767 SmallVector<SDOperand, 8> Ops; 3768 Ops.push_back(Chain); 3769 Ops.push_back(DAG.getValueType(AVT)); 3770 Ops.push_back(InFlag); 3771 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3772 3773 if (TwoRepStos) { 3774 InFlag = Chain.getValue(1); 3775 Count = Op.getOperand(3); 3776 MVT::ValueType CVT = Count.getValueType(); 3777 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3778 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3779 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3780 Left, InFlag); 3781 InFlag = Chain.getValue(1); 3782 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3783 Ops.clear(); 3784 Ops.push_back(Chain); 3785 Ops.push_back(DAG.getValueType(MVT::i8)); 3786 Ops.push_back(InFlag); 3787 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3788 } else if (BytesLeft) { 3789 // Issue stores for the last 1 - 7 bytes. 3790 SDOperand Value; 3791 unsigned Val = ValC->getValue() & 255; 3792 unsigned Offset = I->getValue() - BytesLeft; 3793 SDOperand DstAddr = Op.getOperand(1); 3794 MVT::ValueType AddrVT = DstAddr.getValueType(); 3795 if (BytesLeft >= 4) { 3796 Val = (Val << 8) | Val; 3797 Val = (Val << 16) | Val; 3798 Value = DAG.getConstant(Val, MVT::i32); 3799 Chain = DAG.getStore(Chain, Value, 3800 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3801 DAG.getConstant(Offset, AddrVT)), 3802 NULL, 0); 3803 BytesLeft -= 4; 3804 Offset += 4; 3805 } 3806 if (BytesLeft >= 2) { 3807 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3808 Chain = DAG.getStore(Chain, Value, 3809 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3810 DAG.getConstant(Offset, AddrVT)), 3811 NULL, 0); 3812 BytesLeft -= 2; 3813 Offset += 2; 3814 } 3815 if (BytesLeft == 1) { 3816 Value = DAG.getConstant(Val, MVT::i8); 3817 Chain = DAG.getStore(Chain, Value, 3818 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3819 DAG.getConstant(Offset, AddrVT)), 3820 NULL, 0); 3821 } 3822 } 3823 3824 return Chain; 3825} 3826 3827SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3828 SDOperand Chain = Op.getOperand(0); 3829 unsigned Align = 3830 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3831 if (Align == 0) Align = 1; 3832 3833 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3834 // If not DWORD aligned, call memcpy if size is less than the threshold. 3835 // It knows how to align to the right boundary first. 3836 if ((Align & 3) != 0 || 3837 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3838 MVT::ValueType IntPtr = getPointerTy(); 3839 TargetLowering::ArgListTy Args; 3840 TargetLowering::ArgListEntry Entry; 3841 Entry.Ty = getTargetData()->getIntPtrType(); 3842 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3843 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3844 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3845 std::pair<SDOperand,SDOperand> CallResult = 3846 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3847 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3848 return CallResult.second; 3849 } 3850 3851 MVT::ValueType AVT; 3852 SDOperand Count; 3853 unsigned BytesLeft = 0; 3854 bool TwoRepMovs = false; 3855 switch (Align & 3) { 3856 case 2: // WORD aligned 3857 AVT = MVT::i16; 3858 break; 3859 case 0: // DWORD aligned 3860 AVT = MVT::i32; 3861 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3862 AVT = MVT::i64; 3863 break; 3864 default: // Byte aligned 3865 AVT = MVT::i8; 3866 Count = Op.getOperand(3); 3867 break; 3868 } 3869 3870 if (AVT > MVT::i8) { 3871 if (I) { 3872 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3873 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3874 BytesLeft = I->getValue() % UBytes; 3875 } else { 3876 assert(AVT >= MVT::i32 && 3877 "Do not use rep;movs if not at least DWORD aligned"); 3878 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3879 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3880 TwoRepMovs = true; 3881 } 3882 } 3883 3884 SDOperand InFlag(0, 0); 3885 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3886 Count, InFlag); 3887 InFlag = Chain.getValue(1); 3888 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3889 Op.getOperand(1), InFlag); 3890 InFlag = Chain.getValue(1); 3891 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 3892 Op.getOperand(2), InFlag); 3893 InFlag = Chain.getValue(1); 3894 3895 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3896 SmallVector<SDOperand, 8> Ops; 3897 Ops.push_back(Chain); 3898 Ops.push_back(DAG.getValueType(AVT)); 3899 Ops.push_back(InFlag); 3900 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3901 3902 if (TwoRepMovs) { 3903 InFlag = Chain.getValue(1); 3904 Count = Op.getOperand(3); 3905 MVT::ValueType CVT = Count.getValueType(); 3906 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3907 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3908 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3909 Left, InFlag); 3910 InFlag = Chain.getValue(1); 3911 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3912 Ops.clear(); 3913 Ops.push_back(Chain); 3914 Ops.push_back(DAG.getValueType(MVT::i8)); 3915 Ops.push_back(InFlag); 3916 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3917 } else if (BytesLeft) { 3918 // Issue loads and stores for the last 1 - 7 bytes. 3919 unsigned Offset = I->getValue() - BytesLeft; 3920 SDOperand DstAddr = Op.getOperand(1); 3921 MVT::ValueType DstVT = DstAddr.getValueType(); 3922 SDOperand SrcAddr = Op.getOperand(2); 3923 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3924 SDOperand Value; 3925 if (BytesLeft >= 4) { 3926 Value = DAG.getLoad(MVT::i32, Chain, 3927 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3928 DAG.getConstant(Offset, SrcVT)), 3929 NULL, 0); 3930 Chain = Value.getValue(1); 3931 Chain = DAG.getStore(Chain, Value, 3932 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3933 DAG.getConstant(Offset, DstVT)), 3934 NULL, 0); 3935 BytesLeft -= 4; 3936 Offset += 4; 3937 } 3938 if (BytesLeft >= 2) { 3939 Value = DAG.getLoad(MVT::i16, Chain, 3940 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3941 DAG.getConstant(Offset, SrcVT)), 3942 NULL, 0); 3943 Chain = Value.getValue(1); 3944 Chain = DAG.getStore(Chain, Value, 3945 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3946 DAG.getConstant(Offset, DstVT)), 3947 NULL, 0); 3948 BytesLeft -= 2; 3949 Offset += 2; 3950 } 3951 3952 if (BytesLeft == 1) { 3953 Value = DAG.getLoad(MVT::i8, Chain, 3954 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3955 DAG.getConstant(Offset, SrcVT)), 3956 NULL, 0); 3957 Chain = Value.getValue(1); 3958 Chain = DAG.getStore(Chain, Value, 3959 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3960 DAG.getConstant(Offset, DstVT)), 3961 NULL, 0); 3962 } 3963 } 3964 3965 return Chain; 3966} 3967 3968SDOperand 3969X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3970 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3971 SDOperand TheOp = Op.getOperand(0); 3972 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 3973 if (Subtarget->is64Bit()) { 3974 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 3975 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 3976 MVT::i64, Copy1.getValue(2)); 3977 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 3978 DAG.getConstant(32, MVT::i8)); 3979 SDOperand Ops[] = { 3980 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 3981 }; 3982 3983 Tys = DAG.getVTList(MVT::i64, MVT::Other); 3984 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 3985 } 3986 3987 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 3988 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 3989 MVT::i32, Copy1.getValue(2)); 3990 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 3991 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3992 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 3993} 3994 3995SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3996 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 3997 3998 if (!Subtarget->is64Bit()) { 3999 // vastart just stores the address of the VarArgsFrameIndex slot into the 4000 // memory location argument. 4001 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4002 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4003 SV->getOffset()); 4004 } 4005 4006 // __va_list_tag: 4007 // gp_offset (0 - 6 * 8) 4008 // fp_offset (48 - 48 + 8 * 16) 4009 // overflow_arg_area (point to parameters coming in memory). 4010 // reg_save_area 4011 SmallVector<SDOperand, 8> MemOps; 4012 SDOperand FIN = Op.getOperand(1); 4013 // Store gp_offset 4014 SDOperand Store = DAG.getStore(Op.getOperand(0), 4015 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4016 FIN, SV->getValue(), SV->getOffset()); 4017 MemOps.push_back(Store); 4018 4019 // Store fp_offset 4020 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4021 DAG.getConstant(4, getPointerTy())); 4022 Store = DAG.getStore(Op.getOperand(0), 4023 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4024 FIN, SV->getValue(), SV->getOffset()); 4025 MemOps.push_back(Store); 4026 4027 // Store ptr to overflow_arg_area 4028 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4029 DAG.getConstant(4, getPointerTy())); 4030 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4031 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4032 SV->getOffset()); 4033 MemOps.push_back(Store); 4034 4035 // Store ptr to reg_save_area. 4036 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4037 DAG.getConstant(8, getPointerTy())); 4038 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4039 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4040 SV->getOffset()); 4041 MemOps.push_back(Store); 4042 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4043} 4044 4045SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4046 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4047 SDOperand Chain = Op.getOperand(0); 4048 SDOperand DstPtr = Op.getOperand(1); 4049 SDOperand SrcPtr = Op.getOperand(2); 4050 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4051 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4052 4053 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4054 SrcSV->getValue(), SrcSV->getOffset()); 4055 Chain = SrcPtr.getValue(1); 4056 for (unsigned i = 0; i < 3; ++i) { 4057 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4058 SrcSV->getValue(), SrcSV->getOffset()); 4059 Chain = Val.getValue(1); 4060 Chain = DAG.getStore(Chain, Val, DstPtr, 4061 DstSV->getValue(), DstSV->getOffset()); 4062 if (i == 2) 4063 break; 4064 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4065 DAG.getConstant(8, getPointerTy())); 4066 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4067 DAG.getConstant(8, getPointerTy())); 4068 } 4069 return Chain; 4070} 4071 4072SDOperand 4073X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4074 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4075 switch (IntNo) { 4076 default: return SDOperand(); // Don't custom lower most intrinsics. 4077 // Comparison intrinsics. 4078 case Intrinsic::x86_sse_comieq_ss: 4079 case Intrinsic::x86_sse_comilt_ss: 4080 case Intrinsic::x86_sse_comile_ss: 4081 case Intrinsic::x86_sse_comigt_ss: 4082 case Intrinsic::x86_sse_comige_ss: 4083 case Intrinsic::x86_sse_comineq_ss: 4084 case Intrinsic::x86_sse_ucomieq_ss: 4085 case Intrinsic::x86_sse_ucomilt_ss: 4086 case Intrinsic::x86_sse_ucomile_ss: 4087 case Intrinsic::x86_sse_ucomigt_ss: 4088 case Intrinsic::x86_sse_ucomige_ss: 4089 case Intrinsic::x86_sse_ucomineq_ss: 4090 case Intrinsic::x86_sse2_comieq_sd: 4091 case Intrinsic::x86_sse2_comilt_sd: 4092 case Intrinsic::x86_sse2_comile_sd: 4093 case Intrinsic::x86_sse2_comigt_sd: 4094 case Intrinsic::x86_sse2_comige_sd: 4095 case Intrinsic::x86_sse2_comineq_sd: 4096 case Intrinsic::x86_sse2_ucomieq_sd: 4097 case Intrinsic::x86_sse2_ucomilt_sd: 4098 case Intrinsic::x86_sse2_ucomile_sd: 4099 case Intrinsic::x86_sse2_ucomigt_sd: 4100 case Intrinsic::x86_sse2_ucomige_sd: 4101 case Intrinsic::x86_sse2_ucomineq_sd: { 4102 unsigned Opc = 0; 4103 ISD::CondCode CC = ISD::SETCC_INVALID; 4104 switch (IntNo) { 4105 default: break; 4106 case Intrinsic::x86_sse_comieq_ss: 4107 case Intrinsic::x86_sse2_comieq_sd: 4108 Opc = X86ISD::COMI; 4109 CC = ISD::SETEQ; 4110 break; 4111 case Intrinsic::x86_sse_comilt_ss: 4112 case Intrinsic::x86_sse2_comilt_sd: 4113 Opc = X86ISD::COMI; 4114 CC = ISD::SETLT; 4115 break; 4116 case Intrinsic::x86_sse_comile_ss: 4117 case Intrinsic::x86_sse2_comile_sd: 4118 Opc = X86ISD::COMI; 4119 CC = ISD::SETLE; 4120 break; 4121 case Intrinsic::x86_sse_comigt_ss: 4122 case Intrinsic::x86_sse2_comigt_sd: 4123 Opc = X86ISD::COMI; 4124 CC = ISD::SETGT; 4125 break; 4126 case Intrinsic::x86_sse_comige_ss: 4127 case Intrinsic::x86_sse2_comige_sd: 4128 Opc = X86ISD::COMI; 4129 CC = ISD::SETGE; 4130 break; 4131 case Intrinsic::x86_sse_comineq_ss: 4132 case Intrinsic::x86_sse2_comineq_sd: 4133 Opc = X86ISD::COMI; 4134 CC = ISD::SETNE; 4135 break; 4136 case Intrinsic::x86_sse_ucomieq_ss: 4137 case Intrinsic::x86_sse2_ucomieq_sd: 4138 Opc = X86ISD::UCOMI; 4139 CC = ISD::SETEQ; 4140 break; 4141 case Intrinsic::x86_sse_ucomilt_ss: 4142 case Intrinsic::x86_sse2_ucomilt_sd: 4143 Opc = X86ISD::UCOMI; 4144 CC = ISD::SETLT; 4145 break; 4146 case Intrinsic::x86_sse_ucomile_ss: 4147 case Intrinsic::x86_sse2_ucomile_sd: 4148 Opc = X86ISD::UCOMI; 4149 CC = ISD::SETLE; 4150 break; 4151 case Intrinsic::x86_sse_ucomigt_ss: 4152 case Intrinsic::x86_sse2_ucomigt_sd: 4153 Opc = X86ISD::UCOMI; 4154 CC = ISD::SETGT; 4155 break; 4156 case Intrinsic::x86_sse_ucomige_ss: 4157 case Intrinsic::x86_sse2_ucomige_sd: 4158 Opc = X86ISD::UCOMI; 4159 CC = ISD::SETGE; 4160 break; 4161 case Intrinsic::x86_sse_ucomineq_ss: 4162 case Intrinsic::x86_sse2_ucomineq_sd: 4163 Opc = X86ISD::UCOMI; 4164 CC = ISD::SETNE; 4165 break; 4166 } 4167 4168 unsigned X86CC; 4169 SDOperand LHS = Op.getOperand(1); 4170 SDOperand RHS = Op.getOperand(2); 4171 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4172 4173 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4174 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4175 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4176 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4177 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4178 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4179 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4180 } 4181 } 4182} 4183 4184SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4185 // Depths > 0 not supported yet! 4186 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4187 return SDOperand(); 4188 4189 // Just load the return address 4190 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4191 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4192} 4193 4194SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4195 // Depths > 0 not supported yet! 4196 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4197 return SDOperand(); 4198 4199 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4200 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4201 DAG.getConstant(4, getPointerTy())); 4202} 4203 4204/// LowerOperation - Provide custom lowering hooks for some operations. 4205/// 4206SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4207 switch (Op.getOpcode()) { 4208 default: assert(0 && "Should not custom lower this!"); 4209 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4210 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4211 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4212 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4213 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4214 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4215 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4216 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4217 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4218 case ISD::SHL_PARTS: 4219 case ISD::SRA_PARTS: 4220 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4221 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4222 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4223 case ISD::FABS: return LowerFABS(Op, DAG); 4224 case ISD::FNEG: return LowerFNEG(Op, DAG); 4225 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4226 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4227 case ISD::SELECT: return LowerSELECT(Op, DAG); 4228 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4229 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4230 case ISD::CALL: return LowerCALL(Op, DAG); 4231 case ISD::RET: return LowerRET(Op, DAG); 4232 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4233 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4234 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4235 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4236 case ISD::VASTART: return LowerVASTART(Op, DAG); 4237 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4238 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4239 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4240 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4241 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4242 } 4243 return SDOperand(); 4244} 4245 4246const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4247 switch (Opcode) { 4248 default: return NULL; 4249 case X86ISD::SHLD: return "X86ISD::SHLD"; 4250 case X86ISD::SHRD: return "X86ISD::SHRD"; 4251 case X86ISD::FAND: return "X86ISD::FAND"; 4252 case X86ISD::FOR: return "X86ISD::FOR"; 4253 case X86ISD::FXOR: return "X86ISD::FXOR"; 4254 case X86ISD::FSRL: return "X86ISD::FSRL"; 4255 case X86ISD::FILD: return "X86ISD::FILD"; 4256 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4257 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4258 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4259 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4260 case X86ISD::FLD: return "X86ISD::FLD"; 4261 case X86ISD::FST: return "X86ISD::FST"; 4262 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4263 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4264 case X86ISD::CALL: return "X86ISD::CALL"; 4265 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4266 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4267 case X86ISD::CMP: return "X86ISD::CMP"; 4268 case X86ISD::COMI: return "X86ISD::COMI"; 4269 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4270 case X86ISD::SETCC: return "X86ISD::SETCC"; 4271 case X86ISD::CMOV: return "X86ISD::CMOV"; 4272 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4273 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4274 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4275 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4276 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4277 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4278 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4279 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4280 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4281 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4282 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4283 case X86ISD::FMAX: return "X86ISD::FMAX"; 4284 case X86ISD::FMIN: return "X86ISD::FMIN"; 4285 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4286 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4287 } 4288} 4289 4290// isLegalAddressingMode - Return true if the addressing mode represented 4291// by AM is legal for this target, for a load/store of the specified type. 4292bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4293 const Type *Ty) const { 4294 // X86 supports extremely general addressing modes. 4295 4296 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4297 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4298 return false; 4299 4300 if (AM.BaseGV) { 4301 // X86-64 only supports addr of globals in small code model. 4302 if (Subtarget->is64Bit() && 4303 getTargetMachine().getCodeModel() != CodeModel::Small) 4304 return false; 4305 4306 // We can only fold this if we don't need a load either. 4307 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4308 return false; 4309 } 4310 4311 switch (AM.Scale) { 4312 case 0: 4313 case 1: 4314 case 2: 4315 case 4: 4316 case 8: 4317 // These scales always work. 4318 break; 4319 case 3: 4320 case 5: 4321 case 9: 4322 // These scales are formed with basereg+scalereg. Only accept if there is 4323 // no basereg yet. 4324 if (AM.HasBaseReg) 4325 return false; 4326 break; 4327 default: // Other stuff never works. 4328 return false; 4329 } 4330 4331 return true; 4332} 4333 4334 4335/// isShuffleMaskLegal - Targets can use this to indicate that they only 4336/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4337/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4338/// are assumed to be legal. 4339bool 4340X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4341 // Only do shuffles on 128-bit vector types for now. 4342 if (MVT::getSizeInBits(VT) == 64) return false; 4343 return (Mask.Val->getNumOperands() <= 4 || 4344 isIdentityMask(Mask.Val) || 4345 isIdentityMask(Mask.Val, true) || 4346 isSplatMask(Mask.Val) || 4347 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4348 X86::isUNPCKLMask(Mask.Val) || 4349 X86::isUNPCKHMask(Mask.Val) || 4350 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4351 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4352} 4353 4354bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4355 MVT::ValueType EVT, 4356 SelectionDAG &DAG) const { 4357 unsigned NumElts = BVOps.size(); 4358 // Only do shuffles on 128-bit vector types for now. 4359 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4360 if (NumElts == 2) return true; 4361 if (NumElts == 4) { 4362 return (isMOVLMask(&BVOps[0], 4) || 4363 isCommutedMOVL(&BVOps[0], 4, true) || 4364 isSHUFPMask(&BVOps[0], 4) || 4365 isCommutedSHUFP(&BVOps[0], 4)); 4366 } 4367 return false; 4368} 4369 4370//===----------------------------------------------------------------------===// 4371// X86 Scheduler Hooks 4372//===----------------------------------------------------------------------===// 4373 4374MachineBasicBlock * 4375X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4376 MachineBasicBlock *BB) { 4377 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4378 switch (MI->getOpcode()) { 4379 default: assert(false && "Unexpected instr type to insert"); 4380 case X86::CMOV_FR32: 4381 case X86::CMOV_FR64: 4382 case X86::CMOV_V4F32: 4383 case X86::CMOV_V2F64: 4384 case X86::CMOV_V2I64: { 4385 // To "insert" a SELECT_CC instruction, we actually have to insert the 4386 // diamond control-flow pattern. The incoming instruction knows the 4387 // destination vreg to set, the condition code register to branch on, the 4388 // true/false values to select between, and a branch opcode to use. 4389 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4390 ilist<MachineBasicBlock>::iterator It = BB; 4391 ++It; 4392 4393 // thisMBB: 4394 // ... 4395 // TrueVal = ... 4396 // cmpTY ccX, r1, r2 4397 // bCC copy1MBB 4398 // fallthrough --> copy0MBB 4399 MachineBasicBlock *thisMBB = BB; 4400 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4401 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4402 unsigned Opc = 4403 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4404 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4405 MachineFunction *F = BB->getParent(); 4406 F->getBasicBlockList().insert(It, copy0MBB); 4407 F->getBasicBlockList().insert(It, sinkMBB); 4408 // Update machine-CFG edges by first adding all successors of the current 4409 // block to the new block which will contain the Phi node for the select. 4410 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4411 e = BB->succ_end(); i != e; ++i) 4412 sinkMBB->addSuccessor(*i); 4413 // Next, remove all successors of the current block, and add the true 4414 // and fallthrough blocks as its successors. 4415 while(!BB->succ_empty()) 4416 BB->removeSuccessor(BB->succ_begin()); 4417 BB->addSuccessor(copy0MBB); 4418 BB->addSuccessor(sinkMBB); 4419 4420 // copy0MBB: 4421 // %FalseValue = ... 4422 // # fallthrough to sinkMBB 4423 BB = copy0MBB; 4424 4425 // Update machine-CFG edges 4426 BB->addSuccessor(sinkMBB); 4427 4428 // sinkMBB: 4429 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4430 // ... 4431 BB = sinkMBB; 4432 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4433 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4434 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4435 4436 delete MI; // The pseudo instruction is gone now. 4437 return BB; 4438 } 4439 4440 case X86::FP_TO_INT16_IN_MEM: 4441 case X86::FP_TO_INT32_IN_MEM: 4442 case X86::FP_TO_INT64_IN_MEM: { 4443 // Change the floating point control register to use "round towards zero" 4444 // mode when truncating to an integer value. 4445 MachineFunction *F = BB->getParent(); 4446 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4447 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4448 4449 // Load the old value of the high byte of the control word... 4450 unsigned OldCW = 4451 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4452 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4453 4454 // Set the high part to be round to zero... 4455 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4456 .addImm(0xC7F); 4457 4458 // Reload the modified control word now... 4459 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4460 4461 // Restore the memory image of control word to original value 4462 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4463 .addReg(OldCW); 4464 4465 // Get the X86 opcode to use. 4466 unsigned Opc; 4467 switch (MI->getOpcode()) { 4468 default: assert(0 && "illegal opcode!"); 4469 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 4470 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 4471 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 4472 } 4473 4474 X86AddressMode AM; 4475 MachineOperand &Op = MI->getOperand(0); 4476 if (Op.isRegister()) { 4477 AM.BaseType = X86AddressMode::RegBase; 4478 AM.Base.Reg = Op.getReg(); 4479 } else { 4480 AM.BaseType = X86AddressMode::FrameIndexBase; 4481 AM.Base.FrameIndex = Op.getFrameIndex(); 4482 } 4483 Op = MI->getOperand(1); 4484 if (Op.isImmediate()) 4485 AM.Scale = Op.getImm(); 4486 Op = MI->getOperand(2); 4487 if (Op.isImmediate()) 4488 AM.IndexReg = Op.getImm(); 4489 Op = MI->getOperand(3); 4490 if (Op.isGlobalAddress()) { 4491 AM.GV = Op.getGlobal(); 4492 } else { 4493 AM.Disp = Op.getImm(); 4494 } 4495 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4496 .addReg(MI->getOperand(4).getReg()); 4497 4498 // Reload the original control word now. 4499 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4500 4501 delete MI; // The pseudo instruction is gone now. 4502 return BB; 4503 } 4504 } 4505} 4506 4507//===----------------------------------------------------------------------===// 4508// X86 Optimization Hooks 4509//===----------------------------------------------------------------------===// 4510 4511void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4512 uint64_t Mask, 4513 uint64_t &KnownZero, 4514 uint64_t &KnownOne, 4515 const SelectionDAG &DAG, 4516 unsigned Depth) const { 4517 unsigned Opc = Op.getOpcode(); 4518 assert((Opc >= ISD::BUILTIN_OP_END || 4519 Opc == ISD::INTRINSIC_WO_CHAIN || 4520 Opc == ISD::INTRINSIC_W_CHAIN || 4521 Opc == ISD::INTRINSIC_VOID) && 4522 "Should use MaskedValueIsZero if you don't know whether Op" 4523 " is a target node!"); 4524 4525 KnownZero = KnownOne = 0; // Don't know anything. 4526 switch (Opc) { 4527 default: break; 4528 case X86ISD::SETCC: 4529 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4530 break; 4531 } 4532} 4533 4534/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4535/// element of the result of the vector shuffle. 4536static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4537 MVT::ValueType VT = N->getValueType(0); 4538 SDOperand PermMask = N->getOperand(2); 4539 unsigned NumElems = PermMask.getNumOperands(); 4540 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4541 i %= NumElems; 4542 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4543 return (i == 0) 4544 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4545 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4546 SDOperand Idx = PermMask.getOperand(i); 4547 if (Idx.getOpcode() == ISD::UNDEF) 4548 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4549 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4550 } 4551 return SDOperand(); 4552} 4553 4554/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4555/// node is a GlobalAddress + an offset. 4556static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4557 unsigned Opc = N->getOpcode(); 4558 if (Opc == X86ISD::Wrapper) { 4559 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4560 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4561 return true; 4562 } 4563 } else if (Opc == ISD::ADD) { 4564 SDOperand N1 = N->getOperand(0); 4565 SDOperand N2 = N->getOperand(1); 4566 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4567 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4568 if (V) { 4569 Offset += V->getSignExtended(); 4570 return true; 4571 } 4572 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4573 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4574 if (V) { 4575 Offset += V->getSignExtended(); 4576 return true; 4577 } 4578 } 4579 } 4580 return false; 4581} 4582 4583/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4584/// + Dist * Size. 4585static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4586 MachineFrameInfo *MFI) { 4587 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4588 return false; 4589 4590 SDOperand Loc = N->getOperand(1); 4591 SDOperand BaseLoc = Base->getOperand(1); 4592 if (Loc.getOpcode() == ISD::FrameIndex) { 4593 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4594 return false; 4595 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 4596 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4597 int FS = MFI->getObjectSize(FI); 4598 int BFS = MFI->getObjectSize(BFI); 4599 if (FS != BFS || FS != Size) return false; 4600 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4601 } else { 4602 GlobalValue *GV1 = NULL; 4603 GlobalValue *GV2 = NULL; 4604 int64_t Offset1 = 0; 4605 int64_t Offset2 = 0; 4606 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4607 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4608 if (isGA1 && isGA2 && GV1 == GV2) 4609 return Offset1 == (Offset2 + Dist*Size); 4610 } 4611 4612 return false; 4613} 4614 4615static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4616 const X86Subtarget *Subtarget) { 4617 GlobalValue *GV; 4618 int64_t Offset; 4619 if (isGAPlusOffset(Base, GV, Offset)) 4620 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4621 else { 4622 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4623 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 4624 if (BFI < 0) 4625 // Fixed objects do not specify alignment, however the offsets are known. 4626 return ((Subtarget->getStackAlignment() % 16) == 0 && 4627 (MFI->getObjectOffset(BFI) % 16) == 0); 4628 else 4629 return MFI->getObjectAlignment(BFI) >= 16; 4630 } 4631 return false; 4632} 4633 4634 4635/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4636/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4637/// if the load addresses are consecutive, non-overlapping, and in the right 4638/// order. 4639static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4640 const X86Subtarget *Subtarget) { 4641 MachineFunction &MF = DAG.getMachineFunction(); 4642 MachineFrameInfo *MFI = MF.getFrameInfo(); 4643 MVT::ValueType VT = N->getValueType(0); 4644 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4645 SDOperand PermMask = N->getOperand(2); 4646 int NumElems = (int)PermMask.getNumOperands(); 4647 SDNode *Base = NULL; 4648 for (int i = 0; i < NumElems; ++i) { 4649 SDOperand Idx = PermMask.getOperand(i); 4650 if (Idx.getOpcode() == ISD::UNDEF) { 4651 if (!Base) return SDOperand(); 4652 } else { 4653 SDOperand Arg = 4654 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4655 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4656 return SDOperand(); 4657 if (!Base) 4658 Base = Arg.Val; 4659 else if (!isConsecutiveLoad(Arg.Val, Base, 4660 i, MVT::getSizeInBits(EVT)/8,MFI)) 4661 return SDOperand(); 4662 } 4663 } 4664 4665 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4666 if (isAlign16) { 4667 LoadSDNode *LD = cast<LoadSDNode>(Base); 4668 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4669 LD->getSrcValueOffset()); 4670 } else { 4671 // Just use movups, it's shorter. 4672 SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); 4673 SmallVector<SDOperand, 3> Ops; 4674 Ops.push_back(Base->getOperand(0)); 4675 Ops.push_back(Base->getOperand(1)); 4676 Ops.push_back(Base->getOperand(2)); 4677 return DAG.getNode(ISD::BIT_CONVERT, VT, 4678 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 4679 } 4680} 4681 4682/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4683static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4684 const X86Subtarget *Subtarget) { 4685 SDOperand Cond = N->getOperand(0); 4686 4687 // If we have SSE[12] support, try to form min/max nodes. 4688 if (Subtarget->hasSSE2() && 4689 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4690 if (Cond.getOpcode() == ISD::SETCC) { 4691 // Get the LHS/RHS of the select. 4692 SDOperand LHS = N->getOperand(1); 4693 SDOperand RHS = N->getOperand(2); 4694 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4695 4696 unsigned Opcode = 0; 4697 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4698 switch (CC) { 4699 default: break; 4700 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4701 case ISD::SETULE: 4702 case ISD::SETLE: 4703 if (!UnsafeFPMath) break; 4704 // FALL THROUGH. 4705 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4706 case ISD::SETLT: 4707 Opcode = X86ISD::FMIN; 4708 break; 4709 4710 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4711 case ISD::SETUGT: 4712 case ISD::SETGT: 4713 if (!UnsafeFPMath) break; 4714 // FALL THROUGH. 4715 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4716 case ISD::SETGE: 4717 Opcode = X86ISD::FMAX; 4718 break; 4719 } 4720 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4721 switch (CC) { 4722 default: break; 4723 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4724 case ISD::SETUGT: 4725 case ISD::SETGT: 4726 if (!UnsafeFPMath) break; 4727 // FALL THROUGH. 4728 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4729 case ISD::SETGE: 4730 Opcode = X86ISD::FMIN; 4731 break; 4732 4733 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4734 case ISD::SETULE: 4735 case ISD::SETLE: 4736 if (!UnsafeFPMath) break; 4737 // FALL THROUGH. 4738 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4739 case ISD::SETLT: 4740 Opcode = X86ISD::FMAX; 4741 break; 4742 } 4743 } 4744 4745 if (Opcode) 4746 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4747 } 4748 4749 } 4750 4751 return SDOperand(); 4752} 4753 4754 4755SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4756 DAGCombinerInfo &DCI) const { 4757 SelectionDAG &DAG = DCI.DAG; 4758 switch (N->getOpcode()) { 4759 default: break; 4760 case ISD::VECTOR_SHUFFLE: 4761 return PerformShuffleCombine(N, DAG, Subtarget); 4762 case ISD::SELECT: 4763 return PerformSELECTCombine(N, DAG, Subtarget); 4764 } 4765 4766 return SDOperand(); 4767} 4768 4769//===----------------------------------------------------------------------===// 4770// X86 Inline Assembly Support 4771//===----------------------------------------------------------------------===// 4772 4773/// getConstraintType - Given a constraint letter, return the type of 4774/// constraint it is for this target. 4775X86TargetLowering::ConstraintType 4776X86TargetLowering::getConstraintType(const std::string &Constraint) const { 4777 if (Constraint.size() == 1) { 4778 switch (Constraint[0]) { 4779 case 'A': 4780 case 'r': 4781 case 'R': 4782 case 'l': 4783 case 'q': 4784 case 'Q': 4785 case 'x': 4786 case 'Y': 4787 return C_RegisterClass; 4788 default: 4789 break; 4790 } 4791 } 4792 return TargetLowering::getConstraintType(Constraint); 4793} 4794 4795/// isOperandValidForConstraint - Return the specified operand (possibly 4796/// modified) if the specified SDOperand is valid for the specified target 4797/// constraint letter, otherwise return null. 4798SDOperand X86TargetLowering:: 4799isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { 4800 switch (Constraint) { 4801 default: break; 4802 case 'I': 4803 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4804 if (C->getValue() <= 31) 4805 return DAG.getTargetConstant(C->getValue(), Op.getValueType()); 4806 } 4807 return SDOperand(0,0); 4808 case 'N': 4809 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4810 if (C->getValue() <= 255) 4811 return DAG.getTargetConstant(C->getValue(), Op.getValueType()); 4812 } 4813 return SDOperand(0,0); 4814 case 'i': { 4815 // Literal immediates are always ok. 4816 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) 4817 return DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 4818 4819 // If we are in non-pic codegen mode, we allow the address of a global (with 4820 // an optional displacement) to be used with 'i'. 4821 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 4822 int64_t Offset = 0; 4823 4824 // Match either (GA) or (GA+C) 4825 if (GA) { 4826 Offset = GA->getOffset(); 4827 } else if (Op.getOpcode() == ISD::ADD) { 4828 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4829 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 4830 if (C && GA) { 4831 Offset = GA->getOffset()+C->getValue(); 4832 } else { 4833 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4834 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 4835 if (C && GA) 4836 Offset = GA->getOffset()+C->getValue(); 4837 else 4838 C = 0, GA = 0; 4839 } 4840 } 4841 4842 if (GA) { 4843 // If addressing this global requires a load (e.g. in PIC mode), we can't 4844 // match. 4845 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 4846 false)) 4847 return SDOperand(0, 0); 4848 4849 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 4850 Offset); 4851 return Op; 4852 } 4853 4854 // Otherwise, not valid for this mode. 4855 return SDOperand(0, 0); 4856 } 4857 } 4858 return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); 4859} 4860 4861std::vector<unsigned> X86TargetLowering:: 4862getRegClassForInlineAsmConstraint(const std::string &Constraint, 4863 MVT::ValueType VT) const { 4864 if (Constraint.size() == 1) { 4865 // FIXME: not handling fp-stack yet! 4866 switch (Constraint[0]) { // GCC X86 Constraint Letters 4867 default: break; // Unknown constraint letter 4868 case 'A': // EAX/EDX 4869 if (VT == MVT::i32 || VT == MVT::i64) 4870 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 4871 break; 4872 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4873 case 'Q': // Q_REGS 4874 if (VT == MVT::i32) 4875 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 4876 else if (VT == MVT::i16) 4877 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 4878 else if (VT == MVT::i8) 4879 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4880 break; 4881 } 4882 } 4883 4884 return std::vector<unsigned>(); 4885} 4886 4887std::pair<unsigned, const TargetRegisterClass*> 4888X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4889 MVT::ValueType VT) const { 4890 // First, see if this is a constraint that directly corresponds to an LLVM 4891 // register class. 4892 if (Constraint.size() == 1) { 4893 // GCC Constraint Letters 4894 switch (Constraint[0]) { 4895 default: break; 4896 case 'r': // GENERAL_REGS 4897 case 'R': // LEGACY_REGS 4898 case 'l': // INDEX_REGS 4899 if (VT == MVT::i64 && Subtarget->is64Bit()) 4900 return std::make_pair(0U, X86::GR64RegisterClass); 4901 if (VT == MVT::i32) 4902 return std::make_pair(0U, X86::GR32RegisterClass); 4903 else if (VT == MVT::i16) 4904 return std::make_pair(0U, X86::GR16RegisterClass); 4905 else if (VT == MVT::i8) 4906 return std::make_pair(0U, X86::GR8RegisterClass); 4907 break; 4908 case 'y': // MMX_REGS if MMX allowed. 4909 if (!Subtarget->hasMMX()) break; 4910 return std::make_pair(0U, X86::VR64RegisterClass); 4911 break; 4912 case 'Y': // SSE_REGS if SSE2 allowed 4913 if (!Subtarget->hasSSE2()) break; 4914 // FALL THROUGH. 4915 case 'x': // SSE_REGS if SSE1 allowed 4916 if (!Subtarget->hasSSE1()) break; 4917 4918 switch (VT) { 4919 default: break; 4920 // Scalar SSE types. 4921 case MVT::f32: 4922 case MVT::i32: 4923 return std::make_pair(0U, X86::FR32RegisterClass); 4924 case MVT::f64: 4925 case MVT::i64: 4926 return std::make_pair(0U, X86::FR64RegisterClass); 4927 // Vector types. 4928 case MVT::v16i8: 4929 case MVT::v8i16: 4930 case MVT::v4i32: 4931 case MVT::v2i64: 4932 case MVT::v4f32: 4933 case MVT::v2f64: 4934 return std::make_pair(0U, X86::VR128RegisterClass); 4935 } 4936 break; 4937 } 4938 } 4939 4940 // Use the default implementation in TargetLowering to convert the register 4941 // constraint into a member of a register class. 4942 std::pair<unsigned, const TargetRegisterClass*> Res; 4943 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4944 4945 // Not found as a standard register? 4946 if (Res.second == 0) { 4947 // GCC calls "st(0)" just plain "st". 4948 if (StringsEqualNoCase("{st}", Constraint)) { 4949 Res.first = X86::ST0; 4950 Res.second = X86::RSTRegisterClass; 4951 } 4952 4953 return Res; 4954 } 4955 4956 // Otherwise, check to see if this is a register class of the wrong value 4957 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 4958 // turn into {ax},{dx}. 4959 if (Res.second->hasType(VT)) 4960 return Res; // Correct type already, nothing to do. 4961 4962 // All of the single-register GCC register classes map their values onto 4963 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 4964 // really want an 8-bit or 32-bit register, map to the appropriate register 4965 // class and return the appropriate register. 4966 if (Res.second != X86::GR16RegisterClass) 4967 return Res; 4968 4969 if (VT == MVT::i8) { 4970 unsigned DestReg = 0; 4971 switch (Res.first) { 4972 default: break; 4973 case X86::AX: DestReg = X86::AL; break; 4974 case X86::DX: DestReg = X86::DL; break; 4975 case X86::CX: DestReg = X86::CL; break; 4976 case X86::BX: DestReg = X86::BL; break; 4977 } 4978 if (DestReg) { 4979 Res.first = DestReg; 4980 Res.second = Res.second = X86::GR8RegisterClass; 4981 } 4982 } else if (VT == MVT::i32) { 4983 unsigned DestReg = 0; 4984 switch (Res.first) { 4985 default: break; 4986 case X86::AX: DestReg = X86::EAX; break; 4987 case X86::DX: DestReg = X86::EDX; break; 4988 case X86::CX: DestReg = X86::ECX; break; 4989 case X86::BX: DestReg = X86::EBX; break; 4990 case X86::SI: DestReg = X86::ESI; break; 4991 case X86::DI: DestReg = X86::EDI; break; 4992 case X86::BP: DestReg = X86::EBP; break; 4993 case X86::SP: DestReg = X86::ESP; break; 4994 } 4995 if (DestReg) { 4996 Res.first = DestReg; 4997 Res.second = Res.second = X86::GR32RegisterClass; 4998 } 4999 } else if (VT == MVT::i64) { 5000 unsigned DestReg = 0; 5001 switch (Res.first) { 5002 default: break; 5003 case X86::AX: DestReg = X86::RAX; break; 5004 case X86::DX: DestReg = X86::RDX; break; 5005 case X86::CX: DestReg = X86::RCX; break; 5006 case X86::BX: DestReg = X86::RBX; break; 5007 case X86::SI: DestReg = X86::RSI; break; 5008 case X86::DI: DestReg = X86::RDI; break; 5009 case X86::BP: DestReg = X86::RBP; break; 5010 case X86::SP: DestReg = X86::RSP; break; 5011 } 5012 if (DestReg) { 5013 Res.first = DestReg; 5014 Res.second = Res.second = X86::GR64RegisterClass; 5015 } 5016 } 5017 5018 return Res; 5019} 5020