X86ISelLowering.cpp revision 0d9e976ad2c5479f3d67f8cb09a5908cfc29985c
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/SelectionDAG.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/StringExtras.h" 40#include "llvm/ParameterAttributes.h" 41using namespace llvm; 42 43X86TargetLowering::X86TargetLowering(TargetMachine &TM) 44 : TargetLowering(TM) { 45 Subtarget = &TM.getSubtarget<X86Subtarget>(); 46 X86ScalarSSEf64 = Subtarget->hasSSE2(); 47 X86ScalarSSEf32 = Subtarget->hasSSE1(); 48 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 49 50 bool Fast = false; 51 52 RegInfo = TM.getRegisterInfo(); 53 54 // Set up the TargetLowering object. 55 56 // X86 is weird, it always uses i8 for shift amounts and setcc results. 57 setShiftAmountType(MVT::i8); 58 setSetCCResultType(MVT::i8); 59 setSetCCResultContents(ZeroOrOneSetCCResult); 60 setSchedulingPreference(SchedulingForRegPressure); 61 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 62 setStackPointerRegisterToSaveRestore(X86StackPtr); 63 64 if (Subtarget->isTargetDarwin()) { 65 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 66 setUseUnderscoreSetJmp(false); 67 setUseUnderscoreLongJmp(false); 68 } else if (Subtarget->isTargetMingw()) { 69 // MS runtime is weird: it exports _setjmp, but longjmp! 70 setUseUnderscoreSetJmp(true); 71 setUseUnderscoreLongJmp(false); 72 } else { 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(true); 75 } 76 77 // Set up the register classes. 78 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 79 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 80 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 81 if (Subtarget->is64Bit()) 82 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 83 84 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 85 86 // We don't accept any truncstore of integer registers. 87 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 88 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 89 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 90 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 91 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 92 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 93 94 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 95 // operation. 96 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 97 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 98 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 99 100 if (Subtarget->is64Bit()) { 101 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 102 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 103 } else { 104 if (X86ScalarSSEf64) 105 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 106 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 107 else 108 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 109 } 110 111 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 112 // this operation. 113 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 114 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 115 // SSE has no i16 to fp conversion, only i32 116 if (X86ScalarSSEf32) { 117 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 118 // f32 and f64 cases are Legal, f80 case is not 119 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 120 } else { 121 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 122 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 123 } 124 125 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 126 // are Legal, f80 is custom lowered. 127 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 128 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 129 130 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 131 // this operation. 132 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 133 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 134 135 if (X86ScalarSSEf32) { 136 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 137 // f32 and f64 cases are Legal, f80 case is not 138 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 139 } else { 140 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 141 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 142 } 143 144 // Handle FP_TO_UINT by promoting the destination to a larger signed 145 // conversion. 146 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 147 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 148 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 149 150 if (Subtarget->is64Bit()) { 151 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 152 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 153 } else { 154 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 155 // Expand FP_TO_UINT into a select. 156 // FIXME: We would like to use a Custom expander here eventually to do 157 // the optimal thing for SSE vs. the default expansion in the legalizer. 158 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 159 else 160 // With SSE3 we can use fisttpll to convert to a signed i64. 161 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 162 } 163 164 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 165 if (!X86ScalarSSEf64) { 166 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 167 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 168 } 169 170 // Scalar integer multiply, multiply-high, divide, and remainder are 171 // lowered to use operations that produce two results, to match the 172 // available instructions. This exposes the two-result form to trivial 173 // CSE, which is able to combine x/y and x%y into a single instruction, 174 // for example. The single-result multiply instructions are introduced 175 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 176 // is not needed. 177 setOperationAction(ISD::MUL , MVT::i8 , Expand); 178 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 179 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 180 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 181 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 182 setOperationAction(ISD::SREM , MVT::i8 , Expand); 183 setOperationAction(ISD::UREM , MVT::i8 , Expand); 184 setOperationAction(ISD::MUL , MVT::i16 , Expand); 185 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 186 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 187 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 188 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 189 setOperationAction(ISD::SREM , MVT::i16 , Expand); 190 setOperationAction(ISD::UREM , MVT::i16 , Expand); 191 setOperationAction(ISD::MUL , MVT::i32 , Expand); 192 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 193 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 194 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 195 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 196 setOperationAction(ISD::SREM , MVT::i32 , Expand); 197 setOperationAction(ISD::UREM , MVT::i32 , Expand); 198 setOperationAction(ISD::MUL , MVT::i64 , Expand); 199 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 200 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 201 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 202 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 203 setOperationAction(ISD::SREM , MVT::i64 , Expand); 204 setOperationAction(ISD::UREM , MVT::i64 , Expand); 205 206 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 207 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 208 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 209 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 210 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 211 if (Subtarget->is64Bit()) 212 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 213 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 214 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 215 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 216 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 217 setOperationAction(ISD::FREM , MVT::f64 , Expand); 218 setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); 219 220 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 221 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 222 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 223 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 224 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 225 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 226 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 227 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 228 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 229 if (Subtarget->is64Bit()) { 230 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 231 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 232 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 233 } 234 235 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 236 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 237 238 // These should be promoted to a larger select which is supported. 239 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 240 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 241 // X86 wants to expand cmov itself. 242 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 243 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 244 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 245 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 246 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 247 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 248 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 249 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 250 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 251 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 252 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 253 if (Subtarget->is64Bit()) { 254 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 255 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 256 } 257 // X86 ret instruction may pop stack. 258 setOperationAction(ISD::RET , MVT::Other, Custom); 259 if (!Subtarget->is64Bit()) 260 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 261 262 // Darwin ABI issue. 263 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 264 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 265 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 266 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 267 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 268 if (Subtarget->is64Bit()) { 269 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 270 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 271 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 272 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 273 } 274 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 275 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 276 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 277 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 278 // X86 wants to expand memset / memcpy itself. 279 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 280 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 281 282 // Use the default ISD::LOCATION expansion. 283 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 284 // FIXME - use subtarget debug flags 285 if (!Subtarget->isTargetDarwin() && 286 !Subtarget->isTargetELF() && 287 !Subtarget->isTargetCygMing()) 288 setOperationAction(ISD::LABEL, MVT::Other, Expand); 289 290 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 291 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 292 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 293 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 294 if (Subtarget->is64Bit()) { 295 // FIXME: Verify 296 setExceptionPointerRegister(X86::RAX); 297 setExceptionSelectorRegister(X86::RDX); 298 } else { 299 setExceptionPointerRegister(X86::EAX); 300 setExceptionSelectorRegister(X86::EDX); 301 } 302 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 303 304 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 305 306 setOperationAction(ISD::TRAP, MVT::Other, Legal); 307 308 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 309 setOperationAction(ISD::VASTART , MVT::Other, Custom); 310 setOperationAction(ISD::VAARG , MVT::Other, Expand); 311 setOperationAction(ISD::VAEND , MVT::Other, Expand); 312 if (Subtarget->is64Bit()) 313 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 314 else 315 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 316 317 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 318 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 319 if (Subtarget->is64Bit()) 320 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 321 if (Subtarget->isTargetCygMing()) 322 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 323 else 324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 325 326 if (X86ScalarSSEf64) { 327 // f32 and f64 use SSE. 328 // Set up the FP register classes. 329 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 330 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 331 332 // Use ANDPD to simulate FABS. 333 setOperationAction(ISD::FABS , MVT::f64, Custom); 334 setOperationAction(ISD::FABS , MVT::f32, Custom); 335 336 // Use XORP to simulate FNEG. 337 setOperationAction(ISD::FNEG , MVT::f64, Custom); 338 setOperationAction(ISD::FNEG , MVT::f32, Custom); 339 340 // Use ANDPD and ORPD to simulate FCOPYSIGN. 341 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 342 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 343 344 // We don't support sin/cos/fmod 345 setOperationAction(ISD::FSIN , MVT::f64, Expand); 346 setOperationAction(ISD::FCOS , MVT::f64, Expand); 347 setOperationAction(ISD::FREM , MVT::f64, Expand); 348 setOperationAction(ISD::FSIN , MVT::f32, Expand); 349 setOperationAction(ISD::FCOS , MVT::f32, Expand); 350 setOperationAction(ISD::FREM , MVT::f32, Expand); 351 352 // Expand FP immediates into loads from the stack, except for the special 353 // cases we handle. 354 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 355 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 356 addLegalFPImmediate(APFloat(+0.0)); // xorpd 357 addLegalFPImmediate(APFloat(+0.0f)); // xorps 358 359 // Floating truncations from f80 and extensions to f80 go through memory. 360 // If optimizing, we lie about this though and handle it in 361 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 362 if (Fast) { 363 setConvertAction(MVT::f32, MVT::f80, Expand); 364 setConvertAction(MVT::f64, MVT::f80, Expand); 365 setConvertAction(MVT::f80, MVT::f32, Expand); 366 setConvertAction(MVT::f80, MVT::f64, Expand); 367 } 368 } else if (X86ScalarSSEf32) { 369 // Use SSE for f32, x87 for f64. 370 // Set up the FP register classes. 371 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 372 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 373 374 // Use ANDPS to simulate FABS. 375 setOperationAction(ISD::FABS , MVT::f32, Custom); 376 377 // Use XORP to simulate FNEG. 378 setOperationAction(ISD::FNEG , MVT::f32, Custom); 379 380 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 381 382 // Use ANDPS and ORPS to simulate FCOPYSIGN. 383 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 384 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 385 386 // We don't support sin/cos/fmod 387 setOperationAction(ISD::FSIN , MVT::f32, Expand); 388 setOperationAction(ISD::FCOS , MVT::f32, Expand); 389 setOperationAction(ISD::FREM , MVT::f32, Expand); 390 391 // Expand FP immediates into loads from the stack, except for the special 392 // cases we handle. 393 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 394 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 395 addLegalFPImmediate(APFloat(+0.0f)); // xorps 396 addLegalFPImmediate(APFloat(+0.0)); // FLD0 397 addLegalFPImmediate(APFloat(+1.0)); // FLD1 398 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 399 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 400 401 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 402 // this though and handle it in InstructionSelectPreprocess so that 403 // dagcombine2 can hack on these. 404 if (Fast) { 405 setConvertAction(MVT::f32, MVT::f64, Expand); 406 setConvertAction(MVT::f32, MVT::f80, Expand); 407 setConvertAction(MVT::f80, MVT::f32, Expand); 408 setConvertAction(MVT::f64, MVT::f32, Expand); 409 // And x87->x87 truncations also. 410 setConvertAction(MVT::f80, MVT::f64, Expand); 411 } 412 413 if (!UnsafeFPMath) { 414 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 415 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 416 } 417 } else { 418 // f32 and f64 in x87. 419 // Set up the FP register classes. 420 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 421 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 422 423 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 424 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 425 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 426 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 427 428 // Floating truncations go through memory. If optimizing, we lie about 429 // this though and handle it in InstructionSelectPreprocess so that 430 // dagcombine2 can hack on these. 431 if (Fast) { 432 setConvertAction(MVT::f80, MVT::f32, Expand); 433 setConvertAction(MVT::f64, MVT::f32, Expand); 434 setConvertAction(MVT::f80, MVT::f64, Expand); 435 } 436 437 if (!UnsafeFPMath) { 438 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 439 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 440 } 441 442 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 443 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 444 addLegalFPImmediate(APFloat(+0.0)); // FLD0 445 addLegalFPImmediate(APFloat(+1.0)); // FLD1 446 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 447 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 448 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 449 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 450 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 451 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 452 } 453 454 // Long double always uses X87. 455 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 456 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 457 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 458 { 459 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 460 APFloat TmpFlt(+0.0); 461 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 462 addLegalFPImmediate(TmpFlt); // FLD0 463 TmpFlt.changeSign(); 464 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 465 APFloat TmpFlt2(+1.0); 466 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 467 addLegalFPImmediate(TmpFlt2); // FLD1 468 TmpFlt2.changeSign(); 469 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 470 } 471 472 if (!UnsafeFPMath) { 473 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 474 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 475 } 476 477 // Always use a library call for pow. 478 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 479 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 480 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 481 482 // First set operation action for all vector types to expand. Then we 483 // will selectively turn on ones that can be effectively codegen'd. 484 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 485 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 486 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 487 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 488 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 489 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 490 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 491 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 492 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 493 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 494 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 495 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 496 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 497 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 498 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 499 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 500 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 501 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 502 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 503 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 504 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 505 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 506 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 507 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 508 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 509 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 510 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 511 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 512 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 513 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 514 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 515 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 516 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 517 setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand); 518 setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand); 519 setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand); 520 setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand); 521 setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand); 522 setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand); 523 } 524 525 if (Subtarget->hasMMX()) { 526 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 527 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 528 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 529 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 530 531 // FIXME: add MMX packed arithmetics 532 533 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 534 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 535 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 536 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 537 538 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 539 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 540 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 541 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 542 543 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 544 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 545 546 setOperationAction(ISD::AND, MVT::v8i8, Promote); 547 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 548 setOperationAction(ISD::AND, MVT::v4i16, Promote); 549 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 550 setOperationAction(ISD::AND, MVT::v2i32, Promote); 551 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 552 setOperationAction(ISD::AND, MVT::v1i64, Legal); 553 554 setOperationAction(ISD::OR, MVT::v8i8, Promote); 555 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 556 setOperationAction(ISD::OR, MVT::v4i16, Promote); 557 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 558 setOperationAction(ISD::OR, MVT::v2i32, Promote); 559 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 560 setOperationAction(ISD::OR, MVT::v1i64, Legal); 561 562 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 563 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 564 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 565 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 566 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 567 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 568 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 569 570 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 571 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 572 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 573 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 574 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 575 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 576 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 577 578 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 579 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 580 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 581 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 582 583 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 584 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 585 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 586 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 587 588 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 589 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 590 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 591 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 592 } 593 594 if (Subtarget->hasSSE1()) { 595 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 596 597 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 598 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 599 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 600 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 601 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 602 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 603 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 604 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 605 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 606 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 607 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 608 } 609 610 if (Subtarget->hasSSE2()) { 611 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 612 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 613 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 614 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 615 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 616 617 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 618 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 619 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 620 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 621 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 622 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 623 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 624 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 625 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 626 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 627 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 628 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 629 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 630 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 631 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 632 633 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 634 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 635 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 636 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 637 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 638 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 639 640 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 641 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 642 // Do not attempt to custom lower non-power-of-2 vectors 643 if (!isPowerOf2_32(MVT::getVectorNumElements(VT))) 644 continue; 645 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 646 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 647 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 648 } 649 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 650 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 651 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 652 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 653 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 654 if (Subtarget->is64Bit()) 655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 656 657 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 658 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 659 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 660 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 661 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 662 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 663 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 664 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 665 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 666 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 667 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 668 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 669 } 670 671 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 672 673 // Custom lower v2i64 and v2f64 selects. 674 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 675 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 676 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 677 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 678 } 679 680 // We want to custom lower some of our intrinsics. 681 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 682 683 // We have target-specific dag combine patterns for the following nodes: 684 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 685 setTargetDAGCombine(ISD::SELECT); 686 687 computeRegisterProperties(); 688 689 // FIXME: These should be based on subtarget info. Plus, the values should 690 // be smaller when we are in optimizing for size mode. 691 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 692 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 693 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 694 allowUnalignedMemoryAccesses = true; // x86 supports it! 695} 696 697/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 698/// the desired ByVal argument alignment. 699static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 700 if (MaxAlign == 16) 701 return; 702 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 703 if (VTy->getBitWidth() == 128) 704 MaxAlign = 16; 705 else if (VTy->getBitWidth() == 64) 706 if (MaxAlign < 8) 707 MaxAlign = 8; 708 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 709 unsigned EltAlign = 0; 710 getMaxByValAlign(ATy->getElementType(), EltAlign); 711 if (EltAlign > MaxAlign) 712 MaxAlign = EltAlign; 713 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 714 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 715 unsigned EltAlign = 0; 716 getMaxByValAlign(STy->getElementType(i), EltAlign); 717 if (EltAlign > MaxAlign) 718 MaxAlign = EltAlign; 719 if (MaxAlign == 16) 720 break; 721 } 722 } 723 return; 724} 725 726/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 727/// function arguments in the caller parameter area. For X86, aggregates 728/// that contains are placed at 16-byte boundaries while the rest are at 729/// 4-byte boundaries. 730unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 731 if (Subtarget->is64Bit()) 732 return getTargetData()->getABITypeAlignment(Ty); 733 unsigned Align = 4; 734 getMaxByValAlign(Ty, Align); 735 return Align; 736} 737 738/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 739/// jumptable. 740SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 741 SelectionDAG &DAG) const { 742 if (usesGlobalOffsetTable()) 743 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 744 if (!Subtarget->isPICStyleRIPRel()) 745 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 746 return Table; 747} 748 749//===----------------------------------------------------------------------===// 750// Return Value Calling Convention Implementation 751//===----------------------------------------------------------------------===// 752 753#include "X86GenCallingConv.inc" 754 755/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 756/// exists skip possible ISD:TokenFactor. 757static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 758 if (Chain.getOpcode() == X86ISD::TAILCALL) { 759 return Chain; 760 } else if (Chain.getOpcode() == ISD::TokenFactor) { 761 if (Chain.getNumOperands() && 762 Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL) 763 return Chain.getOperand(0); 764 } 765 return Chain; 766} 767 768/// LowerRET - Lower an ISD::RET node. 769SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 770 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 771 772 SmallVector<CCValAssign, 16> RVLocs; 773 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 774 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 775 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 776 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 777 778 // If this is the first return lowered for this function, add the regs to the 779 // liveout set for the function. 780 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 781 for (unsigned i = 0; i != RVLocs.size(); ++i) 782 if (RVLocs[i].isRegLoc()) 783 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 784 } 785 SDOperand Chain = Op.getOperand(0); 786 787 // Handle tail call return. 788 Chain = GetPossiblePreceedingTailCall(Chain); 789 if (Chain.getOpcode() == X86ISD::TAILCALL) { 790 SDOperand TailCall = Chain; 791 SDOperand TargetAddress = TailCall.getOperand(1); 792 SDOperand StackAdjustment = TailCall.getOperand(2); 793 assert(((TargetAddress.getOpcode() == ISD::Register && 794 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 795 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 796 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 797 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 798 "Expecting an global address, external symbol, or register"); 799 assert(StackAdjustment.getOpcode() == ISD::Constant && 800 "Expecting a const value"); 801 802 SmallVector<SDOperand,8> Operands; 803 Operands.push_back(Chain.getOperand(0)); 804 Operands.push_back(TargetAddress); 805 Operands.push_back(StackAdjustment); 806 // Copy registers used by the call. Last operand is a flag so it is not 807 // copied. 808 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 809 Operands.push_back(Chain.getOperand(i)); 810 } 811 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 812 Operands.size()); 813 } 814 815 // Regular return. 816 SDOperand Flag; 817 818 // Copy the result values into the output registers. 819 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 820 RVLocs[0].getLocReg() != X86::ST0) { 821 for (unsigned i = 0; i != RVLocs.size(); ++i) { 822 CCValAssign &VA = RVLocs[i]; 823 assert(VA.isRegLoc() && "Can only return in registers!"); 824 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 825 Flag); 826 Flag = Chain.getValue(1); 827 } 828 } else { 829 // We need to handle a destination of ST0 specially, because it isn't really 830 // a register. 831 SDOperand Value = Op.getOperand(1); 832 833 // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80. 834 // This will get legalized into a load/store if it can't get optimized away. 835 if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) 836 Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value); 837 838 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 839 SDOperand Ops[] = { Chain, Value }; 840 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 841 Flag = Chain.getValue(1); 842 } 843 844 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 845 if (Flag.Val) 846 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 847 else 848 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 849} 850 851 852/// LowerCallResult - Lower the result values of an ISD::CALL into the 853/// appropriate copies out of appropriate physical registers. This assumes that 854/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 855/// being lowered. The returns a SDNode with the same number of values as the 856/// ISD::CALL. 857SDNode *X86TargetLowering:: 858LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 859 unsigned CallingConv, SelectionDAG &DAG) { 860 861 // Assign locations to each value returned by this call. 862 SmallVector<CCValAssign, 16> RVLocs; 863 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 864 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 865 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 866 867 SmallVector<SDOperand, 8> ResultVals; 868 869 // Copy all of the result registers out of their specified physreg. 870 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 871 for (unsigned i = 0; i != RVLocs.size(); ++i) { 872 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 873 RVLocs[i].getValVT(), InFlag).getValue(1); 874 InFlag = Chain.getValue(2); 875 ResultVals.push_back(Chain.getValue(0)); 876 } 877 } else { 878 // Copies from the FP stack are special, as ST0 isn't a valid register 879 // before the fp stackifier runs. 880 881 // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up 882 // in an SSE register, copy it out as F80 and do a truncate, otherwise use 883 // the specified value type. 884 MVT::ValueType GetResultTy = RVLocs[0].getValVT(); 885 if (isScalarFPTypeInSSEReg(GetResultTy)) 886 GetResultTy = MVT::f80; 887 SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag); 888 889 SDOperand GROps[] = { Chain, InFlag }; 890 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 891 Chain = RetVal.getValue(1); 892 InFlag = RetVal.getValue(2); 893 894 // If we want the result in an SSE register, use an FP_TRUNCATE to get it 895 // there. 896 if (GetResultTy != RVLocs[0].getValVT()) 897 RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal, 898 // This truncation won't change the value. 899 DAG.getIntPtrConstant(1)); 900 901 ResultVals.push_back(RetVal); 902 } 903 904 // Merge everything together with a MERGE_VALUES node. 905 ResultVals.push_back(Chain); 906 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 907 &ResultVals[0], ResultVals.size()).Val; 908} 909 910/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64 911/// ISD::CALL where the results are known to be in two 64-bit registers, 912/// e.g. XMM0 and XMM1. This simplify store the two values back to the 913/// fixed stack slot allocated for StructRet. 914SDNode *X86TargetLowering:: 915LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag, 916 SDNode *TheCall, unsigned Reg1, unsigned Reg2, 917 MVT::ValueType VT, SelectionDAG &DAG) { 918 SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag); 919 Chain = RetVal1.getValue(1); 920 InFlag = RetVal1.getValue(2); 921 SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag); 922 Chain = RetVal2.getValue(1); 923 InFlag = RetVal2.getValue(2); 924 SDOperand FIN = TheCall->getOperand(5); 925 Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0); 926 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 927 Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0); 928 return Chain.Val; 929} 930 931/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL 932/// where the results are known to be in ST0 and ST1. 933SDNode *X86TargetLowering:: 934LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag, 935 SDNode *TheCall, SelectionDAG &DAG) { 936 SmallVector<SDOperand, 8> ResultVals; 937 const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag }; 938 SDVTList Tys = DAG.getVTList(VTs, 4); 939 SDOperand Ops[] = { Chain, InFlag }; 940 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2); 941 Chain = RetVal.getValue(2); 942 SDOperand FIN = TheCall->getOperand(5); 943 Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0); 944 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16)); 945 Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0); 946 return Chain.Val; 947} 948 949//===----------------------------------------------------------------------===// 950// C & StdCall & Fast Calling Convention implementation 951//===----------------------------------------------------------------------===// 952// StdCall calling convention seems to be standard for many Windows' API 953// routines and around. It differs from C calling convention just a little: 954// callee should clean up the stack, not caller. Symbols should be also 955// decorated in some fancy way :) It doesn't support any vector arguments. 956// For info on fast calling convention see Fast Calling Convention (tail call) 957// implementation LowerX86_32FastCCCallTo. 958 959/// AddLiveIn - This helper function adds the specified physical register to the 960/// MachineFunction as a live in value. It also creates a corresponding virtual 961/// register for it. 962static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 963 const TargetRegisterClass *RC) { 964 assert(RC->contains(PReg) && "Not the correct regclass!"); 965 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 966 MF.getRegInfo().addLiveIn(PReg, VReg); 967 return VReg; 968} 969 970// Determines whether a CALL node uses struct return semantics. 971static bool CallIsStructReturn(SDOperand Op) { 972 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 973 if (!NumOps) 974 return false; 975 976 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6)); 977 return Flags->getValue() & ISD::ParamFlags::StructReturn; 978} 979 980// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics. 981static bool ArgsAreStructReturn(SDOperand Op) { 982 unsigned NumArgs = Op.Val->getNumValues() - 1; 983 if (!NumArgs) 984 return false; 985 986 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3)); 987 return Flags->getValue() & ISD::ParamFlags::StructReturn; 988} 989 990// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop 991// its own arguments. Callee pop is necessary to support tail calls. 992bool X86TargetLowering::IsCalleePop(SDOperand Op) { 993 bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 994 if (IsVarArg) 995 return false; 996 997 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 998 default: 999 return false; 1000 case CallingConv::X86_StdCall: 1001 return !Subtarget->is64Bit(); 1002 case CallingConv::X86_FastCall: 1003 return !Subtarget->is64Bit(); 1004 case CallingConv::Fast: 1005 return PerformTailCallOpt; 1006 } 1007} 1008 1009// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node. 1010CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const { 1011 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1012 1013 if (Subtarget->is64Bit()) 1014 if (CC == CallingConv::Fast && PerformTailCallOpt) 1015 return CC_X86_64_TailCall; 1016 else 1017 return CC_X86_64_C; 1018 1019 if (CC == CallingConv::X86_FastCall) 1020 return CC_X86_32_FastCall; 1021 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1022 return CC_X86_32_TailCall; 1023 else 1024 return CC_X86_32_C; 1025} 1026 1027// Selects the appropriate decoration to apply to a MachineFunction containing a 1028// given FORMAL_ARGUMENTS node. 1029NameDecorationStyle 1030X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) { 1031 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1032 if (CC == CallingConv::X86_FastCall) 1033 return FastCall; 1034 else if (CC == CallingConv::X86_StdCall) 1035 return StdCall; 1036 return None; 1037} 1038 1039 1040// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly 1041// be overwritten when lowering the outgoing arguments in a tail call. Currently 1042// the implementation of this call is very conservative and assumes all 1043// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual 1044// registers would be overwritten by direct lowering. 1045// Possible improvement: 1046// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes 1047// indicating inreg passed arguments which also need not be lowered to a safe 1048// stack slot. 1049static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) { 1050 RegisterSDNode * OpReg = NULL; 1051 if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || 1052 (Op.getOpcode()== ISD::CopyFromReg && 1053 (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) && 1054 OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister)) 1055 return true; 1056 return false; 1057} 1058 1059// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1060// by "Src" to address "Dst" with size and alignment information specified by 1061// the specific parameter attribute. The copy will be passed as a byval function 1062// parameter. 1063static SDOperand 1064CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, 1065 unsigned Flags, SelectionDAG &DAG) { 1066 unsigned Align = 1 << 1067 ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs); 1068 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1069 ISD::ParamFlags::ByValSizeOffs; 1070 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1071 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1072 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); 1073 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); 1074} 1075 1076SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 1077 const CCValAssign &VA, 1078 MachineFrameInfo *MFI, 1079 SDOperand Root, unsigned i) { 1080 // Create the nodes corresponding to a load from this parameter slot. 1081 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 1082 bool isByVal = Flags & ISD::ParamFlags::ByVal; 1083 1084 // FIXME: For now, all byval parameter objects are marked mutable. This 1085 // can be changed with more analysis. 1086 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1087 VA.getLocMemOffset(), !isByVal); 1088 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1089 if (isByVal) 1090 return FIN; 1091 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 1092} 1093 1094SDOperand 1095X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 1096 MachineFunction &MF = DAG.getMachineFunction(); 1097 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1098 1099 const Function* Fn = MF.getFunction(); 1100 if (Fn->hasExternalLinkage() && 1101 Subtarget->isTargetCygMing() && 1102 Fn->getName() == "main") 1103 FuncInfo->setForceFramePointer(true); 1104 1105 // Decorate the function name. 1106 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1107 1108 MachineFrameInfo *MFI = MF.getFrameInfo(); 1109 SDOperand Root = Op.getOperand(0); 1110 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1111 unsigned CC = MF.getFunction()->getCallingConv(); 1112 bool Is64Bit = Subtarget->is64Bit(); 1113 1114 assert(!(isVarArg && CC == CallingConv::Fast) && 1115 "Var args not supported with calling convention fastcc"); 1116 1117 // Assign locations to all of the incoming arguments. 1118 SmallVector<CCValAssign, 16> ArgLocs; 1119 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1120 CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op)); 1121 1122 SmallVector<SDOperand, 8> ArgValues; 1123 unsigned LastVal = ~0U; 1124 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1125 CCValAssign &VA = ArgLocs[i]; 1126 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1127 // places. 1128 assert(VA.getValNo() != LastVal && 1129 "Don't support value assigned to multiple locs yet"); 1130 LastVal = VA.getValNo(); 1131 1132 if (VA.isRegLoc()) { 1133 MVT::ValueType RegVT = VA.getLocVT(); 1134 TargetRegisterClass *RC; 1135 if (RegVT == MVT::i32) 1136 RC = X86::GR32RegisterClass; 1137 else if (Is64Bit && RegVT == MVT::i64) 1138 RC = X86::GR64RegisterClass; 1139 else if (Is64Bit && RegVT == MVT::f32) 1140 RC = X86::FR32RegisterClass; 1141 else if (Is64Bit && RegVT == MVT::f64) 1142 RC = X86::FR64RegisterClass; 1143 else { 1144 assert(MVT::isVector(RegVT)); 1145 if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) { 1146 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1147 RegVT = MVT::i64; 1148 } else 1149 RC = X86::VR128RegisterClass; 1150 } 1151 1152 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1153 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1154 1155 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1156 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1157 // right size. 1158 if (VA.getLocInfo() == CCValAssign::SExt) 1159 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1160 DAG.getValueType(VA.getValVT())); 1161 else if (VA.getLocInfo() == CCValAssign::ZExt) 1162 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1163 DAG.getValueType(VA.getValVT())); 1164 1165 if (VA.getLocInfo() != CCValAssign::Full) 1166 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1167 1168 // Handle MMX values passed in GPRs. 1169 if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1170 MVT::getSizeInBits(RegVT) == 64) 1171 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1172 1173 ArgValues.push_back(ArgValue); 1174 } else { 1175 assert(VA.isMemLoc()); 1176 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1177 } 1178 } 1179 1180 unsigned StackSize = CCInfo.getNextStackOffset(); 1181 // align stack specially for tail calls 1182 if (CC == CallingConv::Fast) 1183 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1184 1185 // If the function takes variable number of arguments, make a frame index for 1186 // the start of the first vararg value... for expansion of llvm.va_start. 1187 if (isVarArg) { 1188 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1189 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1190 } 1191 if (Is64Bit) { 1192 static const unsigned GPR64ArgRegs[] = { 1193 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1194 }; 1195 static const unsigned XMMArgRegs[] = { 1196 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1197 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1198 }; 1199 1200 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1201 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1202 1203 // For X86-64, if there are vararg parameters that are passed via 1204 // registers, then we must store them to their spots on the stack so they 1205 // may be loaded by deferencing the result of va_next. 1206 VarArgsGPOffset = NumIntRegs * 8; 1207 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1208 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1209 1210 // Store the integer parameter registers. 1211 SmallVector<SDOperand, 8> MemOps; 1212 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1213 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1214 DAG.getIntPtrConstant(VarArgsGPOffset)); 1215 for (; NumIntRegs != 6; ++NumIntRegs) { 1216 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1217 X86::GR64RegisterClass); 1218 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1219 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1220 MemOps.push_back(Store); 1221 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1222 DAG.getIntPtrConstant(8)); 1223 } 1224 1225 // Now store the XMM (fp + vector) parameter registers. 1226 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1227 DAG.getIntPtrConstant(VarArgsFPOffset)); 1228 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1229 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1230 X86::VR128RegisterClass); 1231 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1232 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1233 MemOps.push_back(Store); 1234 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1235 DAG.getIntPtrConstant(16)); 1236 } 1237 if (!MemOps.empty()) 1238 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1239 &MemOps[0], MemOps.size()); 1240 } 1241 } 1242 1243 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1244 // arguments and the arguments after the retaddr has been pushed are 1245 // aligned. 1246 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1247 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1248 (StackSize & 7) == 0) 1249 StackSize += 4; 1250 1251 ArgValues.push_back(Root); 1252 1253 // Some CCs need callee pop. 1254 if (IsCalleePop(Op)) { 1255 BytesToPopOnReturn = StackSize; // Callee pops everything. 1256 BytesCallerReserves = 0; 1257 } else { 1258 BytesToPopOnReturn = 0; // Callee pops nothing. 1259 // If this is an sret function, the return should pop the hidden pointer. 1260 if (!Is64Bit && ArgsAreStructReturn(Op)) 1261 BytesToPopOnReturn = 4; 1262 BytesCallerReserves = StackSize; 1263 } 1264 1265 if (!Is64Bit) { 1266 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1267 if (CC == CallingConv::X86_FastCall) 1268 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1269 } 1270 1271 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1272 1273 // Return the new list of results. 1274 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1275 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1276} 1277 1278SDOperand 1279X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1280 const SDOperand &StackPtr, 1281 const CCValAssign &VA, 1282 SDOperand Chain, 1283 SDOperand Arg) { 1284 SDOperand PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1285 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1286 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1287 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1288 if (Flags & ISD::ParamFlags::ByVal) { 1289 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1290 } 1291 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1292} 1293 1294/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64 1295/// struct return call to the specified function. X86-64 ABI specifies 1296/// some SRet calls are actually returned in registers. Since current 1297/// LLVM cannot represent multi-value calls, they are represent as 1298/// calls where the results are passed in a hidden struct provided by 1299/// the caller. This function examines the type of the struct to 1300/// determine the correct way to implement the call. 1301X86::X86_64SRet 1302X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) { 1303 // FIXME: Disabled for now. 1304 return X86::InMemory; 1305 1306 const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType()); 1307 const Type *RTy = PTy->getElementType(); 1308 unsigned Size = getTargetData()->getABITypeSize(RTy); 1309 if (Size != 16 && Size != 32) 1310 return X86::InMemory; 1311 1312 if (Size == 32) { 1313 const StructType *STy = dyn_cast<StructType>(RTy); 1314 if (!STy) return X86::InMemory; 1315 if (STy->getNumElements() == 2 && 1316 STy->getElementType(0) == Type::X86_FP80Ty && 1317 STy->getElementType(1) == Type::X86_FP80Ty) 1318 return X86::InX87; 1319 } 1320 1321 bool AllFP = true; 1322 for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end(); 1323 I != E; ++I) { 1324 const Type *STy = I->get(); 1325 if (!STy->isFPOrFPVector()) { 1326 AllFP = false; 1327 break; 1328 } 1329 } 1330 1331 if (AllFP) 1332 return X86::InSSE; 1333 return X86::InGPR64; 1334} 1335 1336void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall, 1337 CCAssignFn *Fn, 1338 CCState &CCInfo) { 1339 unsigned NumOps = (TheCall->getNumOperands() - 5) / 2; 1340 for (unsigned i = 1; i != NumOps; ++i) { 1341 MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType(); 1342 SDOperand FlagOp = TheCall->getOperand(5+2*i+1); 1343 unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue(); 1344 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) { 1345 cerr << "Call operand #" << i << " has unhandled type " 1346 << MVT::getValueTypeString(ArgVT) << "\n"; 1347 abort(); 1348 } 1349 } 1350} 1351 1352SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 1353 MachineFunction &MF = DAG.getMachineFunction(); 1354 SDOperand Chain = Op.getOperand(0); 1355 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1356 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1357 bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 1358 && CC == CallingConv::Fast && PerformTailCallOpt; 1359 SDOperand Callee = Op.getOperand(4); 1360 bool Is64Bit = Subtarget->is64Bit(); 1361 bool IsStructRet = CallIsStructReturn(Op); 1362 1363 assert(!(isVarArg && CC == CallingConv::Fast) && 1364 "Var args not supported with calling convention fastcc"); 1365 1366 // Analyze operands of the call, assigning locations to each operand. 1367 SmallVector<CCValAssign, 16> ArgLocs; 1368 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1369 CCAssignFn *CCFn = CCAssignFnForNode(Op); 1370 1371 X86::X86_64SRet SRetMethod = X86::InMemory; 1372 if (Is64Bit && IsStructRet) 1373 // FIXME: We can't figure out type of the sret structure for indirect 1374 // calls. We need to copy more information from CallSite to the ISD::CALL 1375 // node. 1376 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1377 SRetMethod = 1378 ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal())); 1379 1380 // UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of 1381 // registers. Unfortunately, llvm does not support i128 yet so we pretend it's 1382 // a sret call. 1383 if (SRetMethod != X86::InMemory) 1384 X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo); 1385 else 1386 CCInfo.AnalyzeCallOperands(Op.Val, CCFn); 1387 1388 // Get a count of how many bytes are to be pushed on the stack. 1389 unsigned NumBytes = CCInfo.getNextStackOffset(); 1390 if (CC == CallingConv::Fast) 1391 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1392 1393 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1394 // arguments and the arguments after the retaddr has been pushed are aligned. 1395 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1396 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1397 (NumBytes & 7) == 0) 1398 NumBytes += 4; 1399 1400 int FPDiff = 0; 1401 if (IsTailCall) { 1402 // Lower arguments at fp - stackoffset + fpdiff. 1403 unsigned NumBytesCallerPushed = 1404 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1405 FPDiff = NumBytesCallerPushed - NumBytes; 1406 1407 // Set the delta of movement of the returnaddr stackslot. 1408 // But only set if delta is greater than previous delta. 1409 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1410 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1411 } 1412 1413 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes)); 1414 1415 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1416 if (IsTailCall) { 1417 // Adjust the Return address stack slot. 1418 if (FPDiff) { 1419 MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; 1420 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1421 // Load the "old" Return address. 1422 RetAddrFrIdx = 1423 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1424 // Calculate the new stack slot for the return address. 1425 int SlotSize = Is64Bit ? 8 : 4; 1426 int NewReturnAddrFI = 1427 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1428 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1429 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1430 } 1431 } 1432 1433 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1434 SmallVector<SDOperand, 8> MemOpChains; 1435 1436 SDOperand StackPtr; 1437 1438 // Walk the register/memloc assignments, inserting copies/loads. For tail 1439 // calls, lower arguments which could otherwise be possibly overwritten to the 1440 // stack slot where they would go on normal function calls. 1441 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1442 CCValAssign &VA = ArgLocs[i]; 1443 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1444 1445 // Promote the value if needed. 1446 switch (VA.getLocInfo()) { 1447 default: assert(0 && "Unknown loc info!"); 1448 case CCValAssign::Full: break; 1449 case CCValAssign::SExt: 1450 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1451 break; 1452 case CCValAssign::ZExt: 1453 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1454 break; 1455 case CCValAssign::AExt: 1456 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1457 break; 1458 } 1459 1460 if (VA.isRegLoc()) { 1461 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1462 } else { 1463 if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) { 1464 assert(VA.isMemLoc()); 1465 if (StackPtr.Val == 0) 1466 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1467 1468 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1469 Arg)); 1470 } 1471 } 1472 } 1473 1474 if (!MemOpChains.empty()) 1475 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1476 &MemOpChains[0], MemOpChains.size()); 1477 1478 // Build a sequence of copy-to-reg nodes chained together with token chain 1479 // and flag operands which copy the outgoing args into registers. 1480 SDOperand InFlag; 1481 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1482 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1483 InFlag); 1484 InFlag = Chain.getValue(1); 1485 } 1486 1487 if (IsTailCall) 1488 InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output? 1489 1490 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1491 // GOT pointer. 1492 // Does not work with tail call since ebx is not restored correctly by 1493 // tailcaller. TODO: at least for x86 - verify for x86-64 1494 if (!IsTailCall && !Is64Bit && 1495 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1496 Subtarget->isPICStyleGOT()) { 1497 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1498 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1499 InFlag); 1500 InFlag = Chain.getValue(1); 1501 } 1502 1503 if (Is64Bit && isVarArg) { 1504 // From AMD64 ABI document: 1505 // For calls that may call functions that use varargs or stdargs 1506 // (prototype-less calls or calls to functions containing ellipsis (...) in 1507 // the declaration) %al is used as hidden argument to specify the number 1508 // of SSE registers used. The contents of %al do not need to match exactly 1509 // the number of registers, but must be an ubound on the number of SSE 1510 // registers used and is in the range 0 - 8 inclusive. 1511 1512 // Count the number of XMM registers allocated. 1513 static const unsigned XMMArgRegs[] = { 1514 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1515 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1516 }; 1517 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1518 1519 Chain = DAG.getCopyToReg(Chain, X86::AL, 1520 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1521 InFlag = Chain.getValue(1); 1522 } 1523 1524 // For tail calls lower the arguments to the 'real' stack slot. 1525 if (IsTailCall) { 1526 SmallVector<SDOperand, 8> MemOpChains2; 1527 SDOperand FIN; 1528 int FI = 0; 1529 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1530 CCValAssign &VA = ArgLocs[i]; 1531 if (!VA.isRegLoc()) { 1532 assert(VA.isMemLoc()); 1533 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1534 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1535 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1536 // Create frame index. 1537 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1538 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1539 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1540 FIN = DAG.getFrameIndex(FI, MVT::i32); 1541 SDOperand Source = Arg; 1542 if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) { 1543 // Copy from stack slots to stack slot of a tail called function. This 1544 // needs to be done because if we would lower the arguments directly 1545 // to their real stack slot we might end up overwriting each other. 1546 // Get source stack slot. 1547 Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1548 if (StackPtr.Val == 0) 1549 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1550 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1551 if ((Flags & ISD::ParamFlags::ByVal)==0) 1552 Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0); 1553 } 1554 1555 if (Flags & ISD::ParamFlags::ByVal) { 1556 // Copy relative to framepointer. 1557 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1558 Flags, DAG)); 1559 } else { 1560 // Store relative to framepointer. 1561 MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0)); 1562 } 1563 } 1564 } 1565 1566 if (!MemOpChains2.empty()) 1567 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1568 &MemOpChains2[0], MemOpChains2.size()); 1569 1570 // Store the return address to the appropriate stack slot. 1571 if (FPDiff) 1572 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1573 } 1574 1575 // If the callee is a GlobalAddress node (quite common, every direct call is) 1576 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1577 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1578 // We should use extra load for direct calls to dllimported functions in 1579 // non-JIT mode. 1580 if ((IsTailCall || !Is64Bit || 1581 getTargetMachine().getCodeModel() != CodeModel::Large) 1582 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1583 getTargetMachine(), true)) 1584 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1585 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1586 if (IsTailCall || !Is64Bit || 1587 getTargetMachine().getCodeModel() != CodeModel::Large) 1588 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1589 } else if (IsTailCall) { 1590 assert(Callee.getOpcode() == ISD::LOAD && 1591 "Function destination must be loaded into virtual register"); 1592 unsigned Opc = Is64Bit ? X86::R9 : X86::ECX; 1593 1594 Chain = DAG.getCopyToReg(Chain, 1595 DAG.getRegister(Opc, getPointerTy()) , 1596 Callee,InFlag); 1597 Callee = DAG.getRegister(Opc, getPointerTy()); 1598 // Add register as live out. 1599 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1600 } 1601 1602 // Returns a chain & a flag for retval copy to use. 1603 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1604 SmallVector<SDOperand, 8> Ops; 1605 1606 if (IsTailCall) { 1607 Ops.push_back(Chain); 1608 Ops.push_back(DAG.getIntPtrConstant(NumBytes)); 1609 Ops.push_back(DAG.getIntPtrConstant(0)); 1610 if (InFlag.Val) 1611 Ops.push_back(InFlag); 1612 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1613 InFlag = Chain.getValue(1); 1614 1615 // Returns a chain & a flag for retval copy to use. 1616 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1617 Ops.clear(); 1618 } 1619 1620 Ops.push_back(Chain); 1621 Ops.push_back(Callee); 1622 1623 if (IsTailCall) 1624 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1625 1626 // Add an implicit use GOT pointer in EBX. 1627 if (!IsTailCall && !Is64Bit && 1628 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1629 Subtarget->isPICStyleGOT()) 1630 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1631 1632 // Add argument registers to the end of the list so that they are known live 1633 // into the call. 1634 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1635 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1636 RegsToPass[i].second.getValueType())); 1637 1638 if (InFlag.Val) 1639 Ops.push_back(InFlag); 1640 1641 if (IsTailCall) { 1642 assert(InFlag.Val && 1643 "Flag must be set. Depend on flag being set in LowerRET"); 1644 Chain = DAG.getNode(X86ISD::TAILCALL, 1645 Op.Val->getVTList(), &Ops[0], Ops.size()); 1646 1647 return SDOperand(Chain.Val, Op.ResNo); 1648 } 1649 1650 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1651 InFlag = Chain.getValue(1); 1652 1653 // Create the CALLSEQ_END node. 1654 unsigned NumBytesForCalleeToPush; 1655 if (IsCalleePop(Op)) 1656 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1657 else if (!Is64Bit && IsStructRet) 1658 // If this is is a call to a struct-return function, the callee 1659 // pops the hidden struct pointer, so we have to push it back. 1660 // This is common for Darwin/X86, Linux & Mingw32 targets. 1661 NumBytesForCalleeToPush = 4; 1662 else 1663 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1664 1665 // Returns a flag for retval copy to use. 1666 Chain = DAG.getCALLSEQ_END(Chain, 1667 DAG.getIntPtrConstant(NumBytes), 1668 DAG.getIntPtrConstant(NumBytesForCalleeToPush), 1669 InFlag); 1670 InFlag = Chain.getValue(1); 1671 1672 // Handle result values, copying them out of physregs into vregs that we 1673 // return. 1674 switch (SRetMethod) { 1675 default: 1676 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1677 case X86::InGPR64: 1678 return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val, 1679 X86::RAX, X86::RDX, 1680 MVT::i64, DAG), Op.ResNo); 1681 case X86::InSSE: 1682 return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val, 1683 X86::XMM0, X86::XMM1, 1684 MVT::f64, DAG), Op.ResNo); 1685 case X86::InX87: 1686 return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG), 1687 Op.ResNo); 1688 } 1689} 1690 1691 1692//===----------------------------------------------------------------------===// 1693// Fast Calling Convention (tail call) implementation 1694//===----------------------------------------------------------------------===// 1695 1696// Like std call, callee cleans arguments, convention except that ECX is 1697// reserved for storing the tail called function address. Only 2 registers are 1698// free for argument passing (inreg). Tail call optimization is performed 1699// provided: 1700// * tailcallopt is enabled 1701// * caller/callee are fastcc 1702// * elf/pic is disabled OR 1703// * elf/pic enabled + callee is in module + callee has 1704// visibility protected or hidden 1705// To keep the stack aligned according to platform abi the function 1706// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1707// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1708// If a tail called function callee has more arguments than the caller the 1709// caller needs to make sure that there is room to move the RETADDR to. This is 1710// achieved by reserving an area the size of the argument delta right after the 1711// original REtADDR, but before the saved framepointer or the spilled registers 1712// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1713// stack layout: 1714// arg1 1715// arg2 1716// RETADDR 1717// [ new RETADDR 1718// move area ] 1719// (possible EBP) 1720// ESI 1721// EDI 1722// local1 .. 1723 1724/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1725/// for a 16 byte align requirement. 1726unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1727 SelectionDAG& DAG) { 1728 if (PerformTailCallOpt) { 1729 MachineFunction &MF = DAG.getMachineFunction(); 1730 const TargetMachine &TM = MF.getTarget(); 1731 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1732 unsigned StackAlignment = TFI.getStackAlignment(); 1733 uint64_t AlignMask = StackAlignment - 1; 1734 int64_t Offset = StackSize; 1735 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1736 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1737 // Number smaller than 12 so just add the difference. 1738 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1739 } else { 1740 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1741 Offset = ((~AlignMask) & Offset) + StackAlignment + 1742 (StackAlignment-SlotSize); 1743 } 1744 StackSize = Offset; 1745 } 1746 return StackSize; 1747} 1748 1749/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1750/// following the call is a return. A function is eligible if caller/callee 1751/// calling conventions match, currently only fastcc supports tail calls, and 1752/// the function CALL is immediatly followed by a RET. 1753bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1754 SDOperand Ret, 1755 SelectionDAG& DAG) const { 1756 if (!PerformTailCallOpt) 1757 return false; 1758 1759 // Check whether CALL node immediatly preceeds the RET node and whether the 1760 // return uses the result of the node or is a void return. 1761 unsigned NumOps = Ret.getNumOperands(); 1762 if ((NumOps == 1 && 1763 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1764 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1765 (NumOps > 1 && 1766 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1767 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1768 MachineFunction &MF = DAG.getMachineFunction(); 1769 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1770 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1771 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1772 SDOperand Callee = Call.getOperand(4); 1773 // On elf/pic %ebx needs to be livein. 1774 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1775 !Subtarget->isPICStyleGOT()) 1776 return true; 1777 1778 // Can only do local tail calls with PIC. 1779 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1780 return G->getGlobal()->hasHiddenVisibility() 1781 || G->getGlobal()->hasProtectedVisibility(); 1782 } 1783 } 1784 1785 return false; 1786} 1787 1788//===----------------------------------------------------------------------===// 1789// Other Lowering Hooks 1790//===----------------------------------------------------------------------===// 1791 1792 1793SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1794 MachineFunction &MF = DAG.getMachineFunction(); 1795 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1796 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1797 1798 if (ReturnAddrIndex == 0) { 1799 // Set up a frame object for the return address. 1800 if (Subtarget->is64Bit()) 1801 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1802 else 1803 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1804 1805 FuncInfo->setRAIndex(ReturnAddrIndex); 1806 } 1807 1808 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1809} 1810 1811 1812 1813/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1814/// specific condition code. It returns a false if it cannot do a direct 1815/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1816/// needed. 1817static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1818 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1819 SelectionDAG &DAG) { 1820 X86CC = X86::COND_INVALID; 1821 if (!isFP) { 1822 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1823 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1824 // X > -1 -> X == 0, jump !sign. 1825 RHS = DAG.getConstant(0, RHS.getValueType()); 1826 X86CC = X86::COND_NS; 1827 return true; 1828 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1829 // X < 0 -> X == 0, jump on sign. 1830 X86CC = X86::COND_S; 1831 return true; 1832 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1833 // X < 1 -> X <= 0 1834 RHS = DAG.getConstant(0, RHS.getValueType()); 1835 X86CC = X86::COND_LE; 1836 return true; 1837 } 1838 } 1839 1840 switch (SetCCOpcode) { 1841 default: break; 1842 case ISD::SETEQ: X86CC = X86::COND_E; break; 1843 case ISD::SETGT: X86CC = X86::COND_G; break; 1844 case ISD::SETGE: X86CC = X86::COND_GE; break; 1845 case ISD::SETLT: X86CC = X86::COND_L; break; 1846 case ISD::SETLE: X86CC = X86::COND_LE; break; 1847 case ISD::SETNE: X86CC = X86::COND_NE; break; 1848 case ISD::SETULT: X86CC = X86::COND_B; break; 1849 case ISD::SETUGT: X86CC = X86::COND_A; break; 1850 case ISD::SETULE: X86CC = X86::COND_BE; break; 1851 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1852 } 1853 } else { 1854 // On a floating point condition, the flags are set as follows: 1855 // ZF PF CF op 1856 // 0 | 0 | 0 | X > Y 1857 // 0 | 0 | 1 | X < Y 1858 // 1 | 0 | 0 | X == Y 1859 // 1 | 1 | 1 | unordered 1860 bool Flip = false; 1861 switch (SetCCOpcode) { 1862 default: break; 1863 case ISD::SETUEQ: 1864 case ISD::SETEQ: X86CC = X86::COND_E; break; 1865 case ISD::SETOLT: Flip = true; // Fallthrough 1866 case ISD::SETOGT: 1867 case ISD::SETGT: X86CC = X86::COND_A; break; 1868 case ISD::SETOLE: Flip = true; // Fallthrough 1869 case ISD::SETOGE: 1870 case ISD::SETGE: X86CC = X86::COND_AE; break; 1871 case ISD::SETUGT: Flip = true; // Fallthrough 1872 case ISD::SETULT: 1873 case ISD::SETLT: X86CC = X86::COND_B; break; 1874 case ISD::SETUGE: Flip = true; // Fallthrough 1875 case ISD::SETULE: 1876 case ISD::SETLE: X86CC = X86::COND_BE; break; 1877 case ISD::SETONE: 1878 case ISD::SETNE: X86CC = X86::COND_NE; break; 1879 case ISD::SETUO: X86CC = X86::COND_P; break; 1880 case ISD::SETO: X86CC = X86::COND_NP; break; 1881 } 1882 if (Flip) 1883 std::swap(LHS, RHS); 1884 } 1885 1886 return X86CC != X86::COND_INVALID; 1887} 1888 1889/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1890/// code. Current x86 isa includes the following FP cmov instructions: 1891/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1892static bool hasFPCMov(unsigned X86CC) { 1893 switch (X86CC) { 1894 default: 1895 return false; 1896 case X86::COND_B: 1897 case X86::COND_BE: 1898 case X86::COND_E: 1899 case X86::COND_P: 1900 case X86::COND_A: 1901 case X86::COND_AE: 1902 case X86::COND_NE: 1903 case X86::COND_NP: 1904 return true; 1905 } 1906} 1907 1908/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1909/// true if Op is undef or if its value falls within the specified range (L, H]. 1910static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1911 if (Op.getOpcode() == ISD::UNDEF) 1912 return true; 1913 1914 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1915 return (Val >= Low && Val < Hi); 1916} 1917 1918/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1919/// true if Op is undef or if its value equal to the specified value. 1920static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1921 if (Op.getOpcode() == ISD::UNDEF) 1922 return true; 1923 return cast<ConstantSDNode>(Op)->getValue() == Val; 1924} 1925 1926/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1927/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1928bool X86::isPSHUFDMask(SDNode *N) { 1929 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1930 1931 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1932 return false; 1933 1934 // Check if the value doesn't reference the second vector. 1935 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1936 SDOperand Arg = N->getOperand(i); 1937 if (Arg.getOpcode() == ISD::UNDEF) continue; 1938 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1939 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1940 return false; 1941 } 1942 1943 return true; 1944} 1945 1946/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1947/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1948bool X86::isPSHUFHWMask(SDNode *N) { 1949 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1950 1951 if (N->getNumOperands() != 8) 1952 return false; 1953 1954 // Lower quadword copied in order. 1955 for (unsigned i = 0; i != 4; ++i) { 1956 SDOperand Arg = N->getOperand(i); 1957 if (Arg.getOpcode() == ISD::UNDEF) continue; 1958 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1959 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1960 return false; 1961 } 1962 1963 // Upper quadword shuffled. 1964 for (unsigned i = 4; i != 8; ++i) { 1965 SDOperand Arg = N->getOperand(i); 1966 if (Arg.getOpcode() == ISD::UNDEF) continue; 1967 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1968 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1969 if (Val < 4 || Val > 7) 1970 return false; 1971 } 1972 1973 return true; 1974} 1975 1976/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1977/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1978bool X86::isPSHUFLWMask(SDNode *N) { 1979 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1980 1981 if (N->getNumOperands() != 8) 1982 return false; 1983 1984 // Upper quadword copied in order. 1985 for (unsigned i = 4; i != 8; ++i) 1986 if (!isUndefOrEqual(N->getOperand(i), i)) 1987 return false; 1988 1989 // Lower quadword shuffled. 1990 for (unsigned i = 0; i != 4; ++i) 1991 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1992 return false; 1993 1994 return true; 1995} 1996 1997/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1998/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1999static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2000 if (NumElems != 2 && NumElems != 4) return false; 2001 2002 unsigned Half = NumElems / 2; 2003 for (unsigned i = 0; i < Half; ++i) 2004 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2005 return false; 2006 for (unsigned i = Half; i < NumElems; ++i) 2007 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2008 return false; 2009 2010 return true; 2011} 2012 2013bool X86::isSHUFPMask(SDNode *N) { 2014 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2015 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2016} 2017 2018/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2019/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2020/// half elements to come from vector 1 (which would equal the dest.) and 2021/// the upper half to come from vector 2. 2022static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2023 if (NumOps != 2 && NumOps != 4) return false; 2024 2025 unsigned Half = NumOps / 2; 2026 for (unsigned i = 0; i < Half; ++i) 2027 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2028 return false; 2029 for (unsigned i = Half; i < NumOps; ++i) 2030 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2031 return false; 2032 return true; 2033} 2034 2035static bool isCommutedSHUFP(SDNode *N) { 2036 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2037 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2038} 2039 2040/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2041/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2042bool X86::isMOVHLPSMask(SDNode *N) { 2043 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2044 2045 if (N->getNumOperands() != 4) 2046 return false; 2047 2048 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2049 return isUndefOrEqual(N->getOperand(0), 6) && 2050 isUndefOrEqual(N->getOperand(1), 7) && 2051 isUndefOrEqual(N->getOperand(2), 2) && 2052 isUndefOrEqual(N->getOperand(3), 3); 2053} 2054 2055/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2056/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2057/// <2, 3, 2, 3> 2058bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2059 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2060 2061 if (N->getNumOperands() != 4) 2062 return false; 2063 2064 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2065 return isUndefOrEqual(N->getOperand(0), 2) && 2066 isUndefOrEqual(N->getOperand(1), 3) && 2067 isUndefOrEqual(N->getOperand(2), 2) && 2068 isUndefOrEqual(N->getOperand(3), 3); 2069} 2070 2071/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2072/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2073bool X86::isMOVLPMask(SDNode *N) { 2074 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2075 2076 unsigned NumElems = N->getNumOperands(); 2077 if (NumElems != 2 && NumElems != 4) 2078 return false; 2079 2080 for (unsigned i = 0; i < NumElems/2; ++i) 2081 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2082 return false; 2083 2084 for (unsigned i = NumElems/2; i < NumElems; ++i) 2085 if (!isUndefOrEqual(N->getOperand(i), i)) 2086 return false; 2087 2088 return true; 2089} 2090 2091/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2092/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2093/// and MOVLHPS. 2094bool X86::isMOVHPMask(SDNode *N) { 2095 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2096 2097 unsigned NumElems = N->getNumOperands(); 2098 if (NumElems != 2 && NumElems != 4) 2099 return false; 2100 2101 for (unsigned i = 0; i < NumElems/2; ++i) 2102 if (!isUndefOrEqual(N->getOperand(i), i)) 2103 return false; 2104 2105 for (unsigned i = 0; i < NumElems/2; ++i) { 2106 SDOperand Arg = N->getOperand(i + NumElems/2); 2107 if (!isUndefOrEqual(Arg, i + NumElems)) 2108 return false; 2109 } 2110 2111 return true; 2112} 2113 2114/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2115/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2116bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2117 bool V2IsSplat = false) { 2118 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2119 return false; 2120 2121 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2122 SDOperand BitI = Elts[i]; 2123 SDOperand BitI1 = Elts[i+1]; 2124 if (!isUndefOrEqual(BitI, j)) 2125 return false; 2126 if (V2IsSplat) { 2127 if (isUndefOrEqual(BitI1, NumElts)) 2128 return false; 2129 } else { 2130 if (!isUndefOrEqual(BitI1, j + NumElts)) 2131 return false; 2132 } 2133 } 2134 2135 return true; 2136} 2137 2138bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2139 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2140 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2141} 2142 2143/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2144/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2145bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2146 bool V2IsSplat = false) { 2147 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2148 return false; 2149 2150 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2151 SDOperand BitI = Elts[i]; 2152 SDOperand BitI1 = Elts[i+1]; 2153 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2154 return false; 2155 if (V2IsSplat) { 2156 if (isUndefOrEqual(BitI1, NumElts)) 2157 return false; 2158 } else { 2159 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2160 return false; 2161 } 2162 } 2163 2164 return true; 2165} 2166 2167bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2168 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2169 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2170} 2171 2172/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2173/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2174/// <0, 0, 1, 1> 2175bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2176 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2177 2178 unsigned NumElems = N->getNumOperands(); 2179 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2180 return false; 2181 2182 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2183 SDOperand BitI = N->getOperand(i); 2184 SDOperand BitI1 = N->getOperand(i+1); 2185 2186 if (!isUndefOrEqual(BitI, j)) 2187 return false; 2188 if (!isUndefOrEqual(BitI1, j)) 2189 return false; 2190 } 2191 2192 return true; 2193} 2194 2195/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2196/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2197/// <2, 2, 3, 3> 2198bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2199 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2200 2201 unsigned NumElems = N->getNumOperands(); 2202 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2203 return false; 2204 2205 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2206 SDOperand BitI = N->getOperand(i); 2207 SDOperand BitI1 = N->getOperand(i + 1); 2208 2209 if (!isUndefOrEqual(BitI, j)) 2210 return false; 2211 if (!isUndefOrEqual(BitI1, j)) 2212 return false; 2213 } 2214 2215 return true; 2216} 2217 2218/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2219/// specifies a shuffle of elements that is suitable for input to MOVSS, 2220/// MOVSD, and MOVD, i.e. setting the lowest element. 2221static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2222 if (NumElts != 2 && NumElts != 4) 2223 return false; 2224 2225 if (!isUndefOrEqual(Elts[0], NumElts)) 2226 return false; 2227 2228 for (unsigned i = 1; i < NumElts; ++i) { 2229 if (!isUndefOrEqual(Elts[i], i)) 2230 return false; 2231 } 2232 2233 return true; 2234} 2235 2236bool X86::isMOVLMask(SDNode *N) { 2237 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2238 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2239} 2240 2241/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2242/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2243/// element of vector 2 and the other elements to come from vector 1 in order. 2244static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2245 bool V2IsSplat = false, 2246 bool V2IsUndef = false) { 2247 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2248 return false; 2249 2250 if (!isUndefOrEqual(Ops[0], 0)) 2251 return false; 2252 2253 for (unsigned i = 1; i < NumOps; ++i) { 2254 SDOperand Arg = Ops[i]; 2255 if (!(isUndefOrEqual(Arg, i+NumOps) || 2256 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2257 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2258 return false; 2259 } 2260 2261 return true; 2262} 2263 2264static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2265 bool V2IsUndef = false) { 2266 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2267 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2268 V2IsSplat, V2IsUndef); 2269} 2270 2271/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2272/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2273bool X86::isMOVSHDUPMask(SDNode *N) { 2274 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2275 2276 if (N->getNumOperands() != 4) 2277 return false; 2278 2279 // Expect 1, 1, 3, 3 2280 for (unsigned i = 0; i < 2; ++i) { 2281 SDOperand Arg = N->getOperand(i); 2282 if (Arg.getOpcode() == ISD::UNDEF) continue; 2283 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2284 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2285 if (Val != 1) return false; 2286 } 2287 2288 bool HasHi = false; 2289 for (unsigned i = 2; i < 4; ++i) { 2290 SDOperand Arg = N->getOperand(i); 2291 if (Arg.getOpcode() == ISD::UNDEF) continue; 2292 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2293 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2294 if (Val != 3) return false; 2295 HasHi = true; 2296 } 2297 2298 // Don't use movshdup if it can be done with a shufps. 2299 return HasHi; 2300} 2301 2302/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2303/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2304bool X86::isMOVSLDUPMask(SDNode *N) { 2305 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2306 2307 if (N->getNumOperands() != 4) 2308 return false; 2309 2310 // Expect 0, 0, 2, 2 2311 for (unsigned i = 0; i < 2; ++i) { 2312 SDOperand Arg = N->getOperand(i); 2313 if (Arg.getOpcode() == ISD::UNDEF) continue; 2314 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2315 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2316 if (Val != 0) return false; 2317 } 2318 2319 bool HasHi = false; 2320 for (unsigned i = 2; i < 4; ++i) { 2321 SDOperand Arg = N->getOperand(i); 2322 if (Arg.getOpcode() == ISD::UNDEF) continue; 2323 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2324 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2325 if (Val != 2) return false; 2326 HasHi = true; 2327 } 2328 2329 // Don't use movshdup if it can be done with a shufps. 2330 return HasHi; 2331} 2332 2333/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2334/// specifies a identity operation on the LHS or RHS. 2335static bool isIdentityMask(SDNode *N, bool RHS = false) { 2336 unsigned NumElems = N->getNumOperands(); 2337 for (unsigned i = 0; i < NumElems; ++i) 2338 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2339 return false; 2340 return true; 2341} 2342 2343/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2344/// a splat of a single element. 2345static bool isSplatMask(SDNode *N) { 2346 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2347 2348 // This is a splat operation if each element of the permute is the same, and 2349 // if the value doesn't reference the second vector. 2350 unsigned NumElems = N->getNumOperands(); 2351 SDOperand ElementBase; 2352 unsigned i = 0; 2353 for (; i != NumElems; ++i) { 2354 SDOperand Elt = N->getOperand(i); 2355 if (isa<ConstantSDNode>(Elt)) { 2356 ElementBase = Elt; 2357 break; 2358 } 2359 } 2360 2361 if (!ElementBase.Val) 2362 return false; 2363 2364 for (; i != NumElems; ++i) { 2365 SDOperand Arg = N->getOperand(i); 2366 if (Arg.getOpcode() == ISD::UNDEF) continue; 2367 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2368 if (Arg != ElementBase) return false; 2369 } 2370 2371 // Make sure it is a splat of the first vector operand. 2372 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2373} 2374 2375/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2376/// a splat of a single element and it's a 2 or 4 element mask. 2377bool X86::isSplatMask(SDNode *N) { 2378 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2379 2380 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2381 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2382 return false; 2383 return ::isSplatMask(N); 2384} 2385 2386/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2387/// specifies a splat of zero element. 2388bool X86::isSplatLoMask(SDNode *N) { 2389 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2390 2391 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2392 if (!isUndefOrEqual(N->getOperand(i), 0)) 2393 return false; 2394 return true; 2395} 2396 2397/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2398/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2399/// instructions. 2400unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2401 unsigned NumOperands = N->getNumOperands(); 2402 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2403 unsigned Mask = 0; 2404 for (unsigned i = 0; i < NumOperands; ++i) { 2405 unsigned Val = 0; 2406 SDOperand Arg = N->getOperand(NumOperands-i-1); 2407 if (Arg.getOpcode() != ISD::UNDEF) 2408 Val = cast<ConstantSDNode>(Arg)->getValue(); 2409 if (Val >= NumOperands) Val -= NumOperands; 2410 Mask |= Val; 2411 if (i != NumOperands - 1) 2412 Mask <<= Shift; 2413 } 2414 2415 return Mask; 2416} 2417 2418/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2419/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2420/// instructions. 2421unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2422 unsigned Mask = 0; 2423 // 8 nodes, but we only care about the last 4. 2424 for (unsigned i = 7; i >= 4; --i) { 2425 unsigned Val = 0; 2426 SDOperand Arg = N->getOperand(i); 2427 if (Arg.getOpcode() != ISD::UNDEF) 2428 Val = cast<ConstantSDNode>(Arg)->getValue(); 2429 Mask |= (Val - 4); 2430 if (i != 4) 2431 Mask <<= 2; 2432 } 2433 2434 return Mask; 2435} 2436 2437/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2438/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2439/// instructions. 2440unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2441 unsigned Mask = 0; 2442 // 8 nodes, but we only care about the first 4. 2443 for (int i = 3; i >= 0; --i) { 2444 unsigned Val = 0; 2445 SDOperand Arg = N->getOperand(i); 2446 if (Arg.getOpcode() != ISD::UNDEF) 2447 Val = cast<ConstantSDNode>(Arg)->getValue(); 2448 Mask |= Val; 2449 if (i != 0) 2450 Mask <<= 2; 2451 } 2452 2453 return Mask; 2454} 2455 2456/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2457/// specifies a 8 element shuffle that can be broken into a pair of 2458/// PSHUFHW and PSHUFLW. 2459static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2460 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2461 2462 if (N->getNumOperands() != 8) 2463 return false; 2464 2465 // Lower quadword shuffled. 2466 for (unsigned i = 0; i != 4; ++i) { 2467 SDOperand Arg = N->getOperand(i); 2468 if (Arg.getOpcode() == ISD::UNDEF) continue; 2469 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2470 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2471 if (Val >= 4) 2472 return false; 2473 } 2474 2475 // Upper quadword shuffled. 2476 for (unsigned i = 4; i != 8; ++i) { 2477 SDOperand Arg = N->getOperand(i); 2478 if (Arg.getOpcode() == ISD::UNDEF) continue; 2479 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2480 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2481 if (Val < 4 || Val > 7) 2482 return false; 2483 } 2484 2485 return true; 2486} 2487 2488/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2489/// values in ther permute mask. 2490static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2491 SDOperand &V2, SDOperand &Mask, 2492 SelectionDAG &DAG) { 2493 MVT::ValueType VT = Op.getValueType(); 2494 MVT::ValueType MaskVT = Mask.getValueType(); 2495 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2496 unsigned NumElems = Mask.getNumOperands(); 2497 SmallVector<SDOperand, 8> MaskVec; 2498 2499 for (unsigned i = 0; i != NumElems; ++i) { 2500 SDOperand Arg = Mask.getOperand(i); 2501 if (Arg.getOpcode() == ISD::UNDEF) { 2502 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2503 continue; 2504 } 2505 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2506 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2507 if (Val < NumElems) 2508 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2509 else 2510 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2511 } 2512 2513 std::swap(V1, V2); 2514 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2515 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2516} 2517 2518/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2519/// the two vector operands have swapped position. 2520static 2521SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { 2522 MVT::ValueType MaskVT = Mask.getValueType(); 2523 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2524 unsigned NumElems = Mask.getNumOperands(); 2525 SmallVector<SDOperand, 8> MaskVec; 2526 for (unsigned i = 0; i != NumElems; ++i) { 2527 SDOperand Arg = Mask.getOperand(i); 2528 if (Arg.getOpcode() == ISD::UNDEF) { 2529 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2530 continue; 2531 } 2532 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2533 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2534 if (Val < NumElems) 2535 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2536 else 2537 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2538 } 2539 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2540} 2541 2542 2543/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2544/// match movhlps. The lower half elements should come from upper half of 2545/// V1 (and in order), and the upper half elements should come from the upper 2546/// half of V2 (and in order). 2547static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2548 unsigned NumElems = Mask->getNumOperands(); 2549 if (NumElems != 4) 2550 return false; 2551 for (unsigned i = 0, e = 2; i != e; ++i) 2552 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2553 return false; 2554 for (unsigned i = 2; i != 4; ++i) 2555 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2556 return false; 2557 return true; 2558} 2559 2560/// isScalarLoadToVector - Returns true if the node is a scalar load that 2561/// is promoted to a vector. 2562static inline bool isScalarLoadToVector(SDNode *N) { 2563 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2564 N = N->getOperand(0).Val; 2565 return ISD::isNON_EXTLoad(N); 2566 } 2567 return false; 2568} 2569 2570/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2571/// match movlp{s|d}. The lower half elements should come from lower half of 2572/// V1 (and in order), and the upper half elements should come from the upper 2573/// half of V2 (and in order). And since V1 will become the source of the 2574/// MOVLP, it must be either a vector load or a scalar load to vector. 2575static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2576 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2577 return false; 2578 // Is V2 is a vector load, don't do this transformation. We will try to use 2579 // load folding shufps op. 2580 if (ISD::isNON_EXTLoad(V2)) 2581 return false; 2582 2583 unsigned NumElems = Mask->getNumOperands(); 2584 if (NumElems != 2 && NumElems != 4) 2585 return false; 2586 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2587 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2588 return false; 2589 for (unsigned i = NumElems/2; i != NumElems; ++i) 2590 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2591 return false; 2592 return true; 2593} 2594 2595/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2596/// all the same. 2597static bool isSplatVector(SDNode *N) { 2598 if (N->getOpcode() != ISD::BUILD_VECTOR) 2599 return false; 2600 2601 SDOperand SplatValue = N->getOperand(0); 2602 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2603 if (N->getOperand(i) != SplatValue) 2604 return false; 2605 return true; 2606} 2607 2608/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2609/// to an undef. 2610static bool isUndefShuffle(SDNode *N) { 2611 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2612 return false; 2613 2614 SDOperand V1 = N->getOperand(0); 2615 SDOperand V2 = N->getOperand(1); 2616 SDOperand Mask = N->getOperand(2); 2617 unsigned NumElems = Mask.getNumOperands(); 2618 for (unsigned i = 0; i != NumElems; ++i) { 2619 SDOperand Arg = Mask.getOperand(i); 2620 if (Arg.getOpcode() != ISD::UNDEF) { 2621 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2622 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2623 return false; 2624 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2625 return false; 2626 } 2627 } 2628 return true; 2629} 2630 2631/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2632/// constant +0.0. 2633static inline bool isZeroNode(SDOperand Elt) { 2634 return ((isa<ConstantSDNode>(Elt) && 2635 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2636 (isa<ConstantFPSDNode>(Elt) && 2637 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2638} 2639 2640/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2641/// to an zero vector. 2642static bool isZeroShuffle(SDNode *N) { 2643 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2644 return false; 2645 2646 SDOperand V1 = N->getOperand(0); 2647 SDOperand V2 = N->getOperand(1); 2648 SDOperand Mask = N->getOperand(2); 2649 unsigned NumElems = Mask.getNumOperands(); 2650 for (unsigned i = 0; i != NumElems; ++i) { 2651 SDOperand Arg = Mask.getOperand(i); 2652 if (Arg.getOpcode() == ISD::UNDEF) 2653 continue; 2654 2655 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2656 if (Idx < NumElems) { 2657 unsigned Opc = V1.Val->getOpcode(); 2658 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) 2659 continue; 2660 if (Opc != ISD::BUILD_VECTOR || 2661 !isZeroNode(V1.Val->getOperand(Idx))) 2662 return false; 2663 } else if (Idx >= NumElems) { 2664 unsigned Opc = V2.Val->getOpcode(); 2665 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) 2666 continue; 2667 if (Opc != ISD::BUILD_VECTOR || 2668 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2669 return false; 2670 } 2671 } 2672 return true; 2673} 2674 2675/// getZeroVector - Returns a vector of specified type with all zero elements. 2676/// 2677static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2678 assert(MVT::isVector(VT) && "Expected a vector type"); 2679 2680 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2681 // type. This ensures they get CSE'd. 2682 SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); 2683 SDOperand Vec; 2684 if (MVT::getSizeInBits(VT) == 64) // MMX 2685 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2686 else // SSE 2687 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2688 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2689} 2690 2691/// getOnesVector - Returns a vector of specified type with all bits set. 2692/// 2693static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { 2694 assert(MVT::isVector(VT) && "Expected a vector type"); 2695 2696 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2697 // type. This ensures they get CSE'd. 2698 SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); 2699 SDOperand Vec; 2700 if (MVT::getSizeInBits(VT) == 64) // MMX 2701 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2702 else // SSE 2703 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2704 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2705} 2706 2707 2708/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2709/// that point to V2 points to its first element. 2710static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2711 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2712 2713 bool Changed = false; 2714 SmallVector<SDOperand, 8> MaskVec; 2715 unsigned NumElems = Mask.getNumOperands(); 2716 for (unsigned i = 0; i != NumElems; ++i) { 2717 SDOperand Arg = Mask.getOperand(i); 2718 if (Arg.getOpcode() != ISD::UNDEF) { 2719 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2720 if (Val > NumElems) { 2721 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2722 Changed = true; 2723 } 2724 } 2725 MaskVec.push_back(Arg); 2726 } 2727 2728 if (Changed) 2729 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2730 &MaskVec[0], MaskVec.size()); 2731 return Mask; 2732} 2733 2734/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2735/// operation of specified width. 2736static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2737 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2738 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2739 2740 SmallVector<SDOperand, 8> MaskVec; 2741 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2742 for (unsigned i = 1; i != NumElems; ++i) 2743 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2744 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2745} 2746 2747/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2748/// of specified width. 2749static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2750 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2751 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2752 SmallVector<SDOperand, 8> MaskVec; 2753 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2754 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2755 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2756 } 2757 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2758} 2759 2760/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2761/// of specified width. 2762static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2763 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2764 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2765 unsigned Half = NumElems/2; 2766 SmallVector<SDOperand, 8> MaskVec; 2767 for (unsigned i = 0; i != Half; ++i) { 2768 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2769 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2770 } 2771 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2772} 2773 2774/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2775/// 2776static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2777 SDOperand V1 = Op.getOperand(0); 2778 SDOperand Mask = Op.getOperand(2); 2779 MVT::ValueType VT = Op.getValueType(); 2780 unsigned NumElems = Mask.getNumOperands(); 2781 Mask = getUnpacklMask(NumElems, DAG); 2782 while (NumElems != 4) { 2783 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2784 NumElems >>= 1; 2785 } 2786 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2787 2788 Mask = getZeroVector(MVT::v4i32, DAG); 2789 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2790 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2791 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2792} 2793 2794/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2795/// vector of zero or undef vector. This produces a shuffle where the low 2796/// element of V2 is swizzled into the zero/undef vector, landing at element 2797/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 2798static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2799 unsigned NumElems, unsigned Idx, 2800 bool isZero, SelectionDAG &DAG) { 2801 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2802 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2803 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2804 SmallVector<SDOperand, 16> MaskVec; 2805 for (unsigned i = 0; i != NumElems; ++i) 2806 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 2807 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 2808 else 2809 MaskVec.push_back(DAG.getConstant(i, EVT)); 2810 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2811 &MaskVec[0], MaskVec.size()); 2812 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2813} 2814 2815/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2816/// 2817static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2818 unsigned NumNonZero, unsigned NumZero, 2819 SelectionDAG &DAG, TargetLowering &TLI) { 2820 if (NumNonZero > 8) 2821 return SDOperand(); 2822 2823 SDOperand V(0, 0); 2824 bool First = true; 2825 for (unsigned i = 0; i < 16; ++i) { 2826 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2827 if (ThisIsNonZero && First) { 2828 if (NumZero) 2829 V = getZeroVector(MVT::v8i16, DAG); 2830 else 2831 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2832 First = false; 2833 } 2834 2835 if ((i & 1) != 0) { 2836 SDOperand ThisElt(0, 0), LastElt(0, 0); 2837 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2838 if (LastIsNonZero) { 2839 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2840 } 2841 if (ThisIsNonZero) { 2842 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2843 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2844 ThisElt, DAG.getConstant(8, MVT::i8)); 2845 if (LastIsNonZero) 2846 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2847 } else 2848 ThisElt = LastElt; 2849 2850 if (ThisElt.Val) 2851 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2852 DAG.getIntPtrConstant(i/2)); 2853 } 2854 } 2855 2856 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2857} 2858 2859/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2860/// 2861static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2862 unsigned NumNonZero, unsigned NumZero, 2863 SelectionDAG &DAG, TargetLowering &TLI) { 2864 if (NumNonZero > 4) 2865 return SDOperand(); 2866 2867 SDOperand V(0, 0); 2868 bool First = true; 2869 for (unsigned i = 0; i < 8; ++i) { 2870 bool isNonZero = (NonZeros & (1 << i)) != 0; 2871 if (isNonZero) { 2872 if (First) { 2873 if (NumZero) 2874 V = getZeroVector(MVT::v8i16, DAG); 2875 else 2876 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2877 First = false; 2878 } 2879 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2880 DAG.getIntPtrConstant(i)); 2881 } 2882 } 2883 2884 return V; 2885} 2886 2887SDOperand 2888X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2889 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 2890 if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { 2891 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 2892 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 2893 // eliminated on x86-32 hosts. 2894 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 2895 return Op; 2896 2897 if (ISD::isBuildVectorAllOnes(Op.Val)) 2898 return getOnesVector(Op.getValueType(), DAG); 2899 return getZeroVector(Op.getValueType(), DAG); 2900 } 2901 2902 MVT::ValueType VT = Op.getValueType(); 2903 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2904 unsigned EVTBits = MVT::getSizeInBits(EVT); 2905 2906 unsigned NumElems = Op.getNumOperands(); 2907 unsigned NumZero = 0; 2908 unsigned NumNonZero = 0; 2909 unsigned NonZeros = 0; 2910 bool HasNonImms = false; 2911 SmallSet<SDOperand, 8> Values; 2912 for (unsigned i = 0; i < NumElems; ++i) { 2913 SDOperand Elt = Op.getOperand(i); 2914 if (Elt.getOpcode() == ISD::UNDEF) 2915 continue; 2916 Values.insert(Elt); 2917 if (Elt.getOpcode() != ISD::Constant && 2918 Elt.getOpcode() != ISD::ConstantFP) 2919 HasNonImms = true; 2920 if (isZeroNode(Elt)) 2921 NumZero++; 2922 else { 2923 NonZeros |= (1 << i); 2924 NumNonZero++; 2925 } 2926 } 2927 2928 if (NumNonZero == 0) { 2929 // All undef vector. Return an UNDEF. All zero vectors were handled above. 2930 return DAG.getNode(ISD::UNDEF, VT); 2931 } 2932 2933 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2934 if (Values.size() == 1) 2935 return SDOperand(); 2936 2937 // Special case for single non-zero element. 2938 if (NumNonZero == 1 && NumElems <= 4) { 2939 unsigned Idx = CountTrailingZeros_32(NonZeros); 2940 SDOperand Item = Op.getOperand(Idx); 2941 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2942 if (Idx == 0) 2943 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2944 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2945 NumZero > 0, DAG); 2946 else if (!HasNonImms) // Otherwise, it's better to do a constpool load. 2947 return SDOperand(); 2948 2949 if (EVTBits == 32) { 2950 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2951 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2952 DAG); 2953 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2954 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2955 SmallVector<SDOperand, 8> MaskVec; 2956 for (unsigned i = 0; i < NumElems; i++) 2957 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2958 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2959 &MaskVec[0], MaskVec.size()); 2960 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2961 DAG.getNode(ISD::UNDEF, VT), Mask); 2962 } 2963 } 2964 2965 // A vector full of immediates; various special cases are already 2966 // handled, so this is best done with a single constant-pool load. 2967 if (!HasNonImms) 2968 return SDOperand(); 2969 2970 // Let legalizer expand 2-wide build_vectors. 2971 if (EVTBits == 64) 2972 return SDOperand(); 2973 2974 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2975 if (EVTBits == 8 && NumElems == 16) { 2976 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2977 *this); 2978 if (V.Val) return V; 2979 } 2980 2981 if (EVTBits == 16 && NumElems == 8) { 2982 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2983 *this); 2984 if (V.Val) return V; 2985 } 2986 2987 // If element VT is == 32 bits, turn it into a number of shuffles. 2988 SmallVector<SDOperand, 8> V; 2989 V.resize(NumElems); 2990 if (NumElems == 4 && NumZero > 0) { 2991 for (unsigned i = 0; i < 4; ++i) { 2992 bool isZero = !(NonZeros & (1 << i)); 2993 if (isZero) 2994 V[i] = getZeroVector(VT, DAG); 2995 else 2996 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2997 } 2998 2999 for (unsigned i = 0; i < 2; ++i) { 3000 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3001 default: break; 3002 case 0: 3003 V[i] = V[i*2]; // Must be a zero vector. 3004 break; 3005 case 1: 3006 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3007 getMOVLMask(NumElems, DAG)); 3008 break; 3009 case 2: 3010 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3011 getMOVLMask(NumElems, DAG)); 3012 break; 3013 case 3: 3014 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3015 getUnpacklMask(NumElems, DAG)); 3016 break; 3017 } 3018 } 3019 3020 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3021 // clears the upper bits. 3022 // FIXME: we can do the same for v4f32 case when we know both parts of 3023 // the lower half come from scalar_to_vector (loadf32). We should do 3024 // that in post legalizer dag combiner with target specific hooks. 3025 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3026 return V[0]; 3027 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3028 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3029 SmallVector<SDOperand, 8> MaskVec; 3030 bool Reverse = (NonZeros & 0x3) == 2; 3031 for (unsigned i = 0; i < 2; ++i) 3032 if (Reverse) 3033 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3034 else 3035 MaskVec.push_back(DAG.getConstant(i, EVT)); 3036 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3037 for (unsigned i = 0; i < 2; ++i) 3038 if (Reverse) 3039 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3040 else 3041 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3042 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3043 &MaskVec[0], MaskVec.size()); 3044 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3045 } 3046 3047 if (Values.size() > 2) { 3048 // Expand into a number of unpckl*. 3049 // e.g. for v4f32 3050 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3051 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3052 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3053 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3054 for (unsigned i = 0; i < NumElems; ++i) 3055 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3056 NumElems >>= 1; 3057 while (NumElems != 0) { 3058 for (unsigned i = 0; i < NumElems; ++i) 3059 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3060 UnpckMask); 3061 NumElems >>= 1; 3062 } 3063 return V[0]; 3064 } 3065 3066 return SDOperand(); 3067} 3068 3069static 3070SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, 3071 SDOperand PermMask, SelectionDAG &DAG, 3072 TargetLowering &TLI) { 3073 SDOperand NewV; 3074 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); 3075 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3076 MVT::ValueType PtrVT = TLI.getPointerTy(); 3077 SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), 3078 PermMask.Val->op_end()); 3079 3080 // First record which half of which vector the low elements come from. 3081 SmallVector<unsigned, 4> LowQuad(4); 3082 for (unsigned i = 0; i < 4; ++i) { 3083 SDOperand Elt = MaskElts[i]; 3084 if (Elt.getOpcode() == ISD::UNDEF) 3085 continue; 3086 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3087 int QuadIdx = EltIdx / 4; 3088 ++LowQuad[QuadIdx]; 3089 } 3090 int BestLowQuad = -1; 3091 unsigned MaxQuad = 1; 3092 for (unsigned i = 0; i < 4; ++i) { 3093 if (LowQuad[i] > MaxQuad) { 3094 BestLowQuad = i; 3095 MaxQuad = LowQuad[i]; 3096 } 3097 } 3098 3099 // Record which half of which vector the high elements come from. 3100 SmallVector<unsigned, 4> HighQuad(4); 3101 for (unsigned i = 4; i < 8; ++i) { 3102 SDOperand Elt = MaskElts[i]; 3103 if (Elt.getOpcode() == ISD::UNDEF) 3104 continue; 3105 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3106 int QuadIdx = EltIdx / 4; 3107 ++HighQuad[QuadIdx]; 3108 } 3109 int BestHighQuad = -1; 3110 MaxQuad = 1; 3111 for (unsigned i = 0; i < 4; ++i) { 3112 if (HighQuad[i] > MaxQuad) { 3113 BestHighQuad = i; 3114 MaxQuad = HighQuad[i]; 3115 } 3116 } 3117 3118 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3119 if (BestLowQuad != -1 || BestHighQuad != -1) { 3120 // First sort the 4 chunks in order using shufpd. 3121 SmallVector<SDOperand, 8> MaskVec; 3122 if (BestLowQuad != -1) 3123 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3124 else 3125 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3126 if (BestHighQuad != -1) 3127 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3128 else 3129 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3130 SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3131 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3132 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3133 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3134 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3135 3136 // Now sort high and low parts separately. 3137 BitVector InOrder(8); 3138 if (BestLowQuad != -1) { 3139 // Sort lower half in order using PSHUFLW. 3140 MaskVec.clear(); 3141 bool AnyOutOrder = false; 3142 for (unsigned i = 0; i != 4; ++i) { 3143 SDOperand Elt = MaskElts[i]; 3144 if (Elt.getOpcode() == ISD::UNDEF) { 3145 MaskVec.push_back(Elt); 3146 InOrder.set(i); 3147 } else { 3148 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3149 if (EltIdx != i) 3150 AnyOutOrder = true; 3151 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3152 // If this element is in the right place after this shuffle, then 3153 // remember it. 3154 if ((int)(EltIdx / 4) == BestLowQuad) 3155 InOrder.set(i); 3156 } 3157 } 3158 if (AnyOutOrder) { 3159 for (unsigned i = 4; i != 8; ++i) 3160 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3161 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3162 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3163 } 3164 } 3165 3166 if (BestHighQuad != -1) { 3167 // Sort high half in order using PSHUFHW if possible. 3168 MaskVec.clear(); 3169 for (unsigned i = 0; i != 4; ++i) 3170 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3171 bool AnyOutOrder = false; 3172 for (unsigned i = 4; i != 8; ++i) { 3173 SDOperand Elt = MaskElts[i]; 3174 if (Elt.getOpcode() == ISD::UNDEF) { 3175 MaskVec.push_back(Elt); 3176 InOrder.set(i); 3177 } else { 3178 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3179 if (EltIdx != i) 3180 AnyOutOrder = true; 3181 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3182 // If this element is in the right place after this shuffle, then 3183 // remember it. 3184 if ((int)(EltIdx / 4) == BestHighQuad) 3185 InOrder.set(i); 3186 } 3187 } 3188 if (AnyOutOrder) { 3189 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3190 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3191 } 3192 } 3193 3194 // The other elements are put in the right place using pextrw and pinsrw. 3195 for (unsigned i = 0; i != 8; ++i) { 3196 if (InOrder[i]) 3197 continue; 3198 SDOperand Elt = MaskElts[i]; 3199 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3200 if (EltIdx == i) 3201 continue; 3202 SDOperand ExtOp = (EltIdx < 8) 3203 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3204 DAG.getConstant(EltIdx, PtrVT)) 3205 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3206 DAG.getConstant(EltIdx - 8, PtrVT)); 3207 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3208 DAG.getConstant(i, PtrVT)); 3209 } 3210 return NewV; 3211 } 3212 3213 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use 3214 ///as few as possible. 3215 // First, let's find out how many elements are already in the right order. 3216 unsigned V1InOrder = 0; 3217 unsigned V1FromV1 = 0; 3218 unsigned V2InOrder = 0; 3219 unsigned V2FromV2 = 0; 3220 SmallVector<SDOperand, 8> V1Elts; 3221 SmallVector<SDOperand, 8> V2Elts; 3222 for (unsigned i = 0; i < 8; ++i) { 3223 SDOperand Elt = MaskElts[i]; 3224 if (Elt.getOpcode() == ISD::UNDEF) { 3225 V1Elts.push_back(Elt); 3226 V2Elts.push_back(Elt); 3227 ++V1InOrder; 3228 ++V2InOrder; 3229 continue; 3230 } 3231 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3232 if (EltIdx == i) { 3233 V1Elts.push_back(Elt); 3234 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3235 ++V1InOrder; 3236 } else if (EltIdx == i+8) { 3237 V1Elts.push_back(Elt); 3238 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3239 ++V2InOrder; 3240 } else if (EltIdx < 8) { 3241 V1Elts.push_back(Elt); 3242 ++V1FromV1; 3243 } else { 3244 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3245 ++V2FromV2; 3246 } 3247 } 3248 3249 if (V2InOrder > V1InOrder) { 3250 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3251 std::swap(V1, V2); 3252 std::swap(V1Elts, V2Elts); 3253 std::swap(V1FromV1, V2FromV2); 3254 } 3255 3256 if ((V1FromV1 + V1InOrder) != 8) { 3257 // Some elements are from V2. 3258 if (V1FromV1) { 3259 // If there are elements that are from V1 but out of place, 3260 // then first sort them in place 3261 SmallVector<SDOperand, 8> MaskVec; 3262 for (unsigned i = 0; i < 8; ++i) { 3263 SDOperand Elt = V1Elts[i]; 3264 if (Elt.getOpcode() == ISD::UNDEF) { 3265 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3266 continue; 3267 } 3268 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3269 if (EltIdx >= 8) 3270 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3271 else 3272 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3273 } 3274 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3275 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3276 } 3277 3278 NewV = V1; 3279 for (unsigned i = 0; i < 8; ++i) { 3280 SDOperand Elt = V1Elts[i]; 3281 if (Elt.getOpcode() == ISD::UNDEF) 3282 continue; 3283 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3284 if (EltIdx < 8) 3285 continue; 3286 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3287 DAG.getConstant(EltIdx - 8, PtrVT)); 3288 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3289 DAG.getConstant(i, PtrVT)); 3290 } 3291 return NewV; 3292 } else { 3293 // All elements are from V1. 3294 NewV = V1; 3295 for (unsigned i = 0; i < 8; ++i) { 3296 SDOperand Elt = V1Elts[i]; 3297 if (Elt.getOpcode() == ISD::UNDEF) 3298 continue; 3299 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3300 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3301 DAG.getConstant(EltIdx, PtrVT)); 3302 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3303 DAG.getConstant(i, PtrVT)); 3304 } 3305 return NewV; 3306 } 3307} 3308 3309/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3310/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3311/// done when every pair / quad of shuffle mask elements point to elements in 3312/// the right sequence. e.g. 3313/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3314static 3315SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2, 3316 MVT::ValueType VT, 3317 SDOperand PermMask, SelectionDAG &DAG, 3318 TargetLowering &TLI) { 3319 unsigned NumElems = PermMask.getNumOperands(); 3320 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3321 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3322 MVT::ValueType NewVT = MaskVT; 3323 switch (VT) { 3324 case MVT::v4f32: NewVT = MVT::v2f64; break; 3325 case MVT::v4i32: NewVT = MVT::v2i64; break; 3326 case MVT::v8i16: NewVT = MVT::v4i32; break; 3327 case MVT::v16i8: NewVT = MVT::v4i32; break; 3328 default: assert(false && "Unexpected!"); 3329 } 3330 3331 if (NewWidth == 2) 3332 if (MVT::isInteger(VT)) 3333 NewVT = MVT::v2i64; 3334 else 3335 NewVT = MVT::v2f64; 3336 unsigned Scale = NumElems / NewWidth; 3337 SmallVector<SDOperand, 8> MaskVec; 3338 for (unsigned i = 0; i < NumElems; i += Scale) { 3339 unsigned StartIdx = ~0U; 3340 for (unsigned j = 0; j < Scale; ++j) { 3341 SDOperand Elt = PermMask.getOperand(i+j); 3342 if (Elt.getOpcode() == ISD::UNDEF) 3343 continue; 3344 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3345 if (StartIdx == ~0U) 3346 StartIdx = EltIdx - (EltIdx % Scale); 3347 if (EltIdx != StartIdx + j) 3348 return SDOperand(); 3349 } 3350 if (StartIdx == ~0U) 3351 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 3352 else 3353 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); 3354 } 3355 3356 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3357 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3358 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3359 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3360 &MaskVec[0], MaskVec.size())); 3361} 3362 3363SDOperand 3364X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3365 SDOperand V1 = Op.getOperand(0); 3366 SDOperand V2 = Op.getOperand(1); 3367 SDOperand PermMask = Op.getOperand(2); 3368 MVT::ValueType VT = Op.getValueType(); 3369 unsigned NumElems = PermMask.getNumOperands(); 3370 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3371 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3372 bool V1IsSplat = false; 3373 bool V2IsSplat = false; 3374 3375 if (isUndefShuffle(Op.Val)) 3376 return DAG.getNode(ISD::UNDEF, VT); 3377 3378 if (isZeroShuffle(Op.Val)) 3379 return getZeroVector(VT, DAG); 3380 3381 if (isIdentityMask(PermMask.Val)) 3382 return V1; 3383 else if (isIdentityMask(PermMask.Val, true)) 3384 return V2; 3385 3386 if (isSplatMask(PermMask.Val)) { 3387 if (NumElems <= 4) return Op; 3388 // Promote it to a v4i32 splat. 3389 return PromoteSplat(Op, DAG); 3390 } 3391 3392 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3393 // do it! 3394 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3395 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3396 if (NewOp.Val) 3397 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3398 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3399 // FIXME: Figure out a cleaner way to do this. 3400 // Try to make use of movq to zero out the top part. 3401 if (ISD::isBuildVectorAllZeros(V2.Val)) { 3402 SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3403 if (NewOp.Val) { 3404 SDOperand NewV1 = NewOp.getOperand(0); 3405 SDOperand NewV2 = NewOp.getOperand(1); 3406 SDOperand NewMask = NewOp.getOperand(2); 3407 if (isCommutedMOVL(NewMask.Val, true, false)) { 3408 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 3409 NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(), 3410 NewV1, NewV2, getMOVLMask(2, DAG)); 3411 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3412 } 3413 } 3414 } else if (ISD::isBuildVectorAllZeros(V1.Val)) { 3415 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3416 if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val)) 3417 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3418 } 3419 } 3420 3421 if (X86::isMOVLMask(PermMask.Val)) 3422 return (V1IsUndef) ? V2 : Op; 3423 3424 if (X86::isMOVSHDUPMask(PermMask.Val) || 3425 X86::isMOVSLDUPMask(PermMask.Val) || 3426 X86::isMOVHLPSMask(PermMask.Val) || 3427 X86::isMOVHPMask(PermMask.Val) || 3428 X86::isMOVLPMask(PermMask.Val)) 3429 return Op; 3430 3431 if (ShouldXformToMOVHLPS(PermMask.Val) || 3432 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3433 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3434 3435 bool Commuted = false; 3436 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3437 // 1,1,1,1 -> v8i16 though. 3438 V1IsSplat = isSplatVector(V1.Val); 3439 V2IsSplat = isSplatVector(V2.Val); 3440 3441 // Canonicalize the splat or undef, if present, to be on the RHS. 3442 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3443 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3444 std::swap(V1IsSplat, V2IsSplat); 3445 std::swap(V1IsUndef, V2IsUndef); 3446 Commuted = true; 3447 } 3448 3449 // FIXME: Figure out a cleaner way to do this. 3450 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3451 if (V2IsUndef) return V1; 3452 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3453 if (V2IsSplat) { 3454 // V2 is a splat, so the mask may be malformed. That is, it may point 3455 // to any V2 element. The instruction selectior won't like this. Get 3456 // a corrected mask and commute to form a proper MOVS{S|D}. 3457 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3458 if (NewMask.Val != PermMask.Val) 3459 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3460 } 3461 return Op; 3462 } 3463 3464 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3465 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3466 X86::isUNPCKLMask(PermMask.Val) || 3467 X86::isUNPCKHMask(PermMask.Val)) 3468 return Op; 3469 3470 if (V2IsSplat) { 3471 // Normalize mask so all entries that point to V2 points to its first 3472 // element then try to match unpck{h|l} again. If match, return a 3473 // new vector_shuffle with the corrected mask. 3474 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3475 if (NewMask.Val != PermMask.Val) { 3476 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3477 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3478 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3479 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3480 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3481 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3482 } 3483 } 3484 } 3485 3486 // Normalize the node to match x86 shuffle ops if needed 3487 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3488 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3489 3490 if (Commuted) { 3491 // Commute is back and try unpck* again. 3492 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3493 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3494 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3495 X86::isUNPCKLMask(PermMask.Val) || 3496 X86::isUNPCKHMask(PermMask.Val)) 3497 return Op; 3498 } 3499 3500 // If VT is integer, try PSHUF* first, then SHUFP*. 3501 if (MVT::isInteger(VT)) { 3502 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3503 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3504 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3505 X86::isPSHUFDMask(PermMask.Val)) || 3506 X86::isPSHUFHWMask(PermMask.Val) || 3507 X86::isPSHUFLWMask(PermMask.Val)) { 3508 if (V2.getOpcode() != ISD::UNDEF) 3509 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3510 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3511 return Op; 3512 } 3513 3514 if (X86::isSHUFPMask(PermMask.Val) && 3515 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3516 return Op; 3517 } else { 3518 // Floating point cases in the other order. 3519 if (X86::isSHUFPMask(PermMask.Val)) 3520 return Op; 3521 if (X86::isPSHUFDMask(PermMask.Val) || 3522 X86::isPSHUFHWMask(PermMask.Val) || 3523 X86::isPSHUFLWMask(PermMask.Val)) { 3524 if (V2.getOpcode() != ISD::UNDEF) 3525 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3526 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3527 return Op; 3528 } 3529 } 3530 3531 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 3532 if (VT == MVT::v8i16) { 3533 SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 3534 if (NewOp.Val) 3535 return NewOp; 3536 } 3537 3538 // Handle all 4 wide cases with a number of shuffles. 3539 if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { 3540 // Don't do this for MMX. 3541 MVT::ValueType MaskVT = PermMask.getValueType(); 3542 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3543 SmallVector<std::pair<int, int>, 8> Locs; 3544 Locs.reserve(NumElems); 3545 SmallVector<SDOperand, 8> Mask1(NumElems, 3546 DAG.getNode(ISD::UNDEF, MaskEVT)); 3547 SmallVector<SDOperand, 8> Mask2(NumElems, 3548 DAG.getNode(ISD::UNDEF, MaskEVT)); 3549 unsigned NumHi = 0; 3550 unsigned NumLo = 0; 3551 // If no more than two elements come from either vector. This can be 3552 // implemented with two shuffles. First shuffle gather the elements. 3553 // The second shuffle, which takes the first shuffle as both of its 3554 // vector operands, put the elements into the right order. 3555 for (unsigned i = 0; i != NumElems; ++i) { 3556 SDOperand Elt = PermMask.getOperand(i); 3557 if (Elt.getOpcode() == ISD::UNDEF) { 3558 Locs[i] = std::make_pair(-1, -1); 3559 } else { 3560 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3561 if (Val < NumElems) { 3562 Locs[i] = std::make_pair(0, NumLo); 3563 Mask1[NumLo] = Elt; 3564 NumLo++; 3565 } else { 3566 Locs[i] = std::make_pair(1, NumHi); 3567 if (2+NumHi < NumElems) 3568 Mask1[2+NumHi] = Elt; 3569 NumHi++; 3570 } 3571 } 3572 } 3573 if (NumLo <= 2 && NumHi <= 2) { 3574 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3575 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3576 &Mask1[0], Mask1.size())); 3577 for (unsigned i = 0; i != NumElems; ++i) { 3578 if (Locs[i].first == -1) 3579 continue; 3580 else { 3581 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3582 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3583 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3584 } 3585 } 3586 3587 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3588 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3589 &Mask2[0], Mask2.size())); 3590 } 3591 3592 // Break it into (shuffle shuffle_hi, shuffle_lo). 3593 Locs.clear(); 3594 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3595 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3596 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3597 unsigned MaskIdx = 0; 3598 unsigned LoIdx = 0; 3599 unsigned HiIdx = NumElems/2; 3600 for (unsigned i = 0; i != NumElems; ++i) { 3601 if (i == NumElems/2) { 3602 MaskPtr = &HiMask; 3603 MaskIdx = 1; 3604 LoIdx = 0; 3605 HiIdx = NumElems/2; 3606 } 3607 SDOperand Elt = PermMask.getOperand(i); 3608 if (Elt.getOpcode() == ISD::UNDEF) { 3609 Locs[i] = std::make_pair(-1, -1); 3610 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3611 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3612 (*MaskPtr)[LoIdx] = Elt; 3613 LoIdx++; 3614 } else { 3615 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3616 (*MaskPtr)[HiIdx] = Elt; 3617 HiIdx++; 3618 } 3619 } 3620 3621 SDOperand LoShuffle = 3622 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3623 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3624 &LoMask[0], LoMask.size())); 3625 SDOperand HiShuffle = 3626 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3627 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3628 &HiMask[0], HiMask.size())); 3629 SmallVector<SDOperand, 8> MaskOps; 3630 for (unsigned i = 0; i != NumElems; ++i) { 3631 if (Locs[i].first == -1) { 3632 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3633 } else { 3634 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3635 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3636 } 3637 } 3638 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3639 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3640 &MaskOps[0], MaskOps.size())); 3641 } 3642 3643 return SDOperand(); 3644} 3645 3646SDOperand 3647X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3648 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3649 return SDOperand(); 3650 3651 MVT::ValueType VT = Op.getValueType(); 3652 // TODO: handle v16i8. 3653 if (MVT::getSizeInBits(VT) == 16) { 3654 SDOperand Vec = Op.getOperand(0); 3655 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3656 if (Idx == 0) 3657 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 3658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 3659 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 3660 Op.getOperand(1))); 3661 // Transform it so it match pextrw which produces a 32-bit result. 3662 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3663 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3664 Op.getOperand(0), Op.getOperand(1)); 3665 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3666 DAG.getValueType(VT)); 3667 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3668 } else if (MVT::getSizeInBits(VT) == 32) { 3669 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3670 if (Idx == 0) 3671 return Op; 3672 // SHUFPS the element to the lowest double word, then movss. 3673 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3674 SmallVector<SDOperand, 8> IdxVec; 3675 IdxVec. 3676 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3677 IdxVec. 3678 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3679 IdxVec. 3680 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3681 IdxVec. 3682 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3683 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3684 &IdxVec[0], IdxVec.size()); 3685 SDOperand Vec = Op.getOperand(0); 3686 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3687 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3688 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3689 DAG.getIntPtrConstant(0)); 3690 } else if (MVT::getSizeInBits(VT) == 64) { 3691 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3692 if (Idx == 0) 3693 return Op; 3694 3695 // UNPCKHPD the element to the lowest double word, then movsd. 3696 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3697 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3698 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3699 SmallVector<SDOperand, 8> IdxVec; 3700 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3701 IdxVec. 3702 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3703 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3704 &IdxVec[0], IdxVec.size()); 3705 SDOperand Vec = Op.getOperand(0); 3706 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3707 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3708 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3709 DAG.getIntPtrConstant(0)); 3710 } 3711 3712 return SDOperand(); 3713} 3714 3715SDOperand 3716X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3717 MVT::ValueType VT = Op.getValueType(); 3718 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3719 if (EVT == MVT::i8) 3720 return SDOperand(); 3721 3722 SDOperand N0 = Op.getOperand(0); 3723 SDOperand N1 = Op.getOperand(1); 3724 SDOperand N2 = Op.getOperand(2); 3725 3726 if (MVT::getSizeInBits(EVT) == 16) { 3727 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3728 // as its second argument. 3729 if (N1.getValueType() != MVT::i32) 3730 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3731 if (N2.getValueType() != MVT::i32) 3732 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()); 3733 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3734 } 3735 return SDOperand(); 3736} 3737 3738SDOperand 3739X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3740 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3741 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3742} 3743 3744// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3745// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3746// one of the above mentioned nodes. It has to be wrapped because otherwise 3747// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3748// be used to form addressing mode. These wrapped nodes will be selected 3749// into MOV32ri. 3750SDOperand 3751X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3752 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3753 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3754 getPointerTy(), 3755 CP->getAlignment()); 3756 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3757 // With PIC, the address is actually $g + Offset. 3758 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3759 !Subtarget->isPICStyleRIPRel()) { 3760 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3761 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3762 Result); 3763 } 3764 3765 return Result; 3766} 3767 3768SDOperand 3769X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3770 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3771 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3772 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3773 // With PIC, the address is actually $g + Offset. 3774 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3775 !Subtarget->isPICStyleRIPRel()) { 3776 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3777 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3778 Result); 3779 } 3780 3781 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3782 // load the value at address GV, not the value of GV itself. This means that 3783 // the GlobalAddress must be in the base or index register of the address, not 3784 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3785 // The same applies for external symbols during PIC codegen 3786 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3787 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3788 3789 return Result; 3790} 3791 3792// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3793static SDOperand 3794LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3795 const MVT::ValueType PtrVT) { 3796 SDOperand InFlag; 3797 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3798 DAG.getNode(X86ISD::GlobalBaseReg, 3799 PtrVT), InFlag); 3800 InFlag = Chain.getValue(1); 3801 3802 // emit leal symbol@TLSGD(,%ebx,1), %eax 3803 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3804 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3805 GA->getValueType(0), 3806 GA->getOffset()); 3807 SDOperand Ops[] = { Chain, TGA, InFlag }; 3808 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3809 InFlag = Result.getValue(2); 3810 Chain = Result.getValue(1); 3811 3812 // call ___tls_get_addr. This function receives its argument in 3813 // the register EAX. 3814 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3815 InFlag = Chain.getValue(1); 3816 3817 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3818 SDOperand Ops1[] = { Chain, 3819 DAG.getTargetExternalSymbol("___tls_get_addr", 3820 PtrVT), 3821 DAG.getRegister(X86::EAX, PtrVT), 3822 DAG.getRegister(X86::EBX, PtrVT), 3823 InFlag }; 3824 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3825 InFlag = Chain.getValue(1); 3826 3827 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3828} 3829 3830// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3831// "local exec" model. 3832static SDOperand 3833LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3834 const MVT::ValueType PtrVT) { 3835 // Get the Thread Pointer 3836 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3837 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3838 // exec) 3839 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3840 GA->getValueType(0), 3841 GA->getOffset()); 3842 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3843 3844 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3845 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3846 3847 // The address of the thread local variable is the add of the thread 3848 // pointer with the offset of the variable. 3849 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3850} 3851 3852SDOperand 3853X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3854 // TODO: implement the "local dynamic" model 3855 // TODO: implement the "initial exec"model for pic executables 3856 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3857 "TLS not implemented for non-ELF and 64-bit targets"); 3858 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3859 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3860 // otherwise use the "Local Exec"TLS Model 3861 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3862 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3863 else 3864 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3865} 3866 3867SDOperand 3868X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3869 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3870 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3871 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3872 // With PIC, the address is actually $g + Offset. 3873 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3874 !Subtarget->isPICStyleRIPRel()) { 3875 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3876 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3877 Result); 3878 } 3879 3880 return Result; 3881} 3882 3883SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3884 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3885 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3886 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3887 // With PIC, the address is actually $g + Offset. 3888 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3889 !Subtarget->isPICStyleRIPRel()) { 3890 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3891 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3892 Result); 3893 } 3894 3895 return Result; 3896} 3897 3898/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 3899/// take a 2 x i32 value to shift plus a shift amount. 3900SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3901 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3902 "Not an i64 shift!"); 3903 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3904 SDOperand ShOpLo = Op.getOperand(0); 3905 SDOperand ShOpHi = Op.getOperand(1); 3906 SDOperand ShAmt = Op.getOperand(2); 3907 SDOperand Tmp1 = isSRA ? 3908 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3909 DAG.getConstant(0, MVT::i32); 3910 3911 SDOperand Tmp2, Tmp3; 3912 if (Op.getOpcode() == ISD::SHL_PARTS) { 3913 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3914 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3915 } else { 3916 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3917 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3918 } 3919 3920 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3921 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3922 DAG.getConstant(32, MVT::i8)); 3923 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 3924 AndNode, DAG.getConstant(0, MVT::i8)); 3925 3926 SDOperand Hi, Lo; 3927 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3928 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3929 SmallVector<SDOperand, 4> Ops; 3930 if (Op.getOpcode() == ISD::SHL_PARTS) { 3931 Ops.push_back(Tmp2); 3932 Ops.push_back(Tmp3); 3933 Ops.push_back(CC); 3934 Ops.push_back(Cond); 3935 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3936 3937 Ops.clear(); 3938 Ops.push_back(Tmp3); 3939 Ops.push_back(Tmp1); 3940 Ops.push_back(CC); 3941 Ops.push_back(Cond); 3942 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3943 } else { 3944 Ops.push_back(Tmp2); 3945 Ops.push_back(Tmp3); 3946 Ops.push_back(CC); 3947 Ops.push_back(Cond); 3948 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3949 3950 Ops.clear(); 3951 Ops.push_back(Tmp3); 3952 Ops.push_back(Tmp1); 3953 Ops.push_back(CC); 3954 Ops.push_back(Cond); 3955 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3956 } 3957 3958 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3959 Ops.clear(); 3960 Ops.push_back(Lo); 3961 Ops.push_back(Hi); 3962 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3963} 3964 3965SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3966 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3967 Op.getOperand(0).getValueType() >= MVT::i16 && 3968 "Unknown SINT_TO_FP to lower!"); 3969 3970 SDOperand Result; 3971 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3972 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3973 MachineFunction &MF = DAG.getMachineFunction(); 3974 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3975 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3976 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3977 StackSlot, NULL, 0); 3978 3979 // These are really Legal; caller falls through into that case. 3980 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 3981 return Result; 3982 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 3983 Subtarget->is64Bit()) 3984 return Result; 3985 3986 // Build the FILD 3987 SDVTList Tys; 3988 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 3989 if (useSSE) 3990 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3991 else 3992 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3993 SmallVector<SDOperand, 8> Ops; 3994 Ops.push_back(Chain); 3995 Ops.push_back(StackSlot); 3996 Ops.push_back(DAG.getValueType(SrcVT)); 3997 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3998 Tys, &Ops[0], Ops.size()); 3999 4000 if (useSSE) { 4001 Chain = Result.getValue(1); 4002 SDOperand InFlag = Result.getValue(2); 4003 4004 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4005 // shouldn't be necessary except that RFP cannot be live across 4006 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4007 MachineFunction &MF = DAG.getMachineFunction(); 4008 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4009 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4010 Tys = DAG.getVTList(MVT::Other); 4011 SmallVector<SDOperand, 8> Ops; 4012 Ops.push_back(Chain); 4013 Ops.push_back(Result); 4014 Ops.push_back(StackSlot); 4015 Ops.push_back(DAG.getValueType(Op.getValueType())); 4016 Ops.push_back(InFlag); 4017 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4018 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 4019 } 4020 4021 return Result; 4022} 4023 4024std::pair<SDOperand,SDOperand> X86TargetLowering:: 4025FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { 4026 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 4027 "Unknown FP_TO_SINT to lower!"); 4028 4029 // These are really Legal. 4030 if (Op.getValueType() == MVT::i32 && 4031 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4032 return std::make_pair(SDOperand(), SDOperand()); 4033 if (Subtarget->is64Bit() && 4034 Op.getValueType() == MVT::i64 && 4035 Op.getOperand(0).getValueType() != MVT::f80) 4036 return std::make_pair(SDOperand(), SDOperand()); 4037 4038 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4039 // stack slot. 4040 MachineFunction &MF = DAG.getMachineFunction(); 4041 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 4042 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4043 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4044 unsigned Opc; 4045 switch (Op.getValueType()) { 4046 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4047 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4048 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4049 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4050 } 4051 4052 SDOperand Chain = DAG.getEntryNode(); 4053 SDOperand Value = Op.getOperand(0); 4054 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4055 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4056 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 4057 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4058 SDOperand Ops[] = { 4059 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4060 }; 4061 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4062 Chain = Value.getValue(1); 4063 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4064 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4065 } 4066 4067 // Build the FP_TO_INT*_IN_MEM 4068 SDOperand Ops[] = { Chain, Value, StackSlot }; 4069 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4070 4071 return std::make_pair(FIST, StackSlot); 4072} 4073 4074SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 4075 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG); 4076 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4077 if (FIST.Val == 0) return SDOperand(); 4078 4079 // Load the result. 4080 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4081} 4082 4083SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4084 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); 4085 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4086 if (FIST.Val == 0) return 0; 4087 4088 // Return an i64 load from the stack slot. 4089 SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); 4090 4091 // Use a MERGE_VALUES node to drop the chain result value. 4092 return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; 4093} 4094 4095SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4096 MVT::ValueType VT = Op.getValueType(); 4097 MVT::ValueType EltVT = VT; 4098 if (MVT::isVector(VT)) 4099 EltVT = MVT::getVectorElementType(VT); 4100 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4101 std::vector<Constant*> CV; 4102 if (EltVT == MVT::f64) { 4103 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 4104 CV.push_back(C); 4105 CV.push_back(C); 4106 } else { 4107 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 4108 CV.push_back(C); 4109 CV.push_back(C); 4110 CV.push_back(C); 4111 CV.push_back(C); 4112 } 4113 Constant *C = ConstantVector::get(CV); 4114 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4115 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4116 false, 16); 4117 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4118} 4119 4120SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4121 MVT::ValueType VT = Op.getValueType(); 4122 MVT::ValueType EltVT = VT; 4123 unsigned EltNum = 1; 4124 if (MVT::isVector(VT)) { 4125 EltVT = MVT::getVectorElementType(VT); 4126 EltNum = MVT::getVectorNumElements(VT); 4127 } 4128 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4129 std::vector<Constant*> CV; 4130 if (EltVT == MVT::f64) { 4131 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4132 CV.push_back(C); 4133 CV.push_back(C); 4134 } else { 4135 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4136 CV.push_back(C); 4137 CV.push_back(C); 4138 CV.push_back(C); 4139 CV.push_back(C); 4140 } 4141 Constant *C = ConstantVector::get(CV); 4142 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4143 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4144 false, 16); 4145 if (MVT::isVector(VT)) { 4146 return DAG.getNode(ISD::BIT_CONVERT, VT, 4147 DAG.getNode(ISD::XOR, MVT::v2i64, 4148 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4149 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4150 } else { 4151 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4152 } 4153} 4154 4155SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4156 SDOperand Op0 = Op.getOperand(0); 4157 SDOperand Op1 = Op.getOperand(1); 4158 MVT::ValueType VT = Op.getValueType(); 4159 MVT::ValueType SrcVT = Op1.getValueType(); 4160 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4161 4162 // If second operand is smaller, extend it first. 4163 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4164 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4165 SrcVT = VT; 4166 SrcTy = MVT::getTypeForValueType(SrcVT); 4167 } 4168 // And if it is bigger, shrink it first. 4169 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4170 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 4171 SrcVT = VT; 4172 SrcTy = MVT::getTypeForValueType(SrcVT); 4173 } 4174 4175 // At this point the operands and the result should have the same 4176 // type, and that won't be f80 since that is not custom lowered. 4177 4178 // First get the sign bit of second operand. 4179 std::vector<Constant*> CV; 4180 if (SrcVT == MVT::f64) { 4181 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4182 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4183 } else { 4184 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4185 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4186 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4187 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4188 } 4189 Constant *C = ConstantVector::get(CV); 4190 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4191 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4192 false, 16); 4193 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4194 4195 // Shift sign bit right or left if the two operands have different types. 4196 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4197 // Op0 is MVT::f32, Op1 is MVT::f64. 4198 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4199 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4200 DAG.getConstant(32, MVT::i32)); 4201 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4202 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4203 DAG.getIntPtrConstant(0)); 4204 } 4205 4206 // Clear first operand sign bit. 4207 CV.clear(); 4208 if (VT == MVT::f64) { 4209 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4210 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4211 } else { 4212 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4213 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4214 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4215 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4216 } 4217 C = ConstantVector::get(CV); 4218 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4219 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4220 false, 16); 4221 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4222 4223 // Or the value with the sign bit. 4224 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4225} 4226 4227SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4228 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4229 SDOperand Cond; 4230 SDOperand Op0 = Op.getOperand(0); 4231 SDOperand Op1 = Op.getOperand(1); 4232 SDOperand CC = Op.getOperand(2); 4233 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4234 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4235 unsigned X86CC; 4236 4237 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4238 Op0, Op1, DAG)) { 4239 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4240 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4241 DAG.getConstant(X86CC, MVT::i8), Cond); 4242 } 4243 4244 assert(isFP && "Illegal integer SetCC!"); 4245 4246 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4247 switch (SetCCOpcode) { 4248 default: assert(false && "Illegal floating point SetCC!"); 4249 case ISD::SETOEQ: { // !PF & ZF 4250 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4251 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4252 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4253 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4254 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4255 } 4256 case ISD::SETUNE: { // PF | !ZF 4257 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4258 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4259 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4260 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4261 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4262 } 4263 } 4264} 4265 4266 4267SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4268 bool addTest = true; 4269 SDOperand Cond = Op.getOperand(0); 4270 SDOperand CC; 4271 4272 if (Cond.getOpcode() == ISD::SETCC) 4273 Cond = LowerSETCC(Cond, DAG); 4274 4275 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4276 // setting operand in place of the X86ISD::SETCC. 4277 if (Cond.getOpcode() == X86ISD::SETCC) { 4278 CC = Cond.getOperand(0); 4279 4280 SDOperand Cmp = Cond.getOperand(1); 4281 unsigned Opc = Cmp.getOpcode(); 4282 MVT::ValueType VT = Op.getValueType(); 4283 4284 bool IllegalFPCMov = false; 4285 if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) && 4286 !isScalarFPTypeInSSEReg(VT)) // FPStack? 4287 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4288 4289 if ((Opc == X86ISD::CMP || 4290 Opc == X86ISD::COMI || 4291 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4292 Cond = Cmp; 4293 addTest = false; 4294 } 4295 } 4296 4297 if (addTest) { 4298 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4299 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4300 } 4301 4302 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4303 MVT::Flag); 4304 SmallVector<SDOperand, 4> Ops; 4305 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4306 // condition is true. 4307 Ops.push_back(Op.getOperand(2)); 4308 Ops.push_back(Op.getOperand(1)); 4309 Ops.push_back(CC); 4310 Ops.push_back(Cond); 4311 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4312} 4313 4314SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4315 bool addTest = true; 4316 SDOperand Chain = Op.getOperand(0); 4317 SDOperand Cond = Op.getOperand(1); 4318 SDOperand Dest = Op.getOperand(2); 4319 SDOperand CC; 4320 4321 if (Cond.getOpcode() == ISD::SETCC) 4322 Cond = LowerSETCC(Cond, DAG); 4323 4324 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4325 // setting operand in place of the X86ISD::SETCC. 4326 if (Cond.getOpcode() == X86ISD::SETCC) { 4327 CC = Cond.getOperand(0); 4328 4329 SDOperand Cmp = Cond.getOperand(1); 4330 unsigned Opc = Cmp.getOpcode(); 4331 if (Opc == X86ISD::CMP || 4332 Opc == X86ISD::COMI || 4333 Opc == X86ISD::UCOMI) { 4334 Cond = Cmp; 4335 addTest = false; 4336 } 4337 } 4338 4339 if (addTest) { 4340 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4341 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4342 } 4343 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4344 Chain, Op.getOperand(2), CC, Cond); 4345} 4346 4347 4348// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4349// Calls to _alloca is needed to probe the stack when allocating more than 4k 4350// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4351// that the guard pages used by the OS virtual memory manager are allocated in 4352// correct sequence. 4353SDOperand 4354X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4355 SelectionDAG &DAG) { 4356 assert(Subtarget->isTargetCygMing() && 4357 "This should be used only on Cygwin/Mingw targets"); 4358 4359 // Get the inputs. 4360 SDOperand Chain = Op.getOperand(0); 4361 SDOperand Size = Op.getOperand(1); 4362 // FIXME: Ensure alignment here 4363 4364 SDOperand Flag; 4365 4366 MVT::ValueType IntPtr = getPointerTy(); 4367 MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 4368 4369 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4370 Flag = Chain.getValue(1); 4371 4372 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4373 SDOperand Ops[] = { Chain, 4374 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4375 DAG.getRegister(X86::EAX, IntPtr), 4376 Flag }; 4377 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4378 Flag = Chain.getValue(1); 4379 4380 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4381 4382 std::vector<MVT::ValueType> Tys; 4383 Tys.push_back(SPTy); 4384 Tys.push_back(MVT::Other); 4385 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4386 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4387} 4388 4389SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4390 SDOperand InFlag(0, 0); 4391 SDOperand Chain = Op.getOperand(0); 4392 unsigned Align = 4393 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4394 if (Align == 0) Align = 1; 4395 4396 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4397 // If not DWORD aligned or size is more than the threshold, call memset. 4398 // The libc version is likely to be faster for these cases. It can use the 4399 // address value and run time information about the CPU. 4400 if ((Align & 3) != 0 || 4401 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4402 MVT::ValueType IntPtr = getPointerTy(); 4403 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4404 TargetLowering::ArgListTy Args; 4405 TargetLowering::ArgListEntry Entry; 4406 Entry.Node = Op.getOperand(1); 4407 Entry.Ty = IntPtrTy; 4408 Args.push_back(Entry); 4409 // Extend the unsigned i8 argument to be an int value for the call. 4410 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4411 Entry.Ty = IntPtrTy; 4412 Args.push_back(Entry); 4413 Entry.Node = Op.getOperand(3); 4414 Args.push_back(Entry); 4415 std::pair<SDOperand,SDOperand> CallResult = 4416 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4417 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4418 return CallResult.second; 4419 } 4420 4421 MVT::ValueType AVT; 4422 SDOperand Count; 4423 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4424 unsigned BytesLeft = 0; 4425 bool TwoRepStos = false; 4426 if (ValC) { 4427 unsigned ValReg; 4428 uint64_t Val = ValC->getValue() & 255; 4429 4430 // If the value is a constant, then we can potentially use larger sets. 4431 switch (Align & 3) { 4432 case 2: // WORD aligned 4433 AVT = MVT::i16; 4434 ValReg = X86::AX; 4435 Val = (Val << 8) | Val; 4436 break; 4437 case 0: // DWORD aligned 4438 AVT = MVT::i32; 4439 ValReg = X86::EAX; 4440 Val = (Val << 8) | Val; 4441 Val = (Val << 16) | Val; 4442 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4443 AVT = MVT::i64; 4444 ValReg = X86::RAX; 4445 Val = (Val << 32) | Val; 4446 } 4447 break; 4448 default: // Byte aligned 4449 AVT = MVT::i8; 4450 ValReg = X86::AL; 4451 Count = Op.getOperand(3); 4452 break; 4453 } 4454 4455 if (AVT > MVT::i8) { 4456 if (I) { 4457 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4458 Count = DAG.getIntPtrConstant(I->getValue() / UBytes); 4459 BytesLeft = I->getValue() % UBytes; 4460 } else { 4461 assert(AVT >= MVT::i32 && 4462 "Do not use rep;stos if not at least DWORD aligned"); 4463 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4464 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4465 TwoRepStos = true; 4466 } 4467 } 4468 4469 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4470 InFlag); 4471 InFlag = Chain.getValue(1); 4472 } else { 4473 AVT = MVT::i8; 4474 Count = Op.getOperand(3); 4475 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4476 InFlag = Chain.getValue(1); 4477 } 4478 4479 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4480 Count, InFlag); 4481 InFlag = Chain.getValue(1); 4482 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4483 Op.getOperand(1), InFlag); 4484 InFlag = Chain.getValue(1); 4485 4486 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4487 SmallVector<SDOperand, 8> Ops; 4488 Ops.push_back(Chain); 4489 Ops.push_back(DAG.getValueType(AVT)); 4490 Ops.push_back(InFlag); 4491 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4492 4493 if (TwoRepStos) { 4494 InFlag = Chain.getValue(1); 4495 Count = Op.getOperand(3); 4496 MVT::ValueType CVT = Count.getValueType(); 4497 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4498 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4499 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4500 Left, InFlag); 4501 InFlag = Chain.getValue(1); 4502 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4503 Ops.clear(); 4504 Ops.push_back(Chain); 4505 Ops.push_back(DAG.getValueType(MVT::i8)); 4506 Ops.push_back(InFlag); 4507 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4508 } else if (BytesLeft) { 4509 // Issue stores for the last 1 - 7 bytes. 4510 SDOperand Value; 4511 unsigned Val = ValC->getValue() & 255; 4512 unsigned Offset = I->getValue() - BytesLeft; 4513 SDOperand DstAddr = Op.getOperand(1); 4514 MVT::ValueType AddrVT = DstAddr.getValueType(); 4515 if (BytesLeft >= 4) { 4516 Val = (Val << 8) | Val; 4517 Val = (Val << 16) | Val; 4518 Value = DAG.getConstant(Val, MVT::i32); 4519 Chain = DAG.getStore(Chain, Value, 4520 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4521 DAG.getConstant(Offset, AddrVT)), 4522 NULL, 0); 4523 BytesLeft -= 4; 4524 Offset += 4; 4525 } 4526 if (BytesLeft >= 2) { 4527 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4528 Chain = DAG.getStore(Chain, Value, 4529 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4530 DAG.getConstant(Offset, AddrVT)), 4531 NULL, 0); 4532 BytesLeft -= 2; 4533 Offset += 2; 4534 } 4535 if (BytesLeft == 1) { 4536 Value = DAG.getConstant(Val, MVT::i8); 4537 Chain = DAG.getStore(Chain, Value, 4538 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4539 DAG.getConstant(Offset, AddrVT)), 4540 NULL, 0); 4541 } 4542 } 4543 4544 return Chain; 4545} 4546 4547SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4548 SDOperand Dest, 4549 SDOperand Source, 4550 unsigned Size, 4551 unsigned Align, 4552 SelectionDAG &DAG) { 4553 MVT::ValueType AVT; 4554 unsigned BytesLeft = 0; 4555 switch (Align & 3) { 4556 case 2: // WORD aligned 4557 AVT = MVT::i16; 4558 break; 4559 case 0: // DWORD aligned 4560 AVT = MVT::i32; 4561 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4562 AVT = MVT::i64; 4563 break; 4564 default: // Byte aligned 4565 AVT = MVT::i8; 4566 break; 4567 } 4568 4569 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4570 SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); 4571 BytesLeft = Size % UBytes; 4572 4573 SDOperand InFlag(0, 0); 4574 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4575 Count, InFlag); 4576 InFlag = Chain.getValue(1); 4577 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4578 Dest, InFlag); 4579 InFlag = Chain.getValue(1); 4580 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4581 Source, InFlag); 4582 InFlag = Chain.getValue(1); 4583 4584 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4585 SmallVector<SDOperand, 8> Ops; 4586 Ops.push_back(Chain); 4587 Ops.push_back(DAG.getValueType(AVT)); 4588 Ops.push_back(InFlag); 4589 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4590 4591 if (BytesLeft) { 4592 // Issue loads and stores for the last 1 - 7 bytes. 4593 unsigned Offset = Size - BytesLeft; 4594 SDOperand DstAddr = Dest; 4595 MVT::ValueType DstVT = DstAddr.getValueType(); 4596 SDOperand SrcAddr = Source; 4597 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4598 SDOperand Value; 4599 if (BytesLeft >= 4) { 4600 Value = DAG.getLoad(MVT::i32, Chain, 4601 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4602 DAG.getConstant(Offset, SrcVT)), 4603 NULL, 0); 4604 Chain = Value.getValue(1); 4605 Chain = DAG.getStore(Chain, Value, 4606 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4607 DAG.getConstant(Offset, DstVT)), 4608 NULL, 0); 4609 BytesLeft -= 4; 4610 Offset += 4; 4611 } 4612 if (BytesLeft >= 2) { 4613 Value = DAG.getLoad(MVT::i16, Chain, 4614 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4615 DAG.getConstant(Offset, SrcVT)), 4616 NULL, 0); 4617 Chain = Value.getValue(1); 4618 Chain = DAG.getStore(Chain, Value, 4619 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4620 DAG.getConstant(Offset, DstVT)), 4621 NULL, 0); 4622 BytesLeft -= 2; 4623 Offset += 2; 4624 } 4625 4626 if (BytesLeft == 1) { 4627 Value = DAG.getLoad(MVT::i8, Chain, 4628 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4629 DAG.getConstant(Offset, SrcVT)), 4630 NULL, 0); 4631 Chain = Value.getValue(1); 4632 Chain = DAG.getStore(Chain, Value, 4633 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4634 DAG.getConstant(Offset, DstVT)), 4635 NULL, 0); 4636 } 4637 } 4638 4639 return Chain; 4640} 4641 4642/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 4643SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 4644 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4645 SDOperand TheChain = N->getOperand(0); 4646 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 4647 if (Subtarget->is64Bit()) { 4648 SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4649 SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 4650 MVT::i64, rax.getValue(2)); 4651 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 4652 DAG.getConstant(32, MVT::i8)); 4653 SDOperand Ops[] = { 4654 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 4655 }; 4656 4657 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4658 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4659 } 4660 4661 SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4662 SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 4663 MVT::i32, eax.getValue(2)); 4664 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 4665 SDOperand Ops[] = { eax, edx }; 4666 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 4667 4668 // Use a MERGE_VALUES to return the value and chain. 4669 Ops[1] = edx.getValue(1); 4670 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4671 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4672} 4673 4674SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4675 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4676 4677 if (!Subtarget->is64Bit()) { 4678 // vastart just stores the address of the VarArgsFrameIndex slot into the 4679 // memory location argument. 4680 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4681 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4682 SV->getOffset()); 4683 } 4684 4685 // __va_list_tag: 4686 // gp_offset (0 - 6 * 8) 4687 // fp_offset (48 - 48 + 8 * 16) 4688 // overflow_arg_area (point to parameters coming in memory). 4689 // reg_save_area 4690 SmallVector<SDOperand, 8> MemOps; 4691 SDOperand FIN = Op.getOperand(1); 4692 // Store gp_offset 4693 SDOperand Store = DAG.getStore(Op.getOperand(0), 4694 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4695 FIN, SV->getValue(), SV->getOffset()); 4696 MemOps.push_back(Store); 4697 4698 // Store fp_offset 4699 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 4700 Store = DAG.getStore(Op.getOperand(0), 4701 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4702 FIN, SV->getValue(), SV->getOffset()); 4703 MemOps.push_back(Store); 4704 4705 // Store ptr to overflow_arg_area 4706 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 4707 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4708 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4709 SV->getOffset()); 4710 MemOps.push_back(Store); 4711 4712 // Store ptr to reg_save_area. 4713 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 4714 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4715 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4716 SV->getOffset()); 4717 MemOps.push_back(Store); 4718 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4719} 4720 4721SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4722 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4723 SDOperand Chain = Op.getOperand(0); 4724 SDOperand DstPtr = Op.getOperand(1); 4725 SDOperand SrcPtr = Op.getOperand(2); 4726 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4727 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4728 4729 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4730 SrcSV->getValue(), SrcSV->getOffset()); 4731 Chain = SrcPtr.getValue(1); 4732 for (unsigned i = 0; i < 3; ++i) { 4733 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4734 SrcSV->getValue(), SrcSV->getOffset()); 4735 Chain = Val.getValue(1); 4736 Chain = DAG.getStore(Chain, Val, DstPtr, 4737 DstSV->getValue(), DstSV->getOffset()); 4738 if (i == 2) 4739 break; 4740 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4741 DAG.getIntPtrConstant(8)); 4742 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4743 DAG.getIntPtrConstant(8)); 4744 } 4745 return Chain; 4746} 4747 4748SDOperand 4749X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4750 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4751 switch (IntNo) { 4752 default: return SDOperand(); // Don't custom lower most intrinsics. 4753 // Comparison intrinsics. 4754 case Intrinsic::x86_sse_comieq_ss: 4755 case Intrinsic::x86_sse_comilt_ss: 4756 case Intrinsic::x86_sse_comile_ss: 4757 case Intrinsic::x86_sse_comigt_ss: 4758 case Intrinsic::x86_sse_comige_ss: 4759 case Intrinsic::x86_sse_comineq_ss: 4760 case Intrinsic::x86_sse_ucomieq_ss: 4761 case Intrinsic::x86_sse_ucomilt_ss: 4762 case Intrinsic::x86_sse_ucomile_ss: 4763 case Intrinsic::x86_sse_ucomigt_ss: 4764 case Intrinsic::x86_sse_ucomige_ss: 4765 case Intrinsic::x86_sse_ucomineq_ss: 4766 case Intrinsic::x86_sse2_comieq_sd: 4767 case Intrinsic::x86_sse2_comilt_sd: 4768 case Intrinsic::x86_sse2_comile_sd: 4769 case Intrinsic::x86_sse2_comigt_sd: 4770 case Intrinsic::x86_sse2_comige_sd: 4771 case Intrinsic::x86_sse2_comineq_sd: 4772 case Intrinsic::x86_sse2_ucomieq_sd: 4773 case Intrinsic::x86_sse2_ucomilt_sd: 4774 case Intrinsic::x86_sse2_ucomile_sd: 4775 case Intrinsic::x86_sse2_ucomigt_sd: 4776 case Intrinsic::x86_sse2_ucomige_sd: 4777 case Intrinsic::x86_sse2_ucomineq_sd: { 4778 unsigned Opc = 0; 4779 ISD::CondCode CC = ISD::SETCC_INVALID; 4780 switch (IntNo) { 4781 default: break; 4782 case Intrinsic::x86_sse_comieq_ss: 4783 case Intrinsic::x86_sse2_comieq_sd: 4784 Opc = X86ISD::COMI; 4785 CC = ISD::SETEQ; 4786 break; 4787 case Intrinsic::x86_sse_comilt_ss: 4788 case Intrinsic::x86_sse2_comilt_sd: 4789 Opc = X86ISD::COMI; 4790 CC = ISD::SETLT; 4791 break; 4792 case Intrinsic::x86_sse_comile_ss: 4793 case Intrinsic::x86_sse2_comile_sd: 4794 Opc = X86ISD::COMI; 4795 CC = ISD::SETLE; 4796 break; 4797 case Intrinsic::x86_sse_comigt_ss: 4798 case Intrinsic::x86_sse2_comigt_sd: 4799 Opc = X86ISD::COMI; 4800 CC = ISD::SETGT; 4801 break; 4802 case Intrinsic::x86_sse_comige_ss: 4803 case Intrinsic::x86_sse2_comige_sd: 4804 Opc = X86ISD::COMI; 4805 CC = ISD::SETGE; 4806 break; 4807 case Intrinsic::x86_sse_comineq_ss: 4808 case Intrinsic::x86_sse2_comineq_sd: 4809 Opc = X86ISD::COMI; 4810 CC = ISD::SETNE; 4811 break; 4812 case Intrinsic::x86_sse_ucomieq_ss: 4813 case Intrinsic::x86_sse2_ucomieq_sd: 4814 Opc = X86ISD::UCOMI; 4815 CC = ISD::SETEQ; 4816 break; 4817 case Intrinsic::x86_sse_ucomilt_ss: 4818 case Intrinsic::x86_sse2_ucomilt_sd: 4819 Opc = X86ISD::UCOMI; 4820 CC = ISD::SETLT; 4821 break; 4822 case Intrinsic::x86_sse_ucomile_ss: 4823 case Intrinsic::x86_sse2_ucomile_sd: 4824 Opc = X86ISD::UCOMI; 4825 CC = ISD::SETLE; 4826 break; 4827 case Intrinsic::x86_sse_ucomigt_ss: 4828 case Intrinsic::x86_sse2_ucomigt_sd: 4829 Opc = X86ISD::UCOMI; 4830 CC = ISD::SETGT; 4831 break; 4832 case Intrinsic::x86_sse_ucomige_ss: 4833 case Intrinsic::x86_sse2_ucomige_sd: 4834 Opc = X86ISD::UCOMI; 4835 CC = ISD::SETGE; 4836 break; 4837 case Intrinsic::x86_sse_ucomineq_ss: 4838 case Intrinsic::x86_sse2_ucomineq_sd: 4839 Opc = X86ISD::UCOMI; 4840 CC = ISD::SETNE; 4841 break; 4842 } 4843 4844 unsigned X86CC; 4845 SDOperand LHS = Op.getOperand(1); 4846 SDOperand RHS = Op.getOperand(2); 4847 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4848 4849 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4850 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4851 DAG.getConstant(X86CC, MVT::i8), Cond); 4852 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4853 } 4854 } 4855} 4856 4857SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4858 // Depths > 0 not supported yet! 4859 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4860 return SDOperand(); 4861 4862 // Just load the return address 4863 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4864 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4865} 4866 4867SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4868 // Depths > 0 not supported yet! 4869 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4870 return SDOperand(); 4871 4872 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4873 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4874 DAG.getIntPtrConstant(4)); 4875} 4876 4877SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4878 SelectionDAG &DAG) { 4879 // Is not yet supported on x86-64 4880 if (Subtarget->is64Bit()) 4881 return SDOperand(); 4882 4883 return DAG.getIntPtrConstant(8); 4884} 4885 4886SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4887{ 4888 assert(!Subtarget->is64Bit() && 4889 "Lowering of eh_return builtin is not supported yet on x86-64"); 4890 4891 MachineFunction &MF = DAG.getMachineFunction(); 4892 SDOperand Chain = Op.getOperand(0); 4893 SDOperand Offset = Op.getOperand(1); 4894 SDOperand Handler = Op.getOperand(2); 4895 4896 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4897 getPointerTy()); 4898 4899 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4900 DAG.getIntPtrConstant(-4UL)); 4901 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4902 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4903 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4904 MF.getRegInfo().addLiveOut(X86::ECX); 4905 4906 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4907 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4908} 4909 4910SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4911 SelectionDAG &DAG) { 4912 SDOperand Root = Op.getOperand(0); 4913 SDOperand Trmp = Op.getOperand(1); // trampoline 4914 SDOperand FPtr = Op.getOperand(2); // nested function 4915 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4916 4917 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4918 4919 const X86InstrInfo *TII = 4920 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4921 4922 if (Subtarget->is64Bit()) { 4923 SDOperand OutChains[6]; 4924 4925 // Large code-model. 4926 4927 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 4928 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 4929 4930 const unsigned char N86R10 = 4931 ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10); 4932 const unsigned char N86R11 = 4933 ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11); 4934 4935 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 4936 4937 // Load the pointer to the nested function into R11. 4938 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 4939 SDOperand Addr = Trmp; 4940 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 4941 TrmpSV->getValue(), TrmpSV->getOffset()); 4942 4943 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 4944 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpSV->getValue(), 4945 TrmpSV->getOffset() + 2, false, 2); 4946 4947 // Load the 'nest' parameter value into R10. 4948 // R10 is specified in X86CallingConv.td 4949 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 4950 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 4951 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 4952 TrmpSV->getValue(), TrmpSV->getOffset() + 10); 4953 4954 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 4955 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4956 TrmpSV->getOffset() + 12, false, 2); 4957 4958 // Jump to the nested function. 4959 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 4960 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 4961 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 4962 TrmpSV->getValue(), TrmpSV->getOffset() + 20); 4963 4964 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 4965 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 4966 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 4967 TrmpSV->getValue(), TrmpSV->getOffset() + 22); 4968 4969 SDOperand Ops[] = 4970 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 4971 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4972 } else { 4973 Function *Func = (Function *) 4974 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4975 unsigned CC = Func->getCallingConv(); 4976 unsigned NestReg; 4977 4978 switch (CC) { 4979 default: 4980 assert(0 && "Unsupported calling convention"); 4981 case CallingConv::C: 4982 case CallingConv::X86_StdCall: { 4983 // Pass 'nest' parameter in ECX. 4984 // Must be kept in sync with X86CallingConv.td 4985 NestReg = X86::ECX; 4986 4987 // Check that ECX wasn't needed by an 'inreg' parameter. 4988 const FunctionType *FTy = Func->getFunctionType(); 4989 const ParamAttrsList *Attrs = Func->getParamAttrs(); 4990 4991 if (Attrs && !Func->isVarArg()) { 4992 unsigned InRegCount = 0; 4993 unsigned Idx = 1; 4994 4995 for (FunctionType::param_iterator I = FTy->param_begin(), 4996 E = FTy->param_end(); I != E; ++I, ++Idx) 4997 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4998 // FIXME: should only count parameters that are lowered to integers. 4999 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 5000 5001 if (InRegCount > 2) { 5002 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5003 abort(); 5004 } 5005 } 5006 break; 5007 } 5008 case CallingConv::X86_FastCall: 5009 // Pass 'nest' parameter in EAX. 5010 // Must be kept in sync with X86CallingConv.td 5011 NestReg = X86::EAX; 5012 break; 5013 } 5014 5015 SDOperand OutChains[4]; 5016 SDOperand Addr, Disp; 5017 5018 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5019 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5020 5021 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5022 const unsigned char N86Reg = 5023 ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg); 5024 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5025 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 5026 5027 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 5028 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 5029 TrmpSV->getOffset() + 1, false, 1); 5030 5031 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 5032 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 5033 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 5034 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 5035 5036 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 5037 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 5038 TrmpSV->getOffset() + 6, false, 1); 5039 5040 SDOperand Ops[] = 5041 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 5042 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 5043 } 5044} 5045 5046SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { 5047 /* 5048 The rounding mode is in bits 11:10 of FPSR, and has the following 5049 settings: 5050 00 Round to nearest 5051 01 Round to -inf 5052 10 Round to +inf 5053 11 Round to 0 5054 5055 FLT_ROUNDS, on the other hand, expects the following: 5056 -1 Undefined 5057 0 Round to 0 5058 1 Round to nearest 5059 2 Round to +inf 5060 3 Round to -inf 5061 5062 To perform the conversion, we do: 5063 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 5064 */ 5065 5066 MachineFunction &MF = DAG.getMachineFunction(); 5067 const TargetMachine &TM = MF.getTarget(); 5068 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 5069 unsigned StackAlignment = TFI.getStackAlignment(); 5070 MVT::ValueType VT = Op.getValueType(); 5071 5072 // Save FP Control Word to stack slot 5073 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 5074 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5075 5076 SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 5077 DAG.getEntryNode(), StackSlot); 5078 5079 // Load FP Control Word from stack slot 5080 SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 5081 5082 // Transform as necessary 5083 SDOperand CWD1 = 5084 DAG.getNode(ISD::SRL, MVT::i16, 5085 DAG.getNode(ISD::AND, MVT::i16, 5086 CWD, DAG.getConstant(0x800, MVT::i16)), 5087 DAG.getConstant(11, MVT::i8)); 5088 SDOperand CWD2 = 5089 DAG.getNode(ISD::SRL, MVT::i16, 5090 DAG.getNode(ISD::AND, MVT::i16, 5091 CWD, DAG.getConstant(0x400, MVT::i16)), 5092 DAG.getConstant(9, MVT::i8)); 5093 5094 SDOperand RetVal = 5095 DAG.getNode(ISD::AND, MVT::i16, 5096 DAG.getNode(ISD::ADD, MVT::i16, 5097 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 5098 DAG.getConstant(1, MVT::i16)), 5099 DAG.getConstant(3, MVT::i16)); 5100 5101 5102 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 5103 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 5104} 5105 5106SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { 5107 MVT::ValueType VT = Op.getValueType(); 5108 MVT::ValueType OpVT = VT; 5109 unsigned NumBits = MVT::getSizeInBits(VT); 5110 5111 Op = Op.getOperand(0); 5112 if (VT == MVT::i8) { 5113 // Zero extend to i32 since there is not an i8 bsr. 5114 OpVT = MVT::i32; 5115 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5116 } 5117 5118 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 5119 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5120 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 5121 5122 // If src is zero (i.e. bsr sets ZF), returns NumBits. 5123 SmallVector<SDOperand, 4> Ops; 5124 Ops.push_back(Op); 5125 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 5126 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5127 Ops.push_back(Op.getValue(1)); 5128 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5129 5130 // Finally xor with NumBits-1. 5131 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 5132 5133 if (VT == MVT::i8) 5134 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5135 return Op; 5136} 5137 5138SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { 5139 MVT::ValueType VT = Op.getValueType(); 5140 MVT::ValueType OpVT = VT; 5141 unsigned NumBits = MVT::getSizeInBits(VT); 5142 5143 Op = Op.getOperand(0); 5144 if (VT == MVT::i8) { 5145 OpVT = MVT::i32; 5146 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5147 } 5148 5149 // Issue a bsf (scan bits forward) which also sets EFLAGS. 5150 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5151 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 5152 5153 // If src is zero (i.e. bsf sets ZF), returns NumBits. 5154 SmallVector<SDOperand, 4> Ops; 5155 Ops.push_back(Op); 5156 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 5157 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5158 Ops.push_back(Op.getValue(1)); 5159 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5160 5161 if (VT == MVT::i8) 5162 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5163 return Op; 5164} 5165 5166/// LowerOperation - Provide custom lowering hooks for some operations. 5167/// 5168SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 5169 switch (Op.getOpcode()) { 5170 default: assert(0 && "Should not custom lower this!"); 5171 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5172 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5173 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5174 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5175 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5176 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5177 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5178 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5179 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5180 case ISD::SHL_PARTS: 5181 case ISD::SRA_PARTS: 5182 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5183 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5184 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5185 case ISD::FABS: return LowerFABS(Op, DAG); 5186 case ISD::FNEG: return LowerFNEG(Op, DAG); 5187 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5188 case ISD::SETCC: return LowerSETCC(Op, DAG); 5189 case ISD::SELECT: return LowerSELECT(Op, DAG); 5190 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5191 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5192 case ISD::CALL: return LowerCALL(Op, DAG); 5193 case ISD::RET: return LowerRET(Op, DAG); 5194 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5195 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5196 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5197 case ISD::VASTART: return LowerVASTART(Op, DAG); 5198 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5199 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5200 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5201 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5202 case ISD::FRAME_TO_ARGS_OFFSET: 5203 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5204 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5205 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5206 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5207 case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); 5208 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 5209 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 5210 5211 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 5212 case ISD::READCYCLECOUNTER: 5213 return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); 5214 } 5215} 5216 5217/// ExpandOperation - Provide custom lowering hooks for expanding operations. 5218SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 5219 switch (N->getOpcode()) { 5220 default: assert(0 && "Should not custom lower this!"); 5221 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 5222 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 5223 } 5224} 5225 5226const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5227 switch (Opcode) { 5228 default: return NULL; 5229 case X86ISD::BSF: return "X86ISD::BSF"; 5230 case X86ISD::BSR: return "X86ISD::BSR"; 5231 case X86ISD::SHLD: return "X86ISD::SHLD"; 5232 case X86ISD::SHRD: return "X86ISD::SHRD"; 5233 case X86ISD::FAND: return "X86ISD::FAND"; 5234 case X86ISD::FOR: return "X86ISD::FOR"; 5235 case X86ISD::FXOR: return "X86ISD::FXOR"; 5236 case X86ISD::FSRL: return "X86ISD::FSRL"; 5237 case X86ISD::FILD: return "X86ISD::FILD"; 5238 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5239 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5240 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5241 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5242 case X86ISD::FLD: return "X86ISD::FLD"; 5243 case X86ISD::FST: return "X86ISD::FST"; 5244 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5245 case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2"; 5246 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5247 case X86ISD::CALL: return "X86ISD::CALL"; 5248 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5249 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5250 case X86ISD::CMP: return "X86ISD::CMP"; 5251 case X86ISD::COMI: return "X86ISD::COMI"; 5252 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5253 case X86ISD::SETCC: return "X86ISD::SETCC"; 5254 case X86ISD::CMOV: return "X86ISD::CMOV"; 5255 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5256 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5257 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5258 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5259 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5260 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5261 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5262 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5263 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5264 case X86ISD::FMAX: return "X86ISD::FMAX"; 5265 case X86ISD::FMIN: return "X86ISD::FMIN"; 5266 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5267 case X86ISD::FRCP: return "X86ISD::FRCP"; 5268 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5269 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5270 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5271 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5272 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 5273 } 5274} 5275 5276// isLegalAddressingMode - Return true if the addressing mode represented 5277// by AM is legal for this target, for a load/store of the specified type. 5278bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5279 const Type *Ty) const { 5280 // X86 supports extremely general addressing modes. 5281 5282 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5283 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5284 return false; 5285 5286 if (AM.BaseGV) { 5287 // We can only fold this if we don't need an extra load. 5288 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5289 return false; 5290 5291 // X86-64 only supports addr of globals in small code model. 5292 if (Subtarget->is64Bit()) { 5293 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5294 return false; 5295 // If lower 4G is not available, then we must use rip-relative addressing. 5296 if (AM.BaseOffs || AM.Scale > 1) 5297 return false; 5298 } 5299 } 5300 5301 switch (AM.Scale) { 5302 case 0: 5303 case 1: 5304 case 2: 5305 case 4: 5306 case 8: 5307 // These scales always work. 5308 break; 5309 case 3: 5310 case 5: 5311 case 9: 5312 // These scales are formed with basereg+scalereg. Only accept if there is 5313 // no basereg yet. 5314 if (AM.HasBaseReg) 5315 return false; 5316 break; 5317 default: // Other stuff never works. 5318 return false; 5319 } 5320 5321 return true; 5322} 5323 5324 5325bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5326 if (!Ty1->isInteger() || !Ty2->isInteger()) 5327 return false; 5328 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5329 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5330 if (NumBits1 <= NumBits2) 5331 return false; 5332 return Subtarget->is64Bit() || NumBits1 < 64; 5333} 5334 5335bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5336 MVT::ValueType VT2) const { 5337 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5338 return false; 5339 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5340 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5341 if (NumBits1 <= NumBits2) 5342 return false; 5343 return Subtarget->is64Bit() || NumBits1 < 64; 5344} 5345 5346/// isShuffleMaskLegal - Targets can use this to indicate that they only 5347/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5348/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5349/// are assumed to be legal. 5350bool 5351X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5352 // Only do shuffles on 128-bit vector types for now. 5353 if (MVT::getSizeInBits(VT) == 64) return false; 5354 return (Mask.Val->getNumOperands() <= 4 || 5355 isIdentityMask(Mask.Val) || 5356 isIdentityMask(Mask.Val, true) || 5357 isSplatMask(Mask.Val) || 5358 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5359 X86::isUNPCKLMask(Mask.Val) || 5360 X86::isUNPCKHMask(Mask.Val) || 5361 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5362 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5363} 5364 5365bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5366 MVT::ValueType EVT, 5367 SelectionDAG &DAG) const { 5368 unsigned NumElts = BVOps.size(); 5369 // Only do shuffles on 128-bit vector types for now. 5370 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5371 if (NumElts == 2) return true; 5372 if (NumElts == 4) { 5373 return (isMOVLMask(&BVOps[0], 4) || 5374 isCommutedMOVL(&BVOps[0], 4, true) || 5375 isSHUFPMask(&BVOps[0], 4) || 5376 isCommutedSHUFP(&BVOps[0], 4)); 5377 } 5378 return false; 5379} 5380 5381//===----------------------------------------------------------------------===// 5382// X86 Scheduler Hooks 5383//===----------------------------------------------------------------------===// 5384 5385MachineBasicBlock * 5386X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5387 MachineBasicBlock *BB) { 5388 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5389 switch (MI->getOpcode()) { 5390 default: assert(false && "Unexpected instr type to insert"); 5391 case X86::CMOV_FR32: 5392 case X86::CMOV_FR64: 5393 case X86::CMOV_V4F32: 5394 case X86::CMOV_V2F64: 5395 case X86::CMOV_V2I64: { 5396 // To "insert" a SELECT_CC instruction, we actually have to insert the 5397 // diamond control-flow pattern. The incoming instruction knows the 5398 // destination vreg to set, the condition code register to branch on, the 5399 // true/false values to select between, and a branch opcode to use. 5400 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5401 ilist<MachineBasicBlock>::iterator It = BB; 5402 ++It; 5403 5404 // thisMBB: 5405 // ... 5406 // TrueVal = ... 5407 // cmpTY ccX, r1, r2 5408 // bCC copy1MBB 5409 // fallthrough --> copy0MBB 5410 MachineBasicBlock *thisMBB = BB; 5411 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5412 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5413 unsigned Opc = 5414 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5415 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5416 MachineFunction *F = BB->getParent(); 5417 F->getBasicBlockList().insert(It, copy0MBB); 5418 F->getBasicBlockList().insert(It, sinkMBB); 5419 // Update machine-CFG edges by first adding all successors of the current 5420 // block to the new block which will contain the Phi node for the select. 5421 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5422 e = BB->succ_end(); i != e; ++i) 5423 sinkMBB->addSuccessor(*i); 5424 // Next, remove all successors of the current block, and add the true 5425 // and fallthrough blocks as its successors. 5426 while(!BB->succ_empty()) 5427 BB->removeSuccessor(BB->succ_begin()); 5428 BB->addSuccessor(copy0MBB); 5429 BB->addSuccessor(sinkMBB); 5430 5431 // copy0MBB: 5432 // %FalseValue = ... 5433 // # fallthrough to sinkMBB 5434 BB = copy0MBB; 5435 5436 // Update machine-CFG edges 5437 BB->addSuccessor(sinkMBB); 5438 5439 // sinkMBB: 5440 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5441 // ... 5442 BB = sinkMBB; 5443 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5444 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5445 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5446 5447 delete MI; // The pseudo instruction is gone now. 5448 return BB; 5449 } 5450 5451 case X86::FP32_TO_INT16_IN_MEM: 5452 case X86::FP32_TO_INT32_IN_MEM: 5453 case X86::FP32_TO_INT64_IN_MEM: 5454 case X86::FP64_TO_INT16_IN_MEM: 5455 case X86::FP64_TO_INT32_IN_MEM: 5456 case X86::FP64_TO_INT64_IN_MEM: 5457 case X86::FP80_TO_INT16_IN_MEM: 5458 case X86::FP80_TO_INT32_IN_MEM: 5459 case X86::FP80_TO_INT64_IN_MEM: { 5460 // Change the floating point control register to use "round towards zero" 5461 // mode when truncating to an integer value. 5462 MachineFunction *F = BB->getParent(); 5463 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5464 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5465 5466 // Load the old value of the high byte of the control word... 5467 unsigned OldCW = 5468 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 5469 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5470 5471 // Set the high part to be round to zero... 5472 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5473 .addImm(0xC7F); 5474 5475 // Reload the modified control word now... 5476 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5477 5478 // Restore the memory image of control word to original value 5479 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5480 .addReg(OldCW); 5481 5482 // Get the X86 opcode to use. 5483 unsigned Opc; 5484 switch (MI->getOpcode()) { 5485 default: assert(0 && "illegal opcode!"); 5486 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5487 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5488 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5489 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5490 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5491 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5492 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5493 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5494 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5495 } 5496 5497 X86AddressMode AM; 5498 MachineOperand &Op = MI->getOperand(0); 5499 if (Op.isRegister()) { 5500 AM.BaseType = X86AddressMode::RegBase; 5501 AM.Base.Reg = Op.getReg(); 5502 } else { 5503 AM.BaseType = X86AddressMode::FrameIndexBase; 5504 AM.Base.FrameIndex = Op.getIndex(); 5505 } 5506 Op = MI->getOperand(1); 5507 if (Op.isImmediate()) 5508 AM.Scale = Op.getImm(); 5509 Op = MI->getOperand(2); 5510 if (Op.isImmediate()) 5511 AM.IndexReg = Op.getImm(); 5512 Op = MI->getOperand(3); 5513 if (Op.isGlobalAddress()) { 5514 AM.GV = Op.getGlobal(); 5515 } else { 5516 AM.Disp = Op.getImm(); 5517 } 5518 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5519 .addReg(MI->getOperand(4).getReg()); 5520 5521 // Reload the original control word now. 5522 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5523 5524 delete MI; // The pseudo instruction is gone now. 5525 return BB; 5526 } 5527 } 5528} 5529 5530//===----------------------------------------------------------------------===// 5531// X86 Optimization Hooks 5532//===----------------------------------------------------------------------===// 5533 5534void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5535 uint64_t Mask, 5536 uint64_t &KnownZero, 5537 uint64_t &KnownOne, 5538 const SelectionDAG &DAG, 5539 unsigned Depth) const { 5540 unsigned Opc = Op.getOpcode(); 5541 assert((Opc >= ISD::BUILTIN_OP_END || 5542 Opc == ISD::INTRINSIC_WO_CHAIN || 5543 Opc == ISD::INTRINSIC_W_CHAIN || 5544 Opc == ISD::INTRINSIC_VOID) && 5545 "Should use MaskedValueIsZero if you don't know whether Op" 5546 " is a target node!"); 5547 5548 KnownZero = KnownOne = 0; // Don't know anything. 5549 switch (Opc) { 5550 default: break; 5551 case X86ISD::SETCC: 5552 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5553 break; 5554 } 5555} 5556 5557/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5558/// element of the result of the vector shuffle. 5559static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5560 MVT::ValueType VT = N->getValueType(0); 5561 SDOperand PermMask = N->getOperand(2); 5562 unsigned NumElems = PermMask.getNumOperands(); 5563 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5564 i %= NumElems; 5565 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5566 return (i == 0) 5567 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5568 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5569 SDOperand Idx = PermMask.getOperand(i); 5570 if (Idx.getOpcode() == ISD::UNDEF) 5571 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5572 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5573 } 5574 return SDOperand(); 5575} 5576 5577/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5578/// node is a GlobalAddress + an offset. 5579static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5580 unsigned Opc = N->getOpcode(); 5581 if (Opc == X86ISD::Wrapper) { 5582 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5583 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5584 return true; 5585 } 5586 } else if (Opc == ISD::ADD) { 5587 SDOperand N1 = N->getOperand(0); 5588 SDOperand N2 = N->getOperand(1); 5589 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5590 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5591 if (V) { 5592 Offset += V->getSignExtended(); 5593 return true; 5594 } 5595 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5596 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5597 if (V) { 5598 Offset += V->getSignExtended(); 5599 return true; 5600 } 5601 } 5602 } 5603 return false; 5604} 5605 5606/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5607/// + Dist * Size. 5608static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5609 MachineFrameInfo *MFI) { 5610 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5611 return false; 5612 5613 SDOperand Loc = N->getOperand(1); 5614 SDOperand BaseLoc = Base->getOperand(1); 5615 if (Loc.getOpcode() == ISD::FrameIndex) { 5616 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5617 return false; 5618 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5619 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5620 int FS = MFI->getObjectSize(FI); 5621 int BFS = MFI->getObjectSize(BFI); 5622 if (FS != BFS || FS != Size) return false; 5623 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5624 } else { 5625 GlobalValue *GV1 = NULL; 5626 GlobalValue *GV2 = NULL; 5627 int64_t Offset1 = 0; 5628 int64_t Offset2 = 0; 5629 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5630 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5631 if (isGA1 && isGA2 && GV1 == GV2) 5632 return Offset1 == (Offset2 + Dist*Size); 5633 } 5634 5635 return false; 5636} 5637 5638static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5639 const X86Subtarget *Subtarget) { 5640 GlobalValue *GV; 5641 int64_t Offset; 5642 if (isGAPlusOffset(Base, GV, Offset)) 5643 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5644 // DAG combine handles the stack object case. 5645 return false; 5646} 5647 5648 5649/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5650/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5651/// if the load addresses are consecutive, non-overlapping, and in the right 5652/// order. 5653static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5654 const X86Subtarget *Subtarget) { 5655 MachineFunction &MF = DAG.getMachineFunction(); 5656 MachineFrameInfo *MFI = MF.getFrameInfo(); 5657 MVT::ValueType VT = N->getValueType(0); 5658 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5659 SDOperand PermMask = N->getOperand(2); 5660 int NumElems = (int)PermMask.getNumOperands(); 5661 SDNode *Base = NULL; 5662 for (int i = 0; i < NumElems; ++i) { 5663 SDOperand Idx = PermMask.getOperand(i); 5664 if (Idx.getOpcode() == ISD::UNDEF) { 5665 if (!Base) return SDOperand(); 5666 } else { 5667 SDOperand Arg = 5668 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5669 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5670 return SDOperand(); 5671 if (!Base) 5672 Base = Arg.Val; 5673 else if (!isConsecutiveLoad(Arg.Val, Base, 5674 i, MVT::getSizeInBits(EVT)/8,MFI)) 5675 return SDOperand(); 5676 } 5677 } 5678 5679 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5680 LoadSDNode *LD = cast<LoadSDNode>(Base); 5681 if (isAlign16) { 5682 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5683 LD->getSrcValueOffset(), LD->isVolatile()); 5684 } else { 5685 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5686 LD->getSrcValueOffset(), LD->isVolatile(), 5687 LD->getAlignment()); 5688 } 5689} 5690 5691/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5692static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5693 const X86Subtarget *Subtarget) { 5694 SDOperand Cond = N->getOperand(0); 5695 5696 // If we have SSE[12] support, try to form min/max nodes. 5697 if (Subtarget->hasSSE2() && 5698 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5699 if (Cond.getOpcode() == ISD::SETCC) { 5700 // Get the LHS/RHS of the select. 5701 SDOperand LHS = N->getOperand(1); 5702 SDOperand RHS = N->getOperand(2); 5703 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5704 5705 unsigned Opcode = 0; 5706 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5707 switch (CC) { 5708 default: break; 5709 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5710 case ISD::SETULE: 5711 case ISD::SETLE: 5712 if (!UnsafeFPMath) break; 5713 // FALL THROUGH. 5714 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5715 case ISD::SETLT: 5716 Opcode = X86ISD::FMIN; 5717 break; 5718 5719 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5720 case ISD::SETUGT: 5721 case ISD::SETGT: 5722 if (!UnsafeFPMath) break; 5723 // FALL THROUGH. 5724 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5725 case ISD::SETGE: 5726 Opcode = X86ISD::FMAX; 5727 break; 5728 } 5729 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5730 switch (CC) { 5731 default: break; 5732 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5733 case ISD::SETUGT: 5734 case ISD::SETGT: 5735 if (!UnsafeFPMath) break; 5736 // FALL THROUGH. 5737 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5738 case ISD::SETGE: 5739 Opcode = X86ISD::FMIN; 5740 break; 5741 5742 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5743 case ISD::SETULE: 5744 case ISD::SETLE: 5745 if (!UnsafeFPMath) break; 5746 // FALL THROUGH. 5747 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5748 case ISD::SETLT: 5749 Opcode = X86ISD::FMAX; 5750 break; 5751 } 5752 } 5753 5754 if (Opcode) 5755 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5756 } 5757 5758 } 5759 5760 return SDOperand(); 5761} 5762 5763/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 5764/// X86ISD::FXOR nodes. 5765static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 5766 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 5767 // F[X]OR(0.0, x) -> x 5768 // F[X]OR(x, 0.0) -> x 5769 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 5770 if (C->getValueAPF().isPosZero()) 5771 return N->getOperand(1); 5772 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 5773 if (C->getValueAPF().isPosZero()) 5774 return N->getOperand(0); 5775 return SDOperand(); 5776} 5777 5778/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 5779static SDOperand PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 5780 // FAND(0.0, x) -> 0.0 5781 // FAND(x, 0.0) -> 0.0 5782 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 5783 if (C->getValueAPF().isPosZero()) 5784 return N->getOperand(0); 5785 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 5786 if (C->getValueAPF().isPosZero()) 5787 return N->getOperand(1); 5788 return SDOperand(); 5789} 5790 5791 5792SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5793 DAGCombinerInfo &DCI) const { 5794 SelectionDAG &DAG = DCI.DAG; 5795 switch (N->getOpcode()) { 5796 default: break; 5797 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget); 5798 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 5799 case X86ISD::FXOR: 5800 case X86ISD::FOR: return PerformFORCombine(N, DAG); 5801 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 5802 } 5803 5804 return SDOperand(); 5805} 5806 5807//===----------------------------------------------------------------------===// 5808// X86 Inline Assembly Support 5809//===----------------------------------------------------------------------===// 5810 5811/// getConstraintType - Given a constraint letter, return the type of 5812/// constraint it is for this target. 5813X86TargetLowering::ConstraintType 5814X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5815 if (Constraint.size() == 1) { 5816 switch (Constraint[0]) { 5817 case 'A': 5818 case 'r': 5819 case 'R': 5820 case 'l': 5821 case 'q': 5822 case 'Q': 5823 case 'x': 5824 case 'Y': 5825 return C_RegisterClass; 5826 default: 5827 break; 5828 } 5829 } 5830 return TargetLowering::getConstraintType(Constraint); 5831} 5832 5833/// LowerXConstraint - try to replace an X constraint, which matches anything, 5834/// with another that has more specific requirements based on the type of the 5835/// corresponding operand. 5836void X86TargetLowering::lowerXConstraint(MVT::ValueType ConstraintVT, 5837 std::string& s) const { 5838 if (MVT::isFloatingPoint(ConstraintVT)) { 5839 if (Subtarget->hasSSE2()) 5840 s = "Y"; 5841 else if (Subtarget->hasSSE1()) 5842 s = "x"; 5843 else 5844 s = "f"; 5845 } else 5846 return TargetLowering::lowerXConstraint(ConstraintVT, s); 5847} 5848 5849/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5850/// vector. If it is invalid, don't add anything to Ops. 5851void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5852 char Constraint, 5853 std::vector<SDOperand>&Ops, 5854 SelectionDAG &DAG) { 5855 SDOperand Result(0, 0); 5856 5857 switch (Constraint) { 5858 default: break; 5859 case 'I': 5860 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5861 if (C->getValue() <= 31) { 5862 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5863 break; 5864 } 5865 } 5866 return; 5867 case 'N': 5868 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5869 if (C->getValue() <= 255) { 5870 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5871 break; 5872 } 5873 } 5874 return; 5875 case 'i': { 5876 // Literal immediates are always ok. 5877 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5878 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5879 break; 5880 } 5881 5882 // If we are in non-pic codegen mode, we allow the address of a global (with 5883 // an optional displacement) to be used with 'i'. 5884 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5885 int64_t Offset = 0; 5886 5887 // Match either (GA) or (GA+C) 5888 if (GA) { 5889 Offset = GA->getOffset(); 5890 } else if (Op.getOpcode() == ISD::ADD) { 5891 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5892 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5893 if (C && GA) { 5894 Offset = GA->getOffset()+C->getValue(); 5895 } else { 5896 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5897 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5898 if (C && GA) 5899 Offset = GA->getOffset()+C->getValue(); 5900 else 5901 C = 0, GA = 0; 5902 } 5903 } 5904 5905 if (GA) { 5906 // If addressing this global requires a load (e.g. in PIC mode), we can't 5907 // match. 5908 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5909 false)) 5910 return; 5911 5912 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5913 Offset); 5914 Result = Op; 5915 break; 5916 } 5917 5918 // Otherwise, not valid for this mode. 5919 return; 5920 } 5921 } 5922 5923 if (Result.Val) { 5924 Ops.push_back(Result); 5925 return; 5926 } 5927 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5928} 5929 5930std::vector<unsigned> X86TargetLowering:: 5931getRegClassForInlineAsmConstraint(const std::string &Constraint, 5932 MVT::ValueType VT) const { 5933 if (Constraint.size() == 1) { 5934 // FIXME: not handling fp-stack yet! 5935 switch (Constraint[0]) { // GCC X86 Constraint Letters 5936 default: break; // Unknown constraint letter 5937 case 'A': // EAX/EDX 5938 if (VT == MVT::i32 || VT == MVT::i64) 5939 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5940 break; 5941 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5942 case 'Q': // Q_REGS 5943 if (VT == MVT::i32) 5944 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5945 else if (VT == MVT::i16) 5946 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5947 else if (VT == MVT::i8) 5948 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5949 else if (VT == MVT::i64) 5950 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 5951 break; 5952 } 5953 } 5954 5955 return std::vector<unsigned>(); 5956} 5957 5958std::pair<unsigned, const TargetRegisterClass*> 5959X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5960 MVT::ValueType VT) const { 5961 // First, see if this is a constraint that directly corresponds to an LLVM 5962 // register class. 5963 if (Constraint.size() == 1) { 5964 // GCC Constraint Letters 5965 switch (Constraint[0]) { 5966 default: break; 5967 case 'r': // GENERAL_REGS 5968 case 'R': // LEGACY_REGS 5969 case 'l': // INDEX_REGS 5970 if (VT == MVT::i64 && Subtarget->is64Bit()) 5971 return std::make_pair(0U, X86::GR64RegisterClass); 5972 if (VT == MVT::i32) 5973 return std::make_pair(0U, X86::GR32RegisterClass); 5974 else if (VT == MVT::i16) 5975 return std::make_pair(0U, X86::GR16RegisterClass); 5976 else if (VT == MVT::i8) 5977 return std::make_pair(0U, X86::GR8RegisterClass); 5978 break; 5979 case 'y': // MMX_REGS if MMX allowed. 5980 if (!Subtarget->hasMMX()) break; 5981 return std::make_pair(0U, X86::VR64RegisterClass); 5982 break; 5983 case 'Y': // SSE_REGS if SSE2 allowed 5984 if (!Subtarget->hasSSE2()) break; 5985 // FALL THROUGH. 5986 case 'x': // SSE_REGS if SSE1 allowed 5987 if (!Subtarget->hasSSE1()) break; 5988 5989 switch (VT) { 5990 default: break; 5991 // Scalar SSE types. 5992 case MVT::f32: 5993 case MVT::i32: 5994 return std::make_pair(0U, X86::FR32RegisterClass); 5995 case MVT::f64: 5996 case MVT::i64: 5997 return std::make_pair(0U, X86::FR64RegisterClass); 5998 // Vector types. 5999 case MVT::v16i8: 6000 case MVT::v8i16: 6001 case MVT::v4i32: 6002 case MVT::v2i64: 6003 case MVT::v4f32: 6004 case MVT::v2f64: 6005 return std::make_pair(0U, X86::VR128RegisterClass); 6006 } 6007 break; 6008 } 6009 } 6010 6011 // Use the default implementation in TargetLowering to convert the register 6012 // constraint into a member of a register class. 6013 std::pair<unsigned, const TargetRegisterClass*> Res; 6014 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6015 6016 // Not found as a standard register? 6017 if (Res.second == 0) { 6018 // GCC calls "st(0)" just plain "st". 6019 if (StringsEqualNoCase("{st}", Constraint)) { 6020 Res.first = X86::ST0; 6021 Res.second = X86::RFP80RegisterClass; 6022 } 6023 6024 return Res; 6025 } 6026 6027 // Otherwise, check to see if this is a register class of the wrong value 6028 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 6029 // turn into {ax},{dx}. 6030 if (Res.second->hasType(VT)) 6031 return Res; // Correct type already, nothing to do. 6032 6033 // All of the single-register GCC register classes map their values onto 6034 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 6035 // really want an 8-bit or 32-bit register, map to the appropriate register 6036 // class and return the appropriate register. 6037 if (Res.second != X86::GR16RegisterClass) 6038 return Res; 6039 6040 if (VT == MVT::i8) { 6041 unsigned DestReg = 0; 6042 switch (Res.first) { 6043 default: break; 6044 case X86::AX: DestReg = X86::AL; break; 6045 case X86::DX: DestReg = X86::DL; break; 6046 case X86::CX: DestReg = X86::CL; break; 6047 case X86::BX: DestReg = X86::BL; break; 6048 } 6049 if (DestReg) { 6050 Res.first = DestReg; 6051 Res.second = Res.second = X86::GR8RegisterClass; 6052 } 6053 } else if (VT == MVT::i32) { 6054 unsigned DestReg = 0; 6055 switch (Res.first) { 6056 default: break; 6057 case X86::AX: DestReg = X86::EAX; break; 6058 case X86::DX: DestReg = X86::EDX; break; 6059 case X86::CX: DestReg = X86::ECX; break; 6060 case X86::BX: DestReg = X86::EBX; break; 6061 case X86::SI: DestReg = X86::ESI; break; 6062 case X86::DI: DestReg = X86::EDI; break; 6063 case X86::BP: DestReg = X86::EBP; break; 6064 case X86::SP: DestReg = X86::ESP; break; 6065 } 6066 if (DestReg) { 6067 Res.first = DestReg; 6068 Res.second = Res.second = X86::GR32RegisterClass; 6069 } 6070 } else if (VT == MVT::i64) { 6071 unsigned DestReg = 0; 6072 switch (Res.first) { 6073 default: break; 6074 case X86::AX: DestReg = X86::RAX; break; 6075 case X86::DX: DestReg = X86::RDX; break; 6076 case X86::CX: DestReg = X86::RCX; break; 6077 case X86::BX: DestReg = X86::RBX; break; 6078 case X86::SI: DestReg = X86::RSI; break; 6079 case X86::DI: DestReg = X86::RDI; break; 6080 case X86::BP: DestReg = X86::RBP; break; 6081 case X86::SP: DestReg = X86::RSP; break; 6082 } 6083 if (DestReg) { 6084 Res.first = DestReg; 6085 Res.second = Res.second = X86::GR64RegisterClass; 6086 } 6087 } 6088 6089 return Res; 6090} 6091