X86ISelLowering.cpp revision 2f9bb1a0a47622d959ec0cee25a7346c55066817
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/ADT/StringExtras.h" 37using namespace llvm; 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 44 45 // Set up the TargetLowering object. 46 47 // X86 is weird, it always uses i8 for shift amounts and setcc results. 48 setShiftAmountType(MVT::i8); 49 setSetCCResultType(MVT::i8); 50 setSetCCResultContents(ZeroOrOneSetCCResult); 51 setSchedulingPreference(SchedulingForRegPressure); 52 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 53 setStackPointerRegisterToSaveRestore(X86StackPtr); 54 55 if (Subtarget->isTargetDarwin()) { 56 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 57 setUseUnderscoreSetJmp(false); 58 setUseUnderscoreLongJmp(false); 59 } else if (Subtarget->isTargetMingw()) { 60 // MS runtime is weird: it exports _setjmp, but longjmp! 61 setUseUnderscoreSetJmp(true); 62 setUseUnderscoreLongJmp(false); 63 } else { 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 } 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 70 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 71 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 72 if (Subtarget->is64Bit()) 73 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 74 75 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 76 77 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 78 // operation. 79 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 80 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 81 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 82 83 if (Subtarget->is64Bit()) { 84 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 86 } else { 87 if (X86ScalarSSE) 88 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 89 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 90 else 91 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 92 } 93 94 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 95 // this operation. 96 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 97 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 98 // SSE has no i16 to fp conversion, only i32 99 if (X86ScalarSSE) 100 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 101 else { 102 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 103 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 104 } 105 106 if (!Subtarget->is64Bit()) { 107 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 108 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 109 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 110 } 111 112 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 113 // this operation. 114 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 115 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 116 117 if (X86ScalarSSE) { 118 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 119 } else { 120 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 121 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 122 } 123 124 // Handle FP_TO_UINT by promoting the destination to a larger signed 125 // conversion. 126 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 127 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 128 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 129 130 if (Subtarget->is64Bit()) { 131 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 132 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 133 } else { 134 if (X86ScalarSSE && !Subtarget->hasSSE3()) 135 // Expand FP_TO_UINT into a select. 136 // FIXME: We would like to use a Custom expander here eventually to do 137 // the optimal thing for SSE vs. the default expansion in the legalizer. 138 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 139 else 140 // With SSE3 we can use fisttpll to convert to a signed i64. 141 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 142 } 143 144 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 145 if (!X86ScalarSSE) { 146 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 147 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 148 } 149 150 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 151 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 152 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 153 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 154 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 155 if (Subtarget->is64Bit()) 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 160 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 161 setOperationAction(ISD::FREM , MVT::f64 , Expand); 162 163 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 164 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 165 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 166 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 167 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 168 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 169 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 170 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 171 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 172 if (Subtarget->is64Bit()) { 173 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 176 } 177 178 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 179 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 180 181 // These should be promoted to a larger select which is supported. 182 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 183 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 184 // X86 wants to expand cmov itself. 185 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 186 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 187 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 188 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 190 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 191 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 192 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 193 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 194 if (Subtarget->is64Bit()) { 195 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 196 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 197 } 198 // X86 ret instruction may pop stack. 199 setOperationAction(ISD::RET , MVT::Other, Custom); 200 // Darwin ABI issue. 201 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 202 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 203 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 204 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 205 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 206 if (Subtarget->is64Bit()) { 207 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 208 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 209 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 210 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 211 } 212 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 213 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 214 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 215 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 216 // X86 wants to expand memset / memcpy itself. 217 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 218 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 219 220 // We don't have line number support yet. 221 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 222 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 223 // FIXME - use subtarget debug flags 224 if (!Subtarget->isTargetDarwin() && 225 !Subtarget->isTargetELF() && 226 !Subtarget->isTargetCygMing()) 227 setOperationAction(ISD::LABEL, MVT::Other, Expand); 228 229 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 230 setOperationAction(ISD::VASTART , MVT::Other, Custom); 231 setOperationAction(ISD::VAARG , MVT::Other, Expand); 232 setOperationAction(ISD::VAEND , MVT::Other, Expand); 233 if (Subtarget->is64Bit()) 234 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 235 else 236 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 237 238 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 239 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 240 if (Subtarget->is64Bit()) 241 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 242 if (Subtarget->isTargetCygMing()) 243 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 244 else 245 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 246 247 if (X86ScalarSSE) { 248 // Set up the FP register classes. 249 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 250 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 251 252 // Use ANDPD to simulate FABS. 253 setOperationAction(ISD::FABS , MVT::f64, Custom); 254 setOperationAction(ISD::FABS , MVT::f32, Custom); 255 256 // Use XORP to simulate FNEG. 257 setOperationAction(ISD::FNEG , MVT::f64, Custom); 258 setOperationAction(ISD::FNEG , MVT::f32, Custom); 259 260 // Use ANDPD and ORPD to simulate FCOPYSIGN. 261 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 262 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 263 264 // We don't support sin/cos/fmod 265 setOperationAction(ISD::FSIN , MVT::f64, Expand); 266 setOperationAction(ISD::FCOS , MVT::f64, Expand); 267 setOperationAction(ISD::FREM , MVT::f64, Expand); 268 setOperationAction(ISD::FSIN , MVT::f32, Expand); 269 setOperationAction(ISD::FCOS , MVT::f32, Expand); 270 setOperationAction(ISD::FREM , MVT::f32, Expand); 271 272 // Expand FP immediates into loads from the stack, except for the special 273 // cases we handle. 274 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 275 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 276 addLegalFPImmediate(+0.0); // xorps / xorpd 277 } else { 278 // Set up the FP register classes. 279 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 280 281 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 282 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 283 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 284 285 if (!UnsafeFPMath) { 286 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 287 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 288 } 289 290 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 291 addLegalFPImmediate(+0.0); // FLD0 292 addLegalFPImmediate(+1.0); // FLD1 293 addLegalFPImmediate(-0.0); // FLD0/FCHS 294 addLegalFPImmediate(-1.0); // FLD1/FCHS 295 } 296 297 // First set operation action for all vector types to expand. Then we 298 // will selectively turn on ones that can be effectively codegen'd. 299 for (unsigned VT = (unsigned)MVT::Vector + 1; 300 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 301 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 302 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 303 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 304 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 305 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 306 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 307 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 308 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 309 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 310 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 311 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 312 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 313 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 314 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 315 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 316 } 317 318 if (Subtarget->hasMMX()) { 319 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 320 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 321 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 322 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 323 324 // FIXME: add MMX packed arithmetics 325 326 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 327 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 328 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 329 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 330 331 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 332 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 333 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 334 335 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 336 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 337 338 setOperationAction(ISD::AND, MVT::v8i8, Promote); 339 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 340 setOperationAction(ISD::AND, MVT::v4i16, Promote); 341 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 342 setOperationAction(ISD::AND, MVT::v2i32, Promote); 343 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 344 setOperationAction(ISD::AND, MVT::v1i64, Legal); 345 346 setOperationAction(ISD::OR, MVT::v8i8, Promote); 347 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 348 setOperationAction(ISD::OR, MVT::v4i16, Promote); 349 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 350 setOperationAction(ISD::OR, MVT::v2i32, Promote); 351 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 352 setOperationAction(ISD::OR, MVT::v1i64, Legal); 353 354 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 355 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 356 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 357 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 358 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 359 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 360 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 361 362 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 363 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 364 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 365 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 366 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 367 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 368 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 369 370 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 371 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 372 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 373 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 374 375 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 376 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 377 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 378 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 379 380 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 381 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 382 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 383 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 384 } 385 386 if (Subtarget->hasSSE1()) { 387 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 388 389 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 390 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 391 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 392 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 393 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 394 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 395 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 396 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 397 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 398 } 399 400 if (Subtarget->hasSSE2()) { 401 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 402 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 403 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 404 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 405 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 406 407 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 408 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 409 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 410 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 411 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 412 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 413 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 414 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 415 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 416 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 417 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 418 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 419 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 420 421 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 422 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 423 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 424 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 425 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 426 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 427 428 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 429 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 430 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 431 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 432 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 433 } 434 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 435 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 436 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 437 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 438 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 440 441 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 442 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 443 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 444 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 445 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 446 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 447 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 448 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 449 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 450 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 451 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 452 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 453 } 454 455 // Custom lower v2i64 and v2f64 selects. 456 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 457 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 458 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 459 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 460 } 461 462 // We want to custom lower some of our intrinsics. 463 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 464 465 // We have target-specific dag combine patterns for the following nodes: 466 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 467 setTargetDAGCombine(ISD::SELECT); 468 469 computeRegisterProperties(); 470 471 // FIXME: These should be based on subtarget info. Plus, the values should 472 // be smaller when we are in optimizing for size mode. 473 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 474 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 475 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 476 allowUnalignedMemoryAccesses = true; // x86 supports it! 477} 478 479 480//===----------------------------------------------------------------------===// 481// Return Value Calling Convention Implementation 482//===----------------------------------------------------------------------===// 483 484#include "X86GenCallingConv.inc" 485 486/// LowerRET - Lower an ISD::RET node. 487SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 488 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 489 490 SmallVector<CCValAssign, 16> RVLocs; 491 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 492 CCState CCInfo(CC, getTargetMachine(), RVLocs); 493 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 494 495 496 // If this is the first return lowered for this function, add the regs to the 497 // liveout set for the function. 498 if (DAG.getMachineFunction().liveout_empty()) { 499 for (unsigned i = 0; i != RVLocs.size(); ++i) 500 if (RVLocs[i].isRegLoc()) 501 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 502 } 503 504 SDOperand Chain = Op.getOperand(0); 505 SDOperand Flag; 506 507 // Copy the result values into the output registers. 508 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 509 RVLocs[0].getLocReg() != X86::ST0) { 510 for (unsigned i = 0; i != RVLocs.size(); ++i) { 511 CCValAssign &VA = RVLocs[i]; 512 assert(VA.isRegLoc() && "Can only return in registers!"); 513 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 514 Flag); 515 Flag = Chain.getValue(1); 516 } 517 } else { 518 // We need to handle a destination of ST0 specially, because it isn't really 519 // a register. 520 SDOperand Value = Op.getOperand(1); 521 522 // If this is an FP return with ScalarSSE, we need to move the value from 523 // an XMM register onto the fp-stack. 524 if (X86ScalarSSE) { 525 SDOperand MemLoc; 526 527 // If this is a load into a scalarsse value, don't store the loaded value 528 // back to the stack, only to reload it: just replace the scalar-sse load. 529 if (ISD::isNON_EXTLoad(Value.Val) && 530 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 531 Chain = Value.getOperand(0); 532 MemLoc = Value.getOperand(1); 533 } else { 534 // Spill the value to memory and reload it into top of stack. 535 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 536 MachineFunction &MF = DAG.getMachineFunction(); 537 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 538 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 539 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 540 } 541 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); 542 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 543 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 544 Chain = Value.getValue(1); 545 } 546 547 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 548 SDOperand Ops[] = { Chain, Value }; 549 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 550 Flag = Chain.getValue(1); 551 } 552 553 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 554 if (Flag.Val) 555 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 556 else 557 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 558} 559 560 561/// LowerCallResult - Lower the result values of an ISD::CALL into the 562/// appropriate copies out of appropriate physical registers. This assumes that 563/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 564/// being lowered. The returns a SDNode with the same number of values as the 565/// ISD::CALL. 566SDNode *X86TargetLowering:: 567LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 568 unsigned CallingConv, SelectionDAG &DAG) { 569 570 // Assign locations to each value returned by this call. 571 SmallVector<CCValAssign, 16> RVLocs; 572 CCState CCInfo(CallingConv, getTargetMachine(), RVLocs); 573 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 574 575 576 SmallVector<SDOperand, 8> ResultVals; 577 578 // Copy all of the result registers out of their specified physreg. 579 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 580 for (unsigned i = 0; i != RVLocs.size(); ++i) { 581 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 582 RVLocs[i].getValVT(), InFlag).getValue(1); 583 InFlag = Chain.getValue(2); 584 ResultVals.push_back(Chain.getValue(0)); 585 } 586 } else { 587 // Copies from the FP stack are special, as ST0 isn't a valid register 588 // before the fp stackifier runs. 589 590 // Copy ST0 into an RFP register with FP_GET_RESULT. 591 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 592 SDOperand GROps[] = { Chain, InFlag }; 593 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 594 Chain = RetVal.getValue(1); 595 InFlag = RetVal.getValue(2); 596 597 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 598 // an XMM register. 599 if (X86ScalarSSE) { 600 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 601 // shouldn't be necessary except that RFP cannot be live across 602 // multiple blocks. When stackifier is fixed, they can be uncoupled. 603 MachineFunction &MF = DAG.getMachineFunction(); 604 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 605 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 606 SDOperand Ops[] = { 607 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 608 }; 609 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 610 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 611 Chain = RetVal.getValue(1); 612 } 613 614 if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE) 615 // FIXME: we would really like to remember that this FP_ROUND 616 // operation is okay to eliminate if we allow excess FP precision. 617 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 618 ResultVals.push_back(RetVal); 619 } 620 621 // Merge everything together with a MERGE_VALUES node. 622 ResultVals.push_back(Chain); 623 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 624 &ResultVals[0], ResultVals.size()).Val; 625} 626 627 628//===----------------------------------------------------------------------===// 629// C & StdCall Calling Convention implementation 630//===----------------------------------------------------------------------===// 631// StdCall calling convention seems to be standard for many Windows' API 632// routines and around. It differs from C calling convention just a little: 633// callee should clean up the stack, not caller. Symbols should be also 634// decorated in some fancy way :) It doesn't support any vector arguments. 635 636/// AddLiveIn - This helper function adds the specified physical register to the 637/// MachineFunction as a live in value. It also creates a corresponding virtual 638/// register for it. 639static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 640 const TargetRegisterClass *RC) { 641 assert(RC->contains(PReg) && "Not the correct regclass!"); 642 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 643 MF.addLiveIn(PReg, VReg); 644 return VReg; 645} 646 647SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 648 bool isStdCall) { 649 unsigned NumArgs = Op.Val->getNumValues() - 1; 650 MachineFunction &MF = DAG.getMachineFunction(); 651 MachineFrameInfo *MFI = MF.getFrameInfo(); 652 SDOperand Root = Op.getOperand(0); 653 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 654 655 // Assign locations to all of the incoming arguments. 656 SmallVector<CCValAssign, 16> ArgLocs; 657 CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), 658 ArgLocs); 659 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 660 661 SmallVector<SDOperand, 8> ArgValues; 662 unsigned LastVal = ~0U; 663 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 664 CCValAssign &VA = ArgLocs[i]; 665 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 666 // places. 667 assert(VA.getValNo() != LastVal && 668 "Don't support value assigned to multiple locs yet"); 669 LastVal = VA.getValNo(); 670 671 if (VA.isRegLoc()) { 672 MVT::ValueType RegVT = VA.getLocVT(); 673 TargetRegisterClass *RC; 674 if (RegVT == MVT::i32) 675 RC = X86::GR32RegisterClass; 676 else { 677 assert(MVT::isVector(RegVT)); 678 RC = X86::VR128RegisterClass; 679 } 680 681 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 682 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 683 684 // If this is an 8 or 16-bit value, it is really passed promoted to 32 685 // bits. Insert an assert[sz]ext to capture this, then truncate to the 686 // right size. 687 if (VA.getLocInfo() == CCValAssign::SExt) 688 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 689 DAG.getValueType(VA.getValVT())); 690 else if (VA.getLocInfo() == CCValAssign::ZExt) 691 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 692 DAG.getValueType(VA.getValVT())); 693 694 if (VA.getLocInfo() != CCValAssign::Full) 695 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 696 697 ArgValues.push_back(ArgValue); 698 } else { 699 assert(VA.isMemLoc()); 700 701 // Create the nodes corresponding to a load from this parameter slot. 702 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 703 VA.getLocMemOffset()); 704 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 705 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 706 } 707 } 708 709 unsigned StackSize = CCInfo.getNextStackOffset(); 710 711 ArgValues.push_back(Root); 712 713 // If the function takes variable number of arguments, make a frame index for 714 // the start of the first vararg value... for expansion of llvm.va_start. 715 if (isVarArg) 716 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 717 718 if (isStdCall && !isVarArg) { 719 BytesToPopOnReturn = StackSize; // Callee pops everything.. 720 BytesCallerReserves = 0; 721 } else { 722 BytesToPopOnReturn = 0; // Callee pops nothing. 723 724 // If this is an sret function, the return should pop the hidden pointer. 725 if (NumArgs && 726 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 727 ISD::ParamFlags::StructReturn)) 728 BytesToPopOnReturn = 4; 729 730 BytesCallerReserves = StackSize; 731 } 732 733 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 734 ReturnAddrIndex = 0; // No return address slot generated yet. 735 736 MF.getInfo<X86MachineFunctionInfo>() 737 ->setBytesToPopOnReturn(BytesToPopOnReturn); 738 739 // Return the new list of results. 740 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 741 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 742} 743 744SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 745 unsigned CC) { 746 SDOperand Chain = Op.getOperand(0); 747 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 748 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 749 SDOperand Callee = Op.getOperand(4); 750 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 751 752 // Analyze operands of the call, assigning locations to each operand. 753 SmallVector<CCValAssign, 16> ArgLocs; 754 CCState CCInfo(CC, getTargetMachine(), ArgLocs); 755 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 756 757 // Get a count of how many bytes are to be pushed on the stack. 758 unsigned NumBytes = CCInfo.getNextStackOffset(); 759 760 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 761 762 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 763 SmallVector<SDOperand, 8> MemOpChains; 764 765 SDOperand StackPtr; 766 767 // Walk the register/memloc assignments, inserting copies/loads. 768 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 769 CCValAssign &VA = ArgLocs[i]; 770 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 771 772 // Promote the value if needed. 773 switch (VA.getLocInfo()) { 774 default: assert(0 && "Unknown loc info!"); 775 case CCValAssign::Full: break; 776 case CCValAssign::SExt: 777 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 778 break; 779 case CCValAssign::ZExt: 780 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 781 break; 782 case CCValAssign::AExt: 783 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 784 break; 785 } 786 787 if (VA.isRegLoc()) { 788 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 789 } else { 790 assert(VA.isMemLoc()); 791 if (StackPtr.Val == 0) 792 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 793 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 794 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 795 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 796 } 797 } 798 799 // If the first argument is an sret pointer, remember it. 800 bool isSRet = NumOps && 801 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 802 ISD::ParamFlags::StructReturn); 803 804 if (!MemOpChains.empty()) 805 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 806 &MemOpChains[0], MemOpChains.size()); 807 808 // Build a sequence of copy-to-reg nodes chained together with token chain 809 // and flag operands which copy the outgoing args into registers. 810 SDOperand InFlag; 811 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 812 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 813 InFlag); 814 InFlag = Chain.getValue(1); 815 } 816 817 // ELF / PIC requires GOT in the EBX register before function calls via PLT 818 // GOT pointer. 819 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 820 Subtarget->isPICStyleGOT()) { 821 Chain = DAG.getCopyToReg(Chain, X86::EBX, 822 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 823 InFlag); 824 InFlag = Chain.getValue(1); 825 } 826 827 // If the callee is a GlobalAddress node (quite common, every direct call is) 828 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 829 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 830 // We should use extra load for direct calls to dllimported functions in 831 // non-JIT mode. 832 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 833 getTargetMachine(), true)) 834 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 835 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 836 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 837 838 // Returns a chain & a flag for retval copy to use. 839 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 840 SmallVector<SDOperand, 8> Ops; 841 Ops.push_back(Chain); 842 Ops.push_back(Callee); 843 844 // Add argument registers to the end of the list so that they are known live 845 // into the call. 846 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 847 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 848 RegsToPass[i].second.getValueType())); 849 850 // Add an implicit use GOT pointer in EBX. 851 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 852 Subtarget->isPICStyleGOT()) 853 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 854 855 if (InFlag.Val) 856 Ops.push_back(InFlag); 857 858 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 859 NodeTys, &Ops[0], Ops.size()); 860 InFlag = Chain.getValue(1); 861 862 // Create the CALLSEQ_END node. 863 unsigned NumBytesForCalleeToPush = 0; 864 865 if (CC == CallingConv::X86_StdCall) { 866 if (isVarArg) 867 NumBytesForCalleeToPush = isSRet ? 4 : 0; 868 else 869 NumBytesForCalleeToPush = NumBytes; 870 } else { 871 // If this is is a call to a struct-return function, the callee 872 // pops the hidden struct pointer, so we have to push it back. 873 // This is common for Darwin/X86, Linux & Mingw32 targets. 874 NumBytesForCalleeToPush = isSRet ? 4 : 0; 875 } 876 877 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 878 Ops.clear(); 879 Ops.push_back(Chain); 880 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 881 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 882 Ops.push_back(InFlag); 883 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 884 InFlag = Chain.getValue(1); 885 886 // Handle result values, copying them out of physregs into vregs that we 887 // return. 888 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 889} 890 891 892//===----------------------------------------------------------------------===// 893// FastCall Calling Convention implementation 894//===----------------------------------------------------------------------===// 895// 896// The X86 'fastcall' calling convention passes up to two integer arguments in 897// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 898// and requires that the callee pop its arguments off the stack (allowing proper 899// tail calls), and has the same return value conventions as C calling convs. 900// 901// This calling convention always arranges for the callee pop value to be 8n+4 902// bytes, which is needed for tail recursion elimination and stack alignment 903// reasons. 904SDOperand 905X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 906 MachineFunction &MF = DAG.getMachineFunction(); 907 MachineFrameInfo *MFI = MF.getFrameInfo(); 908 SDOperand Root = Op.getOperand(0); 909 910 // Assign locations to all of the incoming arguments. 911 SmallVector<CCValAssign, 16> ArgLocs; 912 CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), 913 ArgLocs); 914 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 915 916 SmallVector<SDOperand, 8> ArgValues; 917 unsigned LastVal = ~0U; 918 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 919 CCValAssign &VA = ArgLocs[i]; 920 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 921 // places. 922 assert(VA.getValNo() != LastVal && 923 "Don't support value assigned to multiple locs yet"); 924 LastVal = VA.getValNo(); 925 926 if (VA.isRegLoc()) { 927 MVT::ValueType RegVT = VA.getLocVT(); 928 TargetRegisterClass *RC; 929 if (RegVT == MVT::i32) 930 RC = X86::GR32RegisterClass; 931 else { 932 assert(MVT::isVector(RegVT)); 933 RC = X86::VR128RegisterClass; 934 } 935 936 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 937 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 938 939 // If this is an 8 or 16-bit value, it is really passed promoted to 32 940 // bits. Insert an assert[sz]ext to capture this, then truncate to the 941 // right size. 942 if (VA.getLocInfo() == CCValAssign::SExt) 943 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 944 DAG.getValueType(VA.getValVT())); 945 else if (VA.getLocInfo() == CCValAssign::ZExt) 946 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 947 DAG.getValueType(VA.getValVT())); 948 949 if (VA.getLocInfo() != CCValAssign::Full) 950 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 951 952 ArgValues.push_back(ArgValue); 953 } else { 954 assert(VA.isMemLoc()); 955 956 // Create the nodes corresponding to a load from this parameter slot. 957 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 958 VA.getLocMemOffset()); 959 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 960 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 961 } 962 } 963 964 ArgValues.push_back(Root); 965 966 unsigned StackSize = CCInfo.getNextStackOffset(); 967 968 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 969 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 970 // arguments and the arguments after the retaddr has been pushed are aligned. 971 if ((StackSize & 7) == 0) 972 StackSize += 4; 973 } 974 975 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 976 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 977 ReturnAddrIndex = 0; // No return address slot generated yet. 978 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 979 BytesCallerReserves = 0; 980 981 MF.getInfo<X86MachineFunctionInfo>() 982 ->setBytesToPopOnReturn(BytesToPopOnReturn); 983 984 // Return the new list of results. 985 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 986 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 987} 988 989SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 990 unsigned CC) { 991 SDOperand Chain = Op.getOperand(0); 992 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 993 SDOperand Callee = Op.getOperand(4); 994 995 // Analyze operands of the call, assigning locations to each operand. 996 SmallVector<CCValAssign, 16> ArgLocs; 997 CCState CCInfo(CC, getTargetMachine(), ArgLocs); 998 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 999 1000 // Get a count of how many bytes are to be pushed on the stack. 1001 unsigned NumBytes = CCInfo.getNextStackOffset(); 1002 1003 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1004 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1005 // arguments and the arguments after the retaddr has been pushed are aligned. 1006 if ((NumBytes & 7) == 0) 1007 NumBytes += 4; 1008 } 1009 1010 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1011 1012 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1013 SmallVector<SDOperand, 8> MemOpChains; 1014 1015 SDOperand StackPtr; 1016 1017 // Walk the register/memloc assignments, inserting copies/loads. 1018 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1019 CCValAssign &VA = ArgLocs[i]; 1020 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1021 1022 // Promote the value if needed. 1023 switch (VA.getLocInfo()) { 1024 default: assert(0 && "Unknown loc info!"); 1025 case CCValAssign::Full: break; 1026 case CCValAssign::SExt: 1027 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1028 break; 1029 case CCValAssign::ZExt: 1030 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1031 break; 1032 case CCValAssign::AExt: 1033 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1034 break; 1035 } 1036 1037 if (VA.isRegLoc()) { 1038 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1039 } else { 1040 assert(VA.isMemLoc()); 1041 if (StackPtr.Val == 0) 1042 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1043 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1044 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1045 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1046 } 1047 } 1048 1049 if (!MemOpChains.empty()) 1050 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1051 &MemOpChains[0], MemOpChains.size()); 1052 1053 // Build a sequence of copy-to-reg nodes chained together with token chain 1054 // and flag operands which copy the outgoing args into registers. 1055 SDOperand InFlag; 1056 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1057 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1058 InFlag); 1059 InFlag = Chain.getValue(1); 1060 } 1061 1062 // If the callee is a GlobalAddress node (quite common, every direct call is) 1063 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1064 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1065 // We should use extra load for direct calls to dllimported functions in 1066 // non-JIT mode. 1067 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1068 getTargetMachine(), true)) 1069 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1070 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1071 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1072 1073 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1074 // GOT pointer. 1075 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1076 Subtarget->isPICStyleGOT()) { 1077 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1078 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1079 InFlag); 1080 InFlag = Chain.getValue(1); 1081 } 1082 1083 // Returns a chain & a flag for retval copy to use. 1084 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1085 SmallVector<SDOperand, 8> Ops; 1086 Ops.push_back(Chain); 1087 Ops.push_back(Callee); 1088 1089 // Add argument registers to the end of the list so that they are known live 1090 // into the call. 1091 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1092 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1093 RegsToPass[i].second.getValueType())); 1094 1095 // Add an implicit use GOT pointer in EBX. 1096 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1097 Subtarget->isPICStyleGOT()) 1098 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1099 1100 if (InFlag.Val) 1101 Ops.push_back(InFlag); 1102 1103 // FIXME: Do not generate X86ISD::TAILCALL for now. 1104 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1105 NodeTys, &Ops[0], Ops.size()); 1106 InFlag = Chain.getValue(1); 1107 1108 // Returns a flag for retval copy to use. 1109 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1110 Ops.clear(); 1111 Ops.push_back(Chain); 1112 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1113 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1114 Ops.push_back(InFlag); 1115 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1116 InFlag = Chain.getValue(1); 1117 1118 // Handle result values, copying them out of physregs into vregs that we 1119 // return. 1120 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1121} 1122 1123 1124//===----------------------------------------------------------------------===// 1125// X86-64 C Calling Convention implementation 1126//===----------------------------------------------------------------------===// 1127 1128SDOperand 1129X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1130 MachineFunction &MF = DAG.getMachineFunction(); 1131 MachineFrameInfo *MFI = MF.getFrameInfo(); 1132 SDOperand Root = Op.getOperand(0); 1133 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1134 1135 static const unsigned GPR64ArgRegs[] = { 1136 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1137 }; 1138 static const unsigned XMMArgRegs[] = { 1139 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1140 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1141 }; 1142 1143 1144 // Assign locations to all of the incoming arguments. 1145 SmallVector<CCValAssign, 16> ArgLocs; 1146 CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), 1147 ArgLocs); 1148 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1149 1150 SmallVector<SDOperand, 8> ArgValues; 1151 unsigned LastVal = ~0U; 1152 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1153 CCValAssign &VA = ArgLocs[i]; 1154 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1155 // places. 1156 assert(VA.getValNo() != LastVal && 1157 "Don't support value assigned to multiple locs yet"); 1158 LastVal = VA.getValNo(); 1159 1160 if (VA.isRegLoc()) { 1161 MVT::ValueType RegVT = VA.getLocVT(); 1162 TargetRegisterClass *RC; 1163 if (RegVT == MVT::i32) 1164 RC = X86::GR32RegisterClass; 1165 else if (RegVT == MVT::i64) 1166 RC = X86::GR64RegisterClass; 1167 else if (RegVT == MVT::f32) 1168 RC = X86::FR32RegisterClass; 1169 else if (RegVT == MVT::f64) 1170 RC = X86::FR64RegisterClass; 1171 else { 1172 assert(MVT::isVector(RegVT)); 1173 RC = X86::VR128RegisterClass; 1174 } 1175 1176 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1177 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1178 1179 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1180 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1181 // right size. 1182 if (VA.getLocInfo() == CCValAssign::SExt) 1183 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1184 DAG.getValueType(VA.getValVT())); 1185 else if (VA.getLocInfo() == CCValAssign::ZExt) 1186 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1187 DAG.getValueType(VA.getValVT())); 1188 1189 if (VA.getLocInfo() != CCValAssign::Full) 1190 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1191 1192 ArgValues.push_back(ArgValue); 1193 } else { 1194 assert(VA.isMemLoc()); 1195 1196 // Create the nodes corresponding to a load from this parameter slot. 1197 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1198 VA.getLocMemOffset()); 1199 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1200 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1201 } 1202 } 1203 1204 unsigned StackSize = CCInfo.getNextStackOffset(); 1205 1206 // If the function takes variable number of arguments, make a frame index for 1207 // the start of the first vararg value... for expansion of llvm.va_start. 1208 if (isVarArg) { 1209 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1210 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1211 1212 // For X86-64, if there are vararg parameters that are passed via 1213 // registers, then we must store them to their spots on the stack so they 1214 // may be loaded by deferencing the result of va_next. 1215 VarArgsGPOffset = NumIntRegs * 8; 1216 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1217 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1218 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1219 1220 // Store the integer parameter registers. 1221 SmallVector<SDOperand, 8> MemOps; 1222 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1223 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1224 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1225 for (; NumIntRegs != 6; ++NumIntRegs) { 1226 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1227 X86::GR64RegisterClass); 1228 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1229 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1230 MemOps.push_back(Store); 1231 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1232 DAG.getConstant(8, getPointerTy())); 1233 } 1234 1235 // Now store the XMM (fp + vector) parameter registers. 1236 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1237 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1238 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1239 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1240 X86::VR128RegisterClass); 1241 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1242 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1243 MemOps.push_back(Store); 1244 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1245 DAG.getConstant(16, getPointerTy())); 1246 } 1247 if (!MemOps.empty()) 1248 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1249 &MemOps[0], MemOps.size()); 1250 } 1251 1252 ArgValues.push_back(Root); 1253 1254 ReturnAddrIndex = 0; // No return address slot generated yet. 1255 BytesToPopOnReturn = 0; // Callee pops nothing. 1256 BytesCallerReserves = StackSize; 1257 1258 // Return the new list of results. 1259 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1260 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1261} 1262 1263SDOperand 1264X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1265 unsigned CC) { 1266 SDOperand Chain = Op.getOperand(0); 1267 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1268 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1269 SDOperand Callee = Op.getOperand(4); 1270 1271 // Analyze operands of the call, assigning locations to each operand. 1272 SmallVector<CCValAssign, 16> ArgLocs; 1273 CCState CCInfo(CC, getTargetMachine(), ArgLocs); 1274 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1275 1276 // Get a count of how many bytes are to be pushed on the stack. 1277 unsigned NumBytes = CCInfo.getNextStackOffset(); 1278 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1279 1280 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1281 SmallVector<SDOperand, 8> MemOpChains; 1282 1283 SDOperand StackPtr; 1284 1285 // Walk the register/memloc assignments, inserting copies/loads. 1286 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1287 CCValAssign &VA = ArgLocs[i]; 1288 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1289 1290 // Promote the value if needed. 1291 switch (VA.getLocInfo()) { 1292 default: assert(0 && "Unknown loc info!"); 1293 case CCValAssign::Full: break; 1294 case CCValAssign::SExt: 1295 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1296 break; 1297 case CCValAssign::ZExt: 1298 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1299 break; 1300 case CCValAssign::AExt: 1301 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1302 break; 1303 } 1304 1305 if (VA.isRegLoc()) { 1306 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1307 } else { 1308 assert(VA.isMemLoc()); 1309 if (StackPtr.Val == 0) 1310 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1311 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1312 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1313 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1314 } 1315 } 1316 1317 if (!MemOpChains.empty()) 1318 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1319 &MemOpChains[0], MemOpChains.size()); 1320 1321 // Build a sequence of copy-to-reg nodes chained together with token chain 1322 // and flag operands which copy the outgoing args into registers. 1323 SDOperand InFlag; 1324 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1325 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1326 InFlag); 1327 InFlag = Chain.getValue(1); 1328 } 1329 1330 if (isVarArg) { 1331 // From AMD64 ABI document: 1332 // For calls that may call functions that use varargs or stdargs 1333 // (prototype-less calls or calls to functions containing ellipsis (...) in 1334 // the declaration) %al is used as hidden argument to specify the number 1335 // of SSE registers used. The contents of %al do not need to match exactly 1336 // the number of registers, but must be an ubound on the number of SSE 1337 // registers used and is in the range 0 - 8 inclusive. 1338 1339 // Count the number of XMM registers allocated. 1340 static const unsigned XMMArgRegs[] = { 1341 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1342 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1343 }; 1344 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1345 1346 Chain = DAG.getCopyToReg(Chain, X86::AL, 1347 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1348 InFlag = Chain.getValue(1); 1349 } 1350 1351 // If the callee is a GlobalAddress node (quite common, every direct call is) 1352 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1353 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1354 // We should use extra load for direct calls to dllimported functions in 1355 // non-JIT mode. 1356 if (getTargetMachine().getCodeModel() != CodeModel::Large 1357 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1358 getTargetMachine(), true)) 1359 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1360 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1361 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1362 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1363 1364 // Returns a chain & a flag for retval copy to use. 1365 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1366 SmallVector<SDOperand, 8> Ops; 1367 Ops.push_back(Chain); 1368 Ops.push_back(Callee); 1369 1370 // Add argument registers to the end of the list so that they are known live 1371 // into the call. 1372 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1373 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1374 RegsToPass[i].second.getValueType())); 1375 1376 if (InFlag.Val) 1377 Ops.push_back(InFlag); 1378 1379 // FIXME: Do not generate X86ISD::TAILCALL for now. 1380 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1381 NodeTys, &Ops[0], Ops.size()); 1382 InFlag = Chain.getValue(1); 1383 1384 // Returns a flag for retval copy to use. 1385 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1386 Ops.clear(); 1387 Ops.push_back(Chain); 1388 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1389 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1390 Ops.push_back(InFlag); 1391 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1392 InFlag = Chain.getValue(1); 1393 1394 // Handle result values, copying them out of physregs into vregs that we 1395 // return. 1396 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1397} 1398 1399 1400//===----------------------------------------------------------------------===// 1401// Other Lowering Hooks 1402//===----------------------------------------------------------------------===// 1403 1404 1405SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1406 if (ReturnAddrIndex == 0) { 1407 // Set up a frame object for the return address. 1408 MachineFunction &MF = DAG.getMachineFunction(); 1409 if (Subtarget->is64Bit()) 1410 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1411 else 1412 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1413 } 1414 1415 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1416} 1417 1418 1419 1420/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1421/// specific condition code. It returns a false if it cannot do a direct 1422/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1423/// needed. 1424static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1425 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1426 SelectionDAG &DAG) { 1427 X86CC = X86::COND_INVALID; 1428 if (!isFP) { 1429 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1430 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1431 // X > -1 -> X == 0, jump !sign. 1432 RHS = DAG.getConstant(0, RHS.getValueType()); 1433 X86CC = X86::COND_NS; 1434 return true; 1435 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1436 // X < 0 -> X == 0, jump on sign. 1437 X86CC = X86::COND_S; 1438 return true; 1439 } 1440 } 1441 1442 switch (SetCCOpcode) { 1443 default: break; 1444 case ISD::SETEQ: X86CC = X86::COND_E; break; 1445 case ISD::SETGT: X86CC = X86::COND_G; break; 1446 case ISD::SETGE: X86CC = X86::COND_GE; break; 1447 case ISD::SETLT: X86CC = X86::COND_L; break; 1448 case ISD::SETLE: X86CC = X86::COND_LE; break; 1449 case ISD::SETNE: X86CC = X86::COND_NE; break; 1450 case ISD::SETULT: X86CC = X86::COND_B; break; 1451 case ISD::SETUGT: X86CC = X86::COND_A; break; 1452 case ISD::SETULE: X86CC = X86::COND_BE; break; 1453 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1454 } 1455 } else { 1456 // On a floating point condition, the flags are set as follows: 1457 // ZF PF CF op 1458 // 0 | 0 | 0 | X > Y 1459 // 0 | 0 | 1 | X < Y 1460 // 1 | 0 | 0 | X == Y 1461 // 1 | 1 | 1 | unordered 1462 bool Flip = false; 1463 switch (SetCCOpcode) { 1464 default: break; 1465 case ISD::SETUEQ: 1466 case ISD::SETEQ: X86CC = X86::COND_E; break; 1467 case ISD::SETOLT: Flip = true; // Fallthrough 1468 case ISD::SETOGT: 1469 case ISD::SETGT: X86CC = X86::COND_A; break; 1470 case ISD::SETOLE: Flip = true; // Fallthrough 1471 case ISD::SETOGE: 1472 case ISD::SETGE: X86CC = X86::COND_AE; break; 1473 case ISD::SETUGT: Flip = true; // Fallthrough 1474 case ISD::SETULT: 1475 case ISD::SETLT: X86CC = X86::COND_B; break; 1476 case ISD::SETUGE: Flip = true; // Fallthrough 1477 case ISD::SETULE: 1478 case ISD::SETLE: X86CC = X86::COND_BE; break; 1479 case ISD::SETONE: 1480 case ISD::SETNE: X86CC = X86::COND_NE; break; 1481 case ISD::SETUO: X86CC = X86::COND_P; break; 1482 case ISD::SETO: X86CC = X86::COND_NP; break; 1483 } 1484 if (Flip) 1485 std::swap(LHS, RHS); 1486 } 1487 1488 return X86CC != X86::COND_INVALID; 1489} 1490 1491/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1492/// code. Current x86 isa includes the following FP cmov instructions: 1493/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1494static bool hasFPCMov(unsigned X86CC) { 1495 switch (X86CC) { 1496 default: 1497 return false; 1498 case X86::COND_B: 1499 case X86::COND_BE: 1500 case X86::COND_E: 1501 case X86::COND_P: 1502 case X86::COND_A: 1503 case X86::COND_AE: 1504 case X86::COND_NE: 1505 case X86::COND_NP: 1506 return true; 1507 } 1508} 1509 1510/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1511/// true if Op is undef or if its value falls within the specified range (L, H]. 1512static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1513 if (Op.getOpcode() == ISD::UNDEF) 1514 return true; 1515 1516 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1517 return (Val >= Low && Val < Hi); 1518} 1519 1520/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1521/// true if Op is undef or if its value equal to the specified value. 1522static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1523 if (Op.getOpcode() == ISD::UNDEF) 1524 return true; 1525 return cast<ConstantSDNode>(Op)->getValue() == Val; 1526} 1527 1528/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1529/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1530bool X86::isPSHUFDMask(SDNode *N) { 1531 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1532 1533 if (N->getNumOperands() != 4) 1534 return false; 1535 1536 // Check if the value doesn't reference the second vector. 1537 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1538 SDOperand Arg = N->getOperand(i); 1539 if (Arg.getOpcode() == ISD::UNDEF) continue; 1540 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1541 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1542 return false; 1543 } 1544 1545 return true; 1546} 1547 1548/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1549/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1550bool X86::isPSHUFHWMask(SDNode *N) { 1551 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1552 1553 if (N->getNumOperands() != 8) 1554 return false; 1555 1556 // Lower quadword copied in order. 1557 for (unsigned i = 0; i != 4; ++i) { 1558 SDOperand Arg = N->getOperand(i); 1559 if (Arg.getOpcode() == ISD::UNDEF) continue; 1560 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1561 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1562 return false; 1563 } 1564 1565 // Upper quadword shuffled. 1566 for (unsigned i = 4; i != 8; ++i) { 1567 SDOperand Arg = N->getOperand(i); 1568 if (Arg.getOpcode() == ISD::UNDEF) continue; 1569 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1570 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1571 if (Val < 4 || Val > 7) 1572 return false; 1573 } 1574 1575 return true; 1576} 1577 1578/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1579/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1580bool X86::isPSHUFLWMask(SDNode *N) { 1581 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1582 1583 if (N->getNumOperands() != 8) 1584 return false; 1585 1586 // Upper quadword copied in order. 1587 for (unsigned i = 4; i != 8; ++i) 1588 if (!isUndefOrEqual(N->getOperand(i), i)) 1589 return false; 1590 1591 // Lower quadword shuffled. 1592 for (unsigned i = 0; i != 4; ++i) 1593 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1594 return false; 1595 1596 return true; 1597} 1598 1599/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1600/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1601static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1602 if (NumElems != 2 && NumElems != 4) return false; 1603 1604 unsigned Half = NumElems / 2; 1605 for (unsigned i = 0; i < Half; ++i) 1606 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1607 return false; 1608 for (unsigned i = Half; i < NumElems; ++i) 1609 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1610 return false; 1611 1612 return true; 1613} 1614 1615bool X86::isSHUFPMask(SDNode *N) { 1616 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1617 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1618} 1619 1620/// isCommutedSHUFP - Returns true if the shuffle mask is except 1621/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1622/// half elements to come from vector 1 (which would equal the dest.) and 1623/// the upper half to come from vector 2. 1624static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1625 if (NumOps != 2 && NumOps != 4) return false; 1626 1627 unsigned Half = NumOps / 2; 1628 for (unsigned i = 0; i < Half; ++i) 1629 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1630 return false; 1631 for (unsigned i = Half; i < NumOps; ++i) 1632 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1633 return false; 1634 return true; 1635} 1636 1637static bool isCommutedSHUFP(SDNode *N) { 1638 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1639 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1640} 1641 1642/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1643/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1644bool X86::isMOVHLPSMask(SDNode *N) { 1645 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1646 1647 if (N->getNumOperands() != 4) 1648 return false; 1649 1650 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1651 return isUndefOrEqual(N->getOperand(0), 6) && 1652 isUndefOrEqual(N->getOperand(1), 7) && 1653 isUndefOrEqual(N->getOperand(2), 2) && 1654 isUndefOrEqual(N->getOperand(3), 3); 1655} 1656 1657/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1658/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1659/// <2, 3, 2, 3> 1660bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1661 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1662 1663 if (N->getNumOperands() != 4) 1664 return false; 1665 1666 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1667 return isUndefOrEqual(N->getOperand(0), 2) && 1668 isUndefOrEqual(N->getOperand(1), 3) && 1669 isUndefOrEqual(N->getOperand(2), 2) && 1670 isUndefOrEqual(N->getOperand(3), 3); 1671} 1672 1673/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1674/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1675bool X86::isMOVLPMask(SDNode *N) { 1676 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1677 1678 unsigned NumElems = N->getNumOperands(); 1679 if (NumElems != 2 && NumElems != 4) 1680 return false; 1681 1682 for (unsigned i = 0; i < NumElems/2; ++i) 1683 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1684 return false; 1685 1686 for (unsigned i = NumElems/2; i < NumElems; ++i) 1687 if (!isUndefOrEqual(N->getOperand(i), i)) 1688 return false; 1689 1690 return true; 1691} 1692 1693/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1694/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1695/// and MOVLHPS. 1696bool X86::isMOVHPMask(SDNode *N) { 1697 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1698 1699 unsigned NumElems = N->getNumOperands(); 1700 if (NumElems != 2 && NumElems != 4) 1701 return false; 1702 1703 for (unsigned i = 0; i < NumElems/2; ++i) 1704 if (!isUndefOrEqual(N->getOperand(i), i)) 1705 return false; 1706 1707 for (unsigned i = 0; i < NumElems/2; ++i) { 1708 SDOperand Arg = N->getOperand(i + NumElems/2); 1709 if (!isUndefOrEqual(Arg, i + NumElems)) 1710 return false; 1711 } 1712 1713 return true; 1714} 1715 1716/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1717/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1718bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1719 bool V2IsSplat = false) { 1720 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1721 return false; 1722 1723 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1724 SDOperand BitI = Elts[i]; 1725 SDOperand BitI1 = Elts[i+1]; 1726 if (!isUndefOrEqual(BitI, j)) 1727 return false; 1728 if (V2IsSplat) { 1729 if (isUndefOrEqual(BitI1, NumElts)) 1730 return false; 1731 } else { 1732 if (!isUndefOrEqual(BitI1, j + NumElts)) 1733 return false; 1734 } 1735 } 1736 1737 return true; 1738} 1739 1740bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1741 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1742 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1743} 1744 1745/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1746/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1747bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1748 bool V2IsSplat = false) { 1749 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1750 return false; 1751 1752 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1753 SDOperand BitI = Elts[i]; 1754 SDOperand BitI1 = Elts[i+1]; 1755 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1756 return false; 1757 if (V2IsSplat) { 1758 if (isUndefOrEqual(BitI1, NumElts)) 1759 return false; 1760 } else { 1761 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1762 return false; 1763 } 1764 } 1765 1766 return true; 1767} 1768 1769bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1770 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1771 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1772} 1773 1774/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1775/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1776/// <0, 0, 1, 1> 1777bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1778 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1779 1780 unsigned NumElems = N->getNumOperands(); 1781 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1782 return false; 1783 1784 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1785 SDOperand BitI = N->getOperand(i); 1786 SDOperand BitI1 = N->getOperand(i+1); 1787 1788 if (!isUndefOrEqual(BitI, j)) 1789 return false; 1790 if (!isUndefOrEqual(BitI1, j)) 1791 return false; 1792 } 1793 1794 return true; 1795} 1796 1797/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1798/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1799/// <2, 2, 3, 3> 1800bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1801 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1802 1803 unsigned NumElems = N->getNumOperands(); 1804 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1805 return false; 1806 1807 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1808 SDOperand BitI = N->getOperand(i); 1809 SDOperand BitI1 = N->getOperand(i + 1); 1810 1811 if (!isUndefOrEqual(BitI, j)) 1812 return false; 1813 if (!isUndefOrEqual(BitI1, j)) 1814 return false; 1815 } 1816 1817 return true; 1818} 1819 1820/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1821/// specifies a shuffle of elements that is suitable for input to MOVSS, 1822/// MOVSD, and MOVD, i.e. setting the lowest element. 1823static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1824 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1825 return false; 1826 1827 if (!isUndefOrEqual(Elts[0], NumElts)) 1828 return false; 1829 1830 for (unsigned i = 1; i < NumElts; ++i) { 1831 if (!isUndefOrEqual(Elts[i], i)) 1832 return false; 1833 } 1834 1835 return true; 1836} 1837 1838bool X86::isMOVLMask(SDNode *N) { 1839 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1840 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1841} 1842 1843/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1844/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1845/// element of vector 2 and the other elements to come from vector 1 in order. 1846static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1847 bool V2IsSplat = false, 1848 bool V2IsUndef = false) { 1849 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1850 return false; 1851 1852 if (!isUndefOrEqual(Ops[0], 0)) 1853 return false; 1854 1855 for (unsigned i = 1; i < NumOps; ++i) { 1856 SDOperand Arg = Ops[i]; 1857 if (!(isUndefOrEqual(Arg, i+NumOps) || 1858 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1859 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1860 return false; 1861 } 1862 1863 return true; 1864} 1865 1866static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1867 bool V2IsUndef = false) { 1868 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1869 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1870 V2IsSplat, V2IsUndef); 1871} 1872 1873/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1874/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1875bool X86::isMOVSHDUPMask(SDNode *N) { 1876 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1877 1878 if (N->getNumOperands() != 4) 1879 return false; 1880 1881 // Expect 1, 1, 3, 3 1882 for (unsigned i = 0; i < 2; ++i) { 1883 SDOperand Arg = N->getOperand(i); 1884 if (Arg.getOpcode() == ISD::UNDEF) continue; 1885 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1886 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1887 if (Val != 1) return false; 1888 } 1889 1890 bool HasHi = false; 1891 for (unsigned i = 2; i < 4; ++i) { 1892 SDOperand Arg = N->getOperand(i); 1893 if (Arg.getOpcode() == ISD::UNDEF) continue; 1894 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1895 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1896 if (Val != 3) return false; 1897 HasHi = true; 1898 } 1899 1900 // Don't use movshdup if it can be done with a shufps. 1901 return HasHi; 1902} 1903 1904/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1905/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1906bool X86::isMOVSLDUPMask(SDNode *N) { 1907 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1908 1909 if (N->getNumOperands() != 4) 1910 return false; 1911 1912 // Expect 0, 0, 2, 2 1913 for (unsigned i = 0; i < 2; ++i) { 1914 SDOperand Arg = N->getOperand(i); 1915 if (Arg.getOpcode() == ISD::UNDEF) continue; 1916 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1917 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1918 if (Val != 0) return false; 1919 } 1920 1921 bool HasHi = false; 1922 for (unsigned i = 2; i < 4; ++i) { 1923 SDOperand Arg = N->getOperand(i); 1924 if (Arg.getOpcode() == ISD::UNDEF) continue; 1925 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1926 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1927 if (Val != 2) return false; 1928 HasHi = true; 1929 } 1930 1931 // Don't use movshdup if it can be done with a shufps. 1932 return HasHi; 1933} 1934 1935/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1936/// a splat of a single element. 1937static bool isSplatMask(SDNode *N) { 1938 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1939 1940 // This is a splat operation if each element of the permute is the same, and 1941 // if the value doesn't reference the second vector. 1942 unsigned NumElems = N->getNumOperands(); 1943 SDOperand ElementBase; 1944 unsigned i = 0; 1945 for (; i != NumElems; ++i) { 1946 SDOperand Elt = N->getOperand(i); 1947 if (isa<ConstantSDNode>(Elt)) { 1948 ElementBase = Elt; 1949 break; 1950 } 1951 } 1952 1953 if (!ElementBase.Val) 1954 return false; 1955 1956 for (; i != NumElems; ++i) { 1957 SDOperand Arg = N->getOperand(i); 1958 if (Arg.getOpcode() == ISD::UNDEF) continue; 1959 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1960 if (Arg != ElementBase) return false; 1961 } 1962 1963 // Make sure it is a splat of the first vector operand. 1964 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1965} 1966 1967/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1968/// a splat of a single element and it's a 2 or 4 element mask. 1969bool X86::isSplatMask(SDNode *N) { 1970 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1971 1972 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 1973 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1974 return false; 1975 return ::isSplatMask(N); 1976} 1977 1978/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 1979/// specifies a splat of zero element. 1980bool X86::isSplatLoMask(SDNode *N) { 1981 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1982 1983 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 1984 if (!isUndefOrEqual(N->getOperand(i), 0)) 1985 return false; 1986 return true; 1987} 1988 1989/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1990/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1991/// instructions. 1992unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1993 unsigned NumOperands = N->getNumOperands(); 1994 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1995 unsigned Mask = 0; 1996 for (unsigned i = 0; i < NumOperands; ++i) { 1997 unsigned Val = 0; 1998 SDOperand Arg = N->getOperand(NumOperands-i-1); 1999 if (Arg.getOpcode() != ISD::UNDEF) 2000 Val = cast<ConstantSDNode>(Arg)->getValue(); 2001 if (Val >= NumOperands) Val -= NumOperands; 2002 Mask |= Val; 2003 if (i != NumOperands - 1) 2004 Mask <<= Shift; 2005 } 2006 2007 return Mask; 2008} 2009 2010/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2011/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2012/// instructions. 2013unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2014 unsigned Mask = 0; 2015 // 8 nodes, but we only care about the last 4. 2016 for (unsigned i = 7; i >= 4; --i) { 2017 unsigned Val = 0; 2018 SDOperand Arg = N->getOperand(i); 2019 if (Arg.getOpcode() != ISD::UNDEF) 2020 Val = cast<ConstantSDNode>(Arg)->getValue(); 2021 Mask |= (Val - 4); 2022 if (i != 4) 2023 Mask <<= 2; 2024 } 2025 2026 return Mask; 2027} 2028 2029/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2030/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2031/// instructions. 2032unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2033 unsigned Mask = 0; 2034 // 8 nodes, but we only care about the first 4. 2035 for (int i = 3; i >= 0; --i) { 2036 unsigned Val = 0; 2037 SDOperand Arg = N->getOperand(i); 2038 if (Arg.getOpcode() != ISD::UNDEF) 2039 Val = cast<ConstantSDNode>(Arg)->getValue(); 2040 Mask |= Val; 2041 if (i != 0) 2042 Mask <<= 2; 2043 } 2044 2045 return Mask; 2046} 2047 2048/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2049/// specifies a 8 element shuffle that can be broken into a pair of 2050/// PSHUFHW and PSHUFLW. 2051static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2052 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2053 2054 if (N->getNumOperands() != 8) 2055 return false; 2056 2057 // Lower quadword shuffled. 2058 for (unsigned i = 0; i != 4; ++i) { 2059 SDOperand Arg = N->getOperand(i); 2060 if (Arg.getOpcode() == ISD::UNDEF) continue; 2061 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2062 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2063 if (Val > 4) 2064 return false; 2065 } 2066 2067 // Upper quadword shuffled. 2068 for (unsigned i = 4; i != 8; ++i) { 2069 SDOperand Arg = N->getOperand(i); 2070 if (Arg.getOpcode() == ISD::UNDEF) continue; 2071 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2072 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2073 if (Val < 4 || Val > 7) 2074 return false; 2075 } 2076 2077 return true; 2078} 2079 2080/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2081/// values in ther permute mask. 2082static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2083 SDOperand &V2, SDOperand &Mask, 2084 SelectionDAG &DAG) { 2085 MVT::ValueType VT = Op.getValueType(); 2086 MVT::ValueType MaskVT = Mask.getValueType(); 2087 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2088 unsigned NumElems = Mask.getNumOperands(); 2089 SmallVector<SDOperand, 8> MaskVec; 2090 2091 for (unsigned i = 0; i != NumElems; ++i) { 2092 SDOperand Arg = Mask.getOperand(i); 2093 if (Arg.getOpcode() == ISD::UNDEF) { 2094 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2095 continue; 2096 } 2097 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2098 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2099 if (Val < NumElems) 2100 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2101 else 2102 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2103 } 2104 2105 std::swap(V1, V2); 2106 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2107 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2108} 2109 2110/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2111/// match movhlps. The lower half elements should come from upper half of 2112/// V1 (and in order), and the upper half elements should come from the upper 2113/// half of V2 (and in order). 2114static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2115 unsigned NumElems = Mask->getNumOperands(); 2116 if (NumElems != 4) 2117 return false; 2118 for (unsigned i = 0, e = 2; i != e; ++i) 2119 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2120 return false; 2121 for (unsigned i = 2; i != 4; ++i) 2122 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2123 return false; 2124 return true; 2125} 2126 2127/// isScalarLoadToVector - Returns true if the node is a scalar load that 2128/// is promoted to a vector. 2129static inline bool isScalarLoadToVector(SDNode *N) { 2130 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2131 N = N->getOperand(0).Val; 2132 return ISD::isNON_EXTLoad(N); 2133 } 2134 return false; 2135} 2136 2137/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2138/// match movlp{s|d}. The lower half elements should come from lower half of 2139/// V1 (and in order), and the upper half elements should come from the upper 2140/// half of V2 (and in order). And since V1 will become the source of the 2141/// MOVLP, it must be either a vector load or a scalar load to vector. 2142static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2143 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2144 return false; 2145 // Is V2 is a vector load, don't do this transformation. We will try to use 2146 // load folding shufps op. 2147 if (ISD::isNON_EXTLoad(V2)) 2148 return false; 2149 2150 unsigned NumElems = Mask->getNumOperands(); 2151 if (NumElems != 2 && NumElems != 4) 2152 return false; 2153 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2154 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2155 return false; 2156 for (unsigned i = NumElems/2; i != NumElems; ++i) 2157 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2158 return false; 2159 return true; 2160} 2161 2162/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2163/// all the same. 2164static bool isSplatVector(SDNode *N) { 2165 if (N->getOpcode() != ISD::BUILD_VECTOR) 2166 return false; 2167 2168 SDOperand SplatValue = N->getOperand(0); 2169 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2170 if (N->getOperand(i) != SplatValue) 2171 return false; 2172 return true; 2173} 2174 2175/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2176/// to an undef. 2177static bool isUndefShuffle(SDNode *N) { 2178 if (N->getOpcode() != ISD::BUILD_VECTOR) 2179 return false; 2180 2181 SDOperand V1 = N->getOperand(0); 2182 SDOperand V2 = N->getOperand(1); 2183 SDOperand Mask = N->getOperand(2); 2184 unsigned NumElems = Mask.getNumOperands(); 2185 for (unsigned i = 0; i != NumElems; ++i) { 2186 SDOperand Arg = Mask.getOperand(i); 2187 if (Arg.getOpcode() != ISD::UNDEF) { 2188 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2189 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2190 return false; 2191 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2192 return false; 2193 } 2194 } 2195 return true; 2196} 2197 2198/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2199/// that point to V2 points to its first element. 2200static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2201 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2202 2203 bool Changed = false; 2204 SmallVector<SDOperand, 8> MaskVec; 2205 unsigned NumElems = Mask.getNumOperands(); 2206 for (unsigned i = 0; i != NumElems; ++i) { 2207 SDOperand Arg = Mask.getOperand(i); 2208 if (Arg.getOpcode() != ISD::UNDEF) { 2209 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2210 if (Val > NumElems) { 2211 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2212 Changed = true; 2213 } 2214 } 2215 MaskVec.push_back(Arg); 2216 } 2217 2218 if (Changed) 2219 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2220 &MaskVec[0], MaskVec.size()); 2221 return Mask; 2222} 2223 2224/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2225/// operation of specified width. 2226static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2227 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2228 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2229 2230 SmallVector<SDOperand, 8> MaskVec; 2231 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2232 for (unsigned i = 1; i != NumElems; ++i) 2233 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2234 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2235} 2236 2237/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2238/// of specified width. 2239static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2240 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2241 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2242 SmallVector<SDOperand, 8> MaskVec; 2243 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2244 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2245 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2246 } 2247 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2248} 2249 2250/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2251/// of specified width. 2252static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2253 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2254 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2255 unsigned Half = NumElems/2; 2256 SmallVector<SDOperand, 8> MaskVec; 2257 for (unsigned i = 0; i != Half; ++i) { 2258 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2259 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2260 } 2261 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2262} 2263 2264/// getZeroVector - Returns a vector of specified type with all zero elements. 2265/// 2266static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2267 assert(MVT::isVector(VT) && "Expected a vector type"); 2268 unsigned NumElems = getVectorNumElements(VT); 2269 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2270 bool isFP = MVT::isFloatingPoint(EVT); 2271 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2272 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2273 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2274} 2275 2276/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2277/// 2278static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2279 SDOperand V1 = Op.getOperand(0); 2280 SDOperand Mask = Op.getOperand(2); 2281 MVT::ValueType VT = Op.getValueType(); 2282 unsigned NumElems = Mask.getNumOperands(); 2283 Mask = getUnpacklMask(NumElems, DAG); 2284 while (NumElems != 4) { 2285 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2286 NumElems >>= 1; 2287 } 2288 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2289 2290 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2291 Mask = getZeroVector(MaskVT, DAG); 2292 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2293 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2294 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2295} 2296 2297/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2298/// constant +0.0. 2299static inline bool isZeroNode(SDOperand Elt) { 2300 return ((isa<ConstantSDNode>(Elt) && 2301 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2302 (isa<ConstantFPSDNode>(Elt) && 2303 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2304} 2305 2306/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2307/// vector and zero or undef vector. 2308static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2309 unsigned NumElems, unsigned Idx, 2310 bool isZero, SelectionDAG &DAG) { 2311 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2312 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2313 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2314 SDOperand Zero = DAG.getConstant(0, EVT); 2315 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2316 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2317 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2318 &MaskVec[0], MaskVec.size()); 2319 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2320} 2321 2322/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2323/// 2324static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2325 unsigned NumNonZero, unsigned NumZero, 2326 SelectionDAG &DAG, TargetLowering &TLI) { 2327 if (NumNonZero > 8) 2328 return SDOperand(); 2329 2330 SDOperand V(0, 0); 2331 bool First = true; 2332 for (unsigned i = 0; i < 16; ++i) { 2333 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2334 if (ThisIsNonZero && First) { 2335 if (NumZero) 2336 V = getZeroVector(MVT::v8i16, DAG); 2337 else 2338 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2339 First = false; 2340 } 2341 2342 if ((i & 1) != 0) { 2343 SDOperand ThisElt(0, 0), LastElt(0, 0); 2344 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2345 if (LastIsNonZero) { 2346 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2347 } 2348 if (ThisIsNonZero) { 2349 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2350 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2351 ThisElt, DAG.getConstant(8, MVT::i8)); 2352 if (LastIsNonZero) 2353 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2354 } else 2355 ThisElt = LastElt; 2356 2357 if (ThisElt.Val) 2358 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2359 DAG.getConstant(i/2, TLI.getPointerTy())); 2360 } 2361 } 2362 2363 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2364} 2365 2366/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2367/// 2368static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2369 unsigned NumNonZero, unsigned NumZero, 2370 SelectionDAG &DAG, TargetLowering &TLI) { 2371 if (NumNonZero > 4) 2372 return SDOperand(); 2373 2374 SDOperand V(0, 0); 2375 bool First = true; 2376 for (unsigned i = 0; i < 8; ++i) { 2377 bool isNonZero = (NonZeros & (1 << i)) != 0; 2378 if (isNonZero) { 2379 if (First) { 2380 if (NumZero) 2381 V = getZeroVector(MVT::v8i16, DAG); 2382 else 2383 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2384 First = false; 2385 } 2386 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2387 DAG.getConstant(i, TLI.getPointerTy())); 2388 } 2389 } 2390 2391 return V; 2392} 2393 2394SDOperand 2395X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2396 // All zero's are handled with pxor. 2397 if (ISD::isBuildVectorAllZeros(Op.Val)) 2398 return Op; 2399 2400 // All one's are handled with pcmpeqd. 2401 if (ISD::isBuildVectorAllOnes(Op.Val)) 2402 return Op; 2403 2404 MVT::ValueType VT = Op.getValueType(); 2405 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2406 unsigned EVTBits = MVT::getSizeInBits(EVT); 2407 2408 unsigned NumElems = Op.getNumOperands(); 2409 unsigned NumZero = 0; 2410 unsigned NumNonZero = 0; 2411 unsigned NonZeros = 0; 2412 std::set<SDOperand> Values; 2413 for (unsigned i = 0; i < NumElems; ++i) { 2414 SDOperand Elt = Op.getOperand(i); 2415 if (Elt.getOpcode() != ISD::UNDEF) { 2416 Values.insert(Elt); 2417 if (isZeroNode(Elt)) 2418 NumZero++; 2419 else { 2420 NonZeros |= (1 << i); 2421 NumNonZero++; 2422 } 2423 } 2424 } 2425 2426 if (NumNonZero == 0) 2427 // Must be a mix of zero and undef. Return a zero vector. 2428 return getZeroVector(VT, DAG); 2429 2430 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2431 if (Values.size() == 1) 2432 return SDOperand(); 2433 2434 // Special case for single non-zero element. 2435 if (NumNonZero == 1) { 2436 unsigned Idx = CountTrailingZeros_32(NonZeros); 2437 SDOperand Item = Op.getOperand(Idx); 2438 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2439 if (Idx == 0) 2440 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2441 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2442 NumZero > 0, DAG); 2443 2444 if (EVTBits == 32) { 2445 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2446 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2447 DAG); 2448 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2449 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2450 SmallVector<SDOperand, 8> MaskVec; 2451 for (unsigned i = 0; i < NumElems; i++) 2452 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2453 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2454 &MaskVec[0], MaskVec.size()); 2455 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2456 DAG.getNode(ISD::UNDEF, VT), Mask); 2457 } 2458 } 2459 2460 // Let legalizer expand 2-wide build_vectors. 2461 if (EVTBits == 64) 2462 return SDOperand(); 2463 2464 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2465 if (EVTBits == 8 && NumElems == 16) { 2466 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2467 *this); 2468 if (V.Val) return V; 2469 } 2470 2471 if (EVTBits == 16 && NumElems == 8) { 2472 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2473 *this); 2474 if (V.Val) return V; 2475 } 2476 2477 // If element VT is == 32 bits, turn it into a number of shuffles. 2478 SmallVector<SDOperand, 8> V; 2479 V.resize(NumElems); 2480 if (NumElems == 4 && NumZero > 0) { 2481 for (unsigned i = 0; i < 4; ++i) { 2482 bool isZero = !(NonZeros & (1 << i)); 2483 if (isZero) 2484 V[i] = getZeroVector(VT, DAG); 2485 else 2486 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2487 } 2488 2489 for (unsigned i = 0; i < 2; ++i) { 2490 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2491 default: break; 2492 case 0: 2493 V[i] = V[i*2]; // Must be a zero vector. 2494 break; 2495 case 1: 2496 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2497 getMOVLMask(NumElems, DAG)); 2498 break; 2499 case 2: 2500 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2501 getMOVLMask(NumElems, DAG)); 2502 break; 2503 case 3: 2504 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2505 getUnpacklMask(NumElems, DAG)); 2506 break; 2507 } 2508 } 2509 2510 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2511 // clears the upper bits. 2512 // FIXME: we can do the same for v4f32 case when we know both parts of 2513 // the lower half come from scalar_to_vector (loadf32). We should do 2514 // that in post legalizer dag combiner with target specific hooks. 2515 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2516 return V[0]; 2517 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2518 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2519 SmallVector<SDOperand, 8> MaskVec; 2520 bool Reverse = (NonZeros & 0x3) == 2; 2521 for (unsigned i = 0; i < 2; ++i) 2522 if (Reverse) 2523 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2524 else 2525 MaskVec.push_back(DAG.getConstant(i, EVT)); 2526 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2527 for (unsigned i = 0; i < 2; ++i) 2528 if (Reverse) 2529 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2530 else 2531 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2532 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2533 &MaskVec[0], MaskVec.size()); 2534 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2535 } 2536 2537 if (Values.size() > 2) { 2538 // Expand into a number of unpckl*. 2539 // e.g. for v4f32 2540 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2541 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2542 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2543 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2544 for (unsigned i = 0; i < NumElems; ++i) 2545 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2546 NumElems >>= 1; 2547 while (NumElems != 0) { 2548 for (unsigned i = 0; i < NumElems; ++i) 2549 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2550 UnpckMask); 2551 NumElems >>= 1; 2552 } 2553 return V[0]; 2554 } 2555 2556 return SDOperand(); 2557} 2558 2559SDOperand 2560X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2561 SDOperand V1 = Op.getOperand(0); 2562 SDOperand V2 = Op.getOperand(1); 2563 SDOperand PermMask = Op.getOperand(2); 2564 MVT::ValueType VT = Op.getValueType(); 2565 unsigned NumElems = PermMask.getNumOperands(); 2566 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2567 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2568 bool V1IsSplat = false; 2569 bool V2IsSplat = false; 2570 2571 if (isUndefShuffle(Op.Val)) 2572 return DAG.getNode(ISD::UNDEF, VT); 2573 2574 if (isSplatMask(PermMask.Val)) { 2575 if (NumElems <= 4) return Op; 2576 // Promote it to a v4i32 splat. 2577 return PromoteSplat(Op, DAG); 2578 } 2579 2580 if (X86::isMOVLMask(PermMask.Val)) 2581 return (V1IsUndef) ? V2 : Op; 2582 2583 if (X86::isMOVSHDUPMask(PermMask.Val) || 2584 X86::isMOVSLDUPMask(PermMask.Val) || 2585 X86::isMOVHLPSMask(PermMask.Val) || 2586 X86::isMOVHPMask(PermMask.Val) || 2587 X86::isMOVLPMask(PermMask.Val)) 2588 return Op; 2589 2590 if (ShouldXformToMOVHLPS(PermMask.Val) || 2591 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2592 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2593 2594 bool Commuted = false; 2595 V1IsSplat = isSplatVector(V1.Val); 2596 V2IsSplat = isSplatVector(V2.Val); 2597 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2598 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2599 std::swap(V1IsSplat, V2IsSplat); 2600 std::swap(V1IsUndef, V2IsUndef); 2601 Commuted = true; 2602 } 2603 2604 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2605 if (V2IsUndef) return V1; 2606 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2607 if (V2IsSplat) { 2608 // V2 is a splat, so the mask may be malformed. That is, it may point 2609 // to any V2 element. The instruction selectior won't like this. Get 2610 // a corrected mask and commute to form a proper MOVS{S|D}. 2611 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2612 if (NewMask.Val != PermMask.Val) 2613 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2614 } 2615 return Op; 2616 } 2617 2618 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2619 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2620 X86::isUNPCKLMask(PermMask.Val) || 2621 X86::isUNPCKHMask(PermMask.Val)) 2622 return Op; 2623 2624 if (V2IsSplat) { 2625 // Normalize mask so all entries that point to V2 points to its first 2626 // element then try to match unpck{h|l} again. If match, return a 2627 // new vector_shuffle with the corrected mask. 2628 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2629 if (NewMask.Val != PermMask.Val) { 2630 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2631 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2632 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2633 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2634 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2635 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2636 } 2637 } 2638 } 2639 2640 // Normalize the node to match x86 shuffle ops if needed 2641 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2642 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2643 2644 if (Commuted) { 2645 // Commute is back and try unpck* again. 2646 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2647 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2648 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2649 X86::isUNPCKLMask(PermMask.Val) || 2650 X86::isUNPCKHMask(PermMask.Val)) 2651 return Op; 2652 } 2653 2654 // If VT is integer, try PSHUF* first, then SHUFP*. 2655 if (MVT::isInteger(VT)) { 2656 if (X86::isPSHUFDMask(PermMask.Val) || 2657 X86::isPSHUFHWMask(PermMask.Val) || 2658 X86::isPSHUFLWMask(PermMask.Val)) { 2659 if (V2.getOpcode() != ISD::UNDEF) 2660 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2661 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2662 return Op; 2663 } 2664 2665 if (X86::isSHUFPMask(PermMask.Val)) 2666 return Op; 2667 2668 // Handle v8i16 shuffle high / low shuffle node pair. 2669 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2670 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2671 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2672 SmallVector<SDOperand, 8> MaskVec; 2673 for (unsigned i = 0; i != 4; ++i) 2674 MaskVec.push_back(PermMask.getOperand(i)); 2675 for (unsigned i = 4; i != 8; ++i) 2676 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2677 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2678 &MaskVec[0], MaskVec.size()); 2679 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2680 MaskVec.clear(); 2681 for (unsigned i = 0; i != 4; ++i) 2682 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2683 for (unsigned i = 4; i != 8; ++i) 2684 MaskVec.push_back(PermMask.getOperand(i)); 2685 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2686 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2687 } 2688 } else { 2689 // Floating point cases in the other order. 2690 if (X86::isSHUFPMask(PermMask.Val)) 2691 return Op; 2692 if (X86::isPSHUFDMask(PermMask.Val) || 2693 X86::isPSHUFHWMask(PermMask.Val) || 2694 X86::isPSHUFLWMask(PermMask.Val)) { 2695 if (V2.getOpcode() != ISD::UNDEF) 2696 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2697 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2698 return Op; 2699 } 2700 } 2701 2702 if (NumElems == 4) { 2703 MVT::ValueType MaskVT = PermMask.getValueType(); 2704 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2705 SmallVector<std::pair<int, int>, 8> Locs; 2706 Locs.reserve(NumElems); 2707 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2708 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2709 unsigned NumHi = 0; 2710 unsigned NumLo = 0; 2711 // If no more than two elements come from either vector. This can be 2712 // implemented with two shuffles. First shuffle gather the elements. 2713 // The second shuffle, which takes the first shuffle as both of its 2714 // vector operands, put the elements into the right order. 2715 for (unsigned i = 0; i != NumElems; ++i) { 2716 SDOperand Elt = PermMask.getOperand(i); 2717 if (Elt.getOpcode() == ISD::UNDEF) { 2718 Locs[i] = std::make_pair(-1, -1); 2719 } else { 2720 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2721 if (Val < NumElems) { 2722 Locs[i] = std::make_pair(0, NumLo); 2723 Mask1[NumLo] = Elt; 2724 NumLo++; 2725 } else { 2726 Locs[i] = std::make_pair(1, NumHi); 2727 if (2+NumHi < NumElems) 2728 Mask1[2+NumHi] = Elt; 2729 NumHi++; 2730 } 2731 } 2732 } 2733 if (NumLo <= 2 && NumHi <= 2) { 2734 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2735 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2736 &Mask1[0], Mask1.size())); 2737 for (unsigned i = 0; i != NumElems; ++i) { 2738 if (Locs[i].first == -1) 2739 continue; 2740 else { 2741 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2742 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2743 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2744 } 2745 } 2746 2747 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2748 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2749 &Mask2[0], Mask2.size())); 2750 } 2751 2752 // Break it into (shuffle shuffle_hi, shuffle_lo). 2753 Locs.clear(); 2754 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2755 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2756 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2757 unsigned MaskIdx = 0; 2758 unsigned LoIdx = 0; 2759 unsigned HiIdx = NumElems/2; 2760 for (unsigned i = 0; i != NumElems; ++i) { 2761 if (i == NumElems/2) { 2762 MaskPtr = &HiMask; 2763 MaskIdx = 1; 2764 LoIdx = 0; 2765 HiIdx = NumElems/2; 2766 } 2767 SDOperand Elt = PermMask.getOperand(i); 2768 if (Elt.getOpcode() == ISD::UNDEF) { 2769 Locs[i] = std::make_pair(-1, -1); 2770 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2771 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2772 (*MaskPtr)[LoIdx] = Elt; 2773 LoIdx++; 2774 } else { 2775 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2776 (*MaskPtr)[HiIdx] = Elt; 2777 HiIdx++; 2778 } 2779 } 2780 2781 SDOperand LoShuffle = 2782 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2783 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2784 &LoMask[0], LoMask.size())); 2785 SDOperand HiShuffle = 2786 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2787 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2788 &HiMask[0], HiMask.size())); 2789 SmallVector<SDOperand, 8> MaskOps; 2790 for (unsigned i = 0; i != NumElems; ++i) { 2791 if (Locs[i].first == -1) { 2792 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2793 } else { 2794 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2795 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2796 } 2797 } 2798 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2799 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2800 &MaskOps[0], MaskOps.size())); 2801 } 2802 2803 return SDOperand(); 2804} 2805 2806SDOperand 2807X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2808 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2809 return SDOperand(); 2810 2811 MVT::ValueType VT = Op.getValueType(); 2812 // TODO: handle v16i8. 2813 if (MVT::getSizeInBits(VT) == 16) { 2814 // Transform it so it match pextrw which produces a 32-bit result. 2815 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2816 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2817 Op.getOperand(0), Op.getOperand(1)); 2818 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2819 DAG.getValueType(VT)); 2820 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2821 } else if (MVT::getSizeInBits(VT) == 32) { 2822 SDOperand Vec = Op.getOperand(0); 2823 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2824 if (Idx == 0) 2825 return Op; 2826 // SHUFPS the element to the lowest double word, then movss. 2827 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2828 SmallVector<SDOperand, 8> IdxVec; 2829 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2830 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2831 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2832 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2833 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2834 &IdxVec[0], IdxVec.size()); 2835 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2836 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2837 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2838 DAG.getConstant(0, getPointerTy())); 2839 } else if (MVT::getSizeInBits(VT) == 64) { 2840 SDOperand Vec = Op.getOperand(0); 2841 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2842 if (Idx == 0) 2843 return Op; 2844 2845 // UNPCKHPD the element to the lowest double word, then movsd. 2846 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2847 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2848 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2849 SmallVector<SDOperand, 8> IdxVec; 2850 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2851 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2852 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2853 &IdxVec[0], IdxVec.size()); 2854 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2855 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2856 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2857 DAG.getConstant(0, getPointerTy())); 2858 } 2859 2860 return SDOperand(); 2861} 2862 2863SDOperand 2864X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2865 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 2866 // as its second argument. 2867 MVT::ValueType VT = Op.getValueType(); 2868 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2869 SDOperand N0 = Op.getOperand(0); 2870 SDOperand N1 = Op.getOperand(1); 2871 SDOperand N2 = Op.getOperand(2); 2872 if (MVT::getSizeInBits(BaseVT) == 16) { 2873 if (N1.getValueType() != MVT::i32) 2874 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2875 if (N2.getValueType() != MVT::i32) 2876 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2877 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2878 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2879 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2880 if (Idx == 0) { 2881 // Use a movss. 2882 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2883 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2884 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2885 SmallVector<SDOperand, 8> MaskVec; 2886 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2887 for (unsigned i = 1; i <= 3; ++i) 2888 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2889 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2890 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2891 &MaskVec[0], MaskVec.size())); 2892 } else { 2893 // Use two pinsrw instructions to insert a 32 bit value. 2894 Idx <<= 1; 2895 if (MVT::isFloatingPoint(N1.getValueType())) { 2896 if (ISD::isNON_EXTLoad(N1.Val)) { 2897 // Just load directly from f32mem to GR32. 2898 LoadSDNode *LD = cast<LoadSDNode>(N1); 2899 N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), 2900 LD->getSrcValue(), LD->getSrcValueOffset()); 2901 } else { 2902 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2903 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2904 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2905 DAG.getConstant(0, getPointerTy())); 2906 } 2907 } 2908 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2909 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2910 DAG.getConstant(Idx, getPointerTy())); 2911 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2912 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2913 DAG.getConstant(Idx+1, getPointerTy())); 2914 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 2915 } 2916 } 2917 2918 return SDOperand(); 2919} 2920 2921SDOperand 2922X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2923 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2924 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2925} 2926 2927// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2928// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2929// one of the above mentioned nodes. It has to be wrapped because otherwise 2930// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2931// be used to form addressing mode. These wrapped nodes will be selected 2932// into MOV32ri. 2933SDOperand 2934X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 2935 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2936 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 2937 getPointerTy(), 2938 CP->getAlignment()); 2939 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 2940 // With PIC, the address is actually $g + Offset. 2941 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 2942 !Subtarget->isPICStyleRIPRel()) { 2943 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2944 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 2945 Result); 2946 } 2947 2948 return Result; 2949} 2950 2951SDOperand 2952X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 2953 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2954 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 2955 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 2956 // With PIC, the address is actually $g + Offset. 2957 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 2958 !Subtarget->isPICStyleRIPRel()) { 2959 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2960 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 2961 Result); 2962 } 2963 2964 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 2965 // load the value at address GV, not the value of GV itself. This means that 2966 // the GlobalAddress must be in the base or index register of the address, not 2967 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 2968 // The same applies for external symbols during PIC codegen 2969 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 2970 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 2971 2972 return Result; 2973} 2974 2975// Lower ISD::GlobalTLSAddress using the "general dynamic" model 2976static SDOperand 2977LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 2978 const MVT::ValueType PtrVT) { 2979 SDOperand InFlag; 2980 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 2981 DAG.getNode(X86ISD::GlobalBaseReg, 2982 PtrVT), InFlag); 2983 InFlag = Chain.getValue(1); 2984 2985 // emit leal symbol@TLSGD(,%ebx,1), %eax 2986 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 2987 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 2988 GA->getValueType(0), 2989 GA->getOffset()); 2990 SDOperand Ops[] = { Chain, TGA, InFlag }; 2991 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 2992 InFlag = Result.getValue(2); 2993 Chain = Result.getValue(1); 2994 2995 // call ___tls_get_addr. This function receives its argument in 2996 // the register EAX. 2997 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 2998 InFlag = Chain.getValue(1); 2999 3000 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3001 SDOperand Ops1[] = { Chain, 3002 DAG.getTargetExternalSymbol("___tls_get_addr", 3003 PtrVT), 3004 DAG.getRegister(X86::EAX, PtrVT), 3005 DAG.getRegister(X86::EBX, PtrVT), 3006 InFlag }; 3007 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3008 InFlag = Chain.getValue(1); 3009 3010 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3011} 3012 3013// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3014// "local exec" model. 3015static SDOperand 3016LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3017 const MVT::ValueType PtrVT) { 3018 // Get the Thread Pointer 3019 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3020 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3021 // exec) 3022 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3023 GA->getValueType(0), 3024 GA->getOffset()); 3025 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3026 3027 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3028 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3029 3030 // The address of the thread local variable is the add of the thread 3031 // pointer with the offset of the variable. 3032 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3033} 3034 3035SDOperand 3036X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3037 // TODO: implement the "local dynamic" model 3038 // TODO: implement the "initial exec"model for pic executables 3039 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3040 "TLS not implemented for non-ELF and 64-bit targets"); 3041 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3042 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3043 // otherwise use the "Local Exec"TLS Model 3044 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3045 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3046 else 3047 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3048} 3049 3050SDOperand 3051X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3052 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3053 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3054 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3055 // With PIC, the address is actually $g + Offset. 3056 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3057 !Subtarget->isPICStyleRIPRel()) { 3058 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3059 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3060 Result); 3061 } 3062 3063 return Result; 3064} 3065 3066SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3067 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3068 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3069 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3070 // With PIC, the address is actually $g + Offset. 3071 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3072 !Subtarget->isPICStyleRIPRel()) { 3073 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3074 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3075 Result); 3076 } 3077 3078 return Result; 3079} 3080 3081SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3082 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3083 "Not an i64 shift!"); 3084 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3085 SDOperand ShOpLo = Op.getOperand(0); 3086 SDOperand ShOpHi = Op.getOperand(1); 3087 SDOperand ShAmt = Op.getOperand(2); 3088 SDOperand Tmp1 = isSRA ? 3089 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3090 DAG.getConstant(0, MVT::i32); 3091 3092 SDOperand Tmp2, Tmp3; 3093 if (Op.getOpcode() == ISD::SHL_PARTS) { 3094 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3095 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3096 } else { 3097 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3098 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3099 } 3100 3101 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3102 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3103 DAG.getConstant(32, MVT::i8)); 3104 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3105 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3106 3107 SDOperand Hi, Lo; 3108 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3109 3110 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3111 SmallVector<SDOperand, 4> Ops; 3112 if (Op.getOpcode() == ISD::SHL_PARTS) { 3113 Ops.push_back(Tmp2); 3114 Ops.push_back(Tmp3); 3115 Ops.push_back(CC); 3116 Ops.push_back(InFlag); 3117 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3118 InFlag = Hi.getValue(1); 3119 3120 Ops.clear(); 3121 Ops.push_back(Tmp3); 3122 Ops.push_back(Tmp1); 3123 Ops.push_back(CC); 3124 Ops.push_back(InFlag); 3125 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3126 } else { 3127 Ops.push_back(Tmp2); 3128 Ops.push_back(Tmp3); 3129 Ops.push_back(CC); 3130 Ops.push_back(InFlag); 3131 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3132 InFlag = Lo.getValue(1); 3133 3134 Ops.clear(); 3135 Ops.push_back(Tmp3); 3136 Ops.push_back(Tmp1); 3137 Ops.push_back(CC); 3138 Ops.push_back(InFlag); 3139 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3140 } 3141 3142 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3143 Ops.clear(); 3144 Ops.push_back(Lo); 3145 Ops.push_back(Hi); 3146 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3147} 3148 3149SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3150 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3151 Op.getOperand(0).getValueType() >= MVT::i16 && 3152 "Unknown SINT_TO_FP to lower!"); 3153 3154 SDOperand Result; 3155 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3156 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3157 MachineFunction &MF = DAG.getMachineFunction(); 3158 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3159 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3160 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3161 StackSlot, NULL, 0); 3162 3163 // Build the FILD 3164 SDVTList Tys; 3165 if (X86ScalarSSE) 3166 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3167 else 3168 Tys = DAG.getVTList(MVT::f64, MVT::Other); 3169 SmallVector<SDOperand, 8> Ops; 3170 Ops.push_back(Chain); 3171 Ops.push_back(StackSlot); 3172 Ops.push_back(DAG.getValueType(SrcVT)); 3173 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3174 Tys, &Ops[0], Ops.size()); 3175 3176 if (X86ScalarSSE) { 3177 Chain = Result.getValue(1); 3178 SDOperand InFlag = Result.getValue(2); 3179 3180 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3181 // shouldn't be necessary except that RFP cannot be live across 3182 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3183 MachineFunction &MF = DAG.getMachineFunction(); 3184 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3185 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3186 Tys = DAG.getVTList(MVT::Other); 3187 SmallVector<SDOperand, 8> Ops; 3188 Ops.push_back(Chain); 3189 Ops.push_back(Result); 3190 Ops.push_back(StackSlot); 3191 Ops.push_back(DAG.getValueType(Op.getValueType())); 3192 Ops.push_back(InFlag); 3193 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3194 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3195 } 3196 3197 return Result; 3198} 3199 3200SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3201 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3202 "Unknown FP_TO_SINT to lower!"); 3203 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3204 // stack slot. 3205 MachineFunction &MF = DAG.getMachineFunction(); 3206 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3207 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3208 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3209 3210 unsigned Opc; 3211 switch (Op.getValueType()) { 3212 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3213 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3214 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3215 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3216 } 3217 3218 SDOperand Chain = DAG.getEntryNode(); 3219 SDOperand Value = Op.getOperand(0); 3220 if (X86ScalarSSE) { 3221 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3222 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3223 SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); 3224 SDOperand Ops[] = { 3225 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3226 }; 3227 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3228 Chain = Value.getValue(1); 3229 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3230 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3231 } 3232 3233 // Build the FP_TO_INT*_IN_MEM 3234 SDOperand Ops[] = { Chain, Value, StackSlot }; 3235 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3236 3237 // Load the result. 3238 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3239} 3240 3241SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3242 MVT::ValueType VT = Op.getValueType(); 3243 const Type *OpNTy = MVT::getTypeForValueType(VT); 3244 std::vector<Constant*> CV; 3245 if (VT == MVT::f64) { 3246 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3247 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3248 } else { 3249 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3250 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3251 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3252 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3253 } 3254 Constant *CS = ConstantStruct::get(CV); 3255 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3256 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 3257 SmallVector<SDOperand, 3> Ops; 3258 Ops.push_back(DAG.getEntryNode()); 3259 Ops.push_back(CPIdx); 3260 Ops.push_back(DAG.getSrcValue(NULL)); 3261 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3262 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3263} 3264 3265SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3266 MVT::ValueType VT = Op.getValueType(); 3267 const Type *OpNTy = MVT::getTypeForValueType(VT); 3268 std::vector<Constant*> CV; 3269 if (VT == MVT::f64) { 3270 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3271 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3272 } else { 3273 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3274 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3275 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3276 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3277 } 3278 Constant *CS = ConstantStruct::get(CV); 3279 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3280 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 3281 SmallVector<SDOperand, 3> Ops; 3282 Ops.push_back(DAG.getEntryNode()); 3283 Ops.push_back(CPIdx); 3284 Ops.push_back(DAG.getSrcValue(NULL)); 3285 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3286 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3287} 3288 3289SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3290 SDOperand Op0 = Op.getOperand(0); 3291 SDOperand Op1 = Op.getOperand(1); 3292 MVT::ValueType VT = Op.getValueType(); 3293 MVT::ValueType SrcVT = Op1.getValueType(); 3294 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3295 3296 // If second operand is smaller, extend it first. 3297 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3298 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3299 SrcVT = VT; 3300 } 3301 3302 // First get the sign bit of second operand. 3303 std::vector<Constant*> CV; 3304 if (SrcVT == MVT::f64) { 3305 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); 3306 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3307 } else { 3308 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); 3309 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3310 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3311 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3312 } 3313 Constant *CS = ConstantStruct::get(CV); 3314 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3315 SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other); 3316 SmallVector<SDOperand, 3> Ops; 3317 Ops.push_back(DAG.getEntryNode()); 3318 Ops.push_back(CPIdx); 3319 Ops.push_back(DAG.getSrcValue(NULL)); 3320 SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3321 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3322 3323 // Shift sign bit right or left if the two operands have different types. 3324 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3325 // Op0 is MVT::f32, Op1 is MVT::f64. 3326 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3327 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3328 DAG.getConstant(32, MVT::i32)); 3329 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3330 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3331 DAG.getConstant(0, getPointerTy())); 3332 } 3333 3334 // Clear first operand sign bit. 3335 CV.clear(); 3336 if (VT == MVT::f64) { 3337 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63)))); 3338 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3339 } else { 3340 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31)))); 3341 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3342 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3343 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3344 } 3345 CS = ConstantStruct::get(CV); 3346 CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3347 Tys = DAG.getVTList(VT, MVT::Other); 3348 Ops.clear(); 3349 Ops.push_back(DAG.getEntryNode()); 3350 Ops.push_back(CPIdx); 3351 Ops.push_back(DAG.getSrcValue(NULL)); 3352 SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3353 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3354 3355 // Or the value with the sign bit. 3356 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3357} 3358 3359SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3360 SDOperand Chain) { 3361 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3362 SDOperand Cond; 3363 SDOperand Op0 = Op.getOperand(0); 3364 SDOperand Op1 = Op.getOperand(1); 3365 SDOperand CC = Op.getOperand(2); 3366 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3367 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3368 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3369 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3370 unsigned X86CC; 3371 3372 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3373 Op0, Op1, DAG)) { 3374 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3375 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3376 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3377 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3378 } 3379 3380 assert(isFP && "Illegal integer SetCC!"); 3381 3382 SDOperand COps[] = { Chain, Op0, Op1 }; 3383 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3384 3385 switch (SetCCOpcode) { 3386 default: assert(false && "Illegal floating point SetCC!"); 3387 case ISD::SETOEQ: { // !PF & ZF 3388 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3389 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3390 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3391 Tmp1.getValue(1) }; 3392 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3393 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3394 } 3395 case ISD::SETUNE: { // PF | !ZF 3396 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3397 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3398 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3399 Tmp1.getValue(1) }; 3400 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3401 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3402 } 3403 } 3404} 3405 3406SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3407 bool addTest = true; 3408 SDOperand Chain = DAG.getEntryNode(); 3409 SDOperand Cond = Op.getOperand(0); 3410 SDOperand CC; 3411 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3412 3413 if (Cond.getOpcode() == ISD::SETCC) 3414 Cond = LowerSETCC(Cond, DAG, Chain); 3415 3416 if (Cond.getOpcode() == X86ISD::SETCC) { 3417 CC = Cond.getOperand(0); 3418 3419 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3420 // (since flag operand cannot be shared). Use it as the condition setting 3421 // operand in place of the X86ISD::SETCC. 3422 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3423 // to use a test instead of duplicating the X86ISD::CMP (for register 3424 // pressure reason)? 3425 SDOperand Cmp = Cond.getOperand(1); 3426 unsigned Opc = Cmp.getOpcode(); 3427 bool IllegalFPCMov = !X86ScalarSSE && 3428 MVT::isFloatingPoint(Op.getValueType()) && 3429 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3430 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3431 !IllegalFPCMov) { 3432 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3433 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3434 addTest = false; 3435 } 3436 } 3437 3438 if (addTest) { 3439 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3440 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3441 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3442 } 3443 3444 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3445 SmallVector<SDOperand, 4> Ops; 3446 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3447 // condition is true. 3448 Ops.push_back(Op.getOperand(2)); 3449 Ops.push_back(Op.getOperand(1)); 3450 Ops.push_back(CC); 3451 Ops.push_back(Cond.getValue(1)); 3452 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3453} 3454 3455SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3456 bool addTest = true; 3457 SDOperand Chain = Op.getOperand(0); 3458 SDOperand Cond = Op.getOperand(1); 3459 SDOperand Dest = Op.getOperand(2); 3460 SDOperand CC; 3461 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3462 3463 if (Cond.getOpcode() == ISD::SETCC) 3464 Cond = LowerSETCC(Cond, DAG, Chain); 3465 3466 if (Cond.getOpcode() == X86ISD::SETCC) { 3467 CC = Cond.getOperand(0); 3468 3469 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3470 // (since flag operand cannot be shared). Use it as the condition setting 3471 // operand in place of the X86ISD::SETCC. 3472 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3473 // to use a test instead of duplicating the X86ISD::CMP (for register 3474 // pressure reason)? 3475 SDOperand Cmp = Cond.getOperand(1); 3476 unsigned Opc = Cmp.getOpcode(); 3477 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3478 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3479 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3480 addTest = false; 3481 } 3482 } 3483 3484 if (addTest) { 3485 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3486 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3487 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3488 } 3489 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3490 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3491} 3492 3493SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3494 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3495 3496 if (Subtarget->is64Bit()) 3497 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3498 else 3499 switch (CallingConv) { 3500 default: 3501 assert(0 && "Unsupported calling convention"); 3502 case CallingConv::Fast: 3503 // TODO: Implement fastcc 3504 // Falls through 3505 case CallingConv::C: 3506 case CallingConv::X86_StdCall: 3507 return LowerCCCCallTo(Op, DAG, CallingConv); 3508 case CallingConv::X86_FastCall: 3509 return LowerFastCCCallTo(Op, DAG, CallingConv); 3510 } 3511} 3512 3513 3514// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3515// Calls to _alloca is needed to probe the stack when allocating more than 4k 3516// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3517// that the guard pages used by the OS virtual memory manager are allocated in 3518// correct sequence. 3519SDOperand X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3520 SelectionDAG &DAG) { 3521 assert(Subtarget->isTargetCygMing() && 3522 "This should be used only on Cygwin/Mingw targets"); 3523 3524 // Get the inputs. 3525 SDOperand Chain = Op.getOperand(0); 3526 SDOperand Size = Op.getOperand(1); 3527 // FIXME: Ensure alignment here 3528 3529 TargetLowering::ArgListTy Args; 3530 TargetLowering::ArgListEntry Entry; 3531 MVT::ValueType IntPtr = getPointerTy(); 3532 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3533 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3534 3535 Entry.Node = Size; 3536 Entry.Ty = IntPtrTy; 3537 Entry.isInReg = true; // Should pass in EAX 3538 Args.push_back(Entry); 3539 std::pair<SDOperand, SDOperand> CallResult = 3540 LowerCallTo(Chain, IntPtrTy, false, false, CallingConv::C, false, 3541 DAG.getExternalSymbol("_alloca", IntPtr), Args, DAG); 3542 3543 SDOperand SP = DAG.getCopyFromReg(CallResult.second, X86StackPtr, SPTy); 3544 3545 std::vector<MVT::ValueType> Tys; 3546 Tys.push_back(SPTy); 3547 Tys.push_back(MVT::Other); 3548 SDOperand Ops[2] = { SP, CallResult.second }; 3549 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 3550} 3551 3552SDOperand 3553X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3554 MachineFunction &MF = DAG.getMachineFunction(); 3555 const Function* Fn = MF.getFunction(); 3556 if (Fn->hasExternalLinkage() && 3557 Subtarget->isTargetCygMing() && 3558 Fn->getName() == "main") 3559 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3560 3561 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3562 if (Subtarget->is64Bit()) 3563 return LowerX86_64CCCArguments(Op, DAG); 3564 else 3565 switch(CC) { 3566 default: 3567 assert(0 && "Unsupported calling convention"); 3568 case CallingConv::Fast: 3569 // TODO: implement fastcc. 3570 3571 // Falls through 3572 case CallingConv::C: 3573 return LowerCCCArguments(Op, DAG); 3574 case CallingConv::X86_StdCall: 3575 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3576 return LowerCCCArguments(Op, DAG, true); 3577 case CallingConv::X86_FastCall: 3578 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3579 return LowerFastCCArguments(Op, DAG); 3580 } 3581} 3582 3583SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3584 SDOperand InFlag(0, 0); 3585 SDOperand Chain = Op.getOperand(0); 3586 unsigned Align = 3587 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3588 if (Align == 0) Align = 1; 3589 3590 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3591 // If not DWORD aligned, call memset if size is less than the threshold. 3592 // It knows how to align to the right boundary first. 3593 if ((Align & 3) != 0 || 3594 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3595 MVT::ValueType IntPtr = getPointerTy(); 3596 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3597 TargetLowering::ArgListTy Args; 3598 TargetLowering::ArgListEntry Entry; 3599 Entry.Node = Op.getOperand(1); 3600 Entry.Ty = IntPtrTy; 3601 Args.push_back(Entry); 3602 // Extend the unsigned i8 argument to be an int value for the call. 3603 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3604 Entry.Ty = IntPtrTy; 3605 Args.push_back(Entry); 3606 Entry.Node = Op.getOperand(3); 3607 Args.push_back(Entry); 3608 std::pair<SDOperand,SDOperand> CallResult = 3609 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3610 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3611 return CallResult.second; 3612 } 3613 3614 MVT::ValueType AVT; 3615 SDOperand Count; 3616 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3617 unsigned BytesLeft = 0; 3618 bool TwoRepStos = false; 3619 if (ValC) { 3620 unsigned ValReg; 3621 uint64_t Val = ValC->getValue() & 255; 3622 3623 // If the value is a constant, then we can potentially use larger sets. 3624 switch (Align & 3) { 3625 case 2: // WORD aligned 3626 AVT = MVT::i16; 3627 ValReg = X86::AX; 3628 Val = (Val << 8) | Val; 3629 break; 3630 case 0: // DWORD aligned 3631 AVT = MVT::i32; 3632 ValReg = X86::EAX; 3633 Val = (Val << 8) | Val; 3634 Val = (Val << 16) | Val; 3635 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3636 AVT = MVT::i64; 3637 ValReg = X86::RAX; 3638 Val = (Val << 32) | Val; 3639 } 3640 break; 3641 default: // Byte aligned 3642 AVT = MVT::i8; 3643 ValReg = X86::AL; 3644 Count = Op.getOperand(3); 3645 break; 3646 } 3647 3648 if (AVT > MVT::i8) { 3649 if (I) { 3650 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3651 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3652 BytesLeft = I->getValue() % UBytes; 3653 } else { 3654 assert(AVT >= MVT::i32 && 3655 "Do not use rep;stos if not at least DWORD aligned"); 3656 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3657 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3658 TwoRepStos = true; 3659 } 3660 } 3661 3662 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3663 InFlag); 3664 InFlag = Chain.getValue(1); 3665 } else { 3666 AVT = MVT::i8; 3667 Count = Op.getOperand(3); 3668 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3669 InFlag = Chain.getValue(1); 3670 } 3671 3672 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3673 Count, InFlag); 3674 InFlag = Chain.getValue(1); 3675 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3676 Op.getOperand(1), InFlag); 3677 InFlag = Chain.getValue(1); 3678 3679 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3680 SmallVector<SDOperand, 8> Ops; 3681 Ops.push_back(Chain); 3682 Ops.push_back(DAG.getValueType(AVT)); 3683 Ops.push_back(InFlag); 3684 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3685 3686 if (TwoRepStos) { 3687 InFlag = Chain.getValue(1); 3688 Count = Op.getOperand(3); 3689 MVT::ValueType CVT = Count.getValueType(); 3690 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3691 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3692 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3693 Left, InFlag); 3694 InFlag = Chain.getValue(1); 3695 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3696 Ops.clear(); 3697 Ops.push_back(Chain); 3698 Ops.push_back(DAG.getValueType(MVT::i8)); 3699 Ops.push_back(InFlag); 3700 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3701 } else if (BytesLeft) { 3702 // Issue stores for the last 1 - 7 bytes. 3703 SDOperand Value; 3704 unsigned Val = ValC->getValue() & 255; 3705 unsigned Offset = I->getValue() - BytesLeft; 3706 SDOperand DstAddr = Op.getOperand(1); 3707 MVT::ValueType AddrVT = DstAddr.getValueType(); 3708 if (BytesLeft >= 4) { 3709 Val = (Val << 8) | Val; 3710 Val = (Val << 16) | Val; 3711 Value = DAG.getConstant(Val, MVT::i32); 3712 Chain = DAG.getStore(Chain, Value, 3713 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3714 DAG.getConstant(Offset, AddrVT)), 3715 NULL, 0); 3716 BytesLeft -= 4; 3717 Offset += 4; 3718 } 3719 if (BytesLeft >= 2) { 3720 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3721 Chain = DAG.getStore(Chain, Value, 3722 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3723 DAG.getConstant(Offset, AddrVT)), 3724 NULL, 0); 3725 BytesLeft -= 2; 3726 Offset += 2; 3727 } 3728 if (BytesLeft == 1) { 3729 Value = DAG.getConstant(Val, MVT::i8); 3730 Chain = DAG.getStore(Chain, Value, 3731 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3732 DAG.getConstant(Offset, AddrVT)), 3733 NULL, 0); 3734 } 3735 } 3736 3737 return Chain; 3738} 3739 3740SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3741 SDOperand Chain = Op.getOperand(0); 3742 unsigned Align = 3743 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3744 if (Align == 0) Align = 1; 3745 3746 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3747 // If not DWORD aligned, call memcpy if size is less than the threshold. 3748 // It knows how to align to the right boundary first. 3749 if ((Align & 3) != 0 || 3750 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3751 MVT::ValueType IntPtr = getPointerTy(); 3752 TargetLowering::ArgListTy Args; 3753 TargetLowering::ArgListEntry Entry; 3754 Entry.Ty = getTargetData()->getIntPtrType(); 3755 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3756 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3757 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3758 std::pair<SDOperand,SDOperand> CallResult = 3759 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3760 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3761 return CallResult.second; 3762 } 3763 3764 MVT::ValueType AVT; 3765 SDOperand Count; 3766 unsigned BytesLeft = 0; 3767 bool TwoRepMovs = false; 3768 switch (Align & 3) { 3769 case 2: // WORD aligned 3770 AVT = MVT::i16; 3771 break; 3772 case 0: // DWORD aligned 3773 AVT = MVT::i32; 3774 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3775 AVT = MVT::i64; 3776 break; 3777 default: // Byte aligned 3778 AVT = MVT::i8; 3779 Count = Op.getOperand(3); 3780 break; 3781 } 3782 3783 if (AVT > MVT::i8) { 3784 if (I) { 3785 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3786 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3787 BytesLeft = I->getValue() % UBytes; 3788 } else { 3789 assert(AVT >= MVT::i32 && 3790 "Do not use rep;movs if not at least DWORD aligned"); 3791 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3792 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3793 TwoRepMovs = true; 3794 } 3795 } 3796 3797 SDOperand InFlag(0, 0); 3798 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3799 Count, InFlag); 3800 InFlag = Chain.getValue(1); 3801 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3802 Op.getOperand(1), InFlag); 3803 InFlag = Chain.getValue(1); 3804 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 3805 Op.getOperand(2), InFlag); 3806 InFlag = Chain.getValue(1); 3807 3808 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3809 SmallVector<SDOperand, 8> Ops; 3810 Ops.push_back(Chain); 3811 Ops.push_back(DAG.getValueType(AVT)); 3812 Ops.push_back(InFlag); 3813 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3814 3815 if (TwoRepMovs) { 3816 InFlag = Chain.getValue(1); 3817 Count = Op.getOperand(3); 3818 MVT::ValueType CVT = Count.getValueType(); 3819 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3820 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3821 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3822 Left, InFlag); 3823 InFlag = Chain.getValue(1); 3824 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3825 Ops.clear(); 3826 Ops.push_back(Chain); 3827 Ops.push_back(DAG.getValueType(MVT::i8)); 3828 Ops.push_back(InFlag); 3829 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3830 } else if (BytesLeft) { 3831 // Issue loads and stores for the last 1 - 7 bytes. 3832 unsigned Offset = I->getValue() - BytesLeft; 3833 SDOperand DstAddr = Op.getOperand(1); 3834 MVT::ValueType DstVT = DstAddr.getValueType(); 3835 SDOperand SrcAddr = Op.getOperand(2); 3836 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3837 SDOperand Value; 3838 if (BytesLeft >= 4) { 3839 Value = DAG.getLoad(MVT::i32, Chain, 3840 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3841 DAG.getConstant(Offset, SrcVT)), 3842 NULL, 0); 3843 Chain = Value.getValue(1); 3844 Chain = DAG.getStore(Chain, Value, 3845 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3846 DAG.getConstant(Offset, DstVT)), 3847 NULL, 0); 3848 BytesLeft -= 4; 3849 Offset += 4; 3850 } 3851 if (BytesLeft >= 2) { 3852 Value = DAG.getLoad(MVT::i16, Chain, 3853 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3854 DAG.getConstant(Offset, SrcVT)), 3855 NULL, 0); 3856 Chain = Value.getValue(1); 3857 Chain = DAG.getStore(Chain, Value, 3858 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3859 DAG.getConstant(Offset, DstVT)), 3860 NULL, 0); 3861 BytesLeft -= 2; 3862 Offset += 2; 3863 } 3864 3865 if (BytesLeft == 1) { 3866 Value = DAG.getLoad(MVT::i8, Chain, 3867 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3868 DAG.getConstant(Offset, SrcVT)), 3869 NULL, 0); 3870 Chain = Value.getValue(1); 3871 Chain = DAG.getStore(Chain, Value, 3872 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3873 DAG.getConstant(Offset, DstVT)), 3874 NULL, 0); 3875 } 3876 } 3877 3878 return Chain; 3879} 3880 3881SDOperand 3882X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3883 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3884 SDOperand TheOp = Op.getOperand(0); 3885 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 3886 if (Subtarget->is64Bit()) { 3887 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 3888 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 3889 MVT::i64, Copy1.getValue(2)); 3890 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 3891 DAG.getConstant(32, MVT::i8)); 3892 SDOperand Ops[] = { 3893 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 3894 }; 3895 3896 Tys = DAG.getVTList(MVT::i64, MVT::Other); 3897 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 3898 } 3899 3900 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 3901 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 3902 MVT::i32, Copy1.getValue(2)); 3903 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 3904 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 3905 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 3906} 3907 3908SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3909 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 3910 3911 if (!Subtarget->is64Bit()) { 3912 // vastart just stores the address of the VarArgsFrameIndex slot into the 3913 // memory location argument. 3914 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 3915 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 3916 SV->getOffset()); 3917 } 3918 3919 // __va_list_tag: 3920 // gp_offset (0 - 6 * 8) 3921 // fp_offset (48 - 48 + 8 * 16) 3922 // overflow_arg_area (point to parameters coming in memory). 3923 // reg_save_area 3924 SmallVector<SDOperand, 8> MemOps; 3925 SDOperand FIN = Op.getOperand(1); 3926 // Store gp_offset 3927 SDOperand Store = DAG.getStore(Op.getOperand(0), 3928 DAG.getConstant(VarArgsGPOffset, MVT::i32), 3929 FIN, SV->getValue(), SV->getOffset()); 3930 MemOps.push_back(Store); 3931 3932 // Store fp_offset 3933 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 3934 DAG.getConstant(4, getPointerTy())); 3935 Store = DAG.getStore(Op.getOperand(0), 3936 DAG.getConstant(VarArgsFPOffset, MVT::i32), 3937 FIN, SV->getValue(), SV->getOffset()); 3938 MemOps.push_back(Store); 3939 3940 // Store ptr to overflow_arg_area 3941 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 3942 DAG.getConstant(4, getPointerTy())); 3943 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 3944 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 3945 SV->getOffset()); 3946 MemOps.push_back(Store); 3947 3948 // Store ptr to reg_save_area. 3949 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 3950 DAG.getConstant(8, getPointerTy())); 3951 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 3952 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 3953 SV->getOffset()); 3954 MemOps.push_back(Store); 3955 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 3956} 3957 3958SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 3959 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 3960 SDOperand Chain = Op.getOperand(0); 3961 SDOperand DstPtr = Op.getOperand(1); 3962 SDOperand SrcPtr = Op.getOperand(2); 3963 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 3964 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 3965 3966 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 3967 SrcSV->getValue(), SrcSV->getOffset()); 3968 Chain = SrcPtr.getValue(1); 3969 for (unsigned i = 0; i < 3; ++i) { 3970 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 3971 SrcSV->getValue(), SrcSV->getOffset()); 3972 Chain = Val.getValue(1); 3973 Chain = DAG.getStore(Chain, Val, DstPtr, 3974 DstSV->getValue(), DstSV->getOffset()); 3975 if (i == 2) 3976 break; 3977 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 3978 DAG.getConstant(8, getPointerTy())); 3979 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 3980 DAG.getConstant(8, getPointerTy())); 3981 } 3982 return Chain; 3983} 3984 3985SDOperand 3986X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 3987 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3988 switch (IntNo) { 3989 default: return SDOperand(); // Don't custom lower most intrinsics. 3990 // Comparison intrinsics. 3991 case Intrinsic::x86_sse_comieq_ss: 3992 case Intrinsic::x86_sse_comilt_ss: 3993 case Intrinsic::x86_sse_comile_ss: 3994 case Intrinsic::x86_sse_comigt_ss: 3995 case Intrinsic::x86_sse_comige_ss: 3996 case Intrinsic::x86_sse_comineq_ss: 3997 case Intrinsic::x86_sse_ucomieq_ss: 3998 case Intrinsic::x86_sse_ucomilt_ss: 3999 case Intrinsic::x86_sse_ucomile_ss: 4000 case Intrinsic::x86_sse_ucomigt_ss: 4001 case Intrinsic::x86_sse_ucomige_ss: 4002 case Intrinsic::x86_sse_ucomineq_ss: 4003 case Intrinsic::x86_sse2_comieq_sd: 4004 case Intrinsic::x86_sse2_comilt_sd: 4005 case Intrinsic::x86_sse2_comile_sd: 4006 case Intrinsic::x86_sse2_comigt_sd: 4007 case Intrinsic::x86_sse2_comige_sd: 4008 case Intrinsic::x86_sse2_comineq_sd: 4009 case Intrinsic::x86_sse2_ucomieq_sd: 4010 case Intrinsic::x86_sse2_ucomilt_sd: 4011 case Intrinsic::x86_sse2_ucomile_sd: 4012 case Intrinsic::x86_sse2_ucomigt_sd: 4013 case Intrinsic::x86_sse2_ucomige_sd: 4014 case Intrinsic::x86_sse2_ucomineq_sd: { 4015 unsigned Opc = 0; 4016 ISD::CondCode CC = ISD::SETCC_INVALID; 4017 switch (IntNo) { 4018 default: break; 4019 case Intrinsic::x86_sse_comieq_ss: 4020 case Intrinsic::x86_sse2_comieq_sd: 4021 Opc = X86ISD::COMI; 4022 CC = ISD::SETEQ; 4023 break; 4024 case Intrinsic::x86_sse_comilt_ss: 4025 case Intrinsic::x86_sse2_comilt_sd: 4026 Opc = X86ISD::COMI; 4027 CC = ISD::SETLT; 4028 break; 4029 case Intrinsic::x86_sse_comile_ss: 4030 case Intrinsic::x86_sse2_comile_sd: 4031 Opc = X86ISD::COMI; 4032 CC = ISD::SETLE; 4033 break; 4034 case Intrinsic::x86_sse_comigt_ss: 4035 case Intrinsic::x86_sse2_comigt_sd: 4036 Opc = X86ISD::COMI; 4037 CC = ISD::SETGT; 4038 break; 4039 case Intrinsic::x86_sse_comige_ss: 4040 case Intrinsic::x86_sse2_comige_sd: 4041 Opc = X86ISD::COMI; 4042 CC = ISD::SETGE; 4043 break; 4044 case Intrinsic::x86_sse_comineq_ss: 4045 case Intrinsic::x86_sse2_comineq_sd: 4046 Opc = X86ISD::COMI; 4047 CC = ISD::SETNE; 4048 break; 4049 case Intrinsic::x86_sse_ucomieq_ss: 4050 case Intrinsic::x86_sse2_ucomieq_sd: 4051 Opc = X86ISD::UCOMI; 4052 CC = ISD::SETEQ; 4053 break; 4054 case Intrinsic::x86_sse_ucomilt_ss: 4055 case Intrinsic::x86_sse2_ucomilt_sd: 4056 Opc = X86ISD::UCOMI; 4057 CC = ISD::SETLT; 4058 break; 4059 case Intrinsic::x86_sse_ucomile_ss: 4060 case Intrinsic::x86_sse2_ucomile_sd: 4061 Opc = X86ISD::UCOMI; 4062 CC = ISD::SETLE; 4063 break; 4064 case Intrinsic::x86_sse_ucomigt_ss: 4065 case Intrinsic::x86_sse2_ucomigt_sd: 4066 Opc = X86ISD::UCOMI; 4067 CC = ISD::SETGT; 4068 break; 4069 case Intrinsic::x86_sse_ucomige_ss: 4070 case Intrinsic::x86_sse2_ucomige_sd: 4071 Opc = X86ISD::UCOMI; 4072 CC = ISD::SETGE; 4073 break; 4074 case Intrinsic::x86_sse_ucomineq_ss: 4075 case Intrinsic::x86_sse2_ucomineq_sd: 4076 Opc = X86ISD::UCOMI; 4077 CC = ISD::SETNE; 4078 break; 4079 } 4080 4081 unsigned X86CC; 4082 SDOperand LHS = Op.getOperand(1); 4083 SDOperand RHS = Op.getOperand(2); 4084 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4085 4086 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4087 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4088 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4089 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4090 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4091 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4092 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4093 } 4094 } 4095} 4096 4097SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4098 // Depths > 0 not supported yet! 4099 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4100 return SDOperand(); 4101 4102 // Just load the return address 4103 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4104 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4105} 4106 4107SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4108 // Depths > 0 not supported yet! 4109 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4110 return SDOperand(); 4111 4112 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4113 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4114 DAG.getConstant(4, getPointerTy())); 4115} 4116 4117/// LowerOperation - Provide custom lowering hooks for some operations. 4118/// 4119SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4120 switch (Op.getOpcode()) { 4121 default: assert(0 && "Should not custom lower this!"); 4122 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4123 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4124 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4125 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4126 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4127 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4128 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4129 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4130 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4131 case ISD::SHL_PARTS: 4132 case ISD::SRA_PARTS: 4133 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4134 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4135 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4136 case ISD::FABS: return LowerFABS(Op, DAG); 4137 case ISD::FNEG: return LowerFNEG(Op, DAG); 4138 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4139 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4140 case ISD::SELECT: return LowerSELECT(Op, DAG); 4141 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4142 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4143 case ISD::CALL: return LowerCALL(Op, DAG); 4144 case ISD::RET: return LowerRET(Op, DAG); 4145 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4146 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4147 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4148 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4149 case ISD::VASTART: return LowerVASTART(Op, DAG); 4150 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4151 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4152 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4153 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4154 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4155 } 4156 return SDOperand(); 4157} 4158 4159const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4160 switch (Opcode) { 4161 default: return NULL; 4162 case X86ISD::SHLD: return "X86ISD::SHLD"; 4163 case X86ISD::SHRD: return "X86ISD::SHRD"; 4164 case X86ISD::FAND: return "X86ISD::FAND"; 4165 case X86ISD::FOR: return "X86ISD::FOR"; 4166 case X86ISD::FXOR: return "X86ISD::FXOR"; 4167 case X86ISD::FSRL: return "X86ISD::FSRL"; 4168 case X86ISD::FILD: return "X86ISD::FILD"; 4169 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4170 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4171 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4172 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4173 case X86ISD::FLD: return "X86ISD::FLD"; 4174 case X86ISD::FST: return "X86ISD::FST"; 4175 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4176 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4177 case X86ISD::CALL: return "X86ISD::CALL"; 4178 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4179 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4180 case X86ISD::CMP: return "X86ISD::CMP"; 4181 case X86ISD::COMI: return "X86ISD::COMI"; 4182 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4183 case X86ISD::SETCC: return "X86ISD::SETCC"; 4184 case X86ISD::CMOV: return "X86ISD::CMOV"; 4185 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4186 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4187 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4188 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4189 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4190 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4191 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4192 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4193 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4194 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4195 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4196 case X86ISD::FMAX: return "X86ISD::FMAX"; 4197 case X86ISD::FMIN: return "X86ISD::FMIN"; 4198 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4199 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4200 } 4201} 4202 4203// isLegalAddressingMode - Return true if the addressing mode represented 4204// by AM is legal for this target, for a load/store of the specified type. 4205bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4206 const Type *Ty) const { 4207 // X86 supports extremely general addressing modes. 4208 4209 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4210 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4211 return false; 4212 4213 if (AM.BaseGV) { 4214 // X86-64 only supports addr of globals in small code model. 4215 if (Subtarget->is64Bit() && 4216 getTargetMachine().getCodeModel() != CodeModel::Small) 4217 return false; 4218 4219 // We can only fold this if we don't need a load either. 4220 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4221 return false; 4222 } 4223 4224 switch (AM.Scale) { 4225 case 0: 4226 case 1: 4227 case 2: 4228 case 4: 4229 case 8: 4230 // These scales always work. 4231 break; 4232 case 3: 4233 case 5: 4234 case 9: 4235 // These scales are formed with basereg+scalereg. Only accept if there is 4236 // no basereg yet. 4237 if (AM.HasBaseReg) 4238 return false; 4239 break; 4240 default: // Other stuff never works. 4241 return false; 4242 } 4243 4244 return true; 4245} 4246 4247 4248/// isShuffleMaskLegal - Targets can use this to indicate that they only 4249/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4250/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4251/// are assumed to be legal. 4252bool 4253X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4254 // Only do shuffles on 128-bit vector types for now. 4255 if (MVT::getSizeInBits(VT) == 64) return false; 4256 return (Mask.Val->getNumOperands() <= 4 || 4257 isSplatMask(Mask.Val) || 4258 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4259 X86::isUNPCKLMask(Mask.Val) || 4260 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4261 X86::isUNPCKH_v_undef_Mask(Mask.Val) || 4262 X86::isUNPCKHMask(Mask.Val)); 4263} 4264 4265bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4266 MVT::ValueType EVT, 4267 SelectionDAG &DAG) const { 4268 unsigned NumElts = BVOps.size(); 4269 // Only do shuffles on 128-bit vector types for now. 4270 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4271 if (NumElts == 2) return true; 4272 if (NumElts == 4) { 4273 return (isMOVLMask(&BVOps[0], 4) || 4274 isCommutedMOVL(&BVOps[0], 4, true) || 4275 isSHUFPMask(&BVOps[0], 4) || 4276 isCommutedSHUFP(&BVOps[0], 4)); 4277 } 4278 return false; 4279} 4280 4281//===----------------------------------------------------------------------===// 4282// X86 Scheduler Hooks 4283//===----------------------------------------------------------------------===// 4284 4285MachineBasicBlock * 4286X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4287 MachineBasicBlock *BB) { 4288 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4289 switch (MI->getOpcode()) { 4290 default: assert(false && "Unexpected instr type to insert"); 4291 case X86::CMOV_FR32: 4292 case X86::CMOV_FR64: 4293 case X86::CMOV_V4F32: 4294 case X86::CMOV_V2F64: 4295 case X86::CMOV_V2I64: { 4296 // To "insert" a SELECT_CC instruction, we actually have to insert the 4297 // diamond control-flow pattern. The incoming instruction knows the 4298 // destination vreg to set, the condition code register to branch on, the 4299 // true/false values to select between, and a branch opcode to use. 4300 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4301 ilist<MachineBasicBlock>::iterator It = BB; 4302 ++It; 4303 4304 // thisMBB: 4305 // ... 4306 // TrueVal = ... 4307 // cmpTY ccX, r1, r2 4308 // bCC copy1MBB 4309 // fallthrough --> copy0MBB 4310 MachineBasicBlock *thisMBB = BB; 4311 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4312 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4313 unsigned Opc = 4314 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4315 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4316 MachineFunction *F = BB->getParent(); 4317 F->getBasicBlockList().insert(It, copy0MBB); 4318 F->getBasicBlockList().insert(It, sinkMBB); 4319 // Update machine-CFG edges by first adding all successors of the current 4320 // block to the new block which will contain the Phi node for the select. 4321 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4322 e = BB->succ_end(); i != e; ++i) 4323 sinkMBB->addSuccessor(*i); 4324 // Next, remove all successors of the current block, and add the true 4325 // and fallthrough blocks as its successors. 4326 while(!BB->succ_empty()) 4327 BB->removeSuccessor(BB->succ_begin()); 4328 BB->addSuccessor(copy0MBB); 4329 BB->addSuccessor(sinkMBB); 4330 4331 // copy0MBB: 4332 // %FalseValue = ... 4333 // # fallthrough to sinkMBB 4334 BB = copy0MBB; 4335 4336 // Update machine-CFG edges 4337 BB->addSuccessor(sinkMBB); 4338 4339 // sinkMBB: 4340 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4341 // ... 4342 BB = sinkMBB; 4343 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4344 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4345 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4346 4347 delete MI; // The pseudo instruction is gone now. 4348 return BB; 4349 } 4350 4351 case X86::FP_TO_INT16_IN_MEM: 4352 case X86::FP_TO_INT32_IN_MEM: 4353 case X86::FP_TO_INT64_IN_MEM: { 4354 // Change the floating point control register to use "round towards zero" 4355 // mode when truncating to an integer value. 4356 MachineFunction *F = BB->getParent(); 4357 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4358 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4359 4360 // Load the old value of the high byte of the control word... 4361 unsigned OldCW = 4362 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4363 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4364 4365 // Set the high part to be round to zero... 4366 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4367 .addImm(0xC7F); 4368 4369 // Reload the modified control word now... 4370 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4371 4372 // Restore the memory image of control word to original value 4373 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4374 .addReg(OldCW); 4375 4376 // Get the X86 opcode to use. 4377 unsigned Opc; 4378 switch (MI->getOpcode()) { 4379 default: assert(0 && "illegal opcode!"); 4380 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 4381 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 4382 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 4383 } 4384 4385 X86AddressMode AM; 4386 MachineOperand &Op = MI->getOperand(0); 4387 if (Op.isRegister()) { 4388 AM.BaseType = X86AddressMode::RegBase; 4389 AM.Base.Reg = Op.getReg(); 4390 } else { 4391 AM.BaseType = X86AddressMode::FrameIndexBase; 4392 AM.Base.FrameIndex = Op.getFrameIndex(); 4393 } 4394 Op = MI->getOperand(1); 4395 if (Op.isImmediate()) 4396 AM.Scale = Op.getImm(); 4397 Op = MI->getOperand(2); 4398 if (Op.isImmediate()) 4399 AM.IndexReg = Op.getImm(); 4400 Op = MI->getOperand(3); 4401 if (Op.isGlobalAddress()) { 4402 AM.GV = Op.getGlobal(); 4403 } else { 4404 AM.Disp = Op.getImm(); 4405 } 4406 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4407 .addReg(MI->getOperand(4).getReg()); 4408 4409 // Reload the original control word now. 4410 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4411 4412 delete MI; // The pseudo instruction is gone now. 4413 return BB; 4414 } 4415 } 4416} 4417 4418//===----------------------------------------------------------------------===// 4419// X86 Optimization Hooks 4420//===----------------------------------------------------------------------===// 4421 4422void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4423 uint64_t Mask, 4424 uint64_t &KnownZero, 4425 uint64_t &KnownOne, 4426 unsigned Depth) const { 4427 unsigned Opc = Op.getOpcode(); 4428 assert((Opc >= ISD::BUILTIN_OP_END || 4429 Opc == ISD::INTRINSIC_WO_CHAIN || 4430 Opc == ISD::INTRINSIC_W_CHAIN || 4431 Opc == ISD::INTRINSIC_VOID) && 4432 "Should use MaskedValueIsZero if you don't know whether Op" 4433 " is a target node!"); 4434 4435 KnownZero = KnownOne = 0; // Don't know anything. 4436 switch (Opc) { 4437 default: break; 4438 case X86ISD::SETCC: 4439 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4440 break; 4441 } 4442} 4443 4444/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4445/// element of the result of the vector shuffle. 4446static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4447 MVT::ValueType VT = N->getValueType(0); 4448 SDOperand PermMask = N->getOperand(2); 4449 unsigned NumElems = PermMask.getNumOperands(); 4450 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4451 i %= NumElems; 4452 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4453 return (i == 0) 4454 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4455 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4456 SDOperand Idx = PermMask.getOperand(i); 4457 if (Idx.getOpcode() == ISD::UNDEF) 4458 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4459 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4460 } 4461 return SDOperand(); 4462} 4463 4464/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4465/// node is a GlobalAddress + an offset. 4466static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4467 unsigned Opc = N->getOpcode(); 4468 if (Opc == X86ISD::Wrapper) { 4469 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4470 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4471 return true; 4472 } 4473 } else if (Opc == ISD::ADD) { 4474 SDOperand N1 = N->getOperand(0); 4475 SDOperand N2 = N->getOperand(1); 4476 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4477 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4478 if (V) { 4479 Offset += V->getSignExtended(); 4480 return true; 4481 } 4482 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4483 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4484 if (V) { 4485 Offset += V->getSignExtended(); 4486 return true; 4487 } 4488 } 4489 } 4490 return false; 4491} 4492 4493/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4494/// + Dist * Size. 4495static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4496 MachineFrameInfo *MFI) { 4497 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4498 return false; 4499 4500 SDOperand Loc = N->getOperand(1); 4501 SDOperand BaseLoc = Base->getOperand(1); 4502 if (Loc.getOpcode() == ISD::FrameIndex) { 4503 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4504 return false; 4505 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 4506 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4507 int FS = MFI->getObjectSize(FI); 4508 int BFS = MFI->getObjectSize(BFI); 4509 if (FS != BFS || FS != Size) return false; 4510 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4511 } else { 4512 GlobalValue *GV1 = NULL; 4513 GlobalValue *GV2 = NULL; 4514 int64_t Offset1 = 0; 4515 int64_t Offset2 = 0; 4516 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4517 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4518 if (isGA1 && isGA2 && GV1 == GV2) 4519 return Offset1 == (Offset2 + Dist*Size); 4520 } 4521 4522 return false; 4523} 4524 4525static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4526 const X86Subtarget *Subtarget) { 4527 GlobalValue *GV; 4528 int64_t Offset; 4529 if (isGAPlusOffset(Base, GV, Offset)) 4530 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4531 else { 4532 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4533 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 4534 if (BFI < 0) 4535 // Fixed objects do not specify alignment, however the offsets are known. 4536 return ((Subtarget->getStackAlignment() % 16) == 0 && 4537 (MFI->getObjectOffset(BFI) % 16) == 0); 4538 else 4539 return MFI->getObjectAlignment(BFI) >= 16; 4540 } 4541 return false; 4542} 4543 4544 4545/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4546/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4547/// if the load addresses are consecutive, non-overlapping, and in the right 4548/// order. 4549static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4550 const X86Subtarget *Subtarget) { 4551 MachineFunction &MF = DAG.getMachineFunction(); 4552 MachineFrameInfo *MFI = MF.getFrameInfo(); 4553 MVT::ValueType VT = N->getValueType(0); 4554 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 4555 SDOperand PermMask = N->getOperand(2); 4556 int NumElems = (int)PermMask.getNumOperands(); 4557 SDNode *Base = NULL; 4558 for (int i = 0; i < NumElems; ++i) { 4559 SDOperand Idx = PermMask.getOperand(i); 4560 if (Idx.getOpcode() == ISD::UNDEF) { 4561 if (!Base) return SDOperand(); 4562 } else { 4563 SDOperand Arg = 4564 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4565 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4566 return SDOperand(); 4567 if (!Base) 4568 Base = Arg.Val; 4569 else if (!isConsecutiveLoad(Arg.Val, Base, 4570 i, MVT::getSizeInBits(EVT)/8,MFI)) 4571 return SDOperand(); 4572 } 4573 } 4574 4575 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4576 if (isAlign16) { 4577 LoadSDNode *LD = cast<LoadSDNode>(Base); 4578 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4579 LD->getSrcValueOffset()); 4580 } else { 4581 // Just use movups, it's shorter. 4582 SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); 4583 SmallVector<SDOperand, 3> Ops; 4584 Ops.push_back(Base->getOperand(0)); 4585 Ops.push_back(Base->getOperand(1)); 4586 Ops.push_back(Base->getOperand(2)); 4587 return DAG.getNode(ISD::BIT_CONVERT, VT, 4588 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 4589 } 4590} 4591 4592/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4593static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4594 const X86Subtarget *Subtarget) { 4595 SDOperand Cond = N->getOperand(0); 4596 4597 // If we have SSE[12] support, try to form min/max nodes. 4598 if (Subtarget->hasSSE2() && 4599 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4600 if (Cond.getOpcode() == ISD::SETCC) { 4601 // Get the LHS/RHS of the select. 4602 SDOperand LHS = N->getOperand(1); 4603 SDOperand RHS = N->getOperand(2); 4604 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4605 4606 unsigned Opcode = 0; 4607 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4608 switch (CC) { 4609 default: break; 4610 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4611 case ISD::SETULE: 4612 case ISD::SETLE: 4613 if (!UnsafeFPMath) break; 4614 // FALL THROUGH. 4615 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4616 case ISD::SETLT: 4617 Opcode = X86ISD::FMIN; 4618 break; 4619 4620 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4621 case ISD::SETUGT: 4622 case ISD::SETGT: 4623 if (!UnsafeFPMath) break; 4624 // FALL THROUGH. 4625 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4626 case ISD::SETGE: 4627 Opcode = X86ISD::FMAX; 4628 break; 4629 } 4630 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4631 switch (CC) { 4632 default: break; 4633 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4634 case ISD::SETUGT: 4635 case ISD::SETGT: 4636 if (!UnsafeFPMath) break; 4637 // FALL THROUGH. 4638 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4639 case ISD::SETGE: 4640 Opcode = X86ISD::FMIN; 4641 break; 4642 4643 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4644 case ISD::SETULE: 4645 case ISD::SETLE: 4646 if (!UnsafeFPMath) break; 4647 // FALL THROUGH. 4648 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4649 case ISD::SETLT: 4650 Opcode = X86ISD::FMAX; 4651 break; 4652 } 4653 } 4654 4655 if (Opcode) 4656 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4657 } 4658 4659 } 4660 4661 return SDOperand(); 4662} 4663 4664 4665SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4666 DAGCombinerInfo &DCI) const { 4667 SelectionDAG &DAG = DCI.DAG; 4668 switch (N->getOpcode()) { 4669 default: break; 4670 case ISD::VECTOR_SHUFFLE: 4671 return PerformShuffleCombine(N, DAG, Subtarget); 4672 case ISD::SELECT: 4673 return PerformSELECTCombine(N, DAG, Subtarget); 4674 } 4675 4676 return SDOperand(); 4677} 4678 4679//===----------------------------------------------------------------------===// 4680// X86 Inline Assembly Support 4681//===----------------------------------------------------------------------===// 4682 4683/// getConstraintType - Given a constraint letter, return the type of 4684/// constraint it is for this target. 4685X86TargetLowering::ConstraintType 4686X86TargetLowering::getConstraintType(const std::string &Constraint) const { 4687 if (Constraint.size() == 1) { 4688 switch (Constraint[0]) { 4689 case 'A': 4690 case 'r': 4691 case 'R': 4692 case 'l': 4693 case 'q': 4694 case 'Q': 4695 case 'x': 4696 case 'Y': 4697 return C_RegisterClass; 4698 default: 4699 break; 4700 } 4701 } 4702 return TargetLowering::getConstraintType(Constraint); 4703} 4704 4705/// isOperandValidForConstraint - Return the specified operand (possibly 4706/// modified) if the specified SDOperand is valid for the specified target 4707/// constraint letter, otherwise return null. 4708SDOperand X86TargetLowering:: 4709isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { 4710 switch (Constraint) { 4711 default: break; 4712 case 'I': 4713 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4714 if (C->getValue() <= 31) 4715 return Op; 4716 } 4717 return SDOperand(0,0); 4718 case 'N': 4719 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4720 if (C->getValue() <= 255) 4721 return Op; 4722 } 4723 return SDOperand(0,0); 4724 case 'i': 4725 // Literal immediates are always ok. 4726 if (isa<ConstantSDNode>(Op)) return Op; 4727 4728 // If we are in non-pic codegen mode, we allow the address of a global to 4729 // be used with 'i'. 4730 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 4731 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4732 return SDOperand(0, 0); 4733 4734 if (GA->getOpcode() != ISD::TargetGlobalAddress) 4735 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 4736 GA->getOffset()); 4737 return Op; 4738 } 4739 4740 // Otherwise, not valid for this mode. 4741 return SDOperand(0, 0); 4742 } 4743 return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); 4744} 4745 4746std::vector<unsigned> X86TargetLowering:: 4747getRegClassForInlineAsmConstraint(const std::string &Constraint, 4748 MVT::ValueType VT) const { 4749 if (Constraint.size() == 1) { 4750 // FIXME: not handling fp-stack yet! 4751 switch (Constraint[0]) { // GCC X86 Constraint Letters 4752 default: break; // Unknown constraint letter 4753 case 'A': // EAX/EDX 4754 if (VT == MVT::i32 || VT == MVT::i64) 4755 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 4756 break; 4757 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4758 case 'Q': // Q_REGS 4759 if (VT == MVT::i32) 4760 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 4761 else if (VT == MVT::i16) 4762 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 4763 else if (VT == MVT::i8) 4764 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4765 break; 4766 } 4767 } 4768 4769 return std::vector<unsigned>(); 4770} 4771 4772std::pair<unsigned, const TargetRegisterClass*> 4773X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4774 MVT::ValueType VT) const { 4775 // First, see if this is a constraint that directly corresponds to an LLVM 4776 // register class. 4777 if (Constraint.size() == 1) { 4778 // GCC Constraint Letters 4779 switch (Constraint[0]) { 4780 default: break; 4781 case 'r': // GENERAL_REGS 4782 case 'R': // LEGACY_REGS 4783 case 'l': // INDEX_REGS 4784 if (VT == MVT::i64 && Subtarget->is64Bit()) 4785 return std::make_pair(0U, X86::GR64RegisterClass); 4786 if (VT == MVT::i32) 4787 return std::make_pair(0U, X86::GR32RegisterClass); 4788 else if (VT == MVT::i16) 4789 return std::make_pair(0U, X86::GR16RegisterClass); 4790 else if (VT == MVT::i8) 4791 return std::make_pair(0U, X86::GR8RegisterClass); 4792 break; 4793 case 'y': // MMX_REGS if MMX allowed. 4794 if (!Subtarget->hasMMX()) break; 4795 return std::make_pair(0U, X86::VR64RegisterClass); 4796 break; 4797 case 'Y': // SSE_REGS if SSE2 allowed 4798 if (!Subtarget->hasSSE2()) break; 4799 // FALL THROUGH. 4800 case 'x': // SSE_REGS if SSE1 allowed 4801 if (!Subtarget->hasSSE1()) break; 4802 4803 switch (VT) { 4804 default: break; 4805 // Scalar SSE types. 4806 case MVT::f32: 4807 case MVT::i32: 4808 return std::make_pair(0U, X86::FR32RegisterClass); 4809 case MVT::f64: 4810 case MVT::i64: 4811 return std::make_pair(0U, X86::FR64RegisterClass); 4812 // Vector types. 4813 case MVT::Vector: 4814 case MVT::v16i8: 4815 case MVT::v8i16: 4816 case MVT::v4i32: 4817 case MVT::v2i64: 4818 case MVT::v4f32: 4819 case MVT::v2f64: 4820 return std::make_pair(0U, X86::VR128RegisterClass); 4821 } 4822 break; 4823 } 4824 } 4825 4826 // Use the default implementation in TargetLowering to convert the register 4827 // constraint into a member of a register class. 4828 std::pair<unsigned, const TargetRegisterClass*> Res; 4829 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4830 4831 // Not found as a standard register? 4832 if (Res.second == 0) { 4833 // GCC calls "st(0)" just plain "st". 4834 if (StringsEqualNoCase("{st}", Constraint)) { 4835 Res.first = X86::ST0; 4836 Res.second = X86::RSTRegisterClass; 4837 } 4838 4839 return Res; 4840 } 4841 4842 // Otherwise, check to see if this is a register class of the wrong value 4843 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 4844 // turn into {ax},{dx}. 4845 if (Res.second->hasType(VT)) 4846 return Res; // Correct type already, nothing to do. 4847 4848 // All of the single-register GCC register classes map their values onto 4849 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 4850 // really want an 8-bit or 32-bit register, map to the appropriate register 4851 // class and return the appropriate register. 4852 if (Res.second != X86::GR16RegisterClass) 4853 return Res; 4854 4855 if (VT == MVT::i8) { 4856 unsigned DestReg = 0; 4857 switch (Res.first) { 4858 default: break; 4859 case X86::AX: DestReg = X86::AL; break; 4860 case X86::DX: DestReg = X86::DL; break; 4861 case X86::CX: DestReg = X86::CL; break; 4862 case X86::BX: DestReg = X86::BL; break; 4863 } 4864 if (DestReg) { 4865 Res.first = DestReg; 4866 Res.second = Res.second = X86::GR8RegisterClass; 4867 } 4868 } else if (VT == MVT::i32) { 4869 unsigned DestReg = 0; 4870 switch (Res.first) { 4871 default: break; 4872 case X86::AX: DestReg = X86::EAX; break; 4873 case X86::DX: DestReg = X86::EDX; break; 4874 case X86::CX: DestReg = X86::ECX; break; 4875 case X86::BX: DestReg = X86::EBX; break; 4876 case X86::SI: DestReg = X86::ESI; break; 4877 case X86::DI: DestReg = X86::EDI; break; 4878 case X86::BP: DestReg = X86::EBP; break; 4879 case X86::SP: DestReg = X86::ESP; break; 4880 } 4881 if (DestReg) { 4882 Res.first = DestReg; 4883 Res.second = Res.second = X86::GR32RegisterClass; 4884 } 4885 } else if (VT == MVT::i64) { 4886 unsigned DestReg = 0; 4887 switch (Res.first) { 4888 default: break; 4889 case X86::AX: DestReg = X86::RAX; break; 4890 case X86::DX: DestReg = X86::RDX; break; 4891 case X86::CX: DestReg = X86::RCX; break; 4892 case X86::BX: DestReg = X86::RBX; break; 4893 case X86::SI: DestReg = X86::RSI; break; 4894 case X86::DI: DestReg = X86::RDI; break; 4895 case X86::BP: DestReg = X86::RBP; break; 4896 case X86::SP: DestReg = X86::RSP; break; 4897 } 4898 if (DestReg) { 4899 Res.first = DestReg; 4900 Res.second = Res.second = X86::GR64RegisterClass; 4901 } 4902 } 4903 4904 return Res; 4905} 4906