X86ISelLowering.cpp revision 85e3800e427fd5367df7a46ce4ad37ad901f894c
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 173 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 174 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 175 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 176 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 178 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 179 // X86 wants to expand memset / memcpy itself. 180 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 181 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 182 183 // We don't have line number support yet. 184 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 185 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 186 // FIXME - use subtarget debug flags 187 if (!Subtarget->isTargetDarwin()) 188 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // Use the default implementation. 194 setOperationAction(ISD::VAARG , MVT::Other, Expand); 195 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 196 setOperationAction(ISD::VAEND , MVT::Other, Expand); 197 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 198 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 199 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 200 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 203 204 if (X86ScalarSSE) { 205 // Set up the FP register classes. 206 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 207 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 208 209 // SSE has no load+extend ops 210 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 211 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 212 213 // Use ANDPD to simulate FABS. 214 setOperationAction(ISD::FABS , MVT::f64, Custom); 215 setOperationAction(ISD::FABS , MVT::f32, Custom); 216 217 // Use XORP to simulate FNEG. 218 setOperationAction(ISD::FNEG , MVT::f64, Custom); 219 setOperationAction(ISD::FNEG , MVT::f32, Custom); 220 221 // We don't support sin/cos/fmod 222 setOperationAction(ISD::FSIN , MVT::f64, Expand); 223 setOperationAction(ISD::FCOS , MVT::f64, Expand); 224 setOperationAction(ISD::FREM , MVT::f64, Expand); 225 setOperationAction(ISD::FSIN , MVT::f32, Expand); 226 setOperationAction(ISD::FCOS , MVT::f32, Expand); 227 setOperationAction(ISD::FREM , MVT::f32, Expand); 228 229 // Expand FP immediates into loads from the stack, except for the special 230 // cases we handle. 231 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 232 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 233 addLegalFPImmediate(+0.0); // xorps / xorpd 234 } else { 235 // Set up the FP register classes. 236 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 237 238 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 239 240 if (!UnsafeFPMath) { 241 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 242 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 243 } 244 245 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 246 addLegalFPImmediate(+0.0); // FLD0 247 addLegalFPImmediate(+1.0); // FLD1 248 addLegalFPImmediate(-0.0); // FLD0/FCHS 249 addLegalFPImmediate(-1.0); // FLD1/FCHS 250 } 251 252 // First set operation action for all vector types to expand. Then we 253 // will selectively turn on ones that can be effectively codegen'd. 254 for (unsigned VT = (unsigned)MVT::Vector + 1; 255 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 256 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 263 } 264 265 if (Subtarget->hasMMX()) { 266 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 268 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 269 270 // FIXME: add MMX packed arithmetics 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 273 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 274 } 275 276 if (Subtarget->hasSSE1()) { 277 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 278 279 setOperationAction(ISD::AND, MVT::v4f32, Legal); 280 setOperationAction(ISD::OR, MVT::v4f32, Legal); 281 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 282 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 283 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 284 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 285 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 286 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 287 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 288 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 289 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 290 } 291 292 if (Subtarget->hasSSE2()) { 293 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 297 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 298 299 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 300 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 301 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 302 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 303 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 304 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 305 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 306 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 307 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 308 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 309 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 311 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 312 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 314 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 315 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 316 317 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 318 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 319 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 320 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 321 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 322 } 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 324 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 325 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 326 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 329 330 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 331 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 332 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 338 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 339 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 340 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 341 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 342 } 343 344 // Custom lower v2i64 and v2f64 selects. 345 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 346 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 347 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 348 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 349 } 350 351 // We want to custom lower some of our intrinsics. 352 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 353 354 computeRegisterProperties(); 355 356 // FIXME: These should be based on subtarget info. Plus, the values should 357 // be smaller when we are in optimizing for size mode. 358 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 359 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 360 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 361 allowUnalignedMemoryAccesses = true; // x86 supports it! 362} 363 364std::vector<SDOperand> 365X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 366 std::vector<SDOperand> Args = TargetLowering::LowerArguments(F, DAG); 367 368 FormalArgs.clear(); 369 FormalArgLocs.clear(); 370 371 // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set 372 // before the rest of the function can be lowered. 373 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 374 PreprocessFastCCArguments(Args, F, DAG); 375 else 376 PreprocessCCCArguments(Args, F, DAG); 377 return Args; 378} 379 380std::pair<SDOperand, SDOperand> 381X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 382 bool isVarArg, unsigned CallingConv, 383 bool isTailCall, 384 SDOperand Callee, ArgListTy &Args, 385 SelectionDAG &DAG) { 386 assert((!isVarArg || CallingConv == CallingConv::C) && 387 "Only C takes varargs!"); 388 389 // If the callee is a GlobalAddress node (quite common, every direct call is) 390 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 391 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 392 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 393 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 394 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 395 396 if (CallingConv == CallingConv::Fast && EnableFastCC) 397 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 398 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 399} 400 401//===----------------------------------------------------------------------===// 402// C Calling Convention implementation 403//===----------------------------------------------------------------------===// 404 405/// AddLiveIn - This helper function adds the specified physical register to the 406/// MachineFunction as a live in value. It also creates a corresponding virtual 407/// register for it. 408static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 409 TargetRegisterClass *RC) { 410 assert(RC->contains(PReg) && "Not the correct regclass!"); 411 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 412 MF.addLiveIn(PReg, VReg); 413 return VReg; 414} 415 416/// getFormalArgSize - Return the minimum size of the stack frame needed to store 417/// an object of the specified type. 418static unsigned getFormalArgSize(MVT::ValueType ObjectVT) { 419 unsigned ObjSize = 0; 420 switch (ObjectVT) { 421 default: assert(0 && "Unhandled argument type!"); 422 case MVT::i1: 423 case MVT::i8: ObjSize = 1; break; 424 case MVT::i16: ObjSize = 2; break; 425 case MVT::i32: ObjSize = 4; break; 426 case MVT::i64: ObjSize = 8; break; 427 case MVT::f32: ObjSize = 4; break; 428 case MVT::f64: ObjSize = 8; break; 429 } 430 return ObjSize; 431} 432 433/// getFormalArgObjects - Returns itself if Op is a FORMAL_ARGUMENTS, otherwise 434/// returns the FORMAL_ARGUMENTS node(s) that made up parts of the node. 435static std::vector<SDOperand> getFormalArgObjects(SDOperand Op) { 436 unsigned Opc = Op.getOpcode(); 437 std::vector<SDOperand> Objs; 438 if (Opc == ISD::TRUNCATE) { 439 Op = Op.getOperand(0); 440 assert(Op.getOpcode() == ISD::AssertSext || 441 Op.getOpcode() == ISD::AssertZext); 442 Objs.push_back(Op.getOperand(0)); 443 } else if (Opc == ISD::FP_ROUND) { 444 Objs.push_back(Op.getOperand(0)); 445 } else if (Opc == ISD::BUILD_PAIR) { 446 Objs.push_back(Op.getOperand(0)); 447 Objs.push_back(Op.getOperand(1)); 448 } else { 449 Objs.push_back(Op); 450 } 451 return Objs; 452} 453 454void X86TargetLowering::PreprocessCCCArguments(std::vector<SDOperand>Args, 455 Function &F, SelectionDAG &DAG) { 456 unsigned NumArgs = Args.size(); 457 MachineFunction &MF = DAG.getMachineFunction(); 458 MachineFrameInfo *MFI = MF.getFrameInfo(); 459 460 // Add DAG nodes to load the arguments... On entry to a function on the X86, 461 // the stack frame looks like this: 462 // 463 // [ESP] -- return address 464 // [ESP + 4] -- first argument (leftmost lexically) 465 // [ESP + 8] -- second argument, if first argument is four bytes in size 466 // ... 467 // 468 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 469 for (unsigned i = 0; i < NumArgs; ++i) { 470 SDOperand Op = Args[i]; 471 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 472 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 473 I != E; ++I) { 474 SDOperand Obj = *I; 475 MVT::ValueType ObjectVT = Obj.getValueType(); 476 unsigned ArgIncrement = 4; 477 unsigned ObjSize = getFormalArgSize(ObjectVT); 478 if (ObjSize == 8) 479 ArgIncrement = 8; 480 481 // Create the frame index object for this incoming parameter... 482 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 483 std::pair<FALocInfo, FALocInfo> Loc = 484 std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo()); 485 FormalArgLocs.push_back(Loc); 486 ArgOffset += ArgIncrement; // Move on to the next argument... 487 } 488 } 489 490 // If the function takes variable number of arguments, make a frame index for 491 // the start of the first vararg value... for expansion of llvm.va_start. 492 if (F.isVarArg()) 493 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 494 ReturnAddrIndex = 0; // No return address slot generated yet. 495 BytesToPopOnReturn = 0; // Callee pops nothing. 496 BytesCallerReserves = ArgOffset; 497} 498 499void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 500 unsigned NumArgs = Op.Val->getNumValues(); 501 MachineFunction &MF = DAG.getMachineFunction(); 502 MachineFrameInfo *MFI = MF.getFrameInfo(); 503 504 for (unsigned i = 0; i < NumArgs; ++i) { 505 // Create the SelectionDAG nodes corresponding to a load from this parameter 506 unsigned FI = FormalArgLocs[i].first.Loc; 507 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 508 SDOperand ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), 509 FIN, DAG.getSrcValue(NULL)); 510 FormalArgs.push_back(ArgValue); 511 } 512} 513 514std::pair<SDOperand, SDOperand> 515X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 516 bool isVarArg, bool isTailCall, 517 SDOperand Callee, ArgListTy &Args, 518 SelectionDAG &DAG) { 519 // Count how many bytes are to be pushed on the stack. 520 unsigned NumBytes = 0; 521 522 if (Args.empty()) { 523 // Save zero bytes. 524 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 525 } else { 526 for (unsigned i = 0, e = Args.size(); i != e; ++i) 527 switch (getValueType(Args[i].second)) { 528 default: assert(0 && "Unknown value type!"); 529 case MVT::i1: 530 case MVT::i8: 531 case MVT::i16: 532 case MVT::i32: 533 case MVT::f32: 534 NumBytes += 4; 535 break; 536 case MVT::i64: 537 case MVT::f64: 538 NumBytes += 8; 539 break; 540 } 541 542 Chain = DAG.getCALLSEQ_START(Chain, 543 DAG.getConstant(NumBytes, getPointerTy())); 544 545 // Arguments go on the stack in reverse order, as specified by the ABI. 546 unsigned ArgOffset = 0; 547 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 548 std::vector<SDOperand> Stores; 549 550 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 551 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 552 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 553 554 switch (getValueType(Args[i].second)) { 555 default: assert(0 && "Unexpected ValueType for argument!"); 556 case MVT::i1: 557 case MVT::i8: 558 case MVT::i16: 559 // Promote the integer to 32 bits. If the input type is signed use a 560 // sign extend, otherwise use a zero extend. 561 if (Args[i].second->isSigned()) 562 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 563 else 564 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 565 566 // FALL THROUGH 567 case MVT::i32: 568 case MVT::f32: 569 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 570 Args[i].first, PtrOff, 571 DAG.getSrcValue(NULL))); 572 ArgOffset += 4; 573 break; 574 case MVT::i64: 575 case MVT::f64: 576 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 577 Args[i].first, PtrOff, 578 DAG.getSrcValue(NULL))); 579 ArgOffset += 8; 580 break; 581 } 582 } 583 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 584 } 585 586 std::vector<MVT::ValueType> RetVals; 587 MVT::ValueType RetTyVT = getValueType(RetTy); 588 RetVals.push_back(MVT::Other); 589 590 // The result values produced have to be legal. Promote the result. 591 switch (RetTyVT) { 592 case MVT::isVoid: break; 593 default: 594 RetVals.push_back(RetTyVT); 595 break; 596 case MVT::i1: 597 case MVT::i8: 598 case MVT::i16: 599 RetVals.push_back(MVT::i32); 600 break; 601 case MVT::f32: 602 if (X86ScalarSSE) 603 RetVals.push_back(MVT::f32); 604 else 605 RetVals.push_back(MVT::f64); 606 break; 607 case MVT::i64: 608 RetVals.push_back(MVT::i32); 609 RetVals.push_back(MVT::i32); 610 break; 611 } 612 613 std::vector<MVT::ValueType> NodeTys; 614 NodeTys.push_back(MVT::Other); // Returns a chain 615 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 616 std::vector<SDOperand> Ops; 617 Ops.push_back(Chain); 618 Ops.push_back(Callee); 619 620 // FIXME: Do not generate X86ISD::TAILCALL for now. 621 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 622 SDOperand InFlag = Chain.getValue(1); 623 624 NodeTys.clear(); 625 NodeTys.push_back(MVT::Other); // Returns a chain 626 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 627 Ops.clear(); 628 Ops.push_back(Chain); 629 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 630 Ops.push_back(DAG.getConstant(0, getPointerTy())); 631 Ops.push_back(InFlag); 632 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 633 InFlag = Chain.getValue(1); 634 635 SDOperand RetVal; 636 if (RetTyVT != MVT::isVoid) { 637 switch (RetTyVT) { 638 default: assert(0 && "Unknown value type to return!"); 639 case MVT::i1: 640 case MVT::i8: 641 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 642 Chain = RetVal.getValue(1); 643 if (RetTyVT == MVT::i1) 644 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 645 break; 646 case MVT::i16: 647 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 648 Chain = RetVal.getValue(1); 649 break; 650 case MVT::i32: 651 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 652 Chain = RetVal.getValue(1); 653 break; 654 case MVT::i64: { 655 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 656 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 657 Lo.getValue(2)); 658 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 659 Chain = Hi.getValue(1); 660 break; 661 } 662 case MVT::f32: 663 case MVT::f64: { 664 std::vector<MVT::ValueType> Tys; 665 Tys.push_back(MVT::f64); 666 Tys.push_back(MVT::Other); 667 Tys.push_back(MVT::Flag); 668 std::vector<SDOperand> Ops; 669 Ops.push_back(Chain); 670 Ops.push_back(InFlag); 671 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 672 Chain = RetVal.getValue(1); 673 InFlag = RetVal.getValue(2); 674 if (X86ScalarSSE) { 675 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 676 // shouldn't be necessary except that RFP cannot be live across 677 // multiple blocks. When stackifier is fixed, they can be uncoupled. 678 MachineFunction &MF = DAG.getMachineFunction(); 679 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 680 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 681 Tys.clear(); 682 Tys.push_back(MVT::Other); 683 Ops.clear(); 684 Ops.push_back(Chain); 685 Ops.push_back(RetVal); 686 Ops.push_back(StackSlot); 687 Ops.push_back(DAG.getValueType(RetTyVT)); 688 Ops.push_back(InFlag); 689 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 690 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 691 DAG.getSrcValue(NULL)); 692 Chain = RetVal.getValue(1); 693 } 694 695 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 696 // FIXME: we would really like to remember that this FP_ROUND 697 // operation is okay to eliminate if we allow excess FP precision. 698 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 699 break; 700 } 701 } 702 } 703 704 return std::make_pair(RetVal, Chain); 705} 706 707//===----------------------------------------------------------------------===// 708// Fast Calling Convention implementation 709//===----------------------------------------------------------------------===// 710// 711// The X86 'fast' calling convention passes up to two integer arguments in 712// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 713// and requires that the callee pop its arguments off the stack (allowing proper 714// tail calls), and has the same return value conventions as C calling convs. 715// 716// This calling convention always arranges for the callee pop value to be 8n+4 717// bytes, which is needed for tail recursion elimination and stack alignment 718// reasons. 719// 720// Note that this can be enhanced in the future to pass fp vals in registers 721// (when we have a global fp allocator) and do other tricks. 722// 723 724// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 725// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 726// EDX". Anything more is illegal. 727// 728// FIXME: The linscan register allocator currently has problem with 729// coalescing. At the time of this writing, whenever it decides to coalesce 730// a physreg with a virtreg, this increases the size of the physreg's live 731// range, and the live range cannot ever be reduced. This causes problems if 732// too many physregs are coaleced with virtregs, which can cause the register 733// allocator to wedge itself. 734// 735// This code triggers this problem more often if we pass args in registers, 736// so disable it until this is fixed. 737// 738// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 739// about code being dead. 740// 741static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 742 743 744static void 745HowToPassFastCCArgument(MVT::ValueType ObjectVT, unsigned NumIntRegs, 746 unsigned &ObjSize, unsigned &ObjIntRegs) { 747 ObjSize = 0; 748 NumIntRegs = 0; 749 750 switch (ObjectVT) { 751 default: assert(0 && "Unhandled argument type!"); 752 case MVT::i1: 753 case MVT::i8: 754 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 755 ObjIntRegs = 1; 756 else 757 ObjSize = 1; 758 break; 759 case MVT::i16: 760 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 761 ObjIntRegs = 1; 762 else 763 ObjSize = 2; 764 break; 765 case MVT::i32: 766 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 767 ObjIntRegs = 1; 768 else 769 ObjSize = 4; 770 break; 771 case MVT::i64: 772 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 773 ObjIntRegs = 2; 774 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 775 ObjIntRegs = 1; 776 ObjSize = 4; 777 } else 778 ObjSize = 8; 779 case MVT::f32: 780 ObjSize = 4; 781 break; 782 case MVT::f64: 783 ObjSize = 8; 784 break; 785 } 786} 787 788void 789X86TargetLowering::PreprocessFastCCArguments(std::vector<SDOperand>Args, 790 Function &F, SelectionDAG &DAG) { 791 unsigned NumArgs = Args.size(); 792 MachineFunction &MF = DAG.getMachineFunction(); 793 MachineFrameInfo *MFI = MF.getFrameInfo(); 794 795 // Add DAG nodes to load the arguments... On entry to a function the stack 796 // frame looks like this: 797 // 798 // [ESP] -- return address 799 // [ESP + 4] -- first nonreg argument (leftmost lexically) 800 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 801 // ... 802 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 803 804 // Keep track of the number of integer regs passed so far. This can be either 805 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 806 // used). 807 unsigned NumIntRegs = 0; 808 809 for (unsigned i = 0; i < NumArgs; ++i) { 810 SDOperand Op = Args[i]; 811 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 812 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 813 I != E; ++I) { 814 SDOperand Obj = *I; 815 MVT::ValueType ObjectVT = Obj.getValueType(); 816 unsigned ArgIncrement = 4; 817 unsigned ObjSize = 0; 818 unsigned ObjIntRegs = 0; 819 820 HowToPassFastCCArgument(ObjectVT, NumIntRegs, ObjSize, ObjIntRegs); 821 if (ObjSize == 8) 822 ArgIncrement = 8; 823 824 unsigned Reg; 825 std::pair<FALocInfo,FALocInfo> Loc = std::make_pair(FALocInfo(), 826 FALocInfo()); 827 if (ObjIntRegs) { 828 NumIntRegs += ObjIntRegs; 829 switch (ObjectVT) { 830 default: assert(0 && "Unhandled argument type!"); 831 case MVT::i1: 832 case MVT::i8: 833 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 834 X86::R8RegisterClass); 835 Loc.first.Kind = FALocInfo::LiveInRegLoc; 836 Loc.first.Loc = Reg; 837 Loc.first.Typ = MVT::i8; 838 break; 839 case MVT::i16: 840 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 841 X86::R16RegisterClass); 842 Loc.first.Kind = FALocInfo::LiveInRegLoc; 843 Loc.first.Loc = Reg; 844 Loc.first.Typ = MVT::i16; 845 break; 846 case MVT::i32: 847 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 848 X86::R32RegisterClass); 849 Loc.first.Kind = FALocInfo::LiveInRegLoc; 850 Loc.first.Loc = Reg; 851 Loc.first.Typ = MVT::i32; 852 break; 853 case MVT::i64: 854 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 855 X86::R32RegisterClass); 856 Loc.first.Kind = FALocInfo::LiveInRegLoc; 857 Loc.first.Loc = Reg; 858 Loc.first.Typ = MVT::i32; 859 if (ObjIntRegs == 2) { 860 Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 861 Loc.second.Kind = FALocInfo::LiveInRegLoc; 862 Loc.second.Loc = Reg; 863 Loc.second.Typ = MVT::i32; 864 } 865 break; 866 } 867 } 868 if (ObjSize) { 869 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 870 if (ObjectVT == MVT::i64 && ObjIntRegs) { 871 Loc.second.Kind = FALocInfo::StackFrameLoc; 872 Loc.second.Loc = FI; 873 } else { 874 Loc.first.Kind = FALocInfo::StackFrameLoc; 875 Loc.first.Loc = FI; 876 } 877 ArgOffset += ArgIncrement; // Move on to the next argument. 878 } 879 880 FormalArgLocs.push_back(Loc); 881 } 882 } 883 884 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 885 // arguments and the arguments after the retaddr has been pushed are aligned. 886 if ((ArgOffset & 7) == 0) 887 ArgOffset += 4; 888 889 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 890 ReturnAddrIndex = 0; // No return address slot generated yet. 891 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 892 BytesCallerReserves = 0; 893 894 // Finally, inform the code generator which regs we return values in. 895 switch (getValueType(F.getReturnType())) { 896 default: assert(0 && "Unknown type!"); 897 case MVT::isVoid: break; 898 case MVT::i1: 899 case MVT::i8: 900 case MVT::i16: 901 case MVT::i32: 902 MF.addLiveOut(X86::EAX); 903 break; 904 case MVT::i64: 905 MF.addLiveOut(X86::EAX); 906 MF.addLiveOut(X86::EDX); 907 break; 908 case MVT::f32: 909 case MVT::f64: 910 MF.addLiveOut(X86::ST0); 911 break; 912 } 913} 914void 915X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 916 unsigned NumArgs = Op.Val->getNumValues(); 917 MachineFunction &MF = DAG.getMachineFunction(); 918 MachineFrameInfo *MFI = MF.getFrameInfo(); 919 920 for (unsigned i = 0; i < NumArgs; ++i) { 921 MVT::ValueType VT = Op.Val->getValueType(i); 922 std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i]; 923 SDOperand ArgValue; 924 if (Loc.first.Kind == FALocInfo::StackFrameLoc) { 925 // Create the SelectionDAG nodes corresponding to a load from this parameter 926 SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32); 927 ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 928 DAG.getSrcValue(NULL)); 929 } else { 930 // Must be a CopyFromReg 931 ArgValue= DAG.getCopyFromReg(DAG.getRoot(), Loc.first.Loc, Loc.first.Typ); 932 } 933 934 if (Loc.second.Kind != FALocInfo::None) { 935 SDOperand ArgValue2; 936 if (Loc.second.Kind == FALocInfo::StackFrameLoc) { 937 // Create the SelectionDAG nodes corresponding to a load from this parameter 938 SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32); 939 ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 940 DAG.getSrcValue(NULL)); 941 } else { 942 // Must be a CopyFromReg 943 ArgValue2 = DAG.getCopyFromReg(DAG.getRoot(), 944 Loc.second.Loc, Loc.second.Typ); 945 } 946 ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2); 947 } 948 FormalArgs.push_back(ArgValue); 949 } 950} 951 952std::pair<SDOperand, SDOperand> 953X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 954 bool isTailCall, SDOperand Callee, 955 ArgListTy &Args, SelectionDAG &DAG) { 956 // Count how many bytes are to be pushed on the stack. 957 unsigned NumBytes = 0; 958 959 // Keep track of the number of integer regs passed so far. This can be either 960 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 961 // used). 962 unsigned NumIntRegs = 0; 963 964 for (unsigned i = 0, e = Args.size(); i != e; ++i) 965 switch (getValueType(Args[i].second)) { 966 default: assert(0 && "Unknown value type!"); 967 case MVT::i1: 968 case MVT::i8: 969 case MVT::i16: 970 case MVT::i32: 971 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 972 ++NumIntRegs; 973 break; 974 } 975 // fall through 976 case MVT::f32: 977 NumBytes += 4; 978 break; 979 case MVT::i64: 980 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 981 NumIntRegs += 2; 982 break; 983 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 984 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 985 NumBytes += 4; 986 break; 987 } 988 989 // fall through 990 case MVT::f64: 991 NumBytes += 8; 992 break; 993 } 994 995 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 996 // arguments and the arguments after the retaddr has been pushed are aligned. 997 if ((NumBytes & 7) == 0) 998 NumBytes += 4; 999 1000 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1001 1002 // Arguments go on the stack in reverse order, as specified by the ABI. 1003 unsigned ArgOffset = 0; 1004 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 1005 NumIntRegs = 0; 1006 std::vector<SDOperand> Stores; 1007 std::vector<SDOperand> RegValuesToPass; 1008 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1009 switch (getValueType(Args[i].second)) { 1010 default: assert(0 && "Unexpected ValueType for argument!"); 1011 case MVT::i1: 1012 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 1013 // Fall through. 1014 case MVT::i8: 1015 case MVT::i16: 1016 case MVT::i32: 1017 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1018 RegValuesToPass.push_back(Args[i].first); 1019 ++NumIntRegs; 1020 break; 1021 } 1022 // Fall through 1023 case MVT::f32: { 1024 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1025 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1026 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1027 Args[i].first, PtrOff, 1028 DAG.getSrcValue(NULL))); 1029 ArgOffset += 4; 1030 break; 1031 } 1032 case MVT::i64: 1033 // Can pass (at least) part of it in regs? 1034 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1035 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1036 Args[i].first, DAG.getConstant(1, MVT::i32)); 1037 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1038 Args[i].first, DAG.getConstant(0, MVT::i32)); 1039 RegValuesToPass.push_back(Lo); 1040 ++NumIntRegs; 1041 1042 // Pass both parts in regs? 1043 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1044 RegValuesToPass.push_back(Hi); 1045 ++NumIntRegs; 1046 } else { 1047 // Pass the high part in memory. 1048 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1049 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1050 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1051 Hi, PtrOff, DAG.getSrcValue(NULL))); 1052 ArgOffset += 4; 1053 } 1054 break; 1055 } 1056 // Fall through 1057 case MVT::f64: 1058 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1059 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1060 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1061 Args[i].first, PtrOff, 1062 DAG.getSrcValue(NULL))); 1063 ArgOffset += 8; 1064 break; 1065 } 1066 } 1067 if (!Stores.empty()) 1068 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 1069 1070 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1071 // arguments and the arguments after the retaddr has been pushed are aligned. 1072 if ((ArgOffset & 7) == 0) 1073 ArgOffset += 4; 1074 1075 std::vector<MVT::ValueType> RetVals; 1076 MVT::ValueType RetTyVT = getValueType(RetTy); 1077 1078 RetVals.push_back(MVT::Other); 1079 1080 // The result values produced have to be legal. Promote the result. 1081 switch (RetTyVT) { 1082 case MVT::isVoid: break; 1083 default: 1084 RetVals.push_back(RetTyVT); 1085 break; 1086 case MVT::i1: 1087 case MVT::i8: 1088 case MVT::i16: 1089 RetVals.push_back(MVT::i32); 1090 break; 1091 case MVT::f32: 1092 if (X86ScalarSSE) 1093 RetVals.push_back(MVT::f32); 1094 else 1095 RetVals.push_back(MVT::f64); 1096 break; 1097 case MVT::i64: 1098 RetVals.push_back(MVT::i32); 1099 RetVals.push_back(MVT::i32); 1100 break; 1101 } 1102 1103 // Build a sequence of copy-to-reg nodes chained together with token chain 1104 // and flag operands which copy the outgoing args into registers. 1105 SDOperand InFlag; 1106 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1107 unsigned CCReg; 1108 SDOperand RegToPass = RegValuesToPass[i]; 1109 switch (RegToPass.getValueType()) { 1110 default: assert(0 && "Bad thing to pass in regs"); 1111 case MVT::i8: 1112 CCReg = (i == 0) ? X86::AL : X86::DL; 1113 break; 1114 case MVT::i16: 1115 CCReg = (i == 0) ? X86::AX : X86::DX; 1116 break; 1117 case MVT::i32: 1118 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1119 break; 1120 } 1121 1122 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1123 InFlag = Chain.getValue(1); 1124 } 1125 1126 std::vector<MVT::ValueType> NodeTys; 1127 NodeTys.push_back(MVT::Other); // Returns a chain 1128 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1129 std::vector<SDOperand> Ops; 1130 Ops.push_back(Chain); 1131 Ops.push_back(Callee); 1132 if (InFlag.Val) 1133 Ops.push_back(InFlag); 1134 1135 // FIXME: Do not generate X86ISD::TAILCALL for now. 1136 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1137 InFlag = Chain.getValue(1); 1138 1139 NodeTys.clear(); 1140 NodeTys.push_back(MVT::Other); // Returns a chain 1141 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1142 Ops.clear(); 1143 Ops.push_back(Chain); 1144 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1145 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1146 Ops.push_back(InFlag); 1147 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1148 InFlag = Chain.getValue(1); 1149 1150 SDOperand RetVal; 1151 if (RetTyVT != MVT::isVoid) { 1152 switch (RetTyVT) { 1153 default: assert(0 && "Unknown value type to return!"); 1154 case MVT::i1: 1155 case MVT::i8: 1156 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1157 Chain = RetVal.getValue(1); 1158 if (RetTyVT == MVT::i1) 1159 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1160 break; 1161 case MVT::i16: 1162 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1163 Chain = RetVal.getValue(1); 1164 break; 1165 case MVT::i32: 1166 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1167 Chain = RetVal.getValue(1); 1168 break; 1169 case MVT::i64: { 1170 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1171 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1172 Lo.getValue(2)); 1173 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1174 Chain = Hi.getValue(1); 1175 break; 1176 } 1177 case MVT::f32: 1178 case MVT::f64: { 1179 std::vector<MVT::ValueType> Tys; 1180 Tys.push_back(MVT::f64); 1181 Tys.push_back(MVT::Other); 1182 Tys.push_back(MVT::Flag); 1183 std::vector<SDOperand> Ops; 1184 Ops.push_back(Chain); 1185 Ops.push_back(InFlag); 1186 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1187 Chain = RetVal.getValue(1); 1188 InFlag = RetVal.getValue(2); 1189 if (X86ScalarSSE) { 1190 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1191 // shouldn't be necessary except that RFP cannot be live across 1192 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1193 MachineFunction &MF = DAG.getMachineFunction(); 1194 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1195 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1196 Tys.clear(); 1197 Tys.push_back(MVT::Other); 1198 Ops.clear(); 1199 Ops.push_back(Chain); 1200 Ops.push_back(RetVal); 1201 Ops.push_back(StackSlot); 1202 Ops.push_back(DAG.getValueType(RetTyVT)); 1203 Ops.push_back(InFlag); 1204 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1205 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1206 DAG.getSrcValue(NULL)); 1207 Chain = RetVal.getValue(1); 1208 } 1209 1210 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1211 // FIXME: we would really like to remember that this FP_ROUND 1212 // operation is okay to eliminate if we allow excess FP precision. 1213 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1214 break; 1215 } 1216 } 1217 } 1218 1219 return std::make_pair(RetVal, Chain); 1220} 1221 1222SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1223 if (ReturnAddrIndex == 0) { 1224 // Set up a frame object for the return address. 1225 MachineFunction &MF = DAG.getMachineFunction(); 1226 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1227 } 1228 1229 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1230} 1231 1232 1233 1234std::pair<SDOperand, SDOperand> X86TargetLowering:: 1235LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1236 SelectionDAG &DAG) { 1237 SDOperand Result; 1238 if (Depth) // Depths > 0 not supported yet! 1239 Result = DAG.getConstant(0, getPointerTy()); 1240 else { 1241 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1242 if (!isFrameAddress) 1243 // Just load the return address 1244 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1245 DAG.getSrcValue(NULL)); 1246 else 1247 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1248 DAG.getConstant(4, MVT::i32)); 1249 } 1250 return std::make_pair(Result, Chain); 1251} 1252 1253/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1254/// which corresponds to the condition code. 1255static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1256 switch (X86CC) { 1257 default: assert(0 && "Unknown X86 conditional code!"); 1258 case X86ISD::COND_A: return X86::JA; 1259 case X86ISD::COND_AE: return X86::JAE; 1260 case X86ISD::COND_B: return X86::JB; 1261 case X86ISD::COND_BE: return X86::JBE; 1262 case X86ISD::COND_E: return X86::JE; 1263 case X86ISD::COND_G: return X86::JG; 1264 case X86ISD::COND_GE: return X86::JGE; 1265 case X86ISD::COND_L: return X86::JL; 1266 case X86ISD::COND_LE: return X86::JLE; 1267 case X86ISD::COND_NE: return X86::JNE; 1268 case X86ISD::COND_NO: return X86::JNO; 1269 case X86ISD::COND_NP: return X86::JNP; 1270 case X86ISD::COND_NS: return X86::JNS; 1271 case X86ISD::COND_O: return X86::JO; 1272 case X86ISD::COND_P: return X86::JP; 1273 case X86ISD::COND_S: return X86::JS; 1274 } 1275} 1276 1277/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1278/// specific condition code. It returns a false if it cannot do a direct 1279/// translation. X86CC is the translated CondCode. Flip is set to true if the 1280/// the order of comparison operands should be flipped. 1281static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1282 unsigned &X86CC, bool &Flip) { 1283 Flip = false; 1284 X86CC = X86ISD::COND_INVALID; 1285 if (!isFP) { 1286 switch (SetCCOpcode) { 1287 default: break; 1288 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1289 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1290 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1291 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1292 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1293 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1294 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1295 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1296 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1297 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1298 } 1299 } else { 1300 // On a floating point condition, the flags are set as follows: 1301 // ZF PF CF op 1302 // 0 | 0 | 0 | X > Y 1303 // 0 | 0 | 1 | X < Y 1304 // 1 | 0 | 0 | X == Y 1305 // 1 | 1 | 1 | unordered 1306 switch (SetCCOpcode) { 1307 default: break; 1308 case ISD::SETUEQ: 1309 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1310 case ISD::SETOLT: Flip = true; // Fallthrough 1311 case ISD::SETOGT: 1312 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1313 case ISD::SETOLE: Flip = true; // Fallthrough 1314 case ISD::SETOGE: 1315 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1316 case ISD::SETUGT: Flip = true; // Fallthrough 1317 case ISD::SETULT: 1318 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1319 case ISD::SETUGE: Flip = true; // Fallthrough 1320 case ISD::SETULE: 1321 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1322 case ISD::SETONE: 1323 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1324 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1325 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1326 } 1327 } 1328 1329 return X86CC != X86ISD::COND_INVALID; 1330} 1331 1332static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1333 bool &Flip) { 1334 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1335} 1336 1337/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1338/// code. Current x86 isa includes the following FP cmov instructions: 1339/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1340static bool hasFPCMov(unsigned X86CC) { 1341 switch (X86CC) { 1342 default: 1343 return false; 1344 case X86ISD::COND_B: 1345 case X86ISD::COND_BE: 1346 case X86ISD::COND_E: 1347 case X86ISD::COND_P: 1348 case X86ISD::COND_A: 1349 case X86ISD::COND_AE: 1350 case X86ISD::COND_NE: 1351 case X86ISD::COND_NP: 1352 return true; 1353 } 1354} 1355 1356MachineBasicBlock * 1357X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1358 MachineBasicBlock *BB) { 1359 switch (MI->getOpcode()) { 1360 default: assert(false && "Unexpected instr type to insert"); 1361 case X86::CMOV_FR32: 1362 case X86::CMOV_FR64: 1363 case X86::CMOV_V4F32: 1364 case X86::CMOV_V2F64: 1365 case X86::CMOV_V2I64: { 1366 // To "insert" a SELECT_CC instruction, we actually have to insert the 1367 // diamond control-flow pattern. The incoming instruction knows the 1368 // destination vreg to set, the condition code register to branch on, the 1369 // true/false values to select between, and a branch opcode to use. 1370 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1371 ilist<MachineBasicBlock>::iterator It = BB; 1372 ++It; 1373 1374 // thisMBB: 1375 // ... 1376 // TrueVal = ... 1377 // cmpTY ccX, r1, r2 1378 // bCC copy1MBB 1379 // fallthrough --> copy0MBB 1380 MachineBasicBlock *thisMBB = BB; 1381 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1382 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1383 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1384 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1385 MachineFunction *F = BB->getParent(); 1386 F->getBasicBlockList().insert(It, copy0MBB); 1387 F->getBasicBlockList().insert(It, sinkMBB); 1388 // Update machine-CFG edges by first adding all successors of the current 1389 // block to the new block which will contain the Phi node for the select. 1390 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1391 e = BB->succ_end(); i != e; ++i) 1392 sinkMBB->addSuccessor(*i); 1393 // Next, remove all successors of the current block, and add the true 1394 // and fallthrough blocks as its successors. 1395 while(!BB->succ_empty()) 1396 BB->removeSuccessor(BB->succ_begin()); 1397 BB->addSuccessor(copy0MBB); 1398 BB->addSuccessor(sinkMBB); 1399 1400 // copy0MBB: 1401 // %FalseValue = ... 1402 // # fallthrough to sinkMBB 1403 BB = copy0MBB; 1404 1405 // Update machine-CFG edges 1406 BB->addSuccessor(sinkMBB); 1407 1408 // sinkMBB: 1409 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1410 // ... 1411 BB = sinkMBB; 1412 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1413 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1414 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1415 1416 delete MI; // The pseudo instruction is gone now. 1417 return BB; 1418 } 1419 1420 case X86::FP_TO_INT16_IN_MEM: 1421 case X86::FP_TO_INT32_IN_MEM: 1422 case X86::FP_TO_INT64_IN_MEM: { 1423 // Change the floating point control register to use "round towards zero" 1424 // mode when truncating to an integer value. 1425 MachineFunction *F = BB->getParent(); 1426 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1427 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1428 1429 // Load the old value of the high byte of the control word... 1430 unsigned OldCW = 1431 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1432 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1433 1434 // Set the high part to be round to zero... 1435 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1436 1437 // Reload the modified control word now... 1438 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1439 1440 // Restore the memory image of control word to original value 1441 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1442 1443 // Get the X86 opcode to use. 1444 unsigned Opc; 1445 switch (MI->getOpcode()) { 1446 default: assert(0 && "illegal opcode!"); 1447 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1448 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1449 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1450 } 1451 1452 X86AddressMode AM; 1453 MachineOperand &Op = MI->getOperand(0); 1454 if (Op.isRegister()) { 1455 AM.BaseType = X86AddressMode::RegBase; 1456 AM.Base.Reg = Op.getReg(); 1457 } else { 1458 AM.BaseType = X86AddressMode::FrameIndexBase; 1459 AM.Base.FrameIndex = Op.getFrameIndex(); 1460 } 1461 Op = MI->getOperand(1); 1462 if (Op.isImmediate()) 1463 AM.Scale = Op.getImmedValue(); 1464 Op = MI->getOperand(2); 1465 if (Op.isImmediate()) 1466 AM.IndexReg = Op.getImmedValue(); 1467 Op = MI->getOperand(3); 1468 if (Op.isGlobalAddress()) { 1469 AM.GV = Op.getGlobal(); 1470 } else { 1471 AM.Disp = Op.getImmedValue(); 1472 } 1473 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1474 1475 // Reload the original control word now. 1476 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1477 1478 delete MI; // The pseudo instruction is gone now. 1479 return BB; 1480 } 1481 } 1482} 1483 1484 1485//===----------------------------------------------------------------------===// 1486// X86 Custom Lowering Hooks 1487//===----------------------------------------------------------------------===// 1488 1489/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1490/// load. For Darwin, external and weak symbols are indirect, loading the value 1491/// at address GV rather then the value of GV itself. This means that the 1492/// GlobalAddress must be in the base or index register of the address, not the 1493/// GV offset field. 1494static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1495 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1496 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1497} 1498 1499/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1500/// true if Op is undef or if its value falls within the specified range (L, H]. 1501static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1502 if (Op.getOpcode() == ISD::UNDEF) 1503 return true; 1504 1505 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1506 return (Val >= Low && Val < Hi); 1507} 1508 1509/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1510/// true if Op is undef or if its value equal to the specified value. 1511static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1512 if (Op.getOpcode() == ISD::UNDEF) 1513 return true; 1514 return cast<ConstantSDNode>(Op)->getValue() == Val; 1515} 1516 1517/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1518/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1519bool X86::isPSHUFDMask(SDNode *N) { 1520 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1521 1522 if (N->getNumOperands() != 4) 1523 return false; 1524 1525 // Check if the value doesn't reference the second vector. 1526 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1527 SDOperand Arg = N->getOperand(i); 1528 if (Arg.getOpcode() == ISD::UNDEF) continue; 1529 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1530 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1531 return false; 1532 } 1533 1534 return true; 1535} 1536 1537/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1538/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1539bool X86::isPSHUFHWMask(SDNode *N) { 1540 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1541 1542 if (N->getNumOperands() != 8) 1543 return false; 1544 1545 // Lower quadword copied in order. 1546 for (unsigned i = 0; i != 4; ++i) { 1547 SDOperand Arg = N->getOperand(i); 1548 if (Arg.getOpcode() == ISD::UNDEF) continue; 1549 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1550 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1551 return false; 1552 } 1553 1554 // Upper quadword shuffled. 1555 for (unsigned i = 4; i != 8; ++i) { 1556 SDOperand Arg = N->getOperand(i); 1557 if (Arg.getOpcode() == ISD::UNDEF) continue; 1558 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1559 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1560 if (Val < 4 || Val > 7) 1561 return false; 1562 } 1563 1564 return true; 1565} 1566 1567/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1568/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1569bool X86::isPSHUFLWMask(SDNode *N) { 1570 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1571 1572 if (N->getNumOperands() != 8) 1573 return false; 1574 1575 // Upper quadword copied in order. 1576 for (unsigned i = 4; i != 8; ++i) 1577 if (!isUndefOrEqual(N->getOperand(i), i)) 1578 return false; 1579 1580 // Lower quadword shuffled. 1581 for (unsigned i = 0; i != 4; ++i) 1582 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1583 return false; 1584 1585 return true; 1586} 1587 1588/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1589/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1590static bool isSHUFPMask(std::vector<SDOperand> &N) { 1591 unsigned NumElems = N.size(); 1592 if (NumElems != 2 && NumElems != 4) return false; 1593 1594 unsigned Half = NumElems / 2; 1595 for (unsigned i = 0; i < Half; ++i) 1596 if (!isUndefOrInRange(N[i], 0, NumElems)) 1597 return false; 1598 for (unsigned i = Half; i < NumElems; ++i) 1599 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1600 return false; 1601 1602 return true; 1603} 1604 1605bool X86::isSHUFPMask(SDNode *N) { 1606 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1607 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1608 return ::isSHUFPMask(Ops); 1609} 1610 1611/// isCommutedSHUFP - Returns true if the shuffle mask is except 1612/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1613/// half elements to come from vector 1 (which would equal the dest.) and 1614/// the upper half to come from vector 2. 1615static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1616 unsigned NumElems = Ops.size(); 1617 if (NumElems != 2 && NumElems != 4) return false; 1618 1619 unsigned Half = NumElems / 2; 1620 for (unsigned i = 0; i < Half; ++i) 1621 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1622 return false; 1623 for (unsigned i = Half; i < NumElems; ++i) 1624 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1625 return false; 1626 return true; 1627} 1628 1629static bool isCommutedSHUFP(SDNode *N) { 1630 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1631 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1632 return isCommutedSHUFP(Ops); 1633} 1634 1635/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1636/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1637bool X86::isMOVHLPSMask(SDNode *N) { 1638 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1639 1640 if (N->getNumOperands() != 4) 1641 return false; 1642 1643 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1644 return isUndefOrEqual(N->getOperand(0), 6) && 1645 isUndefOrEqual(N->getOperand(1), 7) && 1646 isUndefOrEqual(N->getOperand(2), 2) && 1647 isUndefOrEqual(N->getOperand(3), 3); 1648} 1649 1650/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1651/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1652bool X86::isMOVLPMask(SDNode *N) { 1653 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1654 1655 unsigned NumElems = N->getNumOperands(); 1656 if (NumElems != 2 && NumElems != 4) 1657 return false; 1658 1659 for (unsigned i = 0; i < NumElems/2; ++i) 1660 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1661 return false; 1662 1663 for (unsigned i = NumElems/2; i < NumElems; ++i) 1664 if (!isUndefOrEqual(N->getOperand(i), i)) 1665 return false; 1666 1667 return true; 1668} 1669 1670/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1671/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1672/// and MOVLHPS. 1673bool X86::isMOVHPMask(SDNode *N) { 1674 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1675 1676 unsigned NumElems = N->getNumOperands(); 1677 if (NumElems != 2 && NumElems != 4) 1678 return false; 1679 1680 for (unsigned i = 0; i < NumElems/2; ++i) 1681 if (!isUndefOrEqual(N->getOperand(i), i)) 1682 return false; 1683 1684 for (unsigned i = 0; i < NumElems/2; ++i) { 1685 SDOperand Arg = N->getOperand(i + NumElems/2); 1686 if (!isUndefOrEqual(Arg, i + NumElems)) 1687 return false; 1688 } 1689 1690 return true; 1691} 1692 1693/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1694/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1695bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1696 unsigned NumElems = N.size(); 1697 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1698 return false; 1699 1700 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1701 SDOperand BitI = N[i]; 1702 SDOperand BitI1 = N[i+1]; 1703 if (!isUndefOrEqual(BitI, j)) 1704 return false; 1705 if (V2IsSplat) { 1706 if (isUndefOrEqual(BitI1, NumElems)) 1707 return false; 1708 } else { 1709 if (!isUndefOrEqual(BitI1, j + NumElems)) 1710 return false; 1711 } 1712 } 1713 1714 return true; 1715} 1716 1717bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1718 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1719 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1720 return ::isUNPCKLMask(Ops, V2IsSplat); 1721} 1722 1723/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1724/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1725bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1726 unsigned NumElems = N.size(); 1727 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1728 return false; 1729 1730 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1731 SDOperand BitI = N[i]; 1732 SDOperand BitI1 = N[i+1]; 1733 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1734 return false; 1735 if (V2IsSplat) { 1736 if (isUndefOrEqual(BitI1, NumElems)) 1737 return false; 1738 } else { 1739 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1740 return false; 1741 } 1742 } 1743 1744 return true; 1745} 1746 1747bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1748 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1749 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1750 return ::isUNPCKHMask(Ops, V2IsSplat); 1751} 1752 1753/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1754/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1755/// <0, 0, 1, 1> 1756bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1757 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1758 1759 unsigned NumElems = N->getNumOperands(); 1760 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1761 return false; 1762 1763 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1764 SDOperand BitI = N->getOperand(i); 1765 SDOperand BitI1 = N->getOperand(i+1); 1766 1767 if (!isUndefOrEqual(BitI, j)) 1768 return false; 1769 if (!isUndefOrEqual(BitI1, j)) 1770 return false; 1771 } 1772 1773 return true; 1774} 1775 1776/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1777/// specifies a shuffle of elements that is suitable for input to MOVSS, 1778/// MOVSD, and MOVD, i.e. setting the lowest element. 1779static bool isMOVLMask(std::vector<SDOperand> &N) { 1780 unsigned NumElems = N.size(); 1781 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1782 return false; 1783 1784 if (!isUndefOrEqual(N[0], NumElems)) 1785 return false; 1786 1787 for (unsigned i = 1; i < NumElems; ++i) { 1788 SDOperand Arg = N[i]; 1789 if (!isUndefOrEqual(Arg, i)) 1790 return false; 1791 } 1792 1793 return true; 1794} 1795 1796bool X86::isMOVLMask(SDNode *N) { 1797 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1798 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1799 return ::isMOVLMask(Ops); 1800} 1801 1802/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1803/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1804/// element of vector 2 and the other elements to come from vector 1 in order. 1805static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1806 unsigned NumElems = Ops.size(); 1807 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1808 return false; 1809 1810 if (!isUndefOrEqual(Ops[0], 0)) 1811 return false; 1812 1813 for (unsigned i = 1; i < NumElems; ++i) { 1814 SDOperand Arg = Ops[i]; 1815 if (V2IsSplat) { 1816 if (!isUndefOrEqual(Arg, NumElems)) 1817 return false; 1818 } else { 1819 if (!isUndefOrEqual(Arg, i+NumElems)) 1820 return false; 1821 } 1822 } 1823 1824 return true; 1825} 1826 1827static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1828 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1829 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1830 return isCommutedMOVL(Ops, V2IsSplat); 1831} 1832 1833/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1834/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1835bool X86::isMOVSHDUPMask(SDNode *N) { 1836 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1837 1838 if (N->getNumOperands() != 4) 1839 return false; 1840 1841 // Expect 1, 1, 3, 3 1842 for (unsigned i = 0; i < 2; ++i) { 1843 SDOperand Arg = N->getOperand(i); 1844 if (Arg.getOpcode() == ISD::UNDEF) continue; 1845 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1846 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1847 if (Val != 1) return false; 1848 } 1849 1850 bool HasHi = false; 1851 for (unsigned i = 2; i < 4; ++i) { 1852 SDOperand Arg = N->getOperand(i); 1853 if (Arg.getOpcode() == ISD::UNDEF) continue; 1854 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1855 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1856 if (Val != 3) return false; 1857 HasHi = true; 1858 } 1859 1860 // Don't use movshdup if it can be done with a shufps. 1861 return HasHi; 1862} 1863 1864/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1865/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1866bool X86::isMOVSLDUPMask(SDNode *N) { 1867 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1868 1869 if (N->getNumOperands() != 4) 1870 return false; 1871 1872 // Expect 0, 0, 2, 2 1873 for (unsigned i = 0; i < 2; ++i) { 1874 SDOperand Arg = N->getOperand(i); 1875 if (Arg.getOpcode() == ISD::UNDEF) continue; 1876 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1877 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1878 if (Val != 0) return false; 1879 } 1880 1881 bool HasHi = false; 1882 for (unsigned i = 2; i < 4; ++i) { 1883 SDOperand Arg = N->getOperand(i); 1884 if (Arg.getOpcode() == ISD::UNDEF) continue; 1885 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1886 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1887 if (Val != 2) return false; 1888 HasHi = true; 1889 } 1890 1891 // Don't use movshdup if it can be done with a shufps. 1892 return HasHi; 1893} 1894 1895/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1896/// a splat of a single element. 1897static bool isSplatMask(SDNode *N) { 1898 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1899 1900 // This is a splat operation if each element of the permute is the same, and 1901 // if the value doesn't reference the second vector. 1902 unsigned NumElems = N->getNumOperands(); 1903 SDOperand ElementBase; 1904 unsigned i = 0; 1905 for (; i != NumElems; ++i) { 1906 SDOperand Elt = N->getOperand(i); 1907 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 1908 ElementBase = Elt; 1909 break; 1910 } 1911 } 1912 1913 if (!ElementBase.Val) 1914 return false; 1915 1916 for (; i != NumElems; ++i) { 1917 SDOperand Arg = N->getOperand(i); 1918 if (Arg.getOpcode() == ISD::UNDEF) continue; 1919 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1920 if (Arg != ElementBase) return false; 1921 } 1922 1923 // Make sure it is a splat of the first vector operand. 1924 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1925} 1926 1927/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1928/// a splat of a single element and it's a 2 or 4 element mask. 1929bool X86::isSplatMask(SDNode *N) { 1930 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1931 1932 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 1933 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1934 return false; 1935 return ::isSplatMask(N); 1936} 1937 1938/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1939/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1940/// instructions. 1941unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1942 unsigned NumOperands = N->getNumOperands(); 1943 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1944 unsigned Mask = 0; 1945 for (unsigned i = 0; i < NumOperands; ++i) { 1946 unsigned Val = 0; 1947 SDOperand Arg = N->getOperand(NumOperands-i-1); 1948 if (Arg.getOpcode() != ISD::UNDEF) 1949 Val = cast<ConstantSDNode>(Arg)->getValue(); 1950 if (Val >= NumOperands) Val -= NumOperands; 1951 Mask |= Val; 1952 if (i != NumOperands - 1) 1953 Mask <<= Shift; 1954 } 1955 1956 return Mask; 1957} 1958 1959/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1960/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1961/// instructions. 1962unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1963 unsigned Mask = 0; 1964 // 8 nodes, but we only care about the last 4. 1965 for (unsigned i = 7; i >= 4; --i) { 1966 unsigned Val = 0; 1967 SDOperand Arg = N->getOperand(i); 1968 if (Arg.getOpcode() != ISD::UNDEF) 1969 Val = cast<ConstantSDNode>(Arg)->getValue(); 1970 Mask |= (Val - 4); 1971 if (i != 4) 1972 Mask <<= 2; 1973 } 1974 1975 return Mask; 1976} 1977 1978/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1979/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1980/// instructions. 1981unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1982 unsigned Mask = 0; 1983 // 8 nodes, but we only care about the first 4. 1984 for (int i = 3; i >= 0; --i) { 1985 unsigned Val = 0; 1986 SDOperand Arg = N->getOperand(i); 1987 if (Arg.getOpcode() != ISD::UNDEF) 1988 Val = cast<ConstantSDNode>(Arg)->getValue(); 1989 Mask |= Val; 1990 if (i != 0) 1991 Mask <<= 2; 1992 } 1993 1994 return Mask; 1995} 1996 1997/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1998/// specifies a 8 element shuffle that can be broken into a pair of 1999/// PSHUFHW and PSHUFLW. 2000static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2001 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2002 2003 if (N->getNumOperands() != 8) 2004 return false; 2005 2006 // Lower quadword shuffled. 2007 for (unsigned i = 0; i != 4; ++i) { 2008 SDOperand Arg = N->getOperand(i); 2009 if (Arg.getOpcode() == ISD::UNDEF) continue; 2010 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2011 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2012 if (Val > 4) 2013 return false; 2014 } 2015 2016 // Upper quadword shuffled. 2017 for (unsigned i = 4; i != 8; ++i) { 2018 SDOperand Arg = N->getOperand(i); 2019 if (Arg.getOpcode() == ISD::UNDEF) continue; 2020 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2021 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2022 if (Val < 4 || Val > 7) 2023 return false; 2024 } 2025 2026 return true; 2027} 2028 2029/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2030/// values in ther permute mask. 2031static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 2032 SDOperand V1 = Op.getOperand(0); 2033 SDOperand V2 = Op.getOperand(1); 2034 SDOperand Mask = Op.getOperand(2); 2035 MVT::ValueType VT = Op.getValueType(); 2036 MVT::ValueType MaskVT = Mask.getValueType(); 2037 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2038 unsigned NumElems = Mask.getNumOperands(); 2039 std::vector<SDOperand> MaskVec; 2040 2041 for (unsigned i = 0; i != NumElems; ++i) { 2042 SDOperand Arg = Mask.getOperand(i); 2043 if (Arg.getOpcode() == ISD::UNDEF) { 2044 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2045 continue; 2046 } 2047 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2048 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2049 if (Val < NumElems) 2050 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2051 else 2052 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2053 } 2054 2055 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2056 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 2057} 2058 2059/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2060/// match movhlps. The lower half elements should come from upper half of 2061/// V1 (and in order), and the upper half elements should come from the upper 2062/// half of V2 (and in order). 2063static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2064 unsigned NumElems = Mask->getNumOperands(); 2065 if (NumElems != 4) 2066 return false; 2067 for (unsigned i = 0, e = 2; i != e; ++i) 2068 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2069 return false; 2070 for (unsigned i = 2; i != 4; ++i) 2071 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2072 return false; 2073 return true; 2074} 2075 2076/// isScalarLoadToVector - Returns true if the node is a scalar load that 2077/// is promoted to a vector. 2078static inline bool isScalarLoadToVector(SDNode *N) { 2079 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2080 N = N->getOperand(0).Val; 2081 return (N->getOpcode() == ISD::LOAD); 2082 } 2083 return false; 2084} 2085 2086/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2087/// match movlp{s|d}. The lower half elements should come from lower half of 2088/// V1 (and in order), and the upper half elements should come from the upper 2089/// half of V2 (and in order). And since V1 will become the source of the 2090/// MOVLP, it must be either a vector load or a scalar load to vector. 2091static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2092 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2093 return false; 2094 2095 unsigned NumElems = Mask->getNumOperands(); 2096 if (NumElems != 2 && NumElems != 4) 2097 return false; 2098 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2099 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2100 return false; 2101 for (unsigned i = NumElems/2; i != NumElems; ++i) 2102 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2103 return false; 2104 return true; 2105} 2106 2107/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2108/// all the same. 2109static bool isSplatVector(SDNode *N) { 2110 if (N->getOpcode() != ISD::BUILD_VECTOR) 2111 return false; 2112 2113 SDOperand SplatValue = N->getOperand(0); 2114 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2115 if (N->getOperand(i) != SplatValue) 2116 return false; 2117 return true; 2118} 2119 2120/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2121/// that point to V2 points to its first element. 2122static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2123 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2124 2125 bool Changed = false; 2126 std::vector<SDOperand> MaskVec; 2127 unsigned NumElems = Mask.getNumOperands(); 2128 for (unsigned i = 0; i != NumElems; ++i) { 2129 SDOperand Arg = Mask.getOperand(i); 2130 if (Arg.getOpcode() != ISD::UNDEF) { 2131 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2132 if (Val > NumElems) { 2133 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2134 Changed = true; 2135 } 2136 } 2137 MaskVec.push_back(Arg); 2138 } 2139 2140 if (Changed) 2141 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 2142 return Mask; 2143} 2144 2145/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2146/// operation of specified width. 2147static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2148 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2149 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2150 2151 std::vector<SDOperand> MaskVec; 2152 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2153 for (unsigned i = 1; i != NumElems; ++i) 2154 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2155 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2156} 2157 2158/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2159/// of specified width. 2160static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2161 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2162 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2163 std::vector<SDOperand> MaskVec; 2164 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2165 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2166 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2167 } 2168 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2169} 2170 2171/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2172/// of specified width. 2173static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2174 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2175 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2176 unsigned Half = NumElems/2; 2177 std::vector<SDOperand> MaskVec; 2178 for (unsigned i = 0; i != Half; ++i) { 2179 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2180 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2181 } 2182 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2183} 2184 2185/// getZeroVector - Returns a vector of specified type with all zero elements. 2186/// 2187static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2188 assert(MVT::isVector(VT) && "Expected a vector type"); 2189 unsigned NumElems = getVectorNumElements(VT); 2190 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2191 bool isFP = MVT::isFloatingPoint(EVT); 2192 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2193 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2194 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 2195} 2196 2197/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2198/// 2199static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2200 SDOperand V1 = Op.getOperand(0); 2201 SDOperand Mask = Op.getOperand(2); 2202 MVT::ValueType VT = Op.getValueType(); 2203 unsigned NumElems = Mask.getNumOperands(); 2204 Mask = getUnpacklMask(NumElems, DAG); 2205 while (NumElems != 4) { 2206 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2207 NumElems >>= 1; 2208 } 2209 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2210 2211 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2212 Mask = getZeroVector(MaskVT, DAG); 2213 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2214 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2215 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2216} 2217 2218/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2219/// constant +0.0. 2220static inline bool isZeroNode(SDOperand Elt) { 2221 return ((isa<ConstantSDNode>(Elt) && 2222 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2223 (isa<ConstantFPSDNode>(Elt) && 2224 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2225} 2226 2227/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2228/// vector and zero or undef vector. 2229static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2230 unsigned NumElems, unsigned Idx, 2231 bool isZero, SelectionDAG &DAG) { 2232 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2233 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2234 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2235 SDOperand Zero = DAG.getConstant(0, EVT); 2236 std::vector<SDOperand> MaskVec(NumElems, Zero); 2237 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2238 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2239 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2240} 2241 2242/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2243/// 2244static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2245 unsigned NumNonZero, unsigned NumZero, 2246 SelectionDAG &DAG) { 2247 if (NumNonZero > 8) 2248 return SDOperand(); 2249 2250 SDOperand V(0, 0); 2251 bool First = true; 2252 for (unsigned i = 0; i < 16; ++i) { 2253 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2254 if (ThisIsNonZero && First) { 2255 if (NumZero) 2256 V = getZeroVector(MVT::v8i16, DAG); 2257 else 2258 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2259 First = false; 2260 } 2261 2262 if ((i & 1) != 0) { 2263 SDOperand ThisElt(0, 0), LastElt(0, 0); 2264 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2265 if (LastIsNonZero) { 2266 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2267 } 2268 if (ThisIsNonZero) { 2269 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2270 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2271 ThisElt, DAG.getConstant(8, MVT::i8)); 2272 if (LastIsNonZero) 2273 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2274 } else 2275 ThisElt = LastElt; 2276 2277 if (ThisElt.Val) 2278 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2279 DAG.getConstant(i/2, MVT::i32)); 2280 } 2281 } 2282 2283 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2284} 2285 2286/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2287/// 2288static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2289 unsigned NumNonZero, unsigned NumZero, 2290 SelectionDAG &DAG) { 2291 if (NumNonZero > 4) 2292 return SDOperand(); 2293 2294 SDOperand V(0, 0); 2295 bool First = true; 2296 for (unsigned i = 0; i < 8; ++i) { 2297 bool isNonZero = (NonZeros & (1 << i)) != 0; 2298 if (isNonZero) { 2299 if (First) { 2300 if (NumZero) 2301 V = getZeroVector(MVT::v8i16, DAG); 2302 else 2303 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2304 First = false; 2305 } 2306 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2307 DAG.getConstant(i, MVT::i32)); 2308 } 2309 } 2310 2311 return V; 2312} 2313 2314SDOperand 2315X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2316 // All zero's are handled with pxor. 2317 if (ISD::isBuildVectorAllZeros(Op.Val)) 2318 return Op; 2319 2320 // All one's are handled with pcmpeqd. 2321 if (ISD::isBuildVectorAllOnes(Op.Val)) 2322 return Op; 2323 2324 MVT::ValueType VT = Op.getValueType(); 2325 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2326 unsigned EVTBits = MVT::getSizeInBits(EVT); 2327 2328 unsigned NumElems = Op.getNumOperands(); 2329 unsigned NumZero = 0; 2330 unsigned NumNonZero = 0; 2331 unsigned NonZeros = 0; 2332 std::set<SDOperand> Values; 2333 for (unsigned i = 0; i < NumElems; ++i) { 2334 SDOperand Elt = Op.getOperand(i); 2335 if (Elt.getOpcode() != ISD::UNDEF) { 2336 Values.insert(Elt); 2337 if (isZeroNode(Elt)) 2338 NumZero++; 2339 else { 2340 NonZeros |= (1 << i); 2341 NumNonZero++; 2342 } 2343 } 2344 } 2345 2346 if (NumNonZero == 0) 2347 // Must be a mix of zero and undef. Return a zero vector. 2348 return getZeroVector(VT, DAG); 2349 2350 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2351 if (Values.size() == 1) 2352 return SDOperand(); 2353 2354 // Special case for single non-zero element. 2355 if (NumNonZero == 1) { 2356 unsigned Idx = CountTrailingZeros_32(NonZeros); 2357 SDOperand Item = Op.getOperand(Idx); 2358 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2359 if (Idx == 0) 2360 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2361 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2362 NumZero > 0, DAG); 2363 2364 if (EVTBits == 32) { 2365 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2366 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2367 DAG); 2368 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2369 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2370 std::vector<SDOperand> MaskVec; 2371 for (unsigned i = 0; i < NumElems; i++) 2372 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2373 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2374 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2375 DAG.getNode(ISD::UNDEF, VT), Mask); 2376 } 2377 } 2378 2379 // Let legalizer expand 2-widde build_vector's. 2380 if (EVTBits == 64) 2381 return SDOperand(); 2382 2383 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2384 if (EVTBits == 8) { 2385 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 2386 if (V.Val) return V; 2387 } 2388 2389 if (EVTBits == 16) { 2390 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 2391 if (V.Val) return V; 2392 } 2393 2394 // If element VT is == 32 bits, turn it into a number of shuffles. 2395 std::vector<SDOperand> V(NumElems); 2396 if (NumElems == 4 && NumZero > 0) { 2397 for (unsigned i = 0; i < 4; ++i) { 2398 bool isZero = !(NonZeros & (1 << i)); 2399 if (isZero) 2400 V[i] = getZeroVector(VT, DAG); 2401 else 2402 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2403 } 2404 2405 for (unsigned i = 0; i < 2; ++i) { 2406 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2407 default: break; 2408 case 0: 2409 V[i] = V[i*2]; // Must be a zero vector. 2410 break; 2411 case 1: 2412 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2413 getMOVLMask(NumElems, DAG)); 2414 break; 2415 case 2: 2416 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2417 getMOVLMask(NumElems, DAG)); 2418 break; 2419 case 3: 2420 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2421 getUnpacklMask(NumElems, DAG)); 2422 break; 2423 } 2424 } 2425 2426 // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) 2427 // clears the upper bits. 2428 // FIXME: we can do the same for v4f32 case when we know both parts of 2429 // the lower half come from scalar_to_vector (loadf32). We should do 2430 // that in post legalizer dag combiner with target specific hooks. 2431 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2432 return V[0]; 2433 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2434 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2435 std::vector<SDOperand> MaskVec; 2436 bool Reverse = (NonZeros & 0x3) == 2; 2437 for (unsigned i = 0; i < 2; ++i) 2438 if (Reverse) 2439 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2440 else 2441 MaskVec.push_back(DAG.getConstant(i, EVT)); 2442 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2443 for (unsigned i = 0; i < 2; ++i) 2444 if (Reverse) 2445 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2446 else 2447 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2448 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2449 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2450 } 2451 2452 if (Values.size() > 2) { 2453 // Expand into a number of unpckl*. 2454 // e.g. for v4f32 2455 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2456 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2457 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2458 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2459 for (unsigned i = 0; i < NumElems; ++i) 2460 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2461 NumElems >>= 1; 2462 while (NumElems != 0) { 2463 for (unsigned i = 0; i < NumElems; ++i) 2464 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2465 UnpckMask); 2466 NumElems >>= 1; 2467 } 2468 return V[0]; 2469 } 2470 2471 return SDOperand(); 2472} 2473 2474SDOperand 2475X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2476 SDOperand V1 = Op.getOperand(0); 2477 SDOperand V2 = Op.getOperand(1); 2478 SDOperand PermMask = Op.getOperand(2); 2479 MVT::ValueType VT = Op.getValueType(); 2480 unsigned NumElems = PermMask.getNumOperands(); 2481 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2482 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2483 2484 if (isSplatMask(PermMask.Val)) { 2485 if (NumElems <= 4) return Op; 2486 // Promote it to a v4i32 splat. 2487 return PromoteSplat(Op, DAG); 2488 } 2489 2490 if (X86::isMOVLMask(PermMask.Val)) 2491 return (V1IsUndef) ? V2 : Op; 2492 2493 if (X86::isMOVSHDUPMask(PermMask.Val) || 2494 X86::isMOVSLDUPMask(PermMask.Val) || 2495 X86::isMOVHLPSMask(PermMask.Val) || 2496 X86::isMOVHPMask(PermMask.Val) || 2497 X86::isMOVLPMask(PermMask.Val)) 2498 return Op; 2499 2500 if (ShouldXformToMOVHLPS(PermMask.Val) || 2501 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2502 return CommuteVectorShuffle(Op, DAG); 2503 2504 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 2505 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 2506 if (V1IsSplat && !V2IsSplat) { 2507 Op = CommuteVectorShuffle(Op, DAG); 2508 V1 = Op.getOperand(0); 2509 V2 = Op.getOperand(1); 2510 PermMask = Op.getOperand(2); 2511 V2IsSplat = true; 2512 } 2513 2514 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 2515 if (V2IsUndef) return V1; 2516 Op = CommuteVectorShuffle(Op, DAG); 2517 V1 = Op.getOperand(0); 2518 V2 = Op.getOperand(1); 2519 PermMask = Op.getOperand(2); 2520 if (V2IsSplat) { 2521 // V2 is a splat, so the mask may be malformed. That is, it may point 2522 // to any V2 element. The instruction selectior won't like this. Get 2523 // a corrected mask and commute to form a proper MOVS{S|D}. 2524 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2525 if (NewMask.Val != PermMask.Val) 2526 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2527 } 2528 return Op; 2529 } 2530 2531 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2532 X86::isUNPCKLMask(PermMask.Val) || 2533 X86::isUNPCKHMask(PermMask.Val)) 2534 return Op; 2535 2536 if (V2IsSplat) { 2537 // Normalize mask so all entries that point to V2 points to its first 2538 // element then try to match unpck{h|l} again. If match, return a 2539 // new vector_shuffle with the corrected mask. 2540 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2541 if (NewMask.Val != PermMask.Val) { 2542 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2543 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2544 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2545 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2546 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2547 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2548 } 2549 } 2550 } 2551 2552 // Normalize the node to match x86 shuffle ops if needed 2553 if (V2.getOpcode() != ISD::UNDEF) 2554 if (isCommutedSHUFP(PermMask.Val)) { 2555 Op = CommuteVectorShuffle(Op, DAG); 2556 V1 = Op.getOperand(0); 2557 V2 = Op.getOperand(1); 2558 PermMask = Op.getOperand(2); 2559 } 2560 2561 // If VT is integer, try PSHUF* first, then SHUFP*. 2562 if (MVT::isInteger(VT)) { 2563 if (X86::isPSHUFDMask(PermMask.Val) || 2564 X86::isPSHUFHWMask(PermMask.Val) || 2565 X86::isPSHUFLWMask(PermMask.Val)) { 2566 if (V2.getOpcode() != ISD::UNDEF) 2567 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2568 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2569 return Op; 2570 } 2571 2572 if (X86::isSHUFPMask(PermMask.Val)) 2573 return Op; 2574 2575 // Handle v8i16 shuffle high / low shuffle node pair. 2576 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2577 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2578 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2579 std::vector<SDOperand> MaskVec; 2580 for (unsigned i = 0; i != 4; ++i) 2581 MaskVec.push_back(PermMask.getOperand(i)); 2582 for (unsigned i = 4; i != 8; ++i) 2583 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2584 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2585 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2586 MaskVec.clear(); 2587 for (unsigned i = 0; i != 4; ++i) 2588 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2589 for (unsigned i = 4; i != 8; ++i) 2590 MaskVec.push_back(PermMask.getOperand(i)); 2591 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2592 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2593 } 2594 } else { 2595 // Floating point cases in the other order. 2596 if (X86::isSHUFPMask(PermMask.Val)) 2597 return Op; 2598 if (X86::isPSHUFDMask(PermMask.Val) || 2599 X86::isPSHUFHWMask(PermMask.Val) || 2600 X86::isPSHUFLWMask(PermMask.Val)) { 2601 if (V2.getOpcode() != ISD::UNDEF) 2602 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2603 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2604 return Op; 2605 } 2606 } 2607 2608 if (NumElems == 4) { 2609 // Break it into (shuffle shuffle_hi, shuffle_lo). 2610 MVT::ValueType MaskVT = PermMask.getValueType(); 2611 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2612 std::map<unsigned, std::pair<int, int> > Locs; 2613 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2614 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2615 std::vector<SDOperand> *MaskPtr = &LoMask; 2616 unsigned MaskIdx = 0; 2617 unsigned LoIdx = 0; 2618 unsigned HiIdx = NumElems/2; 2619 for (unsigned i = 0; i != NumElems; ++i) { 2620 if (i == NumElems/2) { 2621 MaskPtr = &HiMask; 2622 MaskIdx = 1; 2623 LoIdx = 0; 2624 HiIdx = NumElems/2; 2625 } 2626 SDOperand Elt = PermMask.getOperand(i); 2627 if (Elt.getOpcode() == ISD::UNDEF) { 2628 Locs[i] = std::make_pair(-1, -1); 2629 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2630 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2631 (*MaskPtr)[LoIdx] = Elt; 2632 LoIdx++; 2633 } else { 2634 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2635 (*MaskPtr)[HiIdx] = Elt; 2636 HiIdx++; 2637 } 2638 } 2639 2640 SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2641 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 2642 SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2643 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 2644 std::vector<SDOperand> MaskOps; 2645 for (unsigned i = 0; i != NumElems; ++i) { 2646 if (Locs[i].first == -1) { 2647 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2648 } else { 2649 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2650 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2651 } 2652 } 2653 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2654 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 2655 } 2656 2657 return SDOperand(); 2658} 2659 2660SDOperand 2661X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2662 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2663 return SDOperand(); 2664 2665 MVT::ValueType VT = Op.getValueType(); 2666 // TODO: handle v16i8. 2667 if (MVT::getSizeInBits(VT) == 16) { 2668 // Transform it so it match pextrw which produces a 32-bit result. 2669 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2670 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2671 Op.getOperand(0), Op.getOperand(1)); 2672 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2673 DAG.getValueType(VT)); 2674 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2675 } else if (MVT::getSizeInBits(VT) == 32) { 2676 SDOperand Vec = Op.getOperand(0); 2677 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2678 if (Idx == 0) 2679 return Op; 2680 2681 // SHUFPS the element to the lowest double word, then movss. 2682 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2683 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2684 MVT::getVectorBaseType(MaskVT)); 2685 std::vector<SDOperand> IdxVec; 2686 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2687 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2688 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2689 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2690 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2691 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2692 Vec, Vec, Mask); 2693 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2694 DAG.getConstant(0, MVT::i32)); 2695 } else if (MVT::getSizeInBits(VT) == 64) { 2696 SDOperand Vec = Op.getOperand(0); 2697 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2698 if (Idx == 0) 2699 return Op; 2700 2701 // UNPCKHPD the element to the lowest double word, then movsd. 2702 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2703 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2704 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2705 std::vector<SDOperand> IdxVec; 2706 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2707 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2708 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2709 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2710 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2711 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2712 DAG.getConstant(0, MVT::i32)); 2713 } 2714 2715 return SDOperand(); 2716} 2717 2718SDOperand 2719X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2720 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2721 // as its second argument. 2722 MVT::ValueType VT = Op.getValueType(); 2723 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2724 SDOperand N0 = Op.getOperand(0); 2725 SDOperand N1 = Op.getOperand(1); 2726 SDOperand N2 = Op.getOperand(2); 2727 if (MVT::getSizeInBits(BaseVT) == 16) { 2728 if (N1.getValueType() != MVT::i32) 2729 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2730 if (N2.getValueType() != MVT::i32) 2731 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2732 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2733 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2734 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2735 if (Idx == 0) { 2736 // Use a movss. 2737 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2738 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2739 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2740 std::vector<SDOperand> MaskVec; 2741 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2742 for (unsigned i = 1; i <= 3; ++i) 2743 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2744 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2745 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 2746 } else { 2747 // Use two pinsrw instructions to insert a 32 bit value. 2748 Idx <<= 1; 2749 if (MVT::isFloatingPoint(N1.getValueType())) { 2750 if (N1.getOpcode() == ISD::LOAD) { 2751 // Just load directly from f32mem to R32. 2752 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 2753 N1.getOperand(2)); 2754 } else { 2755 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2756 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2757 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2758 DAG.getConstant(0, MVT::i32)); 2759 } 2760 } 2761 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2762 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2763 DAG.getConstant(Idx, MVT::i32)); 2764 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2765 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2766 DAG.getConstant(Idx+1, MVT::i32)); 2767 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 2768 } 2769 } 2770 2771 return SDOperand(); 2772} 2773 2774SDOperand 2775X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2776 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2777 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2778} 2779 2780// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2781// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2782// one of the above mentioned nodes. It has to be wrapped because otherwise 2783// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2784// be used to form addressing mode. These wrapped nodes will be selected 2785// into MOV32ri. 2786SDOperand 2787X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 2788 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2789 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2790 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2791 CP->getAlignment())); 2792 if (Subtarget->isTargetDarwin()) { 2793 // With PIC, the address is actually $g + Offset. 2794 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2795 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2796 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2797 } 2798 2799 return Result; 2800} 2801 2802SDOperand 2803X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 2804 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2805 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2806 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2807 if (Subtarget->isTargetDarwin()) { 2808 // With PIC, the address is actually $g + Offset. 2809 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2810 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2811 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2812 2813 // For Darwin, external and weak symbols are indirect, so we want to load 2814 // the value at address GV, not the value of GV itself. This means that 2815 // the GlobalAddress must be in the base or index register of the address, 2816 // not the GV offset field. 2817 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2818 DarwinGVRequiresExtraLoad(GV)) 2819 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2820 Result, DAG.getSrcValue(NULL)); 2821 } 2822 2823 return Result; 2824} 2825 2826SDOperand 2827X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 2828 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2829 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2830 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2831 if (Subtarget->isTargetDarwin()) { 2832 // With PIC, the address is actually $g + Offset. 2833 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2834 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2835 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2836 } 2837 2838 return Result; 2839} 2840 2841SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 2842 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2843 "Not an i64 shift!"); 2844 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2845 SDOperand ShOpLo = Op.getOperand(0); 2846 SDOperand ShOpHi = Op.getOperand(1); 2847 SDOperand ShAmt = Op.getOperand(2); 2848 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2849 DAG.getConstant(31, MVT::i8)) 2850 : DAG.getConstant(0, MVT::i32); 2851 2852 SDOperand Tmp2, Tmp3; 2853 if (Op.getOpcode() == ISD::SHL_PARTS) { 2854 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2855 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2856 } else { 2857 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2858 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2859 } 2860 2861 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2862 ShAmt, DAG.getConstant(32, MVT::i8)); 2863 2864 SDOperand Hi, Lo; 2865 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2866 2867 std::vector<MVT::ValueType> Tys; 2868 Tys.push_back(MVT::i32); 2869 Tys.push_back(MVT::Flag); 2870 std::vector<SDOperand> Ops; 2871 if (Op.getOpcode() == ISD::SHL_PARTS) { 2872 Ops.push_back(Tmp2); 2873 Ops.push_back(Tmp3); 2874 Ops.push_back(CC); 2875 Ops.push_back(InFlag); 2876 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2877 InFlag = Hi.getValue(1); 2878 2879 Ops.clear(); 2880 Ops.push_back(Tmp3); 2881 Ops.push_back(Tmp1); 2882 Ops.push_back(CC); 2883 Ops.push_back(InFlag); 2884 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2885 } else { 2886 Ops.push_back(Tmp2); 2887 Ops.push_back(Tmp3); 2888 Ops.push_back(CC); 2889 Ops.push_back(InFlag); 2890 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2891 InFlag = Lo.getValue(1); 2892 2893 Ops.clear(); 2894 Ops.push_back(Tmp3); 2895 Ops.push_back(Tmp1); 2896 Ops.push_back(CC); 2897 Ops.push_back(InFlag); 2898 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2899 } 2900 2901 Tys.clear(); 2902 Tys.push_back(MVT::i32); 2903 Tys.push_back(MVT::i32); 2904 Ops.clear(); 2905 Ops.push_back(Lo); 2906 Ops.push_back(Hi); 2907 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2908} 2909 2910SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 2911 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2912 Op.getOperand(0).getValueType() >= MVT::i16 && 2913 "Unknown SINT_TO_FP to lower!"); 2914 2915 SDOperand Result; 2916 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2917 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2918 MachineFunction &MF = DAG.getMachineFunction(); 2919 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2920 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2921 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2922 DAG.getEntryNode(), Op.getOperand(0), 2923 StackSlot, DAG.getSrcValue(NULL)); 2924 2925 // Build the FILD 2926 std::vector<MVT::ValueType> Tys; 2927 Tys.push_back(MVT::f64); 2928 Tys.push_back(MVT::Other); 2929 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2930 std::vector<SDOperand> Ops; 2931 Ops.push_back(Chain); 2932 Ops.push_back(StackSlot); 2933 Ops.push_back(DAG.getValueType(SrcVT)); 2934 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2935 Tys, Ops); 2936 2937 if (X86ScalarSSE) { 2938 Chain = Result.getValue(1); 2939 SDOperand InFlag = Result.getValue(2); 2940 2941 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2942 // shouldn't be necessary except that RFP cannot be live across 2943 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2944 MachineFunction &MF = DAG.getMachineFunction(); 2945 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2946 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2947 std::vector<MVT::ValueType> Tys; 2948 Tys.push_back(MVT::Other); 2949 std::vector<SDOperand> Ops; 2950 Ops.push_back(Chain); 2951 Ops.push_back(Result); 2952 Ops.push_back(StackSlot); 2953 Ops.push_back(DAG.getValueType(Op.getValueType())); 2954 Ops.push_back(InFlag); 2955 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2956 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2957 DAG.getSrcValue(NULL)); 2958 } 2959 2960 return Result; 2961} 2962 2963SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2964 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2965 "Unknown FP_TO_SINT to lower!"); 2966 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2967 // stack slot. 2968 MachineFunction &MF = DAG.getMachineFunction(); 2969 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2970 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2971 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2972 2973 unsigned Opc; 2974 switch (Op.getValueType()) { 2975 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2976 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2977 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2978 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2979 } 2980 2981 SDOperand Chain = DAG.getEntryNode(); 2982 SDOperand Value = Op.getOperand(0); 2983 if (X86ScalarSSE) { 2984 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2985 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2986 DAG.getSrcValue(0)); 2987 std::vector<MVT::ValueType> Tys; 2988 Tys.push_back(MVT::f64); 2989 Tys.push_back(MVT::Other); 2990 std::vector<SDOperand> Ops; 2991 Ops.push_back(Chain); 2992 Ops.push_back(StackSlot); 2993 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2994 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2995 Chain = Value.getValue(1); 2996 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2997 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2998 } 2999 3000 // Build the FP_TO_INT*_IN_MEM 3001 std::vector<SDOperand> Ops; 3002 Ops.push_back(Chain); 3003 Ops.push_back(Value); 3004 Ops.push_back(StackSlot); 3005 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 3006 3007 // Load the result. 3008 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 3009 DAG.getSrcValue(NULL)); 3010} 3011 3012SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3013 MVT::ValueType VT = Op.getValueType(); 3014 const Type *OpNTy = MVT::getTypeForValueType(VT); 3015 std::vector<Constant*> CV; 3016 if (VT == MVT::f64) { 3017 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3018 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3019 } else { 3020 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3021 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3022 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3023 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3024 } 3025 Constant *CS = ConstantStruct::get(CV); 3026 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3027 SDOperand Mask 3028 = DAG.getNode(X86ISD::LOAD_PACK, 3029 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3030 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3031} 3032 3033SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3034 MVT::ValueType VT = Op.getValueType(); 3035 const Type *OpNTy = MVT::getTypeForValueType(VT); 3036 std::vector<Constant*> CV; 3037 if (VT == MVT::f64) { 3038 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3039 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3040 } else { 3041 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3042 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3043 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3044 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3045 } 3046 Constant *CS = ConstantStruct::get(CV); 3047 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3048 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, 3049 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3050 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3051} 3052 3053SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3054 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3055 SDOperand Cond; 3056 SDOperand CC = Op.getOperand(2); 3057 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3058 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3059 bool Flip; 3060 unsigned X86CC; 3061 if (translateX86CC(CC, isFP, X86CC, Flip)) { 3062 if (Flip) 3063 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3064 Op.getOperand(1), Op.getOperand(0)); 3065 else 3066 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3067 Op.getOperand(0), Op.getOperand(1)); 3068 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3069 DAG.getConstant(X86CC, MVT::i8), Cond); 3070 } else { 3071 assert(isFP && "Illegal integer SetCC!"); 3072 3073 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3074 Op.getOperand(0), Op.getOperand(1)); 3075 std::vector<MVT::ValueType> Tys; 3076 std::vector<SDOperand> Ops; 3077 switch (SetCCOpcode) { 3078 default: assert(false && "Illegal floating point SetCC!"); 3079 case ISD::SETOEQ: { // !PF & ZF 3080 Tys.push_back(MVT::i8); 3081 Tys.push_back(MVT::Flag); 3082 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 3083 Ops.push_back(Cond); 3084 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3085 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3086 DAG.getConstant(X86ISD::COND_E, MVT::i8), 3087 Tmp1.getValue(1)); 3088 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3089 } 3090 case ISD::SETUNE: { // PF | !ZF 3091 Tys.push_back(MVT::i8); 3092 Tys.push_back(MVT::Flag); 3093 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 3094 Ops.push_back(Cond); 3095 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3096 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3097 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3098 Tmp1.getValue(1)); 3099 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3100 } 3101 } 3102 } 3103} 3104 3105SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3106 MVT::ValueType VT = Op.getValueType(); 3107 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 3108 bool addTest = false; 3109 SDOperand Op0 = Op.getOperand(0); 3110 SDOperand Cond, CC; 3111 if (Op0.getOpcode() == ISD::SETCC) 3112 Op0 = LowerOperation(Op0, DAG); 3113 3114 if (Op0.getOpcode() == X86ISD::SETCC) { 3115 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3116 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3117 // have another use it will be eliminated. 3118 // If the X86ISD::SETCC has more than one use, then it's probably better 3119 // to use a test instead of duplicating the X86ISD::CMP (for register 3120 // pressure reason). 3121 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 3122 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3123 CmpOpc == X86ISD::UCOMI) { 3124 if (!Op0.hasOneUse()) { 3125 std::vector<MVT::ValueType> Tys; 3126 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 3127 Tys.push_back(Op0.Val->getValueType(i)); 3128 std::vector<SDOperand> Ops; 3129 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 3130 Ops.push_back(Op0.getOperand(i)); 3131 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3132 } 3133 3134 CC = Op0.getOperand(0); 3135 Cond = Op0.getOperand(1); 3136 // Make a copy as flag result cannot be used by more than one. 3137 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3138 Cond.getOperand(0), Cond.getOperand(1)); 3139 addTest = 3140 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3141 } else 3142 addTest = true; 3143 } else 3144 addTest = true; 3145 3146 if (addTest) { 3147 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3148 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 3149 } 3150 3151 std::vector<MVT::ValueType> Tys; 3152 Tys.push_back(Op.getValueType()); 3153 Tys.push_back(MVT::Flag); 3154 std::vector<SDOperand> Ops; 3155 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3156 // condition is true. 3157 Ops.push_back(Op.getOperand(2)); 3158 Ops.push_back(Op.getOperand(1)); 3159 Ops.push_back(CC); 3160 Ops.push_back(Cond); 3161 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 3162} 3163 3164SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3165 bool addTest = false; 3166 SDOperand Cond = Op.getOperand(1); 3167 SDOperand Dest = Op.getOperand(2); 3168 SDOperand CC; 3169 if (Cond.getOpcode() == ISD::SETCC) 3170 Cond = LowerOperation(Cond, DAG); 3171 3172 if (Cond.getOpcode() == X86ISD::SETCC) { 3173 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3174 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3175 // have another use it will be eliminated. 3176 // If the X86ISD::SETCC has more than one use, then it's probably better 3177 // to use a test instead of duplicating the X86ISD::CMP (for register 3178 // pressure reason). 3179 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 3180 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3181 CmpOpc == X86ISD::UCOMI) { 3182 if (!Cond.hasOneUse()) { 3183 std::vector<MVT::ValueType> Tys; 3184 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 3185 Tys.push_back(Cond.Val->getValueType(i)); 3186 std::vector<SDOperand> Ops; 3187 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 3188 Ops.push_back(Cond.getOperand(i)); 3189 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3190 } 3191 3192 CC = Cond.getOperand(0); 3193 Cond = Cond.getOperand(1); 3194 // Make a copy as flag result cannot be used by more than one. 3195 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3196 Cond.getOperand(0), Cond.getOperand(1)); 3197 } else 3198 addTest = true; 3199 } else 3200 addTest = true; 3201 3202 if (addTest) { 3203 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3204 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 3205 } 3206 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3207 Op.getOperand(0), Op.getOperand(2), CC, Cond); 3208} 3209 3210SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3211 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3212 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3213 DAG.getTargetJumpTable(JT->getIndex(), 3214 getPointerTy())); 3215 if (Subtarget->isTargetDarwin()) { 3216 // With PIC, the address is actually $g + Offset. 3217 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 3218 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3219 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3220 } 3221 3222 return Result; 3223} 3224 3225SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3226 SDOperand Copy; 3227 3228 switch(Op.getNumOperands()) { 3229 default: 3230 assert(0 && "Do not know how to return this many arguments!"); 3231 abort(); 3232 case 1: // ret void. 3233 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3234 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3235 case 2: { 3236 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3237 3238 if (MVT::isVector(ArgVT)) { 3239 // Integer or FP vector result -> XMM0. 3240 if (DAG.getMachineFunction().liveout_empty()) 3241 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3242 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3243 SDOperand()); 3244 } else if (MVT::isInteger(ArgVT)) { 3245 // Integer result -> EAX 3246 if (DAG.getMachineFunction().liveout_empty()) 3247 DAG.getMachineFunction().addLiveOut(X86::EAX); 3248 3249 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 3250 SDOperand()); 3251 } else if (!X86ScalarSSE) { 3252 // FP return with fp-stack value. 3253 if (DAG.getMachineFunction().liveout_empty()) 3254 DAG.getMachineFunction().addLiveOut(X86::ST0); 3255 3256 std::vector<MVT::ValueType> Tys; 3257 Tys.push_back(MVT::Other); 3258 Tys.push_back(MVT::Flag); 3259 std::vector<SDOperand> Ops; 3260 Ops.push_back(Op.getOperand(0)); 3261 Ops.push_back(Op.getOperand(1)); 3262 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3263 } else { 3264 // FP return with ScalarSSE (return on fp-stack). 3265 if (DAG.getMachineFunction().liveout_empty()) 3266 DAG.getMachineFunction().addLiveOut(X86::ST0); 3267 3268 SDOperand MemLoc; 3269 SDOperand Chain = Op.getOperand(0); 3270 SDOperand Value = Op.getOperand(1); 3271 3272 if (Value.getOpcode() == ISD::LOAD && 3273 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3274 Chain = Value.getOperand(0); 3275 MemLoc = Value.getOperand(1); 3276 } else { 3277 // Spill the value to memory and reload it into top of stack. 3278 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3279 MachineFunction &MF = DAG.getMachineFunction(); 3280 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3281 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3282 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3283 Value, MemLoc, DAG.getSrcValue(0)); 3284 } 3285 std::vector<MVT::ValueType> Tys; 3286 Tys.push_back(MVT::f64); 3287 Tys.push_back(MVT::Other); 3288 std::vector<SDOperand> Ops; 3289 Ops.push_back(Chain); 3290 Ops.push_back(MemLoc); 3291 Ops.push_back(DAG.getValueType(ArgVT)); 3292 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 3293 Tys.clear(); 3294 Tys.push_back(MVT::Other); 3295 Tys.push_back(MVT::Flag); 3296 Ops.clear(); 3297 Ops.push_back(Copy.getValue(1)); 3298 Ops.push_back(Copy); 3299 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3300 } 3301 break; 3302 } 3303 case 3: 3304 if (DAG.getMachineFunction().liveout_empty()) { 3305 DAG.getMachineFunction().addLiveOut(X86::EAX); 3306 DAG.getMachineFunction().addLiveOut(X86::EDX); 3307 } 3308 3309 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 3310 SDOperand()); 3311 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 3312 break; 3313 } 3314 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3315 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3316 Copy.getValue(1)); 3317} 3318 3319SDOperand 3320X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3321 if (FormalArgs.size() == 0) { 3322 unsigned CC = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3323 if (CC == CallingConv::Fast && EnableFastCC) 3324 LowerFastCCArguments(Op, DAG); 3325 else 3326 LowerCCCArguments(Op, DAG); 3327 } 3328 return FormalArgs[Op.ResNo]; 3329} 3330 3331SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3332 SDOperand InFlag(0, 0); 3333 SDOperand Chain = Op.getOperand(0); 3334 unsigned Align = 3335 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3336 if (Align == 0) Align = 1; 3337 3338 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3339 // If not DWORD aligned, call memset if size is less than the threshold. 3340 // It knows how to align to the right boundary first. 3341 if ((Align & 3) != 0 || 3342 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3343 MVT::ValueType IntPtr = getPointerTy(); 3344 const Type *IntPtrTy = getTargetData().getIntPtrType(); 3345 std::vector<std::pair<SDOperand, const Type*> > Args; 3346 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3347 // Extend the ubyte argument to be an int value for the call. 3348 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3349 Args.push_back(std::make_pair(Val, IntPtrTy)); 3350 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3351 std::pair<SDOperand,SDOperand> CallResult = 3352 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3353 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3354 return CallResult.second; 3355 } 3356 3357 MVT::ValueType AVT; 3358 SDOperand Count; 3359 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3360 unsigned BytesLeft = 0; 3361 bool TwoRepStos = false; 3362 if (ValC) { 3363 unsigned ValReg; 3364 unsigned Val = ValC->getValue() & 255; 3365 3366 // If the value is a constant, then we can potentially use larger sets. 3367 switch (Align & 3) { 3368 case 2: // WORD aligned 3369 AVT = MVT::i16; 3370 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3371 BytesLeft = I->getValue() % 2; 3372 Val = (Val << 8) | Val; 3373 ValReg = X86::AX; 3374 break; 3375 case 0: // DWORD aligned 3376 AVT = MVT::i32; 3377 if (I) { 3378 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3379 BytesLeft = I->getValue() % 4; 3380 } else { 3381 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3382 DAG.getConstant(2, MVT::i8)); 3383 TwoRepStos = true; 3384 } 3385 Val = (Val << 8) | Val; 3386 Val = (Val << 16) | Val; 3387 ValReg = X86::EAX; 3388 break; 3389 default: // Byte aligned 3390 AVT = MVT::i8; 3391 Count = Op.getOperand(3); 3392 ValReg = X86::AL; 3393 break; 3394 } 3395 3396 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3397 InFlag); 3398 InFlag = Chain.getValue(1); 3399 } else { 3400 AVT = MVT::i8; 3401 Count = Op.getOperand(3); 3402 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3403 InFlag = Chain.getValue(1); 3404 } 3405 3406 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3407 InFlag = Chain.getValue(1); 3408 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3409 InFlag = Chain.getValue(1); 3410 3411 std::vector<MVT::ValueType> Tys; 3412 Tys.push_back(MVT::Other); 3413 Tys.push_back(MVT::Flag); 3414 std::vector<SDOperand> Ops; 3415 Ops.push_back(Chain); 3416 Ops.push_back(DAG.getValueType(AVT)); 3417 Ops.push_back(InFlag); 3418 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3419 3420 if (TwoRepStos) { 3421 InFlag = Chain.getValue(1); 3422 Count = Op.getOperand(3); 3423 MVT::ValueType CVT = Count.getValueType(); 3424 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3425 DAG.getConstant(3, CVT)); 3426 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3427 InFlag = Chain.getValue(1); 3428 Tys.clear(); 3429 Tys.push_back(MVT::Other); 3430 Tys.push_back(MVT::Flag); 3431 Ops.clear(); 3432 Ops.push_back(Chain); 3433 Ops.push_back(DAG.getValueType(MVT::i8)); 3434 Ops.push_back(InFlag); 3435 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3436 } else if (BytesLeft) { 3437 // Issue stores for the last 1 - 3 bytes. 3438 SDOperand Value; 3439 unsigned Val = ValC->getValue() & 255; 3440 unsigned Offset = I->getValue() - BytesLeft; 3441 SDOperand DstAddr = Op.getOperand(1); 3442 MVT::ValueType AddrVT = DstAddr.getValueType(); 3443 if (BytesLeft >= 2) { 3444 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3445 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3446 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3447 DAG.getConstant(Offset, AddrVT)), 3448 DAG.getSrcValue(NULL)); 3449 BytesLeft -= 2; 3450 Offset += 2; 3451 } 3452 3453 if (BytesLeft == 1) { 3454 Value = DAG.getConstant(Val, MVT::i8); 3455 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3456 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3457 DAG.getConstant(Offset, AddrVT)), 3458 DAG.getSrcValue(NULL)); 3459 } 3460 } 3461 3462 return Chain; 3463} 3464 3465SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3466 SDOperand Chain = Op.getOperand(0); 3467 unsigned Align = 3468 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3469 if (Align == 0) Align = 1; 3470 3471 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3472 // If not DWORD aligned, call memcpy if size is less than the threshold. 3473 // It knows how to align to the right boundary first. 3474 if ((Align & 3) != 0 || 3475 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3476 MVT::ValueType IntPtr = getPointerTy(); 3477 const Type *IntPtrTy = getTargetData().getIntPtrType(); 3478 std::vector<std::pair<SDOperand, const Type*> > Args; 3479 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3480 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 3481 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3482 std::pair<SDOperand,SDOperand> CallResult = 3483 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3484 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3485 return CallResult.second; 3486 } 3487 3488 MVT::ValueType AVT; 3489 SDOperand Count; 3490 unsigned BytesLeft = 0; 3491 bool TwoRepMovs = false; 3492 switch (Align & 3) { 3493 case 2: // WORD aligned 3494 AVT = MVT::i16; 3495 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3496 BytesLeft = I->getValue() % 2; 3497 break; 3498 case 0: // DWORD aligned 3499 AVT = MVT::i32; 3500 if (I) { 3501 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3502 BytesLeft = I->getValue() % 4; 3503 } else { 3504 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3505 DAG.getConstant(2, MVT::i8)); 3506 TwoRepMovs = true; 3507 } 3508 break; 3509 default: // Byte aligned 3510 AVT = MVT::i8; 3511 Count = Op.getOperand(3); 3512 break; 3513 } 3514 3515 SDOperand InFlag(0, 0); 3516 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3517 InFlag = Chain.getValue(1); 3518 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3519 InFlag = Chain.getValue(1); 3520 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 3521 InFlag = Chain.getValue(1); 3522 3523 std::vector<MVT::ValueType> Tys; 3524 Tys.push_back(MVT::Other); 3525 Tys.push_back(MVT::Flag); 3526 std::vector<SDOperand> Ops; 3527 Ops.push_back(Chain); 3528 Ops.push_back(DAG.getValueType(AVT)); 3529 Ops.push_back(InFlag); 3530 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3531 3532 if (TwoRepMovs) { 3533 InFlag = Chain.getValue(1); 3534 Count = Op.getOperand(3); 3535 MVT::ValueType CVT = Count.getValueType(); 3536 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3537 DAG.getConstant(3, CVT)); 3538 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3539 InFlag = Chain.getValue(1); 3540 Tys.clear(); 3541 Tys.push_back(MVT::Other); 3542 Tys.push_back(MVT::Flag); 3543 Ops.clear(); 3544 Ops.push_back(Chain); 3545 Ops.push_back(DAG.getValueType(MVT::i8)); 3546 Ops.push_back(InFlag); 3547 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3548 } else if (BytesLeft) { 3549 // Issue loads and stores for the last 1 - 3 bytes. 3550 unsigned Offset = I->getValue() - BytesLeft; 3551 SDOperand DstAddr = Op.getOperand(1); 3552 MVT::ValueType DstVT = DstAddr.getValueType(); 3553 SDOperand SrcAddr = Op.getOperand(2); 3554 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3555 SDOperand Value; 3556 if (BytesLeft >= 2) { 3557 Value = DAG.getLoad(MVT::i16, Chain, 3558 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3559 DAG.getConstant(Offset, SrcVT)), 3560 DAG.getSrcValue(NULL)); 3561 Chain = Value.getValue(1); 3562 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3563 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3564 DAG.getConstant(Offset, DstVT)), 3565 DAG.getSrcValue(NULL)); 3566 BytesLeft -= 2; 3567 Offset += 2; 3568 } 3569 3570 if (BytesLeft == 1) { 3571 Value = DAG.getLoad(MVT::i8, Chain, 3572 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3573 DAG.getConstant(Offset, SrcVT)), 3574 DAG.getSrcValue(NULL)); 3575 Chain = Value.getValue(1); 3576 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3577 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3578 DAG.getConstant(Offset, DstVT)), 3579 DAG.getSrcValue(NULL)); 3580 } 3581 } 3582 3583 return Chain; 3584} 3585 3586SDOperand 3587X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3588 std::vector<MVT::ValueType> Tys; 3589 Tys.push_back(MVT::Other); 3590 Tys.push_back(MVT::Flag); 3591 std::vector<SDOperand> Ops; 3592 Ops.push_back(Op.getOperand(0)); 3593 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 3594 Ops.clear(); 3595 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 3596 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 3597 MVT::i32, Ops[0].getValue(2))); 3598 Ops.push_back(Ops[1].getValue(1)); 3599 Tys[0] = Tys[1] = MVT::i32; 3600 Tys.push_back(MVT::Other); 3601 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 3602} 3603 3604SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3605 // vastart just stores the address of the VarArgsFrameIndex slot into the 3606 // memory location argument. 3607 // FIXME: Replace MVT::i32 with PointerTy 3608 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 3609 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 3610 Op.getOperand(1), Op.getOperand(2)); 3611} 3612 3613SDOperand 3614X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 3615 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3616 switch (IntNo) { 3617 default: return SDOperand(); // Don't custom lower most intrinsics. 3618 // Comparison intrinsics. 3619 case Intrinsic::x86_sse_comieq_ss: 3620 case Intrinsic::x86_sse_comilt_ss: 3621 case Intrinsic::x86_sse_comile_ss: 3622 case Intrinsic::x86_sse_comigt_ss: 3623 case Intrinsic::x86_sse_comige_ss: 3624 case Intrinsic::x86_sse_comineq_ss: 3625 case Intrinsic::x86_sse_ucomieq_ss: 3626 case Intrinsic::x86_sse_ucomilt_ss: 3627 case Intrinsic::x86_sse_ucomile_ss: 3628 case Intrinsic::x86_sse_ucomigt_ss: 3629 case Intrinsic::x86_sse_ucomige_ss: 3630 case Intrinsic::x86_sse_ucomineq_ss: 3631 case Intrinsic::x86_sse2_comieq_sd: 3632 case Intrinsic::x86_sse2_comilt_sd: 3633 case Intrinsic::x86_sse2_comile_sd: 3634 case Intrinsic::x86_sse2_comigt_sd: 3635 case Intrinsic::x86_sse2_comige_sd: 3636 case Intrinsic::x86_sse2_comineq_sd: 3637 case Intrinsic::x86_sse2_ucomieq_sd: 3638 case Intrinsic::x86_sse2_ucomilt_sd: 3639 case Intrinsic::x86_sse2_ucomile_sd: 3640 case Intrinsic::x86_sse2_ucomigt_sd: 3641 case Intrinsic::x86_sse2_ucomige_sd: 3642 case Intrinsic::x86_sse2_ucomineq_sd: { 3643 unsigned Opc = 0; 3644 ISD::CondCode CC = ISD::SETCC_INVALID; 3645 switch (IntNo) { 3646 default: break; 3647 case Intrinsic::x86_sse_comieq_ss: 3648 case Intrinsic::x86_sse2_comieq_sd: 3649 Opc = X86ISD::COMI; 3650 CC = ISD::SETEQ; 3651 break; 3652 case Intrinsic::x86_sse_comilt_ss: 3653 case Intrinsic::x86_sse2_comilt_sd: 3654 Opc = X86ISD::COMI; 3655 CC = ISD::SETLT; 3656 break; 3657 case Intrinsic::x86_sse_comile_ss: 3658 case Intrinsic::x86_sse2_comile_sd: 3659 Opc = X86ISD::COMI; 3660 CC = ISD::SETLE; 3661 break; 3662 case Intrinsic::x86_sse_comigt_ss: 3663 case Intrinsic::x86_sse2_comigt_sd: 3664 Opc = X86ISD::COMI; 3665 CC = ISD::SETGT; 3666 break; 3667 case Intrinsic::x86_sse_comige_ss: 3668 case Intrinsic::x86_sse2_comige_sd: 3669 Opc = X86ISD::COMI; 3670 CC = ISD::SETGE; 3671 break; 3672 case Intrinsic::x86_sse_comineq_ss: 3673 case Intrinsic::x86_sse2_comineq_sd: 3674 Opc = X86ISD::COMI; 3675 CC = ISD::SETNE; 3676 break; 3677 case Intrinsic::x86_sse_ucomieq_ss: 3678 case Intrinsic::x86_sse2_ucomieq_sd: 3679 Opc = X86ISD::UCOMI; 3680 CC = ISD::SETEQ; 3681 break; 3682 case Intrinsic::x86_sse_ucomilt_ss: 3683 case Intrinsic::x86_sse2_ucomilt_sd: 3684 Opc = X86ISD::UCOMI; 3685 CC = ISD::SETLT; 3686 break; 3687 case Intrinsic::x86_sse_ucomile_ss: 3688 case Intrinsic::x86_sse2_ucomile_sd: 3689 Opc = X86ISD::UCOMI; 3690 CC = ISD::SETLE; 3691 break; 3692 case Intrinsic::x86_sse_ucomigt_ss: 3693 case Intrinsic::x86_sse2_ucomigt_sd: 3694 Opc = X86ISD::UCOMI; 3695 CC = ISD::SETGT; 3696 break; 3697 case Intrinsic::x86_sse_ucomige_ss: 3698 case Intrinsic::x86_sse2_ucomige_sd: 3699 Opc = X86ISD::UCOMI; 3700 CC = ISD::SETGE; 3701 break; 3702 case Intrinsic::x86_sse_ucomineq_ss: 3703 case Intrinsic::x86_sse2_ucomineq_sd: 3704 Opc = X86ISD::UCOMI; 3705 CC = ISD::SETNE; 3706 break; 3707 } 3708 bool Flip; 3709 unsigned X86CC; 3710 translateX86CC(CC, true, X86CC, Flip); 3711 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3712 Op.getOperand(Flip?1:2)); 3713 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3714 DAG.getConstant(X86CC, MVT::i8), Cond); 3715 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3716 } 3717 } 3718} 3719 3720/// LowerOperation - Provide custom lowering hooks for some operations. 3721/// 3722SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3723 switch (Op.getOpcode()) { 3724 default: assert(0 && "Should not custom lower this!"); 3725 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3726 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3727 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3728 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 3729 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3730 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3731 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3732 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 3733 case ISD::SHL_PARTS: 3734 case ISD::SRA_PARTS: 3735 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 3736 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3737 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3738 case ISD::FABS: return LowerFABS(Op, DAG); 3739 case ISD::FNEG: return LowerFNEG(Op, DAG); 3740 case ISD::SETCC: return LowerSETCC(Op, DAG); 3741 case ISD::SELECT: return LowerSELECT(Op, DAG); 3742 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 3743 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3744 case ISD::RET: return LowerRET(Op, DAG); 3745 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 3746 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 3747 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 3748 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 3749 case ISD::VASTART: return LowerVASTART(Op, DAG); 3750 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3751 } 3752} 3753 3754const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3755 switch (Opcode) { 3756 default: return NULL; 3757 case X86ISD::SHLD: return "X86ISD::SHLD"; 3758 case X86ISD::SHRD: return "X86ISD::SHRD"; 3759 case X86ISD::FAND: return "X86ISD::FAND"; 3760 case X86ISD::FXOR: return "X86ISD::FXOR"; 3761 case X86ISD::FILD: return "X86ISD::FILD"; 3762 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3763 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3764 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3765 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3766 case X86ISD::FLD: return "X86ISD::FLD"; 3767 case X86ISD::FST: return "X86ISD::FST"; 3768 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3769 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3770 case X86ISD::CALL: return "X86ISD::CALL"; 3771 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3772 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3773 case X86ISD::CMP: return "X86ISD::CMP"; 3774 case X86ISD::TEST: return "X86ISD::TEST"; 3775 case X86ISD::COMI: return "X86ISD::COMI"; 3776 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3777 case X86ISD::SETCC: return "X86ISD::SETCC"; 3778 case X86ISD::CMOV: return "X86ISD::CMOV"; 3779 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3780 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3781 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3782 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3783 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3784 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3785 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3786 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3787 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3788 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3789 } 3790} 3791 3792void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3793 uint64_t Mask, 3794 uint64_t &KnownZero, 3795 uint64_t &KnownOne, 3796 unsigned Depth) const { 3797 unsigned Opc = Op.getOpcode(); 3798 assert((Opc >= ISD::BUILTIN_OP_END || 3799 Opc == ISD::INTRINSIC_WO_CHAIN || 3800 Opc == ISD::INTRINSIC_W_CHAIN || 3801 Opc == ISD::INTRINSIC_VOID) && 3802 "Should use MaskedValueIsZero if you don't know whether Op" 3803 " is a target node!"); 3804 3805 KnownZero = KnownOne = 0; // Don't know anything. 3806 switch (Opc) { 3807 default: break; 3808 case X86ISD::SETCC: 3809 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3810 break; 3811 } 3812} 3813 3814std::vector<unsigned> X86TargetLowering:: 3815getRegClassForInlineAsmConstraint(const std::string &Constraint, 3816 MVT::ValueType VT) const { 3817 if (Constraint.size() == 1) { 3818 // FIXME: not handling fp-stack yet! 3819 // FIXME: not handling MMX registers yet ('y' constraint). 3820 switch (Constraint[0]) { // GCC X86 Constraint Letters 3821 default: break; // Unknown constriant letter 3822 case 'r': // GENERAL_REGS 3823 case 'R': // LEGACY_REGS 3824 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3825 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3826 case 'l': // INDEX_REGS 3827 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3828 X86::ESI, X86::EDI, X86::EBP, 0); 3829 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3830 case 'Q': // Q_REGS 3831 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3832 case 'x': // SSE_REGS if SSE1 allowed 3833 if (Subtarget->hasSSE1()) 3834 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3835 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3836 0); 3837 return std::vector<unsigned>(); 3838 case 'Y': // SSE_REGS if SSE2 allowed 3839 if (Subtarget->hasSSE2()) 3840 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3841 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3842 0); 3843 return std::vector<unsigned>(); 3844 } 3845 } 3846 3847 return std::vector<unsigned>(); 3848} 3849 3850/// isLegalAddressImmediate - Return true if the integer value or 3851/// GlobalValue can be used as the offset of the target addressing mode. 3852bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3853 // X86 allows a sign-extended 32-bit immediate field. 3854 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3855} 3856 3857bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3858 if (Subtarget->isTargetDarwin()) { 3859 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3860 if (RModel == Reloc::Static) 3861 return true; 3862 else if (RModel == Reloc::DynamicNoPIC) 3863 return !DarwinGVRequiresExtraLoad(GV); 3864 else 3865 return false; 3866 } else 3867 return true; 3868} 3869 3870/// isShuffleMaskLegal - Targets can use this to indicate that they only 3871/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3872/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3873/// are assumed to be legal. 3874bool 3875X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3876 // Only do shuffles on 128-bit vector types for now. 3877 if (MVT::getSizeInBits(VT) == 64) return false; 3878 return (Mask.Val->getNumOperands() <= 4 || 3879 isSplatMask(Mask.Val) || 3880 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3881 X86::isUNPCKLMask(Mask.Val) || 3882 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3883 X86::isUNPCKHMask(Mask.Val)); 3884} 3885 3886bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 3887 MVT::ValueType EVT, 3888 SelectionDAG &DAG) const { 3889 unsigned NumElts = BVOps.size(); 3890 // Only do shuffles on 128-bit vector types for now. 3891 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 3892 if (NumElts == 2) return true; 3893 if (NumElts == 4) { 3894 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 3895 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 3896 } 3897 return false; 3898} 3899