SelectionDAGBuilder.cpp revision d0716b064744598ba7df33b8b47de0375c450570
1//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements routines for translating from LLVM IR into SelectionDAG IR. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "isel" 15#include "SelectionDAGBuilder.h" 16#include "SDNodeDbgValue.h" 17#include "llvm/ADT/BitVector.h" 18#include "llvm/ADT/Optional.h" 19#include "llvm/ADT/SmallSet.h" 20#include "llvm/Analysis/AliasAnalysis.h" 21#include "llvm/Analysis/BranchProbabilityInfo.h" 22#include "llvm/Analysis/ConstantFolding.h" 23#include "llvm/Analysis/ValueTracking.h" 24#include "llvm/CodeGen/Analysis.h" 25#include "llvm/CodeGen/FastISel.h" 26#include "llvm/CodeGen/FunctionLoweringInfo.h" 27#include "llvm/CodeGen/GCMetadata.h" 28#include "llvm/CodeGen/GCStrategy.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineJumpTableInfo.h" 33#include "llvm/CodeGen/MachineModuleInfo.h" 34#include "llvm/CodeGen/MachineRegisterInfo.h" 35#include "llvm/CodeGen/SelectionDAG.h" 36#include "llvm/DebugInfo.h" 37#include "llvm/IR/CallingConv.h" 38#include "llvm/IR/Constants.h" 39#include "llvm/IR/DataLayout.h" 40#include "llvm/IR/DerivedTypes.h" 41#include "llvm/IR/Function.h" 42#include "llvm/IR/GlobalVariable.h" 43#include "llvm/IR/InlineAsm.h" 44#include "llvm/IR/Instructions.h" 45#include "llvm/IR/IntrinsicInst.h" 46#include "llvm/IR/Intrinsics.h" 47#include "llvm/IR/LLVMContext.h" 48#include "llvm/IR/Module.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/Debug.h" 51#include "llvm/Support/ErrorHandling.h" 52#include "llvm/Support/MathExtras.h" 53#include "llvm/Support/raw_ostream.h" 54#include "llvm/Target/TargetFrameLowering.h" 55#include "llvm/Target/TargetInstrInfo.h" 56#include "llvm/Target/TargetIntrinsicInfo.h" 57#include "llvm/Target/TargetLibraryInfo.h" 58#include "llvm/Target/TargetLowering.h" 59#include "llvm/Target/TargetOptions.h" 60#include "llvm/Target/TargetSelectionDAGInfo.h" 61#include <algorithm> 62using namespace llvm; 63 64/// LimitFloatPrecision - Generate low-precision inline sequences for 65/// some float libcalls (6, 8 or 12 bits). 66static unsigned LimitFloatPrecision; 67 68static cl::opt<unsigned, true> 69LimitFPPrecision("limit-float-precision", 70 cl::desc("Generate low-precision inline sequences " 71 "for some float libcalls"), 72 cl::location(LimitFloatPrecision), 73 cl::init(0)); 74 75// Limit the width of DAG chains. This is important in general to prevent 76// prevent DAG-based analysis from blowing up. For example, alias analysis and 77// load clustering may not complete in reasonable time. It is difficult to 78// recognize and avoid this situation within each individual analysis, and 79// future analyses are likely to have the same behavior. Limiting DAG width is 80// the safe approach, and will be especially important with global DAGs. 81// 82// MaxParallelChains default is arbitrarily high to avoid affecting 83// optimization, but could be lowered to improve compile time. Any ld-ld-st-st 84// sequence over this should have been converted to llvm.memcpy by the 85// frontend. It easy to induce this behavior with .ll code such as: 86// %buffer = alloca [4096 x i8] 87// %data = load [4096 x i8]* %argPtr 88// store [4096 x i8] %data, [4096 x i8]* %buffer 89static const unsigned MaxParallelChains = 64; 90 91static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 92 const SDValue *Parts, unsigned NumParts, 93 MVT PartVT, EVT ValueVT, const Value *V); 94 95/// getCopyFromParts - Create a value that contains the specified legal parts 96/// combined into the value they represent. If the parts combine to a type 97/// larger then ValueVT then AssertOp can be used to specify whether the extra 98/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT 99/// (ISD::AssertSext). 100static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, 101 const SDValue *Parts, 102 unsigned NumParts, MVT PartVT, EVT ValueVT, 103 const Value *V, 104 ISD::NodeType AssertOp = ISD::DELETED_NODE) { 105 if (ValueVT.isVector()) 106 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, 107 PartVT, ValueVT, V); 108 109 assert(NumParts > 0 && "No parts to assemble!"); 110 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 111 SDValue Val = Parts[0]; 112 113 if (NumParts > 1) { 114 // Assemble the value from multiple parts. 115 if (ValueVT.isInteger()) { 116 unsigned PartBits = PartVT.getSizeInBits(); 117 unsigned ValueBits = ValueVT.getSizeInBits(); 118 119 // Assemble the power of 2 part. 120 unsigned RoundParts = NumParts & (NumParts - 1) ? 121 1 << Log2_32(NumParts) : NumParts; 122 unsigned RoundBits = PartBits * RoundParts; 123 EVT RoundVT = RoundBits == ValueBits ? 124 ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); 125 SDValue Lo, Hi; 126 127 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); 128 129 if (RoundParts > 2) { 130 Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, 131 PartVT, HalfVT, V); 132 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, 133 RoundParts / 2, PartVT, HalfVT, V); 134 } else { 135 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); 136 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); 137 } 138 139 if (TLI.isBigEndian()) 140 std::swap(Lo, Hi); 141 142 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); 143 144 if (RoundParts < NumParts) { 145 // Assemble the trailing non-power-of-2 part. 146 unsigned OddParts = NumParts - RoundParts; 147 EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); 148 Hi = getCopyFromParts(DAG, DL, 149 Parts + RoundParts, OddParts, PartVT, OddVT, V); 150 151 // Combine the round and odd parts. 152 Lo = Val; 153 if (TLI.isBigEndian()) 154 std::swap(Lo, Hi); 155 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 156 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); 157 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, 158 DAG.getConstant(Lo.getValueType().getSizeInBits(), 159 TLI.getPointerTy())); 160 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); 161 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); 162 } 163 } else if (PartVT.isFloatingPoint()) { 164 // FP split into multiple FP parts (for ppcf128) 165 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && 166 "Unexpected split"); 167 SDValue Lo, Hi; 168 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); 169 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); 170 if (TLI.isBigEndian()) 171 std::swap(Lo, Hi); 172 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); 173 } else { 174 // FP split into integer parts (soft fp) 175 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && 176 !PartVT.isVector() && "Unexpected split"); 177 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); 178 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); 179 } 180 } 181 182 // There is now one part, held in Val. Correct it to match ValueVT. 183 EVT PartEVT = Val.getValueType(); 184 185 if (PartEVT == ValueVT) 186 return Val; 187 188 if (PartEVT.isInteger() && ValueVT.isInteger()) { 189 if (ValueVT.bitsLT(PartEVT)) { 190 // For a truncate, see if we have any information to 191 // indicate whether the truncated bits will always be 192 // zero or sign-extension. 193 if (AssertOp != ISD::DELETED_NODE) 194 Val = DAG.getNode(AssertOp, DL, PartEVT, Val, 195 DAG.getValueType(ValueVT)); 196 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 197 } 198 return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); 199 } 200 201 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 202 // FP_ROUND's are always exact here. 203 if (ValueVT.bitsLT(Val.getValueType())) 204 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, 205 DAG.getTargetConstant(1, TLI.getPointerTy())); 206 207 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); 208 } 209 210 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) 211 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 212 213 llvm_unreachable("Unknown mismatch!"); 214} 215 216/// getCopyFromPartsVector - Create a value that contains the specified legal 217/// parts combined into the value they represent. If the parts combine to a 218/// type larger then ValueVT then AssertOp can be used to specify whether the 219/// extra bits are known to be zero (ISD::AssertZext) or sign extended from 220/// ValueVT (ISD::AssertSext). 221static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 222 const SDValue *Parts, unsigned NumParts, 223 MVT PartVT, EVT ValueVT, const Value *V) { 224 assert(ValueVT.isVector() && "Not a vector value"); 225 assert(NumParts > 0 && "No parts to assemble!"); 226 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 227 SDValue Val = Parts[0]; 228 229 // Handle a multi-element vector. 230 if (NumParts > 1) { 231 EVT IntermediateVT; 232 MVT RegisterVT; 233 unsigned NumIntermediates; 234 unsigned NumRegs = 235 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 236 NumIntermediates, RegisterVT); 237 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 238 NumParts = NumRegs; // Silence a compiler warning. 239 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 240 assert(RegisterVT == Parts[0].getSimpleValueType() && 241 "Part type doesn't match part!"); 242 243 // Assemble the parts into intermediate operands. 244 SmallVector<SDValue, 8> Ops(NumIntermediates); 245 if (NumIntermediates == NumParts) { 246 // If the register was not expanded, truncate or copy the value, 247 // as appropriate. 248 for (unsigned i = 0; i != NumParts; ++i) 249 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, 250 PartVT, IntermediateVT, V); 251 } else if (NumParts > 0) { 252 // If the intermediate type was expanded, build the intermediate 253 // operands from the parts. 254 assert(NumParts % NumIntermediates == 0 && 255 "Must expand into a divisible number of parts!"); 256 unsigned Factor = NumParts / NumIntermediates; 257 for (unsigned i = 0; i != NumIntermediates; ++i) 258 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, 259 PartVT, IntermediateVT, V); 260 } 261 262 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the 263 // intermediate operands. 264 Val = DAG.getNode(IntermediateVT.isVector() ? 265 ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, 266 ValueVT, &Ops[0], NumIntermediates); 267 } 268 269 // There is now one part, held in Val. Correct it to match ValueVT. 270 EVT PartEVT = Val.getValueType(); 271 272 if (PartEVT == ValueVT) 273 return Val; 274 275 if (PartEVT.isVector()) { 276 // If the element type of the source/dest vectors are the same, but the 277 // parts vector has more elements than the value vector, then we have a 278 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the 279 // elements we want. 280 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { 281 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && 282 "Cannot narrow, it would be a lossy transformation"); 283 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 284 DAG.getConstant(0, TLI.getVectorIdxTy())); 285 } 286 287 // Vector/Vector bitcast. 288 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) 289 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 290 291 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && 292 "Cannot handle this kind of promotion"); 293 // Promoted vector extract 294 bool Smaller = ValueVT.bitsLE(PartEVT); 295 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 296 DL, ValueVT, Val); 297 298 } 299 300 // Trivial bitcast if the types are the same size and the destination 301 // vector type is legal. 302 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && 303 TLI.isTypeLegal(ValueVT)) 304 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 305 306 // Handle cases such as i8 -> <1 x i1> 307 if (ValueVT.getVectorNumElements() != 1) { 308 LLVMContext &Ctx = *DAG.getContext(); 309 Twine ErrMsg("non-trivial scalar-to-vector conversion"); 310 if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { 311 if (const CallInst *CI = dyn_cast<CallInst>(I)) 312 if (isa<InlineAsm>(CI->getCalledValue())) 313 ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; 314 Ctx.emitError(I, ErrMsg); 315 } else { 316 Ctx.emitError(ErrMsg); 317 } 318 return DAG.getUNDEF(ValueVT); 319 } 320 321 if (ValueVT.getVectorNumElements() == 1 && 322 ValueVT.getVectorElementType() != PartEVT) { 323 bool Smaller = ValueVT.bitsLE(PartEVT); 324 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 325 DL, ValueVT.getScalarType(), Val); 326 } 327 328 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); 329} 330 331static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, 332 SDValue Val, SDValue *Parts, unsigned NumParts, 333 MVT PartVT, const Value *V); 334 335/// getCopyToParts - Create a series of nodes that contain the specified value 336/// split into legal parts. If the parts contain more bits than Val, then, for 337/// integers, ExtendKind can be used to specify how to generate the extra bits. 338static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, 339 SDValue Val, SDValue *Parts, unsigned NumParts, 340 MVT PartVT, const Value *V, 341 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { 342 EVT ValueVT = Val.getValueType(); 343 344 // Handle the vector case separately. 345 if (ValueVT.isVector()) 346 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); 347 348 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 349 unsigned PartBits = PartVT.getSizeInBits(); 350 unsigned OrigNumParts = NumParts; 351 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); 352 353 if (NumParts == 0) 354 return; 355 356 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); 357 EVT PartEVT = PartVT; 358 if (PartEVT == ValueVT) { 359 assert(NumParts == 1 && "No-op copy with multiple parts!"); 360 Parts[0] = Val; 361 return; 362 } 363 364 if (NumParts * PartBits > ValueVT.getSizeInBits()) { 365 // If the parts cover more bits than the value has, promote the value. 366 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 367 assert(NumParts == 1 && "Do not know what to promote to!"); 368 Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); 369 } else { 370 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 371 ValueVT.isInteger() && 372 "Unknown mismatch!"); 373 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 374 Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); 375 if (PartVT == MVT::x86mmx) 376 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 377 } 378 } else if (PartBits == ValueVT.getSizeInBits()) { 379 // Different types of the same size. 380 assert(NumParts == 1 && PartEVT != ValueVT); 381 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 382 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { 383 // If the parts cover less bits than value has, truncate the value. 384 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 385 ValueVT.isInteger() && 386 "Unknown mismatch!"); 387 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 388 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 389 if (PartVT == MVT::x86mmx) 390 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 391 } 392 393 // The value may have changed - recompute ValueVT. 394 ValueVT = Val.getValueType(); 395 assert(NumParts * PartBits == ValueVT.getSizeInBits() && 396 "Failed to tile the value with PartVT!"); 397 398 if (NumParts == 1) { 399 if (PartEVT != ValueVT) { 400 LLVMContext &Ctx = *DAG.getContext(); 401 Twine ErrMsg("scalar-to-vector conversion failed"); 402 if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { 403 if (const CallInst *CI = dyn_cast<CallInst>(I)) 404 if (isa<InlineAsm>(CI->getCalledValue())) 405 ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; 406 Ctx.emitError(I, ErrMsg); 407 } else { 408 Ctx.emitError(ErrMsg); 409 } 410 } 411 412 Parts[0] = Val; 413 return; 414 } 415 416 // Expand the value into multiple parts. 417 if (NumParts & (NumParts - 1)) { 418 // The number of parts is not a power of 2. Split off and copy the tail. 419 assert(PartVT.isInteger() && ValueVT.isInteger() && 420 "Do not know what to expand to!"); 421 unsigned RoundParts = 1 << Log2_32(NumParts); 422 unsigned RoundBits = RoundParts * PartBits; 423 unsigned OddParts = NumParts - RoundParts; 424 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, 425 DAG.getIntPtrConstant(RoundBits)); 426 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); 427 428 if (TLI.isBigEndian()) 429 // The odd parts were reversed by getCopyToParts - unreverse them. 430 std::reverse(Parts + RoundParts, Parts + NumParts); 431 432 NumParts = RoundParts; 433 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 434 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 435 } 436 437 // The number of parts is a power of 2. Repeatedly bisect the value using 438 // EXTRACT_ELEMENT. 439 Parts[0] = DAG.getNode(ISD::BITCAST, DL, 440 EVT::getIntegerVT(*DAG.getContext(), 441 ValueVT.getSizeInBits()), 442 Val); 443 444 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { 445 for (unsigned i = 0; i < NumParts; i += StepSize) { 446 unsigned ThisBits = StepSize * PartBits / 2; 447 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); 448 SDValue &Part0 = Parts[i]; 449 SDValue &Part1 = Parts[i+StepSize/2]; 450 451 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 452 ThisVT, Part0, DAG.getIntPtrConstant(1)); 453 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 454 ThisVT, Part0, DAG.getIntPtrConstant(0)); 455 456 if (ThisBits == PartBits && ThisVT != PartVT) { 457 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); 458 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); 459 } 460 } 461 } 462 463 if (TLI.isBigEndian()) 464 std::reverse(Parts, Parts + OrigNumParts); 465} 466 467 468/// getCopyToPartsVector - Create a series of nodes that contain the specified 469/// value split into legal parts. 470static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, 471 SDValue Val, SDValue *Parts, unsigned NumParts, 472 MVT PartVT, const Value *V) { 473 EVT ValueVT = Val.getValueType(); 474 assert(ValueVT.isVector() && "Not a vector"); 475 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 476 477 if (NumParts == 1) { 478 EVT PartEVT = PartVT; 479 if (PartEVT == ValueVT) { 480 // Nothing to do. 481 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { 482 // Bitconvert vector->vector case. 483 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 484 } else if (PartVT.isVector() && 485 PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && 486 PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { 487 EVT ElementVT = PartVT.getVectorElementType(); 488 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in 489 // undef elements. 490 SmallVector<SDValue, 16> Ops; 491 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) 492 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 493 ElementVT, Val, DAG.getConstant(i, 494 TLI.getVectorIdxTy()))); 495 496 for (unsigned i = ValueVT.getVectorNumElements(), 497 e = PartVT.getVectorNumElements(); i != e; ++i) 498 Ops.push_back(DAG.getUNDEF(ElementVT)); 499 500 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); 501 502 // FIXME: Use CONCAT for 2x -> 4x. 503 504 //SDValue UndefElts = DAG.getUNDEF(VectorTy); 505 //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); 506 } else if (PartVT.isVector() && 507 PartEVT.getVectorElementType().bitsGE( 508 ValueVT.getVectorElementType()) && 509 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { 510 511 // Promoted vector extract 512 bool Smaller = PartEVT.bitsLE(ValueVT); 513 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 514 DL, PartVT, Val); 515 } else{ 516 // Vector -> scalar conversion. 517 assert(ValueVT.getVectorNumElements() == 1 && 518 "Only trivial vector-to-scalar conversions should get here!"); 519 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 520 PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); 521 522 bool Smaller = ValueVT.bitsLE(PartVT); 523 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 524 DL, PartVT, Val); 525 } 526 527 Parts[0] = Val; 528 return; 529 } 530 531 // Handle a multi-element vector. 532 EVT IntermediateVT; 533 MVT RegisterVT; 534 unsigned NumIntermediates; 535 unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, 536 IntermediateVT, 537 NumIntermediates, RegisterVT); 538 unsigned NumElements = ValueVT.getVectorNumElements(); 539 540 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 541 NumParts = NumRegs; // Silence a compiler warning. 542 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 543 544 // Split the vector into intermediate operands. 545 SmallVector<SDValue, 8> Ops(NumIntermediates); 546 for (unsigned i = 0; i != NumIntermediates; ++i) { 547 if (IntermediateVT.isVector()) 548 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 549 IntermediateVT, Val, 550 DAG.getConstant(i * (NumElements / NumIntermediates), 551 TLI.getVectorIdxTy())); 552 else 553 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 554 IntermediateVT, Val, 555 DAG.getConstant(i, TLI.getVectorIdxTy())); 556 } 557 558 // Split the intermediate operands into legal parts. 559 if (NumParts == NumIntermediates) { 560 // If the register was not expanded, promote or copy the value, 561 // as appropriate. 562 for (unsigned i = 0; i != NumParts; ++i) 563 getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); 564 } else if (NumParts > 0) { 565 // If the intermediate type was expanded, split each the value into 566 // legal parts. 567 assert(NumParts % NumIntermediates == 0 && 568 "Must expand into a divisible number of parts!"); 569 unsigned Factor = NumParts / NumIntermediates; 570 for (unsigned i = 0; i != NumIntermediates; ++i) 571 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); 572 } 573} 574 575namespace { 576 /// RegsForValue - This struct represents the registers (physical or virtual) 577 /// that a particular set of values is assigned, and the type information 578 /// about the value. The most common situation is to represent one value at a 579 /// time, but struct or array values are handled element-wise as multiple 580 /// values. The splitting of aggregates is performed recursively, so that we 581 /// never have aggregate-typed registers. The values at this point do not 582 /// necessarily have legal types, so each value may require one or more 583 /// registers of some legal type. 584 /// 585 struct RegsForValue { 586 /// ValueVTs - The value types of the values, which may not be legal, and 587 /// may need be promoted or synthesized from one or more registers. 588 /// 589 SmallVector<EVT, 4> ValueVTs; 590 591 /// RegVTs - The value types of the registers. This is the same size as 592 /// ValueVTs and it records, for each value, what the type of the assigned 593 /// register or registers are. (Individual values are never synthesized 594 /// from more than one type of register.) 595 /// 596 /// With virtual registers, the contents of RegVTs is redundant with TLI's 597 /// getRegisterType member function, however when with physical registers 598 /// it is necessary to have a separate record of the types. 599 /// 600 SmallVector<MVT, 4> RegVTs; 601 602 /// Regs - This list holds the registers assigned to the values. 603 /// Each legal or promoted value requires one register, and each 604 /// expanded value requires multiple registers. 605 /// 606 SmallVector<unsigned, 4> Regs; 607 608 RegsForValue() {} 609 610 RegsForValue(const SmallVector<unsigned, 4> ®s, 611 MVT regvt, EVT valuevt) 612 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} 613 614 RegsForValue(LLVMContext &Context, const TargetLowering &tli, 615 unsigned Reg, Type *Ty) { 616 ComputeValueVTs(tli, Ty, ValueVTs); 617 618 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 619 EVT ValueVT = ValueVTs[Value]; 620 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); 621 MVT RegisterVT = tli.getRegisterType(Context, ValueVT); 622 for (unsigned i = 0; i != NumRegs; ++i) 623 Regs.push_back(Reg + i); 624 RegVTs.push_back(RegisterVT); 625 Reg += NumRegs; 626 } 627 } 628 629 /// areValueTypesLegal - Return true if types of all the values are legal. 630 bool areValueTypesLegal(const TargetLowering &TLI) { 631 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 632 MVT RegisterVT = RegVTs[Value]; 633 if (!TLI.isTypeLegal(RegisterVT)) 634 return false; 635 } 636 return true; 637 } 638 639 /// append - Add the specified values to this one. 640 void append(const RegsForValue &RHS) { 641 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); 642 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); 643 Regs.append(RHS.Regs.begin(), RHS.Regs.end()); 644 } 645 646 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 647 /// this value and returns the result as a ValueVTs value. This uses 648 /// Chain/Flag as the input and updates them for the output Chain/Flag. 649 /// If the Flag pointer is NULL, no flag is used. 650 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, 651 SDLoc dl, 652 SDValue &Chain, SDValue *Flag, 653 const Value *V = 0) const; 654 655 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 656 /// specified value into the registers specified by this object. This uses 657 /// Chain/Flag as the input and updates them for the output Chain/Flag. 658 /// If the Flag pointer is NULL, no flag is used. 659 void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, 660 SDValue &Chain, SDValue *Flag, const Value *V) const; 661 662 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 663 /// operand list. This adds the code marker, matching input operand index 664 /// (if applicable), and includes the number of values added into it. 665 void AddInlineAsmOperands(unsigned Kind, 666 bool HasMatching, unsigned MatchingIdx, 667 SelectionDAG &DAG, 668 std::vector<SDValue> &Ops) const; 669 }; 670} 671 672/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 673/// this value and returns the result as a ValueVT value. This uses 674/// Chain/Flag as the input and updates them for the output Chain/Flag. 675/// If the Flag pointer is NULL, no flag is used. 676SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, 677 FunctionLoweringInfo &FuncInfo, 678 SDLoc dl, 679 SDValue &Chain, SDValue *Flag, 680 const Value *V) const { 681 // A Value with type {} or [0 x %t] needs no registers. 682 if (ValueVTs.empty()) 683 return SDValue(); 684 685 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 686 687 // Assemble the legal parts into the final values. 688 SmallVector<SDValue, 4> Values(ValueVTs.size()); 689 SmallVector<SDValue, 8> Parts; 690 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 691 // Copy the legal parts from the registers. 692 EVT ValueVT = ValueVTs[Value]; 693 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 694 MVT RegisterVT = RegVTs[Value]; 695 696 Parts.resize(NumRegs); 697 for (unsigned i = 0; i != NumRegs; ++i) { 698 SDValue P; 699 if (Flag == 0) { 700 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); 701 } else { 702 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); 703 *Flag = P.getValue(2); 704 } 705 706 Chain = P.getValue(1); 707 Parts[i] = P; 708 709 // If the source register was virtual and if we know something about it, 710 // add an assert node. 711 if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || 712 !RegisterVT.isInteger() || RegisterVT.isVector()) 713 continue; 714 715 const FunctionLoweringInfo::LiveOutInfo *LOI = 716 FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); 717 if (!LOI) 718 continue; 719 720 unsigned RegSize = RegisterVT.getSizeInBits(); 721 unsigned NumSignBits = LOI->NumSignBits; 722 unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); 723 724 if (NumZeroBits == RegSize) { 725 // The current value is a zero. 726 // Explicitly express that as it would be easier for 727 // optimizations to kick in. 728 Parts[i] = DAG.getConstant(0, RegisterVT); 729 continue; 730 } 731 732 // FIXME: We capture more information than the dag can represent. For 733 // now, just use the tightest assertzext/assertsext possible. 734 bool isSExt = true; 735 EVT FromVT(MVT::Other); 736 if (NumSignBits == RegSize) 737 isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 738 else if (NumZeroBits >= RegSize-1) 739 isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 740 else if (NumSignBits > RegSize-8) 741 isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 742 else if (NumZeroBits >= RegSize-8) 743 isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 744 else if (NumSignBits > RegSize-16) 745 isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 746 else if (NumZeroBits >= RegSize-16) 747 isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 748 else if (NumSignBits > RegSize-32) 749 isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 750 else if (NumZeroBits >= RegSize-32) 751 isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 752 else 753 continue; 754 755 // Add an assertion node. 756 assert(FromVT != MVT::Other); 757 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, 758 RegisterVT, P, DAG.getValueType(FromVT)); 759 } 760 761 Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), 762 NumRegs, RegisterVT, ValueVT, V); 763 Part += NumRegs; 764 Parts.clear(); 765 } 766 767 return DAG.getNode(ISD::MERGE_VALUES, dl, 768 DAG.getVTList(&ValueVTs[0], ValueVTs.size()), 769 &Values[0], ValueVTs.size()); 770} 771 772/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 773/// specified value into the registers specified by this object. This uses 774/// Chain/Flag as the input and updates them for the output Chain/Flag. 775/// If the Flag pointer is NULL, no flag is used. 776void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, 777 SDValue &Chain, SDValue *Flag, 778 const Value *V) const { 779 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 780 781 // Get the list of the values's legal parts. 782 unsigned NumRegs = Regs.size(); 783 SmallVector<SDValue, 8> Parts(NumRegs); 784 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 785 EVT ValueVT = ValueVTs[Value]; 786 unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 787 MVT RegisterVT = RegVTs[Value]; 788 ISD::NodeType ExtendKind = 789 TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; 790 791 getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), 792 &Parts[Part], NumParts, RegisterVT, V, ExtendKind); 793 Part += NumParts; 794 } 795 796 // Copy the parts into the registers. 797 SmallVector<SDValue, 8> Chains(NumRegs); 798 for (unsigned i = 0; i != NumRegs; ++i) { 799 SDValue Part; 800 if (Flag == 0) { 801 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); 802 } else { 803 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); 804 *Flag = Part.getValue(1); 805 } 806 807 Chains[i] = Part.getValue(0); 808 } 809 810 if (NumRegs == 1 || Flag) 811 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is 812 // flagged to it. That is the CopyToReg nodes and the user are considered 813 // a single scheduling unit. If we create a TokenFactor and return it as 814 // chain, then the TokenFactor is both a predecessor (operand) of the 815 // user as well as a successor (the TF operands are flagged to the user). 816 // c1, f1 = CopyToReg 817 // c2, f2 = CopyToReg 818 // c3 = TokenFactor c1, c2 819 // ... 820 // = op c3, ..., f2 821 Chain = Chains[NumRegs-1]; 822 else 823 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); 824} 825 826/// AddInlineAsmOperands - Add this value to the specified inlineasm node 827/// operand list. This adds the code marker and includes the number of 828/// values added into it. 829void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, 830 unsigned MatchingIdx, 831 SelectionDAG &DAG, 832 std::vector<SDValue> &Ops) const { 833 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 834 835 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); 836 if (HasMatching) 837 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); 838 else if (!Regs.empty() && 839 TargetRegisterInfo::isVirtualRegister(Regs.front())) { 840 // Put the register class of the virtual registers in the flag word. That 841 // way, later passes can recompute register class constraints for inline 842 // assembly as well as normal instructions. 843 // Don't do this for tied operands that can use the regclass information 844 // from the def. 845 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 846 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); 847 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 848 } 849 850 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); 851 Ops.push_back(Res); 852 853 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { 854 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); 855 MVT RegisterVT = RegVTs[Value]; 856 for (unsigned i = 0; i != NumRegs; ++i) { 857 assert(Reg < Regs.size() && "Mismatch in # registers expected"); 858 Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); 859 } 860 } 861} 862 863void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, 864 const TargetLibraryInfo *li) { 865 AA = &aa; 866 GFI = gfi; 867 LibInfo = li; 868 TD = DAG.getTarget().getDataLayout(); 869 Context = DAG.getContext(); 870 LPadToCallSiteMap.clear(); 871} 872 873/// clear - Clear out the current SelectionDAG and the associated 874/// state and prepare this SelectionDAGBuilder object to be used 875/// for a new block. This doesn't clear out information about 876/// additional blocks that are needed to complete switch lowering 877/// or PHI node updating; that information is cleared out as it is 878/// consumed. 879void SelectionDAGBuilder::clear() { 880 NodeMap.clear(); 881 UnusedArgNodeMap.clear(); 882 PendingLoads.clear(); 883 PendingExports.clear(); 884 CurInst = NULL; 885 HasTailCall = false; 886} 887 888/// clearDanglingDebugInfo - Clear the dangling debug information 889/// map. This function is separated from the clear so that debug 890/// information that is dangling in a basic block can be properly 891/// resolved in a different basic block. This allows the 892/// SelectionDAG to resolve dangling debug information attached 893/// to PHI nodes. 894void SelectionDAGBuilder::clearDanglingDebugInfo() { 895 DanglingDebugInfoMap.clear(); 896} 897 898/// getRoot - Return the current virtual root of the Selection DAG, 899/// flushing any PendingLoad items. This must be done before emitting 900/// a store or any other node that may need to be ordered after any 901/// prior load instructions. 902/// 903SDValue SelectionDAGBuilder::getRoot() { 904 if (PendingLoads.empty()) 905 return DAG.getRoot(); 906 907 if (PendingLoads.size() == 1) { 908 SDValue Root = PendingLoads[0]; 909 DAG.setRoot(Root); 910 PendingLoads.clear(); 911 return Root; 912 } 913 914 // Otherwise, we have to make a token factor node. 915 SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 916 &PendingLoads[0], PendingLoads.size()); 917 PendingLoads.clear(); 918 DAG.setRoot(Root); 919 return Root; 920} 921 922/// getControlRoot - Similar to getRoot, but instead of flushing all the 923/// PendingLoad items, flush all the PendingExports items. It is necessary 924/// to do this before emitting a terminator instruction. 925/// 926SDValue SelectionDAGBuilder::getControlRoot() { 927 SDValue Root = DAG.getRoot(); 928 929 if (PendingExports.empty()) 930 return Root; 931 932 // Turn all of the CopyToReg chains into one factored node. 933 if (Root.getOpcode() != ISD::EntryToken) { 934 unsigned i = 0, e = PendingExports.size(); 935 for (; i != e; ++i) { 936 assert(PendingExports[i].getNode()->getNumOperands() > 1); 937 if (PendingExports[i].getNode()->getOperand(0) == Root) 938 break; // Don't add the root if we already indirectly depend on it. 939 } 940 941 if (i == e) 942 PendingExports.push_back(Root); 943 } 944 945 Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 946 &PendingExports[0], 947 PendingExports.size()); 948 PendingExports.clear(); 949 DAG.setRoot(Root); 950 return Root; 951} 952 953void SelectionDAGBuilder::visit(const Instruction &I) { 954 // Set up outgoing PHI node register values before emitting the terminator. 955 if (isa<TerminatorInst>(&I)) 956 HandlePHINodesInSuccessorBlocks(I.getParent()); 957 958 ++SDNodeOrder; 959 960 CurInst = &I; 961 962 visit(I.getOpcode(), I); 963 964 if (!isa<TerminatorInst>(&I) && !HasTailCall) 965 CopyToExportRegsIfNeeded(&I); 966 967 CurInst = NULL; 968} 969 970void SelectionDAGBuilder::visitPHI(const PHINode &) { 971 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); 972} 973 974void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { 975 // Note: this doesn't use InstVisitor, because it has to work with 976 // ConstantExpr's in addition to instructions. 977 switch (Opcode) { 978 default: llvm_unreachable("Unknown instruction type encountered!"); 979 // Build the switch statement using the Instruction.def file. 980#define HANDLE_INST(NUM, OPCODE, CLASS) \ 981 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; 982#include "llvm/IR/Instruction.def" 983 } 984} 985 986// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, 987// generate the debug data structures now that we've seen its definition. 988void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, 989 SDValue Val) { 990 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; 991 if (DDI.getDI()) { 992 const DbgValueInst *DI = DDI.getDI(); 993 DebugLoc dl = DDI.getdl(); 994 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); 995 MDNode *Variable = DI->getVariable(); 996 uint64_t Offset = DI->getOffset(); 997 SDDbgValue *SDV; 998 if (Val.getNode()) { 999 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { 1000 SDV = DAG.getDbgValue(Variable, Val.getNode(), 1001 Val.getResNo(), Offset, dl, DbgSDNodeOrder); 1002 DAG.AddDbgValue(SDV, Val.getNode(), false); 1003 } 1004 } else 1005 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 1006 DanglingDebugInfoMap[V] = DanglingDebugInfo(); 1007 } 1008} 1009 1010/// getValue - Return an SDValue for the given Value. 1011SDValue SelectionDAGBuilder::getValue(const Value *V) { 1012 // If we already have an SDValue for this value, use it. It's important 1013 // to do this first, so that we don't create a CopyFromReg if we already 1014 // have a regular SDValue. 1015 SDValue &N = NodeMap[V]; 1016 if (N.getNode()) return N; 1017 1018 // If there's a virtual register allocated and initialized for this 1019 // value, use it. 1020 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); 1021 if (It != FuncInfo.ValueMap.end()) { 1022 unsigned InReg = It->second; 1023 RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), 1024 InReg, V->getType()); 1025 SDValue Chain = DAG.getEntryNode(); 1026 N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); 1027 resolveDanglingDebugInfo(V, N); 1028 return N; 1029 } 1030 1031 // Otherwise create a new SDValue and remember it. 1032 SDValue Val = getValueImpl(V); 1033 NodeMap[V] = Val; 1034 resolveDanglingDebugInfo(V, Val); 1035 return Val; 1036} 1037 1038/// getNonRegisterValue - Return an SDValue for the given Value, but 1039/// don't look in FuncInfo.ValueMap for a virtual register. 1040SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { 1041 // If we already have an SDValue for this value, use it. 1042 SDValue &N = NodeMap[V]; 1043 if (N.getNode()) return N; 1044 1045 // Otherwise create a new SDValue and remember it. 1046 SDValue Val = getValueImpl(V); 1047 NodeMap[V] = Val; 1048 resolveDanglingDebugInfo(V, Val); 1049 return Val; 1050} 1051 1052/// getValueImpl - Helper function for getValue and getNonRegisterValue. 1053/// Create an SDValue for the given value. 1054SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { 1055 const TargetLowering *TLI = TM.getTargetLowering(); 1056 1057 if (const Constant *C = dyn_cast<Constant>(V)) { 1058 EVT VT = TLI->getValueType(V->getType(), true); 1059 1060 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) 1061 return DAG.getConstant(*CI, VT); 1062 1063 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 1064 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); 1065 1066 if (isa<ConstantPointerNull>(C)) 1067 return DAG.getConstant(0, TLI->getPointerTy()); 1068 1069 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 1070 return DAG.getConstantFP(*CFP, VT); 1071 1072 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) 1073 return DAG.getUNDEF(VT); 1074 1075 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 1076 visit(CE->getOpcode(), *CE); 1077 SDValue N1 = NodeMap[V]; 1078 assert(N1.getNode() && "visit didn't populate the NodeMap!"); 1079 return N1; 1080 } 1081 1082 if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { 1083 SmallVector<SDValue, 4> Constants; 1084 for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); 1085 OI != OE; ++OI) { 1086 SDNode *Val = getValue(*OI).getNode(); 1087 // If the operand is an empty aggregate, there are no values. 1088 if (!Val) continue; 1089 // Add each leaf value from the operand to the Constants list 1090 // to form a flattened list of all the values. 1091 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 1092 Constants.push_back(SDValue(Val, i)); 1093 } 1094 1095 return DAG.getMergeValues(&Constants[0], Constants.size(), 1096 getCurSDLoc()); 1097 } 1098 1099 if (const ConstantDataSequential *CDS = 1100 dyn_cast<ConstantDataSequential>(C)) { 1101 SmallVector<SDValue, 4> Ops; 1102 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { 1103 SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); 1104 // Add each leaf value from the operand to the Constants list 1105 // to form a flattened list of all the values. 1106 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 1107 Ops.push_back(SDValue(Val, i)); 1108 } 1109 1110 if (isa<ArrayType>(CDS->getType())) 1111 return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); 1112 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), 1113 VT, &Ops[0], Ops.size()); 1114 } 1115 1116 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { 1117 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && 1118 "Unknown struct or array constant!"); 1119 1120 SmallVector<EVT, 4> ValueVTs; 1121 ComputeValueVTs(*TLI, C->getType(), ValueVTs); 1122 unsigned NumElts = ValueVTs.size(); 1123 if (NumElts == 0) 1124 return SDValue(); // empty struct 1125 SmallVector<SDValue, 4> Constants(NumElts); 1126 for (unsigned i = 0; i != NumElts; ++i) { 1127 EVT EltVT = ValueVTs[i]; 1128 if (isa<UndefValue>(C)) 1129 Constants[i] = DAG.getUNDEF(EltVT); 1130 else if (EltVT.isFloatingPoint()) 1131 Constants[i] = DAG.getConstantFP(0, EltVT); 1132 else 1133 Constants[i] = DAG.getConstant(0, EltVT); 1134 } 1135 1136 return DAG.getMergeValues(&Constants[0], NumElts, 1137 getCurSDLoc()); 1138 } 1139 1140 if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) 1141 return DAG.getBlockAddress(BA, VT); 1142 1143 VectorType *VecTy = cast<VectorType>(V->getType()); 1144 unsigned NumElements = VecTy->getNumElements(); 1145 1146 // Now that we know the number and type of the elements, get that number of 1147 // elements into the Ops array based on what kind of constant it is. 1148 SmallVector<SDValue, 16> Ops; 1149 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { 1150 for (unsigned i = 0; i != NumElements; ++i) 1151 Ops.push_back(getValue(CV->getOperand(i))); 1152 } else { 1153 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); 1154 EVT EltVT = TLI->getValueType(VecTy->getElementType()); 1155 1156 SDValue Op; 1157 if (EltVT.isFloatingPoint()) 1158 Op = DAG.getConstantFP(0, EltVT); 1159 else 1160 Op = DAG.getConstant(0, EltVT); 1161 Ops.assign(NumElements, Op); 1162 } 1163 1164 // Create a BUILD_VECTOR node. 1165 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), 1166 VT, &Ops[0], Ops.size()); 1167 } 1168 1169 // If this is a static alloca, generate it as the frameindex instead of 1170 // computation. 1171 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 1172 DenseMap<const AllocaInst*, int>::iterator SI = 1173 FuncInfo.StaticAllocaMap.find(AI); 1174 if (SI != FuncInfo.StaticAllocaMap.end()) 1175 return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); 1176 } 1177 1178 // If this is an instruction which fast-isel has deferred, select it now. 1179 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 1180 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); 1181 RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); 1182 SDValue Chain = DAG.getEntryNode(); 1183 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); 1184 } 1185 1186 llvm_unreachable("Can't get register for value!"); 1187} 1188 1189void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 1190 const TargetLowering *TLI = TM.getTargetLowering(); 1191 SDValue Chain = getControlRoot(); 1192 SmallVector<ISD::OutputArg, 8> Outs; 1193 SmallVector<SDValue, 8> OutVals; 1194 1195 if (!FuncInfo.CanLowerReturn) { 1196 unsigned DemoteReg = FuncInfo.DemoteRegister; 1197 const Function *F = I.getParent()->getParent(); 1198 1199 // Emit a store of the return value through the virtual register. 1200 // Leave Outs empty so that LowerReturn won't try to load return 1201 // registers the usual way. 1202 SmallVector<EVT, 1> PtrValueVTs; 1203 ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), 1204 PtrValueVTs); 1205 1206 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); 1207 SDValue RetOp = getValue(I.getOperand(0)); 1208 1209 SmallVector<EVT, 4> ValueVTs; 1210 SmallVector<uint64_t, 4> Offsets; 1211 ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); 1212 unsigned NumValues = ValueVTs.size(); 1213 1214 SmallVector<SDValue, 4> Chains(NumValues); 1215 for (unsigned i = 0; i != NumValues; ++i) { 1216 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), 1217 RetPtr.getValueType(), RetPtr, 1218 DAG.getIntPtrConstant(Offsets[i])); 1219 Chains[i] = 1220 DAG.getStore(Chain, getCurSDLoc(), 1221 SDValue(RetOp.getNode(), RetOp.getResNo() + i), 1222 // FIXME: better loc info would be nice. 1223 Add, MachinePointerInfo(), false, false, 0); 1224 } 1225 1226 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 1227 MVT::Other, &Chains[0], NumValues); 1228 } else if (I.getNumOperands() != 0) { 1229 SmallVector<EVT, 4> ValueVTs; 1230 ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); 1231 unsigned NumValues = ValueVTs.size(); 1232 if (NumValues) { 1233 SDValue RetOp = getValue(I.getOperand(0)); 1234 for (unsigned j = 0, f = NumValues; j != f; ++j) { 1235 EVT VT = ValueVTs[j]; 1236 1237 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 1238 1239 const Function *F = I.getParent()->getParent(); 1240 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1241 Attribute::SExt)) 1242 ExtendKind = ISD::SIGN_EXTEND; 1243 else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1244 Attribute::ZExt)) 1245 ExtendKind = ISD::ZERO_EXTEND; 1246 1247 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) 1248 VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); 1249 1250 unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); 1251 MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); 1252 SmallVector<SDValue, 4> Parts(NumParts); 1253 getCopyToParts(DAG, getCurSDLoc(), 1254 SDValue(RetOp.getNode(), RetOp.getResNo() + j), 1255 &Parts[0], NumParts, PartVT, &I, ExtendKind); 1256 1257 // 'inreg' on function refers to return value 1258 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); 1259 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1260 Attribute::InReg)) 1261 Flags.setInReg(); 1262 1263 // Propagate extension type if any 1264 if (ExtendKind == ISD::SIGN_EXTEND) 1265 Flags.setSExt(); 1266 else if (ExtendKind == ISD::ZERO_EXTEND) 1267 Flags.setZExt(); 1268 1269 for (unsigned i = 0; i < NumParts; ++i) { 1270 Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), 1271 VT, /*isfixed=*/true, 0, 0)); 1272 OutVals.push_back(Parts[i]); 1273 } 1274 } 1275 } 1276 } 1277 1278 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 1279 CallingConv::ID CallConv = 1280 DAG.getMachineFunction().getFunction()->getCallingConv(); 1281 Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, 1282 Outs, OutVals, getCurSDLoc(), 1283 DAG); 1284 1285 // Verify that the target's LowerReturn behaved as expected. 1286 assert(Chain.getNode() && Chain.getValueType() == MVT::Other && 1287 "LowerReturn didn't return a valid chain!"); 1288 1289 // Update the DAG with the new chain value resulting from return lowering. 1290 DAG.setRoot(Chain); 1291} 1292 1293/// CopyToExportRegsIfNeeded - If the given value has virtual registers 1294/// created for it, emit nodes to copy the value into the virtual 1295/// registers. 1296void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { 1297 // Skip empty types 1298 if (V->getType()->isEmptyTy()) 1299 return; 1300 1301 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 1302 if (VMI != FuncInfo.ValueMap.end()) { 1303 assert(!V->use_empty() && "Unused value assigned virtual registers!"); 1304 CopyValueToVirtualRegister(V, VMI->second); 1305 } 1306} 1307 1308/// ExportFromCurrentBlock - If this condition isn't known to be exported from 1309/// the current basic block, add it to ValueMap now so that we'll get a 1310/// CopyTo/FromReg. 1311void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { 1312 // No need to export constants. 1313 if (!isa<Instruction>(V) && !isa<Argument>(V)) return; 1314 1315 // Already exported? 1316 if (FuncInfo.isExportedInst(V)) return; 1317 1318 unsigned Reg = FuncInfo.InitializeRegForValue(V); 1319 CopyValueToVirtualRegister(V, Reg); 1320} 1321 1322bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, 1323 const BasicBlock *FromBB) { 1324 // The operands of the setcc have to be in this block. We don't know 1325 // how to export them from some other block. 1326 if (const Instruction *VI = dyn_cast<Instruction>(V)) { 1327 // Can export from current BB. 1328 if (VI->getParent() == FromBB) 1329 return true; 1330 1331 // Is already exported, noop. 1332 return FuncInfo.isExportedInst(V); 1333 } 1334 1335 // If this is an argument, we can export it if the BB is the entry block or 1336 // if it is already exported. 1337 if (isa<Argument>(V)) { 1338 if (FromBB == &FromBB->getParent()->getEntryBlock()) 1339 return true; 1340 1341 // Otherwise, can only export this if it is already exported. 1342 return FuncInfo.isExportedInst(V); 1343 } 1344 1345 // Otherwise, constants can always be exported. 1346 return true; 1347} 1348 1349/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. 1350uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, 1351 const MachineBasicBlock *Dst) const { 1352 BranchProbabilityInfo *BPI = FuncInfo.BPI; 1353 if (!BPI) 1354 return 0; 1355 const BasicBlock *SrcBB = Src->getBasicBlock(); 1356 const BasicBlock *DstBB = Dst->getBasicBlock(); 1357 return BPI->getEdgeWeight(SrcBB, DstBB); 1358} 1359 1360void SelectionDAGBuilder:: 1361addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, 1362 uint32_t Weight /* = 0 */) { 1363 if (!Weight) 1364 Weight = getEdgeWeight(Src, Dst); 1365 Src->addSuccessor(Dst, Weight); 1366} 1367 1368 1369static bool InBlock(const Value *V, const BasicBlock *BB) { 1370 if (const Instruction *I = dyn_cast<Instruction>(V)) 1371 return I->getParent() == BB; 1372 return true; 1373} 1374 1375/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. 1376/// This function emits a branch and is used at the leaves of an OR or an 1377/// AND operator tree. 1378/// 1379void 1380SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, 1381 MachineBasicBlock *TBB, 1382 MachineBasicBlock *FBB, 1383 MachineBasicBlock *CurBB, 1384 MachineBasicBlock *SwitchBB) { 1385 const BasicBlock *BB = CurBB->getBasicBlock(); 1386 1387 // If the leaf of the tree is a comparison, merge the condition into 1388 // the caseblock. 1389 if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { 1390 // The operands of the cmp have to be in this block. We don't know 1391 // how to export them from some other block. If this is the first block 1392 // of the sequence, no exporting is needed. 1393 if (CurBB == SwitchBB || 1394 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && 1395 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { 1396 ISD::CondCode Condition; 1397 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 1398 Condition = getICmpCondCode(IC->getPredicate()); 1399 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { 1400 Condition = getFCmpCondCode(FC->getPredicate()); 1401 if (TM.Options.NoNaNsFPMath) 1402 Condition = getFCmpCodeWithoutNaN(Condition); 1403 } else { 1404 Condition = ISD::SETEQ; // silence warning. 1405 llvm_unreachable("Unknown compare instruction"); 1406 } 1407 1408 CaseBlock CB(Condition, BOp->getOperand(0), 1409 BOp->getOperand(1), NULL, TBB, FBB, CurBB); 1410 SwitchCases.push_back(CB); 1411 return; 1412 } 1413 } 1414 1415 // Create a CaseBlock record representing this branch. 1416 CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), 1417 NULL, TBB, FBB, CurBB); 1418 SwitchCases.push_back(CB); 1419} 1420 1421/// FindMergedConditions - If Cond is an expression like 1422void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, 1423 MachineBasicBlock *TBB, 1424 MachineBasicBlock *FBB, 1425 MachineBasicBlock *CurBB, 1426 MachineBasicBlock *SwitchBB, 1427 unsigned Opc) { 1428 // If this node is not part of the or/and tree, emit it as a branch. 1429 const Instruction *BOp = dyn_cast<Instruction>(Cond); 1430 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 1431 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || 1432 BOp->getParent() != CurBB->getBasicBlock() || 1433 !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || 1434 !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { 1435 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); 1436 return; 1437 } 1438 1439 // Create TmpBB after CurBB. 1440 MachineFunction::iterator BBI = CurBB; 1441 MachineFunction &MF = DAG.getMachineFunction(); 1442 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); 1443 CurBB->getParent()->insert(++BBI, TmpBB); 1444 1445 if (Opc == Instruction::Or) { 1446 // Codegen X | Y as: 1447 // jmp_if_X TBB 1448 // jmp TmpBB 1449 // TmpBB: 1450 // jmp_if_Y TBB 1451 // jmp FBB 1452 // 1453 1454 // Emit the LHS condition. 1455 FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); 1456 1457 // Emit the RHS condition into TmpBB. 1458 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); 1459 } else { 1460 assert(Opc == Instruction::And && "Unknown merge op!"); 1461 // Codegen X & Y as: 1462 // jmp_if_X TmpBB 1463 // jmp FBB 1464 // TmpBB: 1465 // jmp_if_Y TBB 1466 // jmp FBB 1467 // 1468 // This requires creation of TmpBB after CurBB. 1469 1470 // Emit the LHS condition. 1471 FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); 1472 1473 // Emit the RHS condition into TmpBB. 1474 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); 1475 } 1476} 1477 1478/// If the set of cases should be emitted as a series of branches, return true. 1479/// If we should emit this as a bunch of and/or'd together conditions, return 1480/// false. 1481bool 1482SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { 1483 if (Cases.size() != 2) return true; 1484 1485 // If this is two comparisons of the same values or'd or and'd together, they 1486 // will get folded into a single comparison, so don't emit two blocks. 1487 if ((Cases[0].CmpLHS == Cases[1].CmpLHS && 1488 Cases[0].CmpRHS == Cases[1].CmpRHS) || 1489 (Cases[0].CmpRHS == Cases[1].CmpLHS && 1490 Cases[0].CmpLHS == Cases[1].CmpRHS)) { 1491 return false; 1492 } 1493 1494 // Handle: (X != null) | (Y != null) --> (X|Y) != 0 1495 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 1496 if (Cases[0].CmpRHS == Cases[1].CmpRHS && 1497 Cases[0].CC == Cases[1].CC && 1498 isa<Constant>(Cases[0].CmpRHS) && 1499 cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { 1500 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) 1501 return false; 1502 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) 1503 return false; 1504 } 1505 1506 return true; 1507} 1508 1509void SelectionDAGBuilder::visitBr(const BranchInst &I) { 1510 MachineBasicBlock *BrMBB = FuncInfo.MBB; 1511 1512 // Update machine-CFG edges. 1513 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 1514 1515 // Figure out which block is immediately after the current one. 1516 MachineBasicBlock *NextBlock = 0; 1517 MachineFunction::iterator BBI = BrMBB; 1518 if (++BBI != FuncInfo.MF->end()) 1519 NextBlock = BBI; 1520 1521 if (I.isUnconditional()) { 1522 // Update machine-CFG edges. 1523 BrMBB->addSuccessor(Succ0MBB); 1524 1525 // If this is not a fall-through branch, emit the branch. 1526 if (Succ0MBB != NextBlock) 1527 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 1528 MVT::Other, getControlRoot(), 1529 DAG.getBasicBlock(Succ0MBB))); 1530 1531 return; 1532 } 1533 1534 // If this condition is one of the special cases we handle, do special stuff 1535 // now. 1536 const Value *CondVal = I.getCondition(); 1537 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; 1538 1539 // If this is a series of conditions that are or'd or and'd together, emit 1540 // this as a sequence of branches instead of setcc's with and/or operations. 1541 // As long as jumps are not expensive, this should improve performance. 1542 // For example, instead of something like: 1543 // cmp A, B 1544 // C = seteq 1545 // cmp D, E 1546 // F = setle 1547 // or C, F 1548 // jnz foo 1549 // Emit: 1550 // cmp A, B 1551 // je foo 1552 // cmp D, E 1553 // jle foo 1554 // 1555 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { 1556 if (!TM.getTargetLowering()->isJumpExpensive() && 1557 BOp->hasOneUse() && 1558 (BOp->getOpcode() == Instruction::And || 1559 BOp->getOpcode() == Instruction::Or)) { 1560 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, 1561 BOp->getOpcode()); 1562 // If the compares in later blocks need to use values not currently 1563 // exported from this block, export them now. This block should always 1564 // be the first entry. 1565 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); 1566 1567 // Allow some cases to be rejected. 1568 if (ShouldEmitAsBranches(SwitchCases)) { 1569 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { 1570 ExportFromCurrentBlock(SwitchCases[i].CmpLHS); 1571 ExportFromCurrentBlock(SwitchCases[i].CmpRHS); 1572 } 1573 1574 // Emit the branch for this block. 1575 visitSwitchCase(SwitchCases[0], BrMBB); 1576 SwitchCases.erase(SwitchCases.begin()); 1577 return; 1578 } 1579 1580 // Okay, we decided not to do this, remove any inserted MBB's and clear 1581 // SwitchCases. 1582 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) 1583 FuncInfo.MF->erase(SwitchCases[i].ThisBB); 1584 1585 SwitchCases.clear(); 1586 } 1587 } 1588 1589 // Create a CaseBlock record representing this branch. 1590 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), 1591 NULL, Succ0MBB, Succ1MBB, BrMBB); 1592 1593 // Use visitSwitchCase to actually insert the fast branch sequence for this 1594 // cond branch. 1595 visitSwitchCase(CB, BrMBB); 1596} 1597 1598/// visitSwitchCase - Emits the necessary code to represent a single node in 1599/// the binary search tree resulting from lowering a switch instruction. 1600void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, 1601 MachineBasicBlock *SwitchBB) { 1602 SDValue Cond; 1603 SDValue CondLHS = getValue(CB.CmpLHS); 1604 SDLoc dl = getCurSDLoc(); 1605 1606 // Build the setcc now. 1607 if (CB.CmpMHS == NULL) { 1608 // Fold "(X == true)" to X and "(X == false)" to !X to 1609 // handle common cases produced by branch lowering. 1610 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && 1611 CB.CC == ISD::SETEQ) 1612 Cond = CondLHS; 1613 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && 1614 CB.CC == ISD::SETEQ) { 1615 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); 1616 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); 1617 } else 1618 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); 1619 } else { 1620 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); 1621 1622 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); 1623 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); 1624 1625 SDValue CmpOp = getValue(CB.CmpMHS); 1626 EVT VT = CmpOp.getValueType(); 1627 1628 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { 1629 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), 1630 ISD::SETLE); 1631 } else { 1632 SDValue SUB = DAG.getNode(ISD::SUB, dl, 1633 VT, CmpOp, DAG.getConstant(Low, VT)); 1634 Cond = DAG.getSetCC(dl, MVT::i1, SUB, 1635 DAG.getConstant(High-Low, VT), ISD::SETULE); 1636 } 1637 } 1638 1639 // Update successor info 1640 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); 1641 // TrueBB and FalseBB are always different unless the incoming IR is 1642 // degenerate. This only happens when running llc on weird IR. 1643 if (CB.TrueBB != CB.FalseBB) 1644 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); 1645 1646 // Set NextBlock to be the MBB immediately after the current one, if any. 1647 // This is used to avoid emitting unnecessary branches to the next block. 1648 MachineBasicBlock *NextBlock = 0; 1649 MachineFunction::iterator BBI = SwitchBB; 1650 if (++BBI != FuncInfo.MF->end()) 1651 NextBlock = BBI; 1652 1653 // If the lhs block is the next block, invert the condition so that we can 1654 // fall through to the lhs instead of the rhs block. 1655 if (CB.TrueBB == NextBlock) { 1656 std::swap(CB.TrueBB, CB.FalseBB); 1657 SDValue True = DAG.getConstant(1, Cond.getValueType()); 1658 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); 1659 } 1660 1661 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, 1662 MVT::Other, getControlRoot(), Cond, 1663 DAG.getBasicBlock(CB.TrueBB)); 1664 1665 // Insert the false branch. Do this even if it's a fall through branch, 1666 // this makes it easier to do DAG optimizations which require inverting 1667 // the branch condition. 1668 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, 1669 DAG.getBasicBlock(CB.FalseBB)); 1670 1671 DAG.setRoot(BrCond); 1672} 1673 1674/// visitJumpTable - Emit JumpTable node in the current MBB 1675void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { 1676 // Emit the code for the jump table 1677 assert(JT.Reg != -1U && "Should lower JT Header first!"); 1678 EVT PTy = TM.getTargetLowering()->getPointerTy(); 1679 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 1680 JT.Reg, PTy); 1681 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); 1682 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), 1683 MVT::Other, Index.getValue(1), 1684 Table, Index); 1685 DAG.setRoot(BrJumpTable); 1686} 1687 1688/// visitJumpTableHeader - This function emits necessary code to produce index 1689/// in the JumpTable from switch case. 1690void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, 1691 JumpTableHeader &JTH, 1692 MachineBasicBlock *SwitchBB) { 1693 // Subtract the lowest switch case value from the value being switched on and 1694 // conditional branch to default mbb if the result is greater than the 1695 // difference between smallest and largest cases. 1696 SDValue SwitchOp = getValue(JTH.SValue); 1697 EVT VT = SwitchOp.getValueType(); 1698 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 1699 DAG.getConstant(JTH.First, VT)); 1700 1701 // The SDNode we just created, which holds the value being switched on minus 1702 // the smallest case value, needs to be copied to a virtual register so it 1703 // can be used as an index into the jump table in a subsequent basic block. 1704 // This value may be smaller or larger than the target's pointer type, and 1705 // therefore require extension or truncating. 1706 const TargetLowering *TLI = TM.getTargetLowering(); 1707 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); 1708 1709 unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); 1710 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 1711 JumpTableReg, SwitchOp); 1712 JT.Reg = JumpTableReg; 1713 1714 // Emit the range check for the jump table, and branch to the default block 1715 // for the switch statement if the value being switched on exceeds the largest 1716 // case in the switch. 1717 SDValue CMP = DAG.getSetCC(getCurSDLoc(), 1718 TLI->getSetCCResultType(*DAG.getContext(), 1719 Sub.getValueType()), 1720 Sub, 1721 DAG.getConstant(JTH.Last - JTH.First,VT), 1722 ISD::SETUGT); 1723 1724 // Set NextBlock to be the MBB immediately after the current one, if any. 1725 // This is used to avoid emitting unnecessary branches to the next block. 1726 MachineBasicBlock *NextBlock = 0; 1727 MachineFunction::iterator BBI = SwitchBB; 1728 1729 if (++BBI != FuncInfo.MF->end()) 1730 NextBlock = BBI; 1731 1732 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1733 MVT::Other, CopyTo, CMP, 1734 DAG.getBasicBlock(JT.Default)); 1735 1736 if (JT.MBB != NextBlock) 1737 BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, 1738 DAG.getBasicBlock(JT.MBB)); 1739 1740 DAG.setRoot(BrCond); 1741} 1742 1743/// Codegen a new tail for a stack protector check ParentMBB which has had its 1744/// tail spliced into a stack protector check success bb. 1745/// 1746/// For a high level explanation of how this fits into the stack protector 1747/// generation see the comment on the declaration of class 1748/// StackProtectorDescriptor. 1749void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, 1750 MachineBasicBlock *ParentBB) { 1751 1752 // First create the loads to the guard/stack slot for the comparison. 1753 const TargetLowering *TLI = TM.getTargetLowering(); 1754 EVT PtrTy = TLI->getPointerTy(); 1755 1756 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); 1757 int FI = MFI->getStackProtectorIndex(); 1758 1759 const Value *IRGuard = SPD.getGuard(); 1760 SDValue GuardPtr = getValue(IRGuard); 1761 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); 1762 1763 unsigned Align = 1764 TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); 1765 SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1766 GuardPtr, MachinePointerInfo(IRGuard, 0), 1767 true, false, false, Align); 1768 1769 SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1770 StackSlotPtr, 1771 MachinePointerInfo::getFixedStack(FI), 1772 true, false, false, Align); 1773 1774 // Perform the comparison via a subtract/getsetcc. 1775 EVT VT = Guard.getValueType(); 1776 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); 1777 1778 SDValue Cmp = DAG.getSetCC(getCurSDLoc(), 1779 TLI->getSetCCResultType(*DAG.getContext(), 1780 Sub.getValueType()), 1781 Sub, DAG.getConstant(0, VT), 1782 ISD::SETNE); 1783 1784 // If the sub is not 0, then we know the guard/stackslot do not equal, so 1785 // branch to failure MBB. 1786 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1787 MVT::Other, StackSlot.getOperand(0), 1788 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); 1789 // Otherwise branch to success MBB. 1790 SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), 1791 MVT::Other, BrCond, 1792 DAG.getBasicBlock(SPD.getSuccessMBB())); 1793 1794 DAG.setRoot(Br); 1795} 1796 1797/// Codegen the failure basic block for a stack protector check. 1798/// 1799/// A failure stack protector machine basic block consists simply of a call to 1800/// __stack_chk_fail(). 1801/// 1802/// For a high level explanation of how this fits into the stack protector 1803/// generation see the comment on the declaration of class 1804/// StackProtectorDescriptor. 1805void 1806SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { 1807 const TargetLowering *TLI = TM.getTargetLowering(); 1808 SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, 1809 MVT::isVoid, 0, 0, false, getCurSDLoc(), 1810 false, false).second; 1811 DAG.setRoot(Chain); 1812} 1813 1814/// visitBitTestHeader - This function emits necessary code to produce value 1815/// suitable for "bit tests" 1816void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, 1817 MachineBasicBlock *SwitchBB) { 1818 // Subtract the minimum value 1819 SDValue SwitchOp = getValue(B.SValue); 1820 EVT VT = SwitchOp.getValueType(); 1821 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 1822 DAG.getConstant(B.First, VT)); 1823 1824 // Check range 1825 const TargetLowering *TLI = TM.getTargetLowering(); 1826 SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), 1827 TLI->getSetCCResultType(*DAG.getContext(), 1828 Sub.getValueType()), 1829 Sub, DAG.getConstant(B.Range, VT), 1830 ISD::SETUGT); 1831 1832 // Determine the type of the test operands. 1833 bool UsePtrType = false; 1834 if (!TLI->isTypeLegal(VT)) 1835 UsePtrType = true; 1836 else { 1837 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) 1838 if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { 1839 // Switch table case range are encoded into series of masks. 1840 // Just use pointer type, it's guaranteed to fit. 1841 UsePtrType = true; 1842 break; 1843 } 1844 } 1845 if (UsePtrType) { 1846 VT = TLI->getPointerTy(); 1847 Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); 1848 } 1849 1850 B.RegVT = VT.getSimpleVT(); 1851 B.Reg = FuncInfo.CreateReg(B.RegVT); 1852 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 1853 B.Reg, Sub); 1854 1855 // Set NextBlock to be the MBB immediately after the current one, if any. 1856 // This is used to avoid emitting unnecessary branches to the next block. 1857 MachineBasicBlock *NextBlock = 0; 1858 MachineFunction::iterator BBI = SwitchBB; 1859 if (++BBI != FuncInfo.MF->end()) 1860 NextBlock = BBI; 1861 1862 MachineBasicBlock* MBB = B.Cases[0].ThisBB; 1863 1864 addSuccessorWithWeight(SwitchBB, B.Default); 1865 addSuccessorWithWeight(SwitchBB, MBB); 1866 1867 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1868 MVT::Other, CopyTo, RangeCmp, 1869 DAG.getBasicBlock(B.Default)); 1870 1871 if (MBB != NextBlock) 1872 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, 1873 DAG.getBasicBlock(MBB)); 1874 1875 DAG.setRoot(BrRange); 1876} 1877 1878/// visitBitTestCase - this function produces one "bit test" 1879void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, 1880 MachineBasicBlock* NextMBB, 1881 uint32_t BranchWeightToNext, 1882 unsigned Reg, 1883 BitTestCase &B, 1884 MachineBasicBlock *SwitchBB) { 1885 MVT VT = BB.RegVT; 1886 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 1887 Reg, VT); 1888 SDValue Cmp; 1889 unsigned PopCount = CountPopulation_64(B.Mask); 1890 const TargetLowering *TLI = TM.getTargetLowering(); 1891 if (PopCount == 1) { 1892 // Testing for a single bit; just compare the shift count with what it 1893 // would need to be to shift a 1 bit in that position. 1894 Cmp = DAG.getSetCC(getCurSDLoc(), 1895 TLI->getSetCCResultType(*DAG.getContext(), VT), 1896 ShiftOp, 1897 DAG.getConstant(countTrailingZeros(B.Mask), VT), 1898 ISD::SETEQ); 1899 } else if (PopCount == BB.Range) { 1900 // There is only one zero bit in the range, test for it directly. 1901 Cmp = DAG.getSetCC(getCurSDLoc(), 1902 TLI->getSetCCResultType(*DAG.getContext(), VT), 1903 ShiftOp, 1904 DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), 1905 ISD::SETNE); 1906 } else { 1907 // Make desired shift 1908 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, 1909 DAG.getConstant(1, VT), ShiftOp); 1910 1911 // Emit bit tests and jumps 1912 SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), 1913 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); 1914 Cmp = DAG.getSetCC(getCurSDLoc(), 1915 TLI->getSetCCResultType(*DAG.getContext(), VT), 1916 AndOp, DAG.getConstant(0, VT), 1917 ISD::SETNE); 1918 } 1919 1920 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. 1921 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); 1922 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. 1923 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); 1924 1925 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1926 MVT::Other, getControlRoot(), 1927 Cmp, DAG.getBasicBlock(B.TargetBB)); 1928 1929 // Set NextBlock to be the MBB immediately after the current one, if any. 1930 // This is used to avoid emitting unnecessary branches to the next block. 1931 MachineBasicBlock *NextBlock = 0; 1932 MachineFunction::iterator BBI = SwitchBB; 1933 if (++BBI != FuncInfo.MF->end()) 1934 NextBlock = BBI; 1935 1936 if (NextMBB != NextBlock) 1937 BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, 1938 DAG.getBasicBlock(NextMBB)); 1939 1940 DAG.setRoot(BrAnd); 1941} 1942 1943void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { 1944 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; 1945 1946 // Retrieve successors. 1947 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; 1948 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; 1949 1950 const Value *Callee(I.getCalledValue()); 1951 const Function *Fn = dyn_cast<Function>(Callee); 1952 if (isa<InlineAsm>(Callee)) 1953 visitInlineAsm(&I); 1954 else if (Fn && Fn->isIntrinsic()) { 1955 assert(Fn->getIntrinsicID() == Intrinsic::donothing); 1956 // Ignore invokes to @llvm.donothing: jump directly to the next BB. 1957 } else 1958 LowerCallTo(&I, getValue(Callee), false, LandingPad); 1959 1960 // If the value of the invoke is used outside of its defining block, make it 1961 // available as a virtual register. 1962 CopyToExportRegsIfNeeded(&I); 1963 1964 // Update successor info 1965 addSuccessorWithWeight(InvokeMBB, Return); 1966 addSuccessorWithWeight(InvokeMBB, LandingPad); 1967 1968 // Drop into normal successor. 1969 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 1970 MVT::Other, getControlRoot(), 1971 DAG.getBasicBlock(Return))); 1972} 1973 1974void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { 1975 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); 1976} 1977 1978void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { 1979 assert(FuncInfo.MBB->isLandingPad() && 1980 "Call to landingpad not in landing pad!"); 1981 1982 MachineBasicBlock *MBB = FuncInfo.MBB; 1983 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 1984 AddLandingPadInfo(LP, MMI, MBB); 1985 1986 // If there aren't registers to copy the values into (e.g., during SjLj 1987 // exceptions), then don't bother to create these DAG nodes. 1988 const TargetLowering *TLI = TM.getTargetLowering(); 1989 if (TLI->getExceptionPointerRegister() == 0 && 1990 TLI->getExceptionSelectorRegister() == 0) 1991 return; 1992 1993 SmallVector<EVT, 2> ValueVTs; 1994 ComputeValueVTs(*TLI, LP.getType(), ValueVTs); 1995 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); 1996 1997 // Get the two live-in registers as SDValues. The physregs have already been 1998 // copied into virtual registers. 1999 SDValue Ops[2]; 2000 Ops[0] = DAG.getZExtOrTrunc( 2001 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2002 FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), 2003 getCurSDLoc(), ValueVTs[0]); 2004 Ops[1] = DAG.getZExtOrTrunc( 2005 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2006 FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), 2007 getCurSDLoc(), ValueVTs[1]); 2008 2009 // Merge into one. 2010 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2011 DAG.getVTList(&ValueVTs[0], ValueVTs.size()), 2012 &Ops[0], 2); 2013 setValue(&LP, Res); 2014} 2015 2016/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for 2017/// small case ranges). 2018bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, 2019 CaseRecVector& WorkList, 2020 const Value* SV, 2021 MachineBasicBlock *Default, 2022 MachineBasicBlock *SwitchBB) { 2023 // Size is the number of Cases represented by this range. 2024 size_t Size = CR.Range.second - CR.Range.first; 2025 if (Size > 3) 2026 return false; 2027 2028 // Get the MachineFunction which holds the current MBB. This is used when 2029 // inserting any additional MBBs necessary to represent the switch. 2030 MachineFunction *CurMF = FuncInfo.MF; 2031 2032 // Figure out which block is immediately after the current one. 2033 MachineBasicBlock *NextBlock = 0; 2034 MachineFunction::iterator BBI = CR.CaseBB; 2035 2036 if (++BBI != FuncInfo.MF->end()) 2037 NextBlock = BBI; 2038 2039 BranchProbabilityInfo *BPI = FuncInfo.BPI; 2040 // If any two of the cases has the same destination, and if one value 2041 // is the same as the other, but has one bit unset that the other has set, 2042 // use bit manipulation to do two compares at once. For example: 2043 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" 2044 // TODO: This could be extended to merge any 2 cases in switches with 3 cases. 2045 // TODO: Handle cases where CR.CaseBB != SwitchBB. 2046 if (Size == 2 && CR.CaseBB == SwitchBB) { 2047 Case &Small = *CR.Range.first; 2048 Case &Big = *(CR.Range.second-1); 2049 2050 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { 2051 const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); 2052 const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); 2053 2054 // Check that there is only one bit different. 2055 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && 2056 (SmallValue | BigValue) == BigValue) { 2057 // Isolate the common bit. 2058 APInt CommonBit = BigValue & ~SmallValue; 2059 assert((SmallValue | CommonBit) == BigValue && 2060 CommonBit.countPopulation() == 1 && "Not a common bit?"); 2061 2062 SDValue CondLHS = getValue(SV); 2063 EVT VT = CondLHS.getValueType(); 2064 SDLoc DL = getCurSDLoc(); 2065 2066 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, 2067 DAG.getConstant(CommonBit, VT)); 2068 SDValue Cond = DAG.getSetCC(DL, MVT::i1, 2069 Or, DAG.getConstant(BigValue, VT), 2070 ISD::SETEQ); 2071 2072 // Update successor info. 2073 // Both Small and Big will jump to Small.BB, so we sum up the weights. 2074 addSuccessorWithWeight(SwitchBB, Small.BB, 2075 Small.ExtraWeight + Big.ExtraWeight); 2076 addSuccessorWithWeight(SwitchBB, Default, 2077 // The default destination is the first successor in IR. 2078 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); 2079 2080 // Insert the true branch. 2081 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, 2082 getControlRoot(), Cond, 2083 DAG.getBasicBlock(Small.BB)); 2084 2085 // Insert the false branch. 2086 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, 2087 DAG.getBasicBlock(Default)); 2088 2089 DAG.setRoot(BrCond); 2090 return true; 2091 } 2092 } 2093 } 2094 2095 // Order cases by weight so the most likely case will be checked first. 2096 uint32_t UnhandledWeights = 0; 2097 if (BPI) { 2098 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { 2099 uint32_t IWeight = I->ExtraWeight; 2100 UnhandledWeights += IWeight; 2101 for (CaseItr J = CR.Range.first; J < I; ++J) { 2102 uint32_t JWeight = J->ExtraWeight; 2103 if (IWeight > JWeight) 2104 std::swap(*I, *J); 2105 } 2106 } 2107 } 2108 // Rearrange the case blocks so that the last one falls through if possible. 2109 Case &BackCase = *(CR.Range.second-1); 2110 if (Size > 1 && 2111 NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { 2112 // The last case block won't fall through into 'NextBlock' if we emit the 2113 // branches in this order. See if rearranging a case value would help. 2114 // We start at the bottom as it's the case with the least weight. 2115 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) 2116 if (I->BB == NextBlock) { 2117 std::swap(*I, BackCase); 2118 break; 2119 } 2120 } 2121 2122 // Create a CaseBlock record representing a conditional branch to 2123 // the Case's target mbb if the value being switched on SV is equal 2124 // to C. 2125 MachineBasicBlock *CurBlock = CR.CaseBB; 2126 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 2127 MachineBasicBlock *FallThrough; 2128 if (I != E-1) { 2129 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); 2130 CurMF->insert(BBI, FallThrough); 2131 2132 // Put SV in a virtual register to make it available from the new blocks. 2133 ExportFromCurrentBlock(SV); 2134 } else { 2135 // If the last case doesn't match, go to the default block. 2136 FallThrough = Default; 2137 } 2138 2139 const Value *RHS, *LHS, *MHS; 2140 ISD::CondCode CC; 2141 if (I->High == I->Low) { 2142 // This is just small small case range :) containing exactly 1 case 2143 CC = ISD::SETEQ; 2144 LHS = SV; RHS = I->High; MHS = NULL; 2145 } else { 2146 CC = ISD::SETLE; 2147 LHS = I->Low; MHS = SV; RHS = I->High; 2148 } 2149 2150 // The false weight should be sum of all un-handled cases. 2151 UnhandledWeights -= I->ExtraWeight; 2152 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, 2153 /* me */ CurBlock, 2154 /* trueweight */ I->ExtraWeight, 2155 /* falseweight */ UnhandledWeights); 2156 2157 // If emitting the first comparison, just call visitSwitchCase to emit the 2158 // code into the current block. Otherwise, push the CaseBlock onto the 2159 // vector to be later processed by SDISel, and insert the node's MBB 2160 // before the next MBB. 2161 if (CurBlock == SwitchBB) 2162 visitSwitchCase(CB, SwitchBB); 2163 else 2164 SwitchCases.push_back(CB); 2165 2166 CurBlock = FallThrough; 2167 } 2168 2169 return true; 2170} 2171 2172static inline bool areJTsAllowed(const TargetLowering &TLI) { 2173 return TLI.supportJumpTables() && 2174 (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 2175 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); 2176} 2177 2178static APInt ComputeRange(const APInt &First, const APInt &Last) { 2179 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; 2180 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); 2181 return (LastExt - FirstExt + 1ULL); 2182} 2183 2184/// handleJTSwitchCase - Emit jumptable for current switch case range 2185bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, 2186 CaseRecVector &WorkList, 2187 const Value *SV, 2188 MachineBasicBlock *Default, 2189 MachineBasicBlock *SwitchBB) { 2190 Case& FrontCase = *CR.Range.first; 2191 Case& BackCase = *(CR.Range.second-1); 2192 2193 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 2194 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 2195 2196 APInt TSize(First.getBitWidth(), 0); 2197 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) 2198 TSize += I->size(); 2199 2200 const TargetLowering *TLI = TM.getTargetLowering(); 2201 if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) 2202 return false; 2203 2204 APInt Range = ComputeRange(First, Last); 2205 // The density is TSize / Range. Require at least 40%. 2206 // It should not be possible for IntTSize to saturate for sane code, but make 2207 // sure we handle Range saturation correctly. 2208 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); 2209 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); 2210 if (IntTSize * 10 < IntRange * 4) 2211 return false; 2212 2213 DEBUG(dbgs() << "Lowering jump table\n" 2214 << "First entry: " << First << ". Last entry: " << Last << '\n' 2215 << "Range: " << Range << ". Size: " << TSize << ".\n\n"); 2216 2217 // Get the MachineFunction which holds the current MBB. This is used when 2218 // inserting any additional MBBs necessary to represent the switch. 2219 MachineFunction *CurMF = FuncInfo.MF; 2220 2221 // Figure out which block is immediately after the current one. 2222 MachineFunction::iterator BBI = CR.CaseBB; 2223 ++BBI; 2224 2225 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2226 2227 // Create a new basic block to hold the code for loading the address 2228 // of the jump table, and jumping to it. Update successor information; 2229 // we will either branch to the default case for the switch, or the jump 2230 // table. 2231 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2232 CurMF->insert(BBI, JumpTableBB); 2233 2234 addSuccessorWithWeight(CR.CaseBB, Default); 2235 addSuccessorWithWeight(CR.CaseBB, JumpTableBB); 2236 2237 // Build a vector of destination BBs, corresponding to each target 2238 // of the jump table. If the value of the jump table slot corresponds to 2239 // a case statement, push the case's BB onto the vector, otherwise, push 2240 // the default BB. 2241 std::vector<MachineBasicBlock*> DestBBs; 2242 APInt TEI = First; 2243 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { 2244 const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); 2245 const APInt &High = cast<ConstantInt>(I->High)->getValue(); 2246 2247 if (Low.sle(TEI) && TEI.sle(High)) { 2248 DestBBs.push_back(I->BB); 2249 if (TEI==High) 2250 ++I; 2251 } else { 2252 DestBBs.push_back(Default); 2253 } 2254 } 2255 2256 // Calculate weight for each unique destination in CR. 2257 DenseMap<MachineBasicBlock*, uint32_t> DestWeights; 2258 if (FuncInfo.BPI) 2259 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 2260 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 2261 DestWeights.find(I->BB); 2262 if (Itr != DestWeights.end()) 2263 Itr->second += I->ExtraWeight; 2264 else 2265 DestWeights[I->BB] = I->ExtraWeight; 2266 } 2267 2268 // Update successor info. Add one edge to each unique successor. 2269 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); 2270 for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), 2271 E = DestBBs.end(); I != E; ++I) { 2272 if (!SuccsHandled[(*I)->getNumber()]) { 2273 SuccsHandled[(*I)->getNumber()] = true; 2274 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 2275 DestWeights.find(*I); 2276 addSuccessorWithWeight(JumpTableBB, *I, 2277 Itr != DestWeights.end() ? Itr->second : 0); 2278 } 2279 } 2280 2281 // Create a jump table index for this jump table. 2282 unsigned JTEncoding = TLI->getJumpTableEncoding(); 2283 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) 2284 ->createJumpTableIndex(DestBBs); 2285 2286 // Set the jump table information so that we can codegen it as a second 2287 // MachineBasicBlock 2288 JumpTable JT(-1U, JTI, JumpTableBB, Default); 2289 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); 2290 if (CR.CaseBB == SwitchBB) 2291 visitJumpTableHeader(JT, JTH, SwitchBB); 2292 2293 JTCases.push_back(JumpTableBlock(JTH, JT)); 2294 return true; 2295} 2296 2297/// handleBTSplitSwitchCase - emit comparison and split binary search tree into 2298/// 2 subtrees. 2299bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, 2300 CaseRecVector& WorkList, 2301 const Value* SV, 2302 MachineBasicBlock* Default, 2303 MachineBasicBlock* SwitchBB) { 2304 // Get the MachineFunction which holds the current MBB. This is used when 2305 // inserting any additional MBBs necessary to represent the switch. 2306 MachineFunction *CurMF = FuncInfo.MF; 2307 2308 // Figure out which block is immediately after the current one. 2309 MachineFunction::iterator BBI = CR.CaseBB; 2310 ++BBI; 2311 2312 Case& FrontCase = *CR.Range.first; 2313 Case& BackCase = *(CR.Range.second-1); 2314 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2315 2316 // Size is the number of Cases represented by this range. 2317 unsigned Size = CR.Range.second - CR.Range.first; 2318 2319 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 2320 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 2321 double FMetric = 0; 2322 CaseItr Pivot = CR.Range.first + Size/2; 2323 2324 // Select optimal pivot, maximizing sum density of LHS and RHS. This will 2325 // (heuristically) allow us to emit JumpTable's later. 2326 APInt TSize(First.getBitWidth(), 0); 2327 for (CaseItr I = CR.Range.first, E = CR.Range.second; 2328 I!=E; ++I) 2329 TSize += I->size(); 2330 2331 APInt LSize = FrontCase.size(); 2332 APInt RSize = TSize-LSize; 2333 DEBUG(dbgs() << "Selecting best pivot: \n" 2334 << "First: " << First << ", Last: " << Last <<'\n' 2335 << "LSize: " << LSize << ", RSize: " << RSize << '\n'); 2336 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; 2337 J!=E; ++I, ++J) { 2338 const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); 2339 const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); 2340 APInt Range = ComputeRange(LEnd, RBegin); 2341 assert((Range - 2ULL).isNonNegative() && 2342 "Invalid case distance"); 2343 // Use volatile double here to avoid excess precision issues on some hosts, 2344 // e.g. that use 80-bit X87 registers. 2345 volatile double LDensity = 2346 (double)LSize.roundToDouble() / 2347 (LEnd - First + 1ULL).roundToDouble(); 2348 volatile double RDensity = 2349 (double)RSize.roundToDouble() / 2350 (Last - RBegin + 1ULL).roundToDouble(); 2351 double Metric = Range.logBase2()*(LDensity+RDensity); 2352 // Should always split in some non-trivial place 2353 DEBUG(dbgs() <<"=>Step\n" 2354 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' 2355 << "LDensity: " << LDensity 2356 << ", RDensity: " << RDensity << '\n' 2357 << "Metric: " << Metric << '\n'); 2358 if (FMetric < Metric) { 2359 Pivot = J; 2360 FMetric = Metric; 2361 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); 2362 } 2363 2364 LSize += J->size(); 2365 RSize -= J->size(); 2366 } 2367 2368 const TargetLowering *TLI = TM.getTargetLowering(); 2369 if (areJTsAllowed(*TLI)) { 2370 // If our case is dense we *really* should handle it earlier! 2371 assert((FMetric > 0) && "Should handle dense range earlier!"); 2372 } else { 2373 Pivot = CR.Range.first + Size/2; 2374 } 2375 2376 CaseRange LHSR(CR.Range.first, Pivot); 2377 CaseRange RHSR(Pivot, CR.Range.second); 2378 const Constant *C = Pivot->Low; 2379 MachineBasicBlock *FalseBB = 0, *TrueBB = 0; 2380 2381 // We know that we branch to the LHS if the Value being switched on is 2382 // less than the Pivot value, C. We use this to optimize our binary 2383 // tree a bit, by recognizing that if SV is greater than or equal to the 2384 // LHS's Case Value, and that Case Value is exactly one less than the 2385 // Pivot's Value, then we can branch directly to the LHS's Target, 2386 // rather than creating a leaf node for it. 2387 if ((LHSR.second - LHSR.first) == 1 && 2388 LHSR.first->High == CR.GE && 2389 cast<ConstantInt>(C)->getValue() == 2390 (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { 2391 TrueBB = LHSR.first->BB; 2392 } else { 2393 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2394 CurMF->insert(BBI, TrueBB); 2395 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); 2396 2397 // Put SV in a virtual register to make it available from the new blocks. 2398 ExportFromCurrentBlock(SV); 2399 } 2400 2401 // Similar to the optimization above, if the Value being switched on is 2402 // known to be less than the Constant CR.LT, and the current Case Value 2403 // is CR.LT - 1, then we can branch directly to the target block for 2404 // the current Case Value, rather than emitting a RHS leaf node for it. 2405 if ((RHSR.second - RHSR.first) == 1 && CR.LT && 2406 cast<ConstantInt>(RHSR.first->Low)->getValue() == 2407 (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { 2408 FalseBB = RHSR.first->BB; 2409 } else { 2410 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2411 CurMF->insert(BBI, FalseBB); 2412 WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); 2413 2414 // Put SV in a virtual register to make it available from the new blocks. 2415 ExportFromCurrentBlock(SV); 2416 } 2417 2418 // Create a CaseBlock record representing a conditional branch to 2419 // the LHS node if the value being switched on SV is less than C. 2420 // Otherwise, branch to LHS. 2421 CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); 2422 2423 if (CR.CaseBB == SwitchBB) 2424 visitSwitchCase(CB, SwitchBB); 2425 else 2426 SwitchCases.push_back(CB); 2427 2428 return true; 2429} 2430 2431/// handleBitTestsSwitchCase - if current case range has few destination and 2432/// range span less, than machine word bitwidth, encode case range into series 2433/// of masks and emit bit tests with these masks. 2434bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, 2435 CaseRecVector& WorkList, 2436 const Value* SV, 2437 MachineBasicBlock* Default, 2438 MachineBasicBlock* SwitchBB) { 2439 const TargetLowering *TLI = TM.getTargetLowering(); 2440 EVT PTy = TLI->getPointerTy(); 2441 unsigned IntPtrBits = PTy.getSizeInBits(); 2442 2443 Case& FrontCase = *CR.Range.first; 2444 Case& BackCase = *(CR.Range.second-1); 2445 2446 // Get the MachineFunction which holds the current MBB. This is used when 2447 // inserting any additional MBBs necessary to represent the switch. 2448 MachineFunction *CurMF = FuncInfo.MF; 2449 2450 // If target does not have legal shift left, do not emit bit tests at all. 2451 if (!TLI->isOperationLegal(ISD::SHL, PTy)) 2452 return false; 2453 2454 size_t numCmps = 0; 2455 for (CaseItr I = CR.Range.first, E = CR.Range.second; 2456 I!=E; ++I) { 2457 // Single case counts one, case range - two. 2458 numCmps += (I->Low == I->High ? 1 : 2); 2459 } 2460 2461 // Count unique destinations 2462 SmallSet<MachineBasicBlock*, 4> Dests; 2463 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 2464 Dests.insert(I->BB); 2465 if (Dests.size() > 3) 2466 // Don't bother the code below, if there are too much unique destinations 2467 return false; 2468 } 2469 DEBUG(dbgs() << "Total number of unique destinations: " 2470 << Dests.size() << '\n' 2471 << "Total number of comparisons: " << numCmps << '\n'); 2472 2473 // Compute span of values. 2474 const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); 2475 const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); 2476 APInt cmpRange = maxValue - minValue; 2477 2478 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' 2479 << "Low bound: " << minValue << '\n' 2480 << "High bound: " << maxValue << '\n'); 2481 2482 if (cmpRange.uge(IntPtrBits) || 2483 (!(Dests.size() == 1 && numCmps >= 3) && 2484 !(Dests.size() == 2 && numCmps >= 5) && 2485 !(Dests.size() >= 3 && numCmps >= 6))) 2486 return false; 2487 2488 DEBUG(dbgs() << "Emitting bit tests\n"); 2489 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); 2490 2491 // Optimize the case where all the case values fit in a 2492 // word without having to subtract minValue. In this case, 2493 // we can optimize away the subtraction. 2494 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { 2495 cmpRange = maxValue; 2496 } else { 2497 lowBound = minValue; 2498 } 2499 2500 CaseBitsVector CasesBits; 2501 unsigned i, count = 0; 2502 2503 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 2504 MachineBasicBlock* Dest = I->BB; 2505 for (i = 0; i < count; ++i) 2506 if (Dest == CasesBits[i].BB) 2507 break; 2508 2509 if (i == count) { 2510 assert((count < 3) && "Too much destinations to test!"); 2511 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); 2512 count++; 2513 } 2514 2515 const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); 2516 const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); 2517 2518 uint64_t lo = (lowValue - lowBound).getZExtValue(); 2519 uint64_t hi = (highValue - lowBound).getZExtValue(); 2520 CasesBits[i].ExtraWeight += I->ExtraWeight; 2521 2522 for (uint64_t j = lo; j <= hi; j++) { 2523 CasesBits[i].Mask |= 1ULL << j; 2524 CasesBits[i].Bits++; 2525 } 2526 2527 } 2528 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); 2529 2530 BitTestInfo BTC; 2531 2532 // Figure out which block is immediately after the current one. 2533 MachineFunction::iterator BBI = CR.CaseBB; 2534 ++BBI; 2535 2536 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2537 2538 DEBUG(dbgs() << "Cases:\n"); 2539 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { 2540 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask 2541 << ", Bits: " << CasesBits[i].Bits 2542 << ", BB: " << CasesBits[i].BB << '\n'); 2543 2544 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2545 CurMF->insert(BBI, CaseBB); 2546 BTC.push_back(BitTestCase(CasesBits[i].Mask, 2547 CaseBB, 2548 CasesBits[i].BB, CasesBits[i].ExtraWeight)); 2549 2550 // Put SV in a virtual register to make it available from the new blocks. 2551 ExportFromCurrentBlock(SV); 2552 } 2553 2554 BitTestBlock BTB(lowBound, cmpRange, SV, 2555 -1U, MVT::Other, (CR.CaseBB == SwitchBB), 2556 CR.CaseBB, Default, BTC); 2557 2558 if (CR.CaseBB == SwitchBB) 2559 visitBitTestHeader(BTB, SwitchBB); 2560 2561 BitTestCases.push_back(BTB); 2562 2563 return true; 2564} 2565 2566/// Clusterify - Transform simple list of Cases into list of CaseRange's 2567size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, 2568 const SwitchInst& SI) { 2569 size_t numCmps = 0; 2570 2571 BranchProbabilityInfo *BPI = FuncInfo.BPI; 2572 // Start with "simple" cases 2573 for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); 2574 i != e; ++i) { 2575 const BasicBlock *SuccBB = i.getCaseSuccessor(); 2576 MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; 2577 2578 uint32_t ExtraWeight = 2579 BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; 2580 2581 Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), 2582 SMBB, ExtraWeight)); 2583 } 2584 std::sort(Cases.begin(), Cases.end(), CaseCmp()); 2585 2586 // Merge case into clusters 2587 if (Cases.size() >= 2) 2588 // Must recompute end() each iteration because it may be 2589 // invalidated by erase if we hold on to it 2590 for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); 2591 J != Cases.end(); ) { 2592 const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); 2593 const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); 2594 MachineBasicBlock* nextBB = J->BB; 2595 MachineBasicBlock* currentBB = I->BB; 2596 2597 // If the two neighboring cases go to the same destination, merge them 2598 // into a single case. 2599 if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { 2600 I->High = J->High; 2601 I->ExtraWeight += J->ExtraWeight; 2602 J = Cases.erase(J); 2603 } else { 2604 I = J++; 2605 } 2606 } 2607 2608 for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { 2609 if (I->Low != I->High) 2610 // A range counts double, since it requires two compares. 2611 ++numCmps; 2612 } 2613 2614 return numCmps; 2615} 2616 2617void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, 2618 MachineBasicBlock *Last) { 2619 // Update JTCases. 2620 for (unsigned i = 0, e = JTCases.size(); i != e; ++i) 2621 if (JTCases[i].first.HeaderBB == First) 2622 JTCases[i].first.HeaderBB = Last; 2623 2624 // Update BitTestCases. 2625 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) 2626 if (BitTestCases[i].Parent == First) 2627 BitTestCases[i].Parent = Last; 2628} 2629 2630void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { 2631 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; 2632 2633 // Figure out which block is immediately after the current one. 2634 MachineBasicBlock *NextBlock = 0; 2635 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; 2636 2637 // If there is only the default destination, branch to it if it is not the 2638 // next basic block. Otherwise, just fall through. 2639 if (!SI.getNumCases()) { 2640 // Update machine-CFG edges. 2641 2642 // If this is not a fall-through branch, emit the branch. 2643 SwitchMBB->addSuccessor(Default); 2644 if (Default != NextBlock) 2645 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 2646 MVT::Other, getControlRoot(), 2647 DAG.getBasicBlock(Default))); 2648 2649 return; 2650 } 2651 2652 // If there are any non-default case statements, create a vector of Cases 2653 // representing each one, and sort the vector so that we can efficiently 2654 // create a binary search tree from them. 2655 CaseVector Cases; 2656 size_t numCmps = Clusterify(Cases, SI); 2657 DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() 2658 << ". Total compares: " << numCmps << '\n'); 2659 (void)numCmps; 2660 2661 // Get the Value to be switched on and default basic blocks, which will be 2662 // inserted into CaseBlock records, representing basic blocks in the binary 2663 // search tree. 2664 const Value *SV = SI.getCondition(); 2665 2666 // Push the initial CaseRec onto the worklist 2667 CaseRecVector WorkList; 2668 WorkList.push_back(CaseRec(SwitchMBB,0,0, 2669 CaseRange(Cases.begin(),Cases.end()))); 2670 2671 while (!WorkList.empty()) { 2672 // Grab a record representing a case range to process off the worklist 2673 CaseRec CR = WorkList.back(); 2674 WorkList.pop_back(); 2675 2676 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 2677 continue; 2678 2679 // If the range has few cases (two or less) emit a series of specific 2680 // tests. 2681 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) 2682 continue; 2683 2684 // If the switch has more than N blocks, and is at least 40% dense, and the 2685 // target supports indirect branches, then emit a jump table rather than 2686 // lowering the switch to a binary tree of conditional branches. 2687 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). 2688 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 2689 continue; 2690 2691 // Emit binary tree. We need to pick a pivot, and push left and right ranges 2692 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. 2693 handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); 2694 } 2695} 2696 2697void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { 2698 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; 2699 2700 // Update machine-CFG edges with unique successors. 2701 SmallSet<BasicBlock*, 32> Done; 2702 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { 2703 BasicBlock *BB = I.getSuccessor(i); 2704 bool Inserted = Done.insert(BB); 2705 if (!Inserted) 2706 continue; 2707 2708 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; 2709 addSuccessorWithWeight(IndirectBrMBB, Succ); 2710 } 2711 2712 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), 2713 MVT::Other, getControlRoot(), 2714 getValue(I.getAddress()))); 2715} 2716 2717void SelectionDAGBuilder::visitFSub(const User &I) { 2718 // -0.0 - X --> fneg 2719 Type *Ty = I.getType(); 2720 if (isa<Constant>(I.getOperand(0)) && 2721 I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { 2722 SDValue Op2 = getValue(I.getOperand(1)); 2723 setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), 2724 Op2.getValueType(), Op2)); 2725 return; 2726 } 2727 2728 visitBinary(I, ISD::FSUB); 2729} 2730 2731void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { 2732 SDValue Op1 = getValue(I.getOperand(0)); 2733 SDValue Op2 = getValue(I.getOperand(1)); 2734 setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), 2735 Op1.getValueType(), Op1, Op2)); 2736} 2737 2738void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { 2739 SDValue Op1 = getValue(I.getOperand(0)); 2740 SDValue Op2 = getValue(I.getOperand(1)); 2741 2742 EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); 2743 2744 // Coerce the shift amount to the right type if we can. 2745 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { 2746 unsigned ShiftSize = ShiftTy.getSizeInBits(); 2747 unsigned Op2Size = Op2.getValueType().getSizeInBits(); 2748 SDLoc DL = getCurSDLoc(); 2749 2750 // If the operand is smaller than the shift count type, promote it. 2751 if (ShiftSize > Op2Size) 2752 Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); 2753 2754 // If the operand is larger than the shift count type but the shift 2755 // count type has enough bits to represent any shift value, truncate 2756 // it now. This is a common case and it exposes the truncate to 2757 // optimization early. 2758 else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) 2759 Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); 2760 // Otherwise we'll need to temporarily settle for some other convenient 2761 // type. Type legalization will make adjustments once the shiftee is split. 2762 else 2763 Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); 2764 } 2765 2766 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), 2767 Op1.getValueType(), Op1, Op2)); 2768} 2769 2770void SelectionDAGBuilder::visitSDiv(const User &I) { 2771 SDValue Op1 = getValue(I.getOperand(0)); 2772 SDValue Op2 = getValue(I.getOperand(1)); 2773 2774 // Turn exact SDivs into multiplications. 2775 // FIXME: This should be in DAGCombiner, but it doesn't have access to the 2776 // exact bit. 2777 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && 2778 !isa<ConstantSDNode>(Op1) && 2779 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) 2780 setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, 2781 getCurSDLoc(), DAG)); 2782 else 2783 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), 2784 Op1, Op2)); 2785} 2786 2787void SelectionDAGBuilder::visitICmp(const User &I) { 2788 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; 2789 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) 2790 predicate = IC->getPredicate(); 2791 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) 2792 predicate = ICmpInst::Predicate(IC->getPredicate()); 2793 SDValue Op1 = getValue(I.getOperand(0)); 2794 SDValue Op2 = getValue(I.getOperand(1)); 2795 ISD::CondCode Opcode = getICmpCondCode(predicate); 2796 2797 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2798 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); 2799} 2800 2801void SelectionDAGBuilder::visitFCmp(const User &I) { 2802 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; 2803 if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) 2804 predicate = FC->getPredicate(); 2805 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) 2806 predicate = FCmpInst::Predicate(FC->getPredicate()); 2807 SDValue Op1 = getValue(I.getOperand(0)); 2808 SDValue Op2 = getValue(I.getOperand(1)); 2809 ISD::CondCode Condition = getFCmpCondCode(predicate); 2810 if (TM.Options.NoNaNsFPMath) 2811 Condition = getFCmpCodeWithoutNaN(Condition); 2812 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2813 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); 2814} 2815 2816void SelectionDAGBuilder::visitSelect(const User &I) { 2817 SmallVector<EVT, 4> ValueVTs; 2818 ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); 2819 unsigned NumValues = ValueVTs.size(); 2820 if (NumValues == 0) return; 2821 2822 SmallVector<SDValue, 4> Values(NumValues); 2823 SDValue Cond = getValue(I.getOperand(0)); 2824 SDValue TrueVal = getValue(I.getOperand(1)); 2825 SDValue FalseVal = getValue(I.getOperand(2)); 2826 ISD::NodeType OpCode = Cond.getValueType().isVector() ? 2827 ISD::VSELECT : ISD::SELECT; 2828 2829 for (unsigned i = 0; i != NumValues; ++i) 2830 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), 2831 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), 2832 Cond, 2833 SDValue(TrueVal.getNode(), 2834 TrueVal.getResNo() + i), 2835 SDValue(FalseVal.getNode(), 2836 FalseVal.getResNo() + i)); 2837 2838 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2839 DAG.getVTList(&ValueVTs[0], NumValues), 2840 &Values[0], NumValues)); 2841} 2842 2843void SelectionDAGBuilder::visitTrunc(const User &I) { 2844 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). 2845 SDValue N = getValue(I.getOperand(0)); 2846 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2847 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); 2848} 2849 2850void SelectionDAGBuilder::visitZExt(const User &I) { 2851 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2852 // ZExt also can't be a cast to bool for same reason. So, nothing much to do 2853 SDValue N = getValue(I.getOperand(0)); 2854 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2855 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); 2856} 2857 2858void SelectionDAGBuilder::visitSExt(const User &I) { 2859 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2860 // SExt also can't be a cast to bool for same reason. So, nothing much to do 2861 SDValue N = getValue(I.getOperand(0)); 2862 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2863 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); 2864} 2865 2866void SelectionDAGBuilder::visitFPTrunc(const User &I) { 2867 // FPTrunc is never a no-op cast, no need to check 2868 SDValue N = getValue(I.getOperand(0)); 2869 const TargetLowering *TLI = TM.getTargetLowering(); 2870 EVT DestVT = TLI->getValueType(I.getType()); 2871 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), 2872 DestVT, N, 2873 DAG.getTargetConstant(0, TLI->getPointerTy()))); 2874} 2875 2876void SelectionDAGBuilder::visitFPExt(const User &I) { 2877 // FPExt is never a no-op cast, no need to check 2878 SDValue N = getValue(I.getOperand(0)); 2879 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2880 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); 2881} 2882 2883void SelectionDAGBuilder::visitFPToUI(const User &I) { 2884 // FPToUI is never a no-op cast, no need to check 2885 SDValue N = getValue(I.getOperand(0)); 2886 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2887 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); 2888} 2889 2890void SelectionDAGBuilder::visitFPToSI(const User &I) { 2891 // FPToSI is never a no-op cast, no need to check 2892 SDValue N = getValue(I.getOperand(0)); 2893 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2894 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); 2895} 2896 2897void SelectionDAGBuilder::visitUIToFP(const User &I) { 2898 // UIToFP is never a no-op cast, no need to check 2899 SDValue N = getValue(I.getOperand(0)); 2900 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2901 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); 2902} 2903 2904void SelectionDAGBuilder::visitSIToFP(const User &I) { 2905 // SIToFP is never a no-op cast, no need to check 2906 SDValue N = getValue(I.getOperand(0)); 2907 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2908 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); 2909} 2910 2911void SelectionDAGBuilder::visitPtrToInt(const User &I) { 2912 // What to do depends on the size of the integer and the size of the pointer. 2913 // We can either truncate, zero extend, or no-op, accordingly. 2914 SDValue N = getValue(I.getOperand(0)); 2915 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2916 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 2917} 2918 2919void SelectionDAGBuilder::visitIntToPtr(const User &I) { 2920 // What to do depends on the size of the integer and the size of the pointer. 2921 // We can either truncate, zero extend, or no-op, accordingly. 2922 SDValue N = getValue(I.getOperand(0)); 2923 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2924 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 2925} 2926 2927void SelectionDAGBuilder::visitBitCast(const User &I) { 2928 SDValue N = getValue(I.getOperand(0)); 2929 EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); 2930 2931 // BitCast assures us that source and destination are the same size so this is 2932 // either a BITCAST or a no-op. 2933 if (DestVT != N.getValueType()) 2934 setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), 2935 DestVT, N)); // convert types. 2936 else 2937 setValue(&I, N); // noop cast. 2938} 2939 2940void SelectionDAGBuilder::visitInsertElement(const User &I) { 2941 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2942 SDValue InVec = getValue(I.getOperand(0)); 2943 SDValue InVal = getValue(I.getOperand(1)); 2944 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), 2945 getCurSDLoc(), TLI.getVectorIdxTy()); 2946 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), 2947 TM.getTargetLowering()->getValueType(I.getType()), 2948 InVec, InVal, InIdx)); 2949} 2950 2951void SelectionDAGBuilder::visitExtractElement(const User &I) { 2952 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2953 SDValue InVec = getValue(I.getOperand(0)); 2954 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), 2955 getCurSDLoc(), TLI.getVectorIdxTy()); 2956 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 2957 TM.getTargetLowering()->getValueType(I.getType()), 2958 InVec, InIdx)); 2959} 2960 2961// Utility for visitShuffleVector - Return true if every element in Mask, 2962// beginning from position Pos and ending in Pos+Size, falls within the 2963// specified sequential range [L, L+Pos). or is undef. 2964static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, 2965 unsigned Pos, unsigned Size, int Low) { 2966 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) 2967 if (Mask[i] >= 0 && Mask[i] != Low) 2968 return false; 2969 return true; 2970} 2971 2972void SelectionDAGBuilder::visitShuffleVector(const User &I) { 2973 SDValue Src1 = getValue(I.getOperand(0)); 2974 SDValue Src2 = getValue(I.getOperand(1)); 2975 2976 SmallVector<int, 8> Mask; 2977 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); 2978 unsigned MaskNumElts = Mask.size(); 2979 2980 const TargetLowering *TLI = TM.getTargetLowering(); 2981 EVT VT = TLI->getValueType(I.getType()); 2982 EVT SrcVT = Src1.getValueType(); 2983 unsigned SrcNumElts = SrcVT.getVectorNumElements(); 2984 2985 if (SrcNumElts == MaskNumElts) { 2986 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 2987 &Mask[0])); 2988 return; 2989 } 2990 2991 // Normalize the shuffle vector since mask and vector length don't match. 2992 if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { 2993 // Mask is longer than the source vectors and is a multiple of the source 2994 // vectors. We can use concatenate vector to make the mask and vectors 2995 // lengths match. 2996 if (SrcNumElts*2 == MaskNumElts) { 2997 // First check for Src1 in low and Src2 in high 2998 if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && 2999 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { 3000 // The shuffle is concatenating two vectors together. 3001 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 3002 VT, Src1, Src2)); 3003 return; 3004 } 3005 // Then check for Src2 in low and Src1 in high 3006 if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && 3007 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { 3008 // The shuffle is concatenating two vectors together. 3009 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 3010 VT, Src2, Src1)); 3011 return; 3012 } 3013 } 3014 3015 // Pad both vectors with undefs to make them the same length as the mask. 3016 unsigned NumConcat = MaskNumElts / SrcNumElts; 3017 bool Src1U = Src1.getOpcode() == ISD::UNDEF; 3018 bool Src2U = Src2.getOpcode() == ISD::UNDEF; 3019 SDValue UndefVal = DAG.getUNDEF(SrcVT); 3020 3021 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); 3022 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); 3023 MOps1[0] = Src1; 3024 MOps2[0] = Src2; 3025 3026 Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 3027 getCurSDLoc(), VT, 3028 &MOps1[0], NumConcat); 3029 Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 3030 getCurSDLoc(), VT, 3031 &MOps2[0], NumConcat); 3032 3033 // Readjust mask for new input vector length. 3034 SmallVector<int, 8> MappedOps; 3035 for (unsigned i = 0; i != MaskNumElts; ++i) { 3036 int Idx = Mask[i]; 3037 if (Idx >= (int)SrcNumElts) 3038 Idx -= SrcNumElts - MaskNumElts; 3039 MappedOps.push_back(Idx); 3040 } 3041 3042 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 3043 &MappedOps[0])); 3044 return; 3045 } 3046 3047 if (SrcNumElts > MaskNumElts) { 3048 // Analyze the access pattern of the vector to see if we can extract 3049 // two subvectors and do the shuffle. The analysis is done by calculating 3050 // the range of elements the mask access on both vectors. 3051 int MinRange[2] = { static_cast<int>(SrcNumElts), 3052 static_cast<int>(SrcNumElts)}; 3053 int MaxRange[2] = {-1, -1}; 3054 3055 for (unsigned i = 0; i != MaskNumElts; ++i) { 3056 int Idx = Mask[i]; 3057 unsigned Input = 0; 3058 if (Idx < 0) 3059 continue; 3060 3061 if (Idx >= (int)SrcNumElts) { 3062 Input = 1; 3063 Idx -= SrcNumElts; 3064 } 3065 if (Idx > MaxRange[Input]) 3066 MaxRange[Input] = Idx; 3067 if (Idx < MinRange[Input]) 3068 MinRange[Input] = Idx; 3069 } 3070 3071 // Check if the access is smaller than the vector size and can we find 3072 // a reasonable extract index. 3073 int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not 3074 // Extract. 3075 int StartIdx[2]; // StartIdx to extract from 3076 for (unsigned Input = 0; Input < 2; ++Input) { 3077 if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { 3078 RangeUse[Input] = 0; // Unused 3079 StartIdx[Input] = 0; 3080 continue; 3081 } 3082 3083 // Find a good start index that is a multiple of the mask length. Then 3084 // see if the rest of the elements are in range. 3085 StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; 3086 if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && 3087 StartIdx[Input] + MaskNumElts <= SrcNumElts) 3088 RangeUse[Input] = 1; // Extract from a multiple of the mask length. 3089 } 3090 3091 if (RangeUse[0] == 0 && RangeUse[1] == 0) { 3092 setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. 3093 return; 3094 } 3095 if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { 3096 // Extract appropriate subvector and generate a vector shuffle 3097 for (unsigned Input = 0; Input < 2; ++Input) { 3098 SDValue &Src = Input == 0 ? Src1 : Src2; 3099 if (RangeUse[Input] == 0) 3100 Src = DAG.getUNDEF(VT); 3101 else 3102 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, 3103 Src, DAG.getConstant(StartIdx[Input], 3104 TLI->getVectorIdxTy())); 3105 } 3106 3107 // Calculate new mask. 3108 SmallVector<int, 8> MappedOps; 3109 for (unsigned i = 0; i != MaskNumElts; ++i) { 3110 int Idx = Mask[i]; 3111 if (Idx >= 0) { 3112 if (Idx < (int)SrcNumElts) 3113 Idx -= StartIdx[0]; 3114 else 3115 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; 3116 } 3117 MappedOps.push_back(Idx); 3118 } 3119 3120 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 3121 &MappedOps[0])); 3122 return; 3123 } 3124 } 3125 3126 // We can't use either concat vectors or extract subvectors so fall back to 3127 // replacing the shuffle with extract and build vector. 3128 // to insert and build vector. 3129 EVT EltVT = VT.getVectorElementType(); 3130 EVT IdxVT = TLI->getVectorIdxTy(); 3131 SmallVector<SDValue,8> Ops; 3132 for (unsigned i = 0; i != MaskNumElts; ++i) { 3133 int Idx = Mask[i]; 3134 SDValue Res; 3135 3136 if (Idx < 0) { 3137 Res = DAG.getUNDEF(EltVT); 3138 } else { 3139 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; 3140 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; 3141 3142 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 3143 EltVT, Src, DAG.getConstant(Idx, IdxVT)); 3144 } 3145 3146 Ops.push_back(Res); 3147 } 3148 3149 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), 3150 VT, &Ops[0], Ops.size())); 3151} 3152 3153void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { 3154 const Value *Op0 = I.getOperand(0); 3155 const Value *Op1 = I.getOperand(1); 3156 Type *AggTy = I.getType(); 3157 Type *ValTy = Op1->getType(); 3158 bool IntoUndef = isa<UndefValue>(Op0); 3159 bool FromUndef = isa<UndefValue>(Op1); 3160 3161 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 3162 3163 const TargetLowering *TLI = TM.getTargetLowering(); 3164 SmallVector<EVT, 4> AggValueVTs; 3165 ComputeValueVTs(*TLI, AggTy, AggValueVTs); 3166 SmallVector<EVT, 4> ValValueVTs; 3167 ComputeValueVTs(*TLI, ValTy, ValValueVTs); 3168 3169 unsigned NumAggValues = AggValueVTs.size(); 3170 unsigned NumValValues = ValValueVTs.size(); 3171 SmallVector<SDValue, 4> Values(NumAggValues); 3172 3173 SDValue Agg = getValue(Op0); 3174 unsigned i = 0; 3175 // Copy the beginning value(s) from the original aggregate. 3176 for (; i != LinearIndex; ++i) 3177 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3178 SDValue(Agg.getNode(), Agg.getResNo() + i); 3179 // Copy values from the inserted value(s). 3180 if (NumValValues) { 3181 SDValue Val = getValue(Op1); 3182 for (; i != LinearIndex + NumValValues; ++i) 3183 Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3184 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); 3185 } 3186 // Copy remaining value(s) from the original aggregate. 3187 for (; i != NumAggValues; ++i) 3188 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3189 SDValue(Agg.getNode(), Agg.getResNo() + i); 3190 3191 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3192 DAG.getVTList(&AggValueVTs[0], NumAggValues), 3193 &Values[0], NumAggValues)); 3194} 3195 3196void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { 3197 const Value *Op0 = I.getOperand(0); 3198 Type *AggTy = Op0->getType(); 3199 Type *ValTy = I.getType(); 3200 bool OutOfUndef = isa<UndefValue>(Op0); 3201 3202 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 3203 3204 const TargetLowering *TLI = TM.getTargetLowering(); 3205 SmallVector<EVT, 4> ValValueVTs; 3206 ComputeValueVTs(*TLI, ValTy, ValValueVTs); 3207 3208 unsigned NumValValues = ValValueVTs.size(); 3209 3210 // Ignore a extractvalue that produces an empty object 3211 if (!NumValValues) { 3212 setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); 3213 return; 3214 } 3215 3216 SmallVector<SDValue, 4> Values(NumValValues); 3217 3218 SDValue Agg = getValue(Op0); 3219 // Copy out the selected value(s). 3220 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) 3221 Values[i - LinearIndex] = 3222 OutOfUndef ? 3223 DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : 3224 SDValue(Agg.getNode(), Agg.getResNo() + i); 3225 3226 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3227 DAG.getVTList(&ValValueVTs[0], NumValValues), 3228 &Values[0], NumValValues)); 3229} 3230 3231void SelectionDAGBuilder::visitGetElementPtr(const User &I) { 3232 Value *Op0 = I.getOperand(0); 3233 // Note that the pointer operand may be a vector of pointers. Take the scalar 3234 // element which holds a pointer. 3235 Type *Ty = Op0->getType()->getScalarType(); 3236 unsigned AS = Ty->getPointerAddressSpace(); 3237 SDValue N = getValue(Op0); 3238 3239 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); 3240 OI != E; ++OI) { 3241 const Value *Idx = *OI; 3242 if (StructType *StTy = dyn_cast<StructType>(Ty)) { 3243 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); 3244 if (Field) { 3245 // N = N + Offset 3246 uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); 3247 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 3248 DAG.getConstant(Offset, N.getValueType())); 3249 } 3250 3251 Ty = StTy->getElementType(Field); 3252 } else { 3253 Ty = cast<SequentialType>(Ty)->getElementType(); 3254 3255 // If this is a constant subscript, handle it quickly. 3256 const TargetLowering *TLI = TM.getTargetLowering(); 3257 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 3258 if (CI->isZero()) continue; 3259 uint64_t Offs = 3260 TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); 3261 SDValue OffsVal; 3262 EVT PTy = TLI->getPointerTy(AS); 3263 unsigned PtrBits = PTy.getSizeInBits(); 3264 if (PtrBits < 64) 3265 OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, 3266 DAG.getConstant(Offs, MVT::i64)); 3267 else 3268 OffsVal = DAG.getConstant(Offs, PTy); 3269 3270 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 3271 OffsVal); 3272 continue; 3273 } 3274 3275 // N = N + Idx * ElementSize; 3276 APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), 3277 TD->getTypeAllocSize(Ty)); 3278 SDValue IdxN = getValue(Idx); 3279 3280 // If the index is smaller or larger than intptr_t, truncate or extend 3281 // it. 3282 IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); 3283 3284 // If this is a multiply by a power of two, turn it into a shl 3285 // immediately. This is a very common case. 3286 if (ElementSize != 1) { 3287 if (ElementSize.isPowerOf2()) { 3288 unsigned Amt = ElementSize.logBase2(); 3289 IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), 3290 N.getValueType(), IdxN, 3291 DAG.getConstant(Amt, IdxN.getValueType())); 3292 } else { 3293 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); 3294 IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), 3295 N.getValueType(), IdxN, Scale); 3296 } 3297 } 3298 3299 N = DAG.getNode(ISD::ADD, getCurSDLoc(), 3300 N.getValueType(), N, IdxN); 3301 } 3302 } 3303 3304 setValue(&I, N); 3305} 3306 3307void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { 3308 // If this is a fixed sized alloca in the entry block of the function, 3309 // allocate it statically on the stack. 3310 if (FuncInfo.StaticAllocaMap.count(&I)) 3311 return; // getValue will auto-populate this. 3312 3313 Type *Ty = I.getAllocatedType(); 3314 const TargetLowering *TLI = TM.getTargetLowering(); 3315 uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); 3316 unsigned Align = 3317 std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), 3318 I.getAlignment()); 3319 3320 SDValue AllocSize = getValue(I.getArraySize()); 3321 3322 EVT IntPtr = TLI->getPointerTy(); 3323 if (AllocSize.getValueType() != IntPtr) 3324 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); 3325 3326 AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, 3327 AllocSize, 3328 DAG.getConstant(TySize, IntPtr)); 3329 3330 // Handle alignment. If the requested alignment is less than or equal to 3331 // the stack alignment, ignore it. If the size is greater than or equal to 3332 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. 3333 unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); 3334 if (Align <= StackAlign) 3335 Align = 0; 3336 3337 // Round the size of the allocation up to the stack alignment size 3338 // by add SA-1 to the size. 3339 AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), 3340 AllocSize.getValueType(), AllocSize, 3341 DAG.getIntPtrConstant(StackAlign-1)); 3342 3343 // Mask out the low bits for alignment purposes. 3344 AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), 3345 AllocSize.getValueType(), AllocSize, 3346 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); 3347 3348 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; 3349 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); 3350 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), 3351 VTs, Ops, 3); 3352 setValue(&I, DSA); 3353 DAG.setRoot(DSA.getValue(1)); 3354 3355 // Inform the Frame Information that we have just allocated a variable-sized 3356 // object. 3357 FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); 3358} 3359 3360void SelectionDAGBuilder::visitLoad(const LoadInst &I) { 3361 if (I.isAtomic()) 3362 return visitAtomicLoad(I); 3363 3364 const Value *SV = I.getOperand(0); 3365 SDValue Ptr = getValue(SV); 3366 3367 Type *Ty = I.getType(); 3368 3369 bool isVolatile = I.isVolatile(); 3370 bool isNonTemporal = I.getMetadata("nontemporal") != 0; 3371 bool isInvariant = I.getMetadata("invariant.load") != 0; 3372 unsigned Alignment = I.getAlignment(); 3373 const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); 3374 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 3375 3376 SmallVector<EVT, 4> ValueVTs; 3377 SmallVector<uint64_t, 4> Offsets; 3378 ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); 3379 unsigned NumValues = ValueVTs.size(); 3380 if (NumValues == 0) 3381 return; 3382 3383 SDValue Root; 3384 bool ConstantMemory = false; 3385 if (I.isVolatile() || NumValues > MaxParallelChains) 3386 // Serialize volatile loads with other side effects. 3387 Root = getRoot(); 3388 else if (AA->pointsToConstantMemory( 3389 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { 3390 // Do not serialize (non-volatile) loads of constant memory with anything. 3391 Root = DAG.getEntryNode(); 3392 ConstantMemory = true; 3393 } else { 3394 // Do not serialize non-volatile loads against each other. 3395 Root = DAG.getRoot(); 3396 } 3397 3398 SmallVector<SDValue, 4> Values(NumValues); 3399 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3400 NumValues)); 3401 EVT PtrVT = Ptr.getValueType(); 3402 unsigned ChainI = 0; 3403 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3404 // Serializing loads here may result in excessive register pressure, and 3405 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling 3406 // could recover a bit by hoisting nodes upward in the chain by recognizing 3407 // they are side-effect free or do not alias. The optimizer should really 3408 // avoid this case by converting large object/array copies to llvm.memcpy 3409 // (MaxParallelChains should always remain as failsafe). 3410 if (ChainI == MaxParallelChains) { 3411 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); 3412 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 3413 MVT::Other, &Chains[0], ChainI); 3414 Root = Chain; 3415 ChainI = 0; 3416 } 3417 SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), 3418 PtrVT, Ptr, 3419 DAG.getConstant(Offsets[i], PtrVT)); 3420 SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, 3421 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, 3422 isNonTemporal, isInvariant, Alignment, TBAAInfo, 3423 Ranges); 3424 3425 Values[i] = L; 3426 Chains[ChainI] = L.getValue(1); 3427 } 3428 3429 if (!ConstantMemory) { 3430 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 3431 MVT::Other, &Chains[0], ChainI); 3432 if (isVolatile) 3433 DAG.setRoot(Chain); 3434 else 3435 PendingLoads.push_back(Chain); 3436 } 3437 3438 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3439 DAG.getVTList(&ValueVTs[0], NumValues), 3440 &Values[0], NumValues)); 3441} 3442 3443void SelectionDAGBuilder::visitStore(const StoreInst &I) { 3444 if (I.isAtomic()) 3445 return visitAtomicStore(I); 3446 3447 const Value *SrcV = I.getOperand(0); 3448 const Value *PtrV = I.getOperand(1); 3449 3450 SmallVector<EVT, 4> ValueVTs; 3451 SmallVector<uint64_t, 4> Offsets; 3452 ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); 3453 unsigned NumValues = ValueVTs.size(); 3454 if (NumValues == 0) 3455 return; 3456 3457 // Get the lowered operands. Note that we do this after 3458 // checking if NumResults is zero, because with zero results 3459 // the operands won't have values in the map. 3460 SDValue Src = getValue(SrcV); 3461 SDValue Ptr = getValue(PtrV); 3462 3463 SDValue Root = getRoot(); 3464 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3465 NumValues)); 3466 EVT PtrVT = Ptr.getValueType(); 3467 bool isVolatile = I.isVolatile(); 3468 bool isNonTemporal = I.getMetadata("nontemporal") != 0; 3469 unsigned Alignment = I.getAlignment(); 3470 const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); 3471 3472 unsigned ChainI = 0; 3473 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3474 // See visitLoad comments. 3475 if (ChainI == MaxParallelChains) { 3476 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 3477 MVT::Other, &Chains[0], ChainI); 3478 Root = Chain; 3479 ChainI = 0; 3480 } 3481 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, 3482 DAG.getConstant(Offsets[i], PtrVT)); 3483 SDValue St = DAG.getStore(Root, getCurSDLoc(), 3484 SDValue(Src.getNode(), Src.getResNo() + i), 3485 Add, MachinePointerInfo(PtrV, Offsets[i]), 3486 isVolatile, isNonTemporal, Alignment, TBAAInfo); 3487 Chains[ChainI] = St; 3488 } 3489 3490 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 3491 MVT::Other, &Chains[0], ChainI); 3492 DAG.setRoot(StoreNode); 3493} 3494 3495static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, 3496 SynchronizationScope Scope, 3497 bool Before, SDLoc dl, 3498 SelectionDAG &DAG, 3499 const TargetLowering &TLI) { 3500 // Fence, if necessary 3501 if (Before) { 3502 if (Order == AcquireRelease || Order == SequentiallyConsistent) 3503 Order = Release; 3504 else if (Order == Acquire || Order == Monotonic) 3505 return Chain; 3506 } else { 3507 if (Order == AcquireRelease) 3508 Order = Acquire; 3509 else if (Order == Release || Order == Monotonic) 3510 return Chain; 3511 } 3512 SDValue Ops[3]; 3513 Ops[0] = Chain; 3514 Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); 3515 Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); 3516 return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); 3517} 3518 3519void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { 3520 SDLoc dl = getCurSDLoc(); 3521 AtomicOrdering Order = I.getOrdering(); 3522 SynchronizationScope Scope = I.getSynchScope(); 3523 3524 SDValue InChain = getRoot(); 3525 3526 const TargetLowering *TLI = TM.getTargetLowering(); 3527 if (TLI->getInsertFencesForAtomic()) 3528 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 3529 DAG, *TLI); 3530 3531 SDValue L = 3532 DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, 3533 getValue(I.getCompareOperand()).getSimpleValueType(), 3534 InChain, 3535 getValue(I.getPointerOperand()), 3536 getValue(I.getCompareOperand()), 3537 getValue(I.getNewValOperand()), 3538 MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, 3539 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 3540 Scope); 3541 3542 SDValue OutChain = L.getValue(1); 3543 3544 if (TLI->getInsertFencesForAtomic()) 3545 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 3546 DAG, *TLI); 3547 3548 setValue(&I, L); 3549 DAG.setRoot(OutChain); 3550} 3551 3552void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { 3553 SDLoc dl = getCurSDLoc(); 3554 ISD::NodeType NT; 3555 switch (I.getOperation()) { 3556 default: llvm_unreachable("Unknown atomicrmw operation"); 3557 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; 3558 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; 3559 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; 3560 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; 3561 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; 3562 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; 3563 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; 3564 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; 3565 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; 3566 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; 3567 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; 3568 } 3569 AtomicOrdering Order = I.getOrdering(); 3570 SynchronizationScope Scope = I.getSynchScope(); 3571 3572 SDValue InChain = getRoot(); 3573 3574 const TargetLowering *TLI = TM.getTargetLowering(); 3575 if (TLI->getInsertFencesForAtomic()) 3576 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 3577 DAG, *TLI); 3578 3579 SDValue L = 3580 DAG.getAtomic(NT, dl, 3581 getValue(I.getValOperand()).getSimpleValueType(), 3582 InChain, 3583 getValue(I.getPointerOperand()), 3584 getValue(I.getValOperand()), 3585 I.getPointerOperand(), 0 /* Alignment */, 3586 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 3587 Scope); 3588 3589 SDValue OutChain = L.getValue(1); 3590 3591 if (TLI->getInsertFencesForAtomic()) 3592 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 3593 DAG, *TLI); 3594 3595 setValue(&I, L); 3596 DAG.setRoot(OutChain); 3597} 3598 3599void SelectionDAGBuilder::visitFence(const FenceInst &I) { 3600 SDLoc dl = getCurSDLoc(); 3601 const TargetLowering *TLI = TM.getTargetLowering(); 3602 SDValue Ops[3]; 3603 Ops[0] = getRoot(); 3604 Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); 3605 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); 3606 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); 3607} 3608 3609void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { 3610 SDLoc dl = getCurSDLoc(); 3611 AtomicOrdering Order = I.getOrdering(); 3612 SynchronizationScope Scope = I.getSynchScope(); 3613 3614 SDValue InChain = getRoot(); 3615 3616 const TargetLowering *TLI = TM.getTargetLowering(); 3617 EVT VT = TLI->getValueType(I.getType()); 3618 3619 if (I.getAlignment() < VT.getSizeInBits() / 8) 3620 report_fatal_error("Cannot generate unaligned atomic load"); 3621 3622 SDValue L = 3623 DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, 3624 getValue(I.getPointerOperand()), 3625 I.getPointerOperand(), I.getAlignment(), 3626 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 3627 Scope); 3628 3629 SDValue OutChain = L.getValue(1); 3630 3631 if (TLI->getInsertFencesForAtomic()) 3632 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 3633 DAG, *TLI); 3634 3635 setValue(&I, L); 3636 DAG.setRoot(OutChain); 3637} 3638 3639void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { 3640 SDLoc dl = getCurSDLoc(); 3641 3642 AtomicOrdering Order = I.getOrdering(); 3643 SynchronizationScope Scope = I.getSynchScope(); 3644 3645 SDValue InChain = getRoot(); 3646 3647 const TargetLowering *TLI = TM.getTargetLowering(); 3648 EVT VT = TLI->getValueType(I.getValueOperand()->getType()); 3649 3650 if (I.getAlignment() < VT.getSizeInBits() / 8) 3651 report_fatal_error("Cannot generate unaligned atomic store"); 3652 3653 if (TLI->getInsertFencesForAtomic()) 3654 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 3655 DAG, *TLI); 3656 3657 SDValue OutChain = 3658 DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, 3659 InChain, 3660 getValue(I.getPointerOperand()), 3661 getValue(I.getValueOperand()), 3662 I.getPointerOperand(), I.getAlignment(), 3663 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 3664 Scope); 3665 3666 if (TLI->getInsertFencesForAtomic()) 3667 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 3668 DAG, *TLI); 3669 3670 DAG.setRoot(OutChain); 3671} 3672 3673/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC 3674/// node. 3675void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, 3676 unsigned Intrinsic) { 3677 bool HasChain = !I.doesNotAccessMemory(); 3678 bool OnlyLoad = HasChain && I.onlyReadsMemory(); 3679 3680 // Build the operand list. 3681 SmallVector<SDValue, 8> Ops; 3682 if (HasChain) { // If this intrinsic has side-effects, chainify it. 3683 if (OnlyLoad) { 3684 // We don't need to serialize loads against other loads. 3685 Ops.push_back(DAG.getRoot()); 3686 } else { 3687 Ops.push_back(getRoot()); 3688 } 3689 } 3690 3691 // Info is set by getTgtMemInstrinsic 3692 TargetLowering::IntrinsicInfo Info; 3693 const TargetLowering *TLI = TM.getTargetLowering(); 3694 bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); 3695 3696 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. 3697 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || 3698 Info.opc == ISD::INTRINSIC_W_CHAIN) 3699 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); 3700 3701 // Add all operands of the call to the operand list. 3702 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { 3703 SDValue Op = getValue(I.getArgOperand(i)); 3704 Ops.push_back(Op); 3705 } 3706 3707 SmallVector<EVT, 4> ValueVTs; 3708 ComputeValueVTs(*TLI, I.getType(), ValueVTs); 3709 3710 if (HasChain) 3711 ValueVTs.push_back(MVT::Other); 3712 3713 SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); 3714 3715 // Create the node. 3716 SDValue Result; 3717 if (IsTgtIntrinsic) { 3718 // This is target intrinsic that touches memory 3719 Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), 3720 VTs, &Ops[0], Ops.size(), 3721 Info.memVT, 3722 MachinePointerInfo(Info.ptrVal, Info.offset), 3723 Info.align, Info.vol, 3724 Info.readMem, Info.writeMem); 3725 } else if (!HasChain) { 3726 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), 3727 VTs, &Ops[0], Ops.size()); 3728 } else if (!I.getType()->isVoidTy()) { 3729 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), 3730 VTs, &Ops[0], Ops.size()); 3731 } else { 3732 Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), 3733 VTs, &Ops[0], Ops.size()); 3734 } 3735 3736 if (HasChain) { 3737 SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); 3738 if (OnlyLoad) 3739 PendingLoads.push_back(Chain); 3740 else 3741 DAG.setRoot(Chain); 3742 } 3743 3744 if (!I.getType()->isVoidTy()) { 3745 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { 3746 EVT VT = TLI->getValueType(PTy); 3747 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); 3748 } 3749 3750 setValue(&I, Result); 3751 } 3752} 3753 3754/// GetSignificand - Get the significand and build it into a floating-point 3755/// number with exponent of 1: 3756/// 3757/// Op = (Op & 0x007fffff) | 0x3f800000; 3758/// 3759/// where Op is the hexadecimal representation of floating point value. 3760static SDValue 3761GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { 3762 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3763 DAG.getConstant(0x007fffff, MVT::i32)); 3764 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, 3765 DAG.getConstant(0x3f800000, MVT::i32)); 3766 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); 3767} 3768 3769/// GetExponent - Get the exponent: 3770/// 3771/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); 3772/// 3773/// where Op is the hexadecimal representation of floating point value. 3774static SDValue 3775GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, 3776 SDLoc dl) { 3777 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3778 DAG.getConstant(0x7f800000, MVT::i32)); 3779 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, 3780 DAG.getConstant(23, TLI.getPointerTy())); 3781 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, 3782 DAG.getConstant(127, MVT::i32)); 3783 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); 3784} 3785 3786/// getF32Constant - Get 32-bit floating point constant. 3787static SDValue 3788getF32Constant(SelectionDAG &DAG, unsigned Flt) { 3789 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), 3790 MVT::f32); 3791} 3792 3793/// expandExp - Lower an exp intrinsic. Handles the special sequences for 3794/// limited-precision mode. 3795static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3796 const TargetLowering &TLI) { 3797 if (Op.getValueType() == MVT::f32 && 3798 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 3799 3800 // Put the exponent in the right bit position for later addition to the 3801 // final result: 3802 // 3803 // #define LOG2OFe 1.4426950f 3804 // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); 3805 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, 3806 getF32Constant(DAG, 0x3fb8aa3b)); 3807 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 3808 3809 // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; 3810 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 3811 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 3812 3813 // IntegerPartOfX <<= 23; 3814 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 3815 DAG.getConstant(23, TLI.getPointerTy())); 3816 3817 SDValue TwoToFracPartOfX; 3818 if (LimitFloatPrecision <= 6) { 3819 // For floating-point precision of 6: 3820 // 3821 // TwoToFractionalPartOfX = 3822 // 0.997535578f + 3823 // (0.735607626f + 0.252464424f * x) * x; 3824 // 3825 // error 0.0144103317, which is 6 bits 3826 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3827 getF32Constant(DAG, 0x3e814304)); 3828 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 3829 getF32Constant(DAG, 0x3f3c50c8)); 3830 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3831 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3832 getF32Constant(DAG, 0x3f7f5e7e)); 3833 } else if (LimitFloatPrecision <= 12) { 3834 // For floating-point precision of 12: 3835 // 3836 // TwoToFractionalPartOfX = 3837 // 0.999892986f + 3838 // (0.696457318f + 3839 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 3840 // 3841 // 0.000107046256 error, which is 13 to 14 bits 3842 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3843 getF32Constant(DAG, 0x3da235e3)); 3844 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 3845 getF32Constant(DAG, 0x3e65b8f3)); 3846 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3847 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3848 getF32Constant(DAG, 0x3f324b07)); 3849 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3850 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 3851 getF32Constant(DAG, 0x3f7ff8fd)); 3852 } else { // LimitFloatPrecision <= 18 3853 // For floating-point precision of 18: 3854 // 3855 // TwoToFractionalPartOfX = 3856 // 0.999999982f + 3857 // (0.693148872f + 3858 // (0.240227044f + 3859 // (0.554906021e-1f + 3860 // (0.961591928e-2f + 3861 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 3862 // 3863 // error 2.47208000*10^(-7), which is better than 18 bits 3864 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3865 getF32Constant(DAG, 0x3924b03e)); 3866 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 3867 getF32Constant(DAG, 0x3ab24b87)); 3868 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3869 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3870 getF32Constant(DAG, 0x3c1d8c17)); 3871 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3872 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 3873 getF32Constant(DAG, 0x3d634a1d)); 3874 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 3875 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 3876 getF32Constant(DAG, 0x3e75fe14)); 3877 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 3878 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 3879 getF32Constant(DAG, 0x3f317234)); 3880 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 3881 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 3882 getF32Constant(DAG, 0x3f800000)); 3883 } 3884 3885 // Add the exponent into the result in integer domain. 3886 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); 3887 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3888 DAG.getNode(ISD::ADD, dl, MVT::i32, 3889 t13, IntegerPartOfX)); 3890 } 3891 3892 // No special expansion. 3893 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); 3894} 3895 3896/// expandLog - Lower a log intrinsic. Handles the special sequences for 3897/// limited-precision mode. 3898static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3899 const TargetLowering &TLI) { 3900 if (Op.getValueType() == MVT::f32 && 3901 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 3902 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3903 3904 // Scale the exponent by log(2) [0.69314718f]. 3905 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 3906 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 3907 getF32Constant(DAG, 0x3f317218)); 3908 3909 // Get the significand and build it into a floating-point number with 3910 // exponent of 1. 3911 SDValue X = GetSignificand(DAG, Op1, dl); 3912 3913 SDValue LogOfMantissa; 3914 if (LimitFloatPrecision <= 6) { 3915 // For floating-point precision of 6: 3916 // 3917 // LogofMantissa = 3918 // -1.1609546f + 3919 // (1.4034025f - 0.23903021f * x) * x; 3920 // 3921 // error 0.0034276066, which is better than 8 bits 3922 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3923 getF32Constant(DAG, 0xbe74c456)); 3924 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3925 getF32Constant(DAG, 0x3fb3a2b1)); 3926 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3927 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3928 getF32Constant(DAG, 0x3f949a29)); 3929 } else if (LimitFloatPrecision <= 12) { 3930 // For floating-point precision of 12: 3931 // 3932 // LogOfMantissa = 3933 // -1.7417939f + 3934 // (2.8212026f + 3935 // (-1.4699568f + 3936 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; 3937 // 3938 // error 0.000061011436, which is 14 bits 3939 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3940 getF32Constant(DAG, 0xbd67b6d6)); 3941 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3942 getF32Constant(DAG, 0x3ee4f4b8)); 3943 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3944 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3945 getF32Constant(DAG, 0x3fbc278b)); 3946 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3947 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3948 getF32Constant(DAG, 0x40348e95)); 3949 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3950 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 3951 getF32Constant(DAG, 0x3fdef31a)); 3952 } else { // LimitFloatPrecision <= 18 3953 // For floating-point precision of 18: 3954 // 3955 // LogOfMantissa = 3956 // -2.1072184f + 3957 // (4.2372794f + 3958 // (-3.7029485f + 3959 // (2.2781945f + 3960 // (-0.87823314f + 3961 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; 3962 // 3963 // error 0.0000023660568, which is better than 18 bits 3964 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3965 getF32Constant(DAG, 0xbc91e5ac)); 3966 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 3967 getF32Constant(DAG, 0x3e4350aa)); 3968 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 3969 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 3970 getF32Constant(DAG, 0x3f60d3e3)); 3971 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3972 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3973 getF32Constant(DAG, 0x4011cdf0)); 3974 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3975 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 3976 getF32Constant(DAG, 0x406cfd1c)); 3977 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 3978 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 3979 getF32Constant(DAG, 0x408797cb)); 3980 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 3981 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 3982 getF32Constant(DAG, 0x4006dcab)); 3983 } 3984 3985 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); 3986 } 3987 3988 // No special expansion. 3989 return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); 3990} 3991 3992/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for 3993/// limited-precision mode. 3994static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 3995 const TargetLowering &TLI) { 3996 if (Op.getValueType() == MVT::f32 && 3997 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 3998 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3999 4000 // Get the exponent. 4001 SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); 4002 4003 // Get the significand and build it into a floating-point number with 4004 // exponent of 1. 4005 SDValue X = GetSignificand(DAG, Op1, dl); 4006 4007 // Different possible minimax approximations of significand in 4008 // floating-point for various degrees of accuracy over [1,2]. 4009 SDValue Log2ofMantissa; 4010 if (LimitFloatPrecision <= 6) { 4011 // For floating-point precision of 6: 4012 // 4013 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; 4014 // 4015 // error 0.0049451742, which is more than 7 bits 4016 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4017 getF32Constant(DAG, 0xbeb08fe0)); 4018 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4019 getF32Constant(DAG, 0x40019463)); 4020 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4021 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4022 getF32Constant(DAG, 0x3fd6633d)); 4023 } else if (LimitFloatPrecision <= 12) { 4024 // For floating-point precision of 12: 4025 // 4026 // Log2ofMantissa = 4027 // -2.51285454f + 4028 // (4.07009056f + 4029 // (-2.12067489f + 4030 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; 4031 // 4032 // error 0.0000876136000, which is better than 13 bits 4033 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4034 getF32Constant(DAG, 0xbda7262e)); 4035 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4036 getF32Constant(DAG, 0x3f25280b)); 4037 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4038 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4039 getF32Constant(DAG, 0x4007b923)); 4040 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4041 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4042 getF32Constant(DAG, 0x40823e2f)); 4043 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4044 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4045 getF32Constant(DAG, 0x4020d29c)); 4046 } else { // LimitFloatPrecision <= 18 4047 // For floating-point precision of 18: 4048 // 4049 // Log2ofMantissa = 4050 // -3.0400495f + 4051 // (6.1129976f + 4052 // (-5.3420409f + 4053 // (3.2865683f + 4054 // (-1.2669343f + 4055 // (0.27515199f - 4056 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; 4057 // 4058 // error 0.0000018516, which is better than 18 bits 4059 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4060 getF32Constant(DAG, 0xbcd2769e)); 4061 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4062 getF32Constant(DAG, 0x3e8ce0b9)); 4063 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4064 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4065 getF32Constant(DAG, 0x3fa22ae7)); 4066 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4067 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4068 getF32Constant(DAG, 0x40525723)); 4069 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4070 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4071 getF32Constant(DAG, 0x40aaf200)); 4072 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4073 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4074 getF32Constant(DAG, 0x40c39dad)); 4075 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4076 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 4077 getF32Constant(DAG, 0x4042902c)); 4078 } 4079 4080 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); 4081 } 4082 4083 // No special expansion. 4084 return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); 4085} 4086 4087/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for 4088/// limited-precision mode. 4089static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4090 const TargetLowering &TLI) { 4091 if (Op.getValueType() == MVT::f32 && 4092 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4093 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 4094 4095 // Scale the exponent by log10(2) [0.30102999f]. 4096 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 4097 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 4098 getF32Constant(DAG, 0x3e9a209a)); 4099 4100 // Get the significand and build it into a floating-point number with 4101 // exponent of 1. 4102 SDValue X = GetSignificand(DAG, Op1, dl); 4103 4104 SDValue Log10ofMantissa; 4105 if (LimitFloatPrecision <= 6) { 4106 // For floating-point precision of 6: 4107 // 4108 // Log10ofMantissa = 4109 // -0.50419619f + 4110 // (0.60948995f - 0.10380950f * x) * x; 4111 // 4112 // error 0.0014886165, which is 6 bits 4113 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4114 getF32Constant(DAG, 0xbdd49a13)); 4115 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4116 getF32Constant(DAG, 0x3f1c0789)); 4117 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4118 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4119 getF32Constant(DAG, 0x3f011300)); 4120 } else if (LimitFloatPrecision <= 12) { 4121 // For floating-point precision of 12: 4122 // 4123 // Log10ofMantissa = 4124 // -0.64831180f + 4125 // (0.91751397f + 4126 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; 4127 // 4128 // error 0.00019228036, which is better than 12 bits 4129 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4130 getF32Constant(DAG, 0x3d431f31)); 4131 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4132 getF32Constant(DAG, 0x3ea21fb2)); 4133 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4134 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4135 getF32Constant(DAG, 0x3f6ae232)); 4136 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4137 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4138 getF32Constant(DAG, 0x3f25f7c3)); 4139 } else { // LimitFloatPrecision <= 18 4140 // For floating-point precision of 18: 4141 // 4142 // Log10ofMantissa = 4143 // -0.84299375f + 4144 // (1.5327582f + 4145 // (-1.0688956f + 4146 // (0.49102474f + 4147 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; 4148 // 4149 // error 0.0000037995730, which is better than 18 bits 4150 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4151 getF32Constant(DAG, 0x3c5d51ce)); 4152 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4153 getF32Constant(DAG, 0x3e00685a)); 4154 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4155 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4156 getF32Constant(DAG, 0x3efb6798)); 4157 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4158 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4159 getF32Constant(DAG, 0x3f88d192)); 4160 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4161 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4162 getF32Constant(DAG, 0x3fc4316c)); 4163 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4164 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, 4165 getF32Constant(DAG, 0x3f57ce70)); 4166 } 4167 4168 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); 4169 } 4170 4171 // No special expansion. 4172 return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); 4173} 4174 4175/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for 4176/// limited-precision mode. 4177static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4178 const TargetLowering &TLI) { 4179 if (Op.getValueType() == MVT::f32 && 4180 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4181 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); 4182 4183 // FractionalPartOfX = x - (float)IntegerPartOfX; 4184 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 4185 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); 4186 4187 // IntegerPartOfX <<= 23; 4188 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 4189 DAG.getConstant(23, TLI.getPointerTy())); 4190 4191 SDValue TwoToFractionalPartOfX; 4192 if (LimitFloatPrecision <= 6) { 4193 // For floating-point precision of 6: 4194 // 4195 // TwoToFractionalPartOfX = 4196 // 0.997535578f + 4197 // (0.735607626f + 0.252464424f * x) * x; 4198 // 4199 // error 0.0144103317, which is 6 bits 4200 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4201 getF32Constant(DAG, 0x3e814304)); 4202 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4203 getF32Constant(DAG, 0x3f3c50c8)); 4204 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4205 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4206 getF32Constant(DAG, 0x3f7f5e7e)); 4207 } else if (LimitFloatPrecision <= 12) { 4208 // For floating-point precision of 12: 4209 // 4210 // TwoToFractionalPartOfX = 4211 // 0.999892986f + 4212 // (0.696457318f + 4213 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 4214 // 4215 // error 0.000107046256, which is 13 to 14 bits 4216 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4217 getF32Constant(DAG, 0x3da235e3)); 4218 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4219 getF32Constant(DAG, 0x3e65b8f3)); 4220 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4221 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4222 getF32Constant(DAG, 0x3f324b07)); 4223 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4224 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4225 getF32Constant(DAG, 0x3f7ff8fd)); 4226 } else { // LimitFloatPrecision <= 18 4227 // For floating-point precision of 18: 4228 // 4229 // TwoToFractionalPartOfX = 4230 // 0.999999982f + 4231 // (0.693148872f + 4232 // (0.240227044f + 4233 // (0.554906021e-1f + 4234 // (0.961591928e-2f + 4235 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 4236 // error 2.47208000*10^(-7), which is better than 18 bits 4237 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4238 getF32Constant(DAG, 0x3924b03e)); 4239 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4240 getF32Constant(DAG, 0x3ab24b87)); 4241 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4242 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4243 getF32Constant(DAG, 0x3c1d8c17)); 4244 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4245 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4246 getF32Constant(DAG, 0x3d634a1d)); 4247 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4248 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4249 getF32Constant(DAG, 0x3e75fe14)); 4250 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4251 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 4252 getF32Constant(DAG, 0x3f317234)); 4253 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 4254 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 4255 getF32Constant(DAG, 0x3f800000)); 4256 } 4257 4258 // Add the exponent into the result in integer domain. 4259 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4260 TwoToFractionalPartOfX); 4261 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 4262 DAG.getNode(ISD::ADD, dl, MVT::i32, 4263 t13, IntegerPartOfX)); 4264 } 4265 4266 // No special expansion. 4267 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); 4268} 4269 4270/// visitPow - Lower a pow intrinsic. Handles the special sequences for 4271/// limited-precision mode with x == 10.0f. 4272static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, 4273 SelectionDAG &DAG, const TargetLowering &TLI) { 4274 bool IsExp10 = false; 4275 if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && 4276 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4277 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { 4278 APFloat Ten(10.0f); 4279 IsExp10 = LHSC->isExactlyValue(Ten); 4280 } 4281 } 4282 4283 if (IsExp10) { 4284 // Put the exponent in the right bit position for later addition to the 4285 // final result: 4286 // 4287 // #define LOG2OF10 3.3219281f 4288 // IntegerPartOfX = (int32_t)(x * LOG2OF10); 4289 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, 4290 getF32Constant(DAG, 0x40549a78)); 4291 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 4292 4293 // FractionalPartOfX = x - (float)IntegerPartOfX; 4294 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 4295 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 4296 4297 // IntegerPartOfX <<= 23; 4298 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 4299 DAG.getConstant(23, TLI.getPointerTy())); 4300 4301 SDValue TwoToFractionalPartOfX; 4302 if (LimitFloatPrecision <= 6) { 4303 // For floating-point precision of 6: 4304 // 4305 // twoToFractionalPartOfX = 4306 // 0.997535578f + 4307 // (0.735607626f + 0.252464424f * x) * x; 4308 // 4309 // error 0.0144103317, which is 6 bits 4310 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4311 getF32Constant(DAG, 0x3e814304)); 4312 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4313 getF32Constant(DAG, 0x3f3c50c8)); 4314 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4315 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4316 getF32Constant(DAG, 0x3f7f5e7e)); 4317 } else if (LimitFloatPrecision <= 12) { 4318 // For floating-point precision of 12: 4319 // 4320 // TwoToFractionalPartOfX = 4321 // 0.999892986f + 4322 // (0.696457318f + 4323 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 4324 // 4325 // error 0.000107046256, which is 13 to 14 bits 4326 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4327 getF32Constant(DAG, 0x3da235e3)); 4328 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4329 getF32Constant(DAG, 0x3e65b8f3)); 4330 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4331 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4332 getF32Constant(DAG, 0x3f324b07)); 4333 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4334 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4335 getF32Constant(DAG, 0x3f7ff8fd)); 4336 } else { // LimitFloatPrecision <= 18 4337 // For floating-point precision of 18: 4338 // 4339 // TwoToFractionalPartOfX = 4340 // 0.999999982f + 4341 // (0.693148872f + 4342 // (0.240227044f + 4343 // (0.554906021e-1f + 4344 // (0.961591928e-2f + 4345 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 4346 // error 2.47208000*10^(-7), which is better than 18 bits 4347 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4348 getF32Constant(DAG, 0x3924b03e)); 4349 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4350 getF32Constant(DAG, 0x3ab24b87)); 4351 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4352 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4353 getF32Constant(DAG, 0x3c1d8c17)); 4354 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4355 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4356 getF32Constant(DAG, 0x3d634a1d)); 4357 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4358 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4359 getF32Constant(DAG, 0x3e75fe14)); 4360 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4361 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 4362 getF32Constant(DAG, 0x3f317234)); 4363 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 4364 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 4365 getF32Constant(DAG, 0x3f800000)); 4366 } 4367 4368 SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); 4369 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 4370 DAG.getNode(ISD::ADD, dl, MVT::i32, 4371 t13, IntegerPartOfX)); 4372 } 4373 4374 // No special expansion. 4375 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); 4376} 4377 4378 4379/// ExpandPowI - Expand a llvm.powi intrinsic. 4380static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, 4381 SelectionDAG &DAG) { 4382 // If RHS is a constant, we can expand this out to a multiplication tree, 4383 // otherwise we end up lowering to a call to __powidf2 (for example). When 4384 // optimizing for size, we only want to do this if the expansion would produce 4385 // a small number of multiplies, otherwise we do the full expansion. 4386 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 4387 // Get the exponent as a positive value. 4388 unsigned Val = RHSC->getSExtValue(); 4389 if ((int)Val < 0) Val = -Val; 4390 4391 // powi(x, 0) -> 1.0 4392 if (Val == 0) 4393 return DAG.getConstantFP(1.0, LHS.getValueType()); 4394 4395 const Function *F = DAG.getMachineFunction().getFunction(); 4396 if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 4397 Attribute::OptimizeForSize) || 4398 // If optimizing for size, don't insert too many multiplies. This 4399 // inserts up to 5 multiplies. 4400 CountPopulation_32(Val)+Log2_32(Val) < 7) { 4401 // We use the simple binary decomposition method to generate the multiply 4402 // sequence. There are more optimal ways to do this (for example, 4403 // powi(x,15) generates one more multiply than it should), but this has 4404 // the benefit of being both really simple and much better than a libcall. 4405 SDValue Res; // Logically starts equal to 1.0 4406 SDValue CurSquare = LHS; 4407 while (Val) { 4408 if (Val & 1) { 4409 if (Res.getNode()) 4410 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); 4411 else 4412 Res = CurSquare; // 1.0*CurSquare. 4413 } 4414 4415 CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), 4416 CurSquare, CurSquare); 4417 Val >>= 1; 4418 } 4419 4420 // If the original was negative, invert the result, producing 1/(x*x*x). 4421 if (RHSC->getSExtValue() < 0) 4422 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), 4423 DAG.getConstantFP(1.0, LHS.getValueType()), Res); 4424 return Res; 4425 } 4426 } 4427 4428 // Otherwise, expand to a libcall. 4429 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); 4430} 4431 4432// getTruncatedArgReg - Find underlying register used for an truncated 4433// argument. 4434static unsigned getTruncatedArgReg(const SDValue &N) { 4435 if (N.getOpcode() != ISD::TRUNCATE) 4436 return 0; 4437 4438 const SDValue &Ext = N.getOperand(0); 4439 if (Ext.getOpcode() == ISD::AssertZext || 4440 Ext.getOpcode() == ISD::AssertSext) { 4441 const SDValue &CFR = Ext.getOperand(0); 4442 if (CFR.getOpcode() == ISD::CopyFromReg) 4443 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); 4444 if (CFR.getOpcode() == ISD::TRUNCATE) 4445 return getTruncatedArgReg(CFR); 4446 } 4447 return 0; 4448} 4449 4450/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function 4451/// argument, create the corresponding DBG_VALUE machine instruction for it now. 4452/// At the end of instruction selection, they will be inserted to the entry BB. 4453bool 4454SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, 4455 int64_t Offset, 4456 const SDValue &N) { 4457 const Argument *Arg = dyn_cast<Argument>(V); 4458 if (!Arg) 4459 return false; 4460 4461 MachineFunction &MF = DAG.getMachineFunction(); 4462 const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); 4463 4464 // Ignore inlined function arguments here. 4465 DIVariable DV(Variable); 4466 if (DV.isInlinedFnArgument(MF.getFunction())) 4467 return false; 4468 4469 Optional<MachineOperand> Op; 4470 // Some arguments' frame index is recorded during argument lowering. 4471 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) 4472 Op = MachineOperand::CreateFI(FI); 4473 4474 if (!Op && N.getNode()) { 4475 unsigned Reg; 4476 if (N.getOpcode() == ISD::CopyFromReg) 4477 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); 4478 else 4479 Reg = getTruncatedArgReg(N); 4480 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { 4481 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4482 unsigned PR = RegInfo.getLiveInPhysReg(Reg); 4483 if (PR) 4484 Reg = PR; 4485 } 4486 if (Reg) 4487 Op = MachineOperand::CreateReg(Reg, false); 4488 } 4489 4490 if (!Op) { 4491 // Check if ValueMap has reg number. 4492 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 4493 if (VMI != FuncInfo.ValueMap.end()) 4494 Op = MachineOperand::CreateReg(VMI->second, false); 4495 } 4496 4497 if (!Op && N.getNode()) 4498 // Check if frame index is available. 4499 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) 4500 if (FrameIndexSDNode *FINode = 4501 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 4502 Op = MachineOperand::CreateFI(FINode->getIndex()); 4503 4504 if (!Op) 4505 return false; 4506 4507 // FIXME: This does not handle register-indirect values at offset 0. 4508 bool IsIndirect = Offset != 0; 4509 if (Op->isReg()) 4510 FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), 4511 TII->get(TargetOpcode::DBG_VALUE), 4512 IsIndirect, 4513 Op->getReg(), Offset, Variable)); 4514 else 4515 FuncInfo.ArgDbgValues.push_back( 4516 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) 4517 .addOperand(*Op).addImm(Offset).addMetadata(Variable)); 4518 4519 return true; 4520} 4521 4522// VisualStudio defines setjmp as _setjmp 4523#if defined(_MSC_VER) && defined(setjmp) && \ 4524 !defined(setjmp_undefined_for_msvc) 4525# pragma push_macro("setjmp") 4526# undef setjmp 4527# define setjmp_undefined_for_msvc 4528#endif 4529 4530/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If 4531/// we want to emit this as a call to a named external function, return the name 4532/// otherwise lower it and return null. 4533const char * 4534SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 4535 const TargetLowering *TLI = TM.getTargetLowering(); 4536 SDLoc sdl = getCurSDLoc(); 4537 DebugLoc dl = getCurDebugLoc(); 4538 SDValue Res; 4539 4540 switch (Intrinsic) { 4541 default: 4542 // By default, turn this into a target intrinsic node. 4543 visitTargetIntrinsic(I, Intrinsic); 4544 return 0; 4545 case Intrinsic::vastart: visitVAStart(I); return 0; 4546 case Intrinsic::vaend: visitVAEnd(I); return 0; 4547 case Intrinsic::vacopy: visitVACopy(I); return 0; 4548 case Intrinsic::returnaddress: 4549 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), 4550 getValue(I.getArgOperand(0)))); 4551 return 0; 4552 case Intrinsic::frameaddress: 4553 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), 4554 getValue(I.getArgOperand(0)))); 4555 return 0; 4556 case Intrinsic::setjmp: 4557 return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; 4558 case Intrinsic::longjmp: 4559 return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; 4560 case Intrinsic::memcpy: { 4561 // Assert for address < 256 since we support only user defined address 4562 // spaces. 4563 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4564 < 256 && 4565 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 4566 < 256 && 4567 "Unknown address space"); 4568 SDValue Op1 = getValue(I.getArgOperand(0)); 4569 SDValue Op2 = getValue(I.getArgOperand(1)); 4570 SDValue Op3 = getValue(I.getArgOperand(2)); 4571 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4572 if (!Align) 4573 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. 4574 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4575 DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, 4576 MachinePointerInfo(I.getArgOperand(0)), 4577 MachinePointerInfo(I.getArgOperand(1)))); 4578 return 0; 4579 } 4580 case Intrinsic::memset: { 4581 // Assert for address < 256 since we support only user defined address 4582 // spaces. 4583 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4584 < 256 && 4585 "Unknown address space"); 4586 SDValue Op1 = getValue(I.getArgOperand(0)); 4587 SDValue Op2 = getValue(I.getArgOperand(1)); 4588 SDValue Op3 = getValue(I.getArgOperand(2)); 4589 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4590 if (!Align) 4591 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. 4592 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4593 DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4594 MachinePointerInfo(I.getArgOperand(0)))); 4595 return 0; 4596 } 4597 case Intrinsic::memmove: { 4598 // Assert for address < 256 since we support only user defined address 4599 // spaces. 4600 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4601 < 256 && 4602 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 4603 < 256 && 4604 "Unknown address space"); 4605 SDValue Op1 = getValue(I.getArgOperand(0)); 4606 SDValue Op2 = getValue(I.getArgOperand(1)); 4607 SDValue Op3 = getValue(I.getArgOperand(2)); 4608 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4609 if (!Align) 4610 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. 4611 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4612 DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4613 MachinePointerInfo(I.getArgOperand(0)), 4614 MachinePointerInfo(I.getArgOperand(1)))); 4615 return 0; 4616 } 4617 case Intrinsic::dbg_declare: { 4618 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 4619 MDNode *Variable = DI.getVariable(); 4620 const Value *Address = DI.getAddress(); 4621 DIVariable DIVar(Variable); 4622 assert((!DIVar || DIVar.isVariable()) && 4623 "Variable in DbgDeclareInst should be either null or a DIVariable."); 4624 if (!Address || !DIVar) { 4625 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4626 return 0; 4627 } 4628 4629 // Check if address has undef value. 4630 if (isa<UndefValue>(Address) || 4631 (Address->use_empty() && !isa<Argument>(Address))) { 4632 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4633 return 0; 4634 } 4635 4636 SDValue &N = NodeMap[Address]; 4637 if (!N.getNode() && isa<Argument>(Address)) 4638 // Check unused arguments map. 4639 N = UnusedArgNodeMap[Address]; 4640 SDDbgValue *SDV; 4641 if (N.getNode()) { 4642 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) 4643 Address = BCI->getOperand(0); 4644 // Parameters are handled specially. 4645 bool isParameter = 4646 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || 4647 isa<Argument>(Address)); 4648 4649 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); 4650 4651 if (isParameter && !AI) { 4652 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); 4653 if (FINode) 4654 // Byval parameter. We have a frame index at this point. 4655 SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 4656 0, dl, SDNodeOrder); 4657 else { 4658 // Address is an argument, so try to emit its dbg value using 4659 // virtual register info from the FuncInfo.ValueMap. 4660 EmitFuncArgumentDbgValue(Address, Variable, 0, N); 4661 return 0; 4662 } 4663 } else if (AI) 4664 SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), 4665 0, dl, SDNodeOrder); 4666 else { 4667 // Can't do anything with other non-AI cases yet. 4668 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4669 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); 4670 DEBUG(Address->dump()); 4671 return 0; 4672 } 4673 DAG.AddDbgValue(SDV, N.getNode(), isParameter); 4674 } else { 4675 // If Address is an argument then try to emit its dbg value using 4676 // virtual register info from the FuncInfo.ValueMap. 4677 if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { 4678 // If variable is pinned by a alloca in dominating bb then 4679 // use StaticAllocaMap. 4680 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { 4681 if (AI->getParent() != DI.getParent()) { 4682 DenseMap<const AllocaInst*, int>::iterator SI = 4683 FuncInfo.StaticAllocaMap.find(AI); 4684 if (SI != FuncInfo.StaticAllocaMap.end()) { 4685 SDV = DAG.getDbgValue(Variable, SI->second, 4686 0, dl, SDNodeOrder); 4687 DAG.AddDbgValue(SDV, 0, false); 4688 return 0; 4689 } 4690 } 4691 } 4692 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4693 } 4694 } 4695 return 0; 4696 } 4697 case Intrinsic::dbg_value: { 4698 const DbgValueInst &DI = cast<DbgValueInst>(I); 4699 DIVariable DIVar(DI.getVariable()); 4700 assert((!DIVar || DIVar.isVariable()) && 4701 "Variable in DbgValueInst should be either null or a DIVariable."); 4702 if (!DIVar) 4703 return 0; 4704 4705 MDNode *Variable = DI.getVariable(); 4706 uint64_t Offset = DI.getOffset(); 4707 const Value *V = DI.getValue(); 4708 if (!V) 4709 return 0; 4710 4711 SDDbgValue *SDV; 4712 if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { 4713 SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); 4714 DAG.AddDbgValue(SDV, 0, false); 4715 } else { 4716 // Do not use getValue() in here; we don't want to generate code at 4717 // this point if it hasn't been done yet. 4718 SDValue N = NodeMap[V]; 4719 if (!N.getNode() && isa<Argument>(V)) 4720 // Check unused arguments map. 4721 N = UnusedArgNodeMap[V]; 4722 if (N.getNode()) { 4723 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { 4724 SDV = DAG.getDbgValue(Variable, N.getNode(), 4725 N.getResNo(), Offset, dl, SDNodeOrder); 4726 DAG.AddDbgValue(SDV, N.getNode(), false); 4727 } 4728 } else if (!V->use_empty() ) { 4729 // Do not call getValue(V) yet, as we don't want to generate code. 4730 // Remember it for later. 4731 DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); 4732 DanglingDebugInfoMap[V] = DDI; 4733 } else { 4734 // We may expand this to cover more cases. One case where we have no 4735 // data available is an unreferenced parameter. 4736 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4737 } 4738 } 4739 4740 // Build a debug info table entry. 4741 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) 4742 V = BCI->getOperand(0); 4743 const AllocaInst *AI = dyn_cast<AllocaInst>(V); 4744 // Don't handle byval struct arguments or VLAs, for example. 4745 if (!AI) { 4746 DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); 4747 DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); 4748 return 0; 4749 } 4750 DenseMap<const AllocaInst*, int>::iterator SI = 4751 FuncInfo.StaticAllocaMap.find(AI); 4752 if (SI == FuncInfo.StaticAllocaMap.end()) 4753 return 0; // VLAs. 4754 int FI = SI->second; 4755 4756 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 4757 if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) 4758 MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); 4759 return 0; 4760 } 4761 4762 case Intrinsic::eh_typeid_for: { 4763 // Find the type id for the given typeinfo. 4764 GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); 4765 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); 4766 Res = DAG.getConstant(TypeID, MVT::i32); 4767 setValue(&I, Res); 4768 return 0; 4769 } 4770 4771 case Intrinsic::eh_return_i32: 4772 case Intrinsic::eh_return_i64: 4773 DAG.getMachineFunction().getMMI().setCallsEHReturn(true); 4774 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, 4775 MVT::Other, 4776 getControlRoot(), 4777 getValue(I.getArgOperand(0)), 4778 getValue(I.getArgOperand(1)))); 4779 return 0; 4780 case Intrinsic::eh_unwind_init: 4781 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); 4782 return 0; 4783 case Intrinsic::eh_dwarf_cfa: { 4784 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, 4785 TLI->getPointerTy()); 4786 SDValue Offset = DAG.getNode(ISD::ADD, sdl, 4787 CfaArg.getValueType(), 4788 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, 4789 CfaArg.getValueType()), 4790 CfaArg); 4791 SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, 4792 TLI->getPointerTy(), 4793 DAG.getConstant(0, TLI->getPointerTy())); 4794 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), 4795 FA, Offset)); 4796 return 0; 4797 } 4798 case Intrinsic::eh_sjlj_callsite: { 4799 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 4800 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); 4801 assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); 4802 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); 4803 4804 MMI.setCurrentCallSite(CI->getZExtValue()); 4805 return 0; 4806 } 4807 case Intrinsic::eh_sjlj_functioncontext: { 4808 // Get and store the index of the function context. 4809 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 4810 AllocaInst *FnCtx = 4811 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); 4812 int FI = FuncInfo.StaticAllocaMap[FnCtx]; 4813 MFI->setFunctionContextIndex(FI); 4814 return 0; 4815 } 4816 case Intrinsic::eh_sjlj_setjmp: { 4817 SDValue Ops[2]; 4818 Ops[0] = getRoot(); 4819 Ops[1] = getValue(I.getArgOperand(0)); 4820 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, 4821 DAG.getVTList(MVT::i32, MVT::Other), 4822 Ops, 2); 4823 setValue(&I, Op.getValue(0)); 4824 DAG.setRoot(Op.getValue(1)); 4825 return 0; 4826 } 4827 case Intrinsic::eh_sjlj_longjmp: { 4828 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, 4829 getRoot(), getValue(I.getArgOperand(0)))); 4830 return 0; 4831 } 4832 4833 case Intrinsic::x86_mmx_pslli_w: 4834 case Intrinsic::x86_mmx_pslli_d: 4835 case Intrinsic::x86_mmx_pslli_q: 4836 case Intrinsic::x86_mmx_psrli_w: 4837 case Intrinsic::x86_mmx_psrli_d: 4838 case Intrinsic::x86_mmx_psrli_q: 4839 case Intrinsic::x86_mmx_psrai_w: 4840 case Intrinsic::x86_mmx_psrai_d: { 4841 SDValue ShAmt = getValue(I.getArgOperand(1)); 4842 if (isa<ConstantSDNode>(ShAmt)) { 4843 visitTargetIntrinsic(I, Intrinsic); 4844 return 0; 4845 } 4846 unsigned NewIntrinsic = 0; 4847 EVT ShAmtVT = MVT::v2i32; 4848 switch (Intrinsic) { 4849 case Intrinsic::x86_mmx_pslli_w: 4850 NewIntrinsic = Intrinsic::x86_mmx_psll_w; 4851 break; 4852 case Intrinsic::x86_mmx_pslli_d: 4853 NewIntrinsic = Intrinsic::x86_mmx_psll_d; 4854 break; 4855 case Intrinsic::x86_mmx_pslli_q: 4856 NewIntrinsic = Intrinsic::x86_mmx_psll_q; 4857 break; 4858 case Intrinsic::x86_mmx_psrli_w: 4859 NewIntrinsic = Intrinsic::x86_mmx_psrl_w; 4860 break; 4861 case Intrinsic::x86_mmx_psrli_d: 4862 NewIntrinsic = Intrinsic::x86_mmx_psrl_d; 4863 break; 4864 case Intrinsic::x86_mmx_psrli_q: 4865 NewIntrinsic = Intrinsic::x86_mmx_psrl_q; 4866 break; 4867 case Intrinsic::x86_mmx_psrai_w: 4868 NewIntrinsic = Intrinsic::x86_mmx_psra_w; 4869 break; 4870 case Intrinsic::x86_mmx_psrai_d: 4871 NewIntrinsic = Intrinsic::x86_mmx_psra_d; 4872 break; 4873 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4874 } 4875 4876 // The vector shift intrinsics with scalars uses 32b shift amounts but 4877 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 4878 // to be zero. 4879 // We must do this early because v2i32 is not a legal type. 4880 SDValue ShOps[2]; 4881 ShOps[0] = ShAmt; 4882 ShOps[1] = DAG.getConstant(0, MVT::i32); 4883 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); 4884 EVT DestVT = TLI->getValueType(I.getType()); 4885 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); 4886 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, 4887 DAG.getConstant(NewIntrinsic, MVT::i32), 4888 getValue(I.getArgOperand(0)), ShAmt); 4889 setValue(&I, Res); 4890 return 0; 4891 } 4892 case Intrinsic::x86_avx_vinsertf128_pd_256: 4893 case Intrinsic::x86_avx_vinsertf128_ps_256: 4894 case Intrinsic::x86_avx_vinsertf128_si_256: 4895 case Intrinsic::x86_avx2_vinserti128: { 4896 EVT DestVT = TLI->getValueType(I.getType()); 4897 EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); 4898 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * 4899 ElVT.getVectorNumElements(); 4900 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, 4901 getValue(I.getArgOperand(0)), 4902 getValue(I.getArgOperand(1)), 4903 DAG.getConstant(Idx, TLI->getVectorIdxTy())); 4904 setValue(&I, Res); 4905 return 0; 4906 } 4907 case Intrinsic::x86_avx_vextractf128_pd_256: 4908 case Intrinsic::x86_avx_vextractf128_ps_256: 4909 case Intrinsic::x86_avx_vextractf128_si_256: 4910 case Intrinsic::x86_avx2_vextracti128: { 4911 EVT DestVT = TLI->getValueType(I.getType()); 4912 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * 4913 DestVT.getVectorNumElements(); 4914 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, 4915 getValue(I.getArgOperand(0)), 4916 DAG.getConstant(Idx, TLI->getVectorIdxTy())); 4917 setValue(&I, Res); 4918 return 0; 4919 } 4920 case Intrinsic::convertff: 4921 case Intrinsic::convertfsi: 4922 case Intrinsic::convertfui: 4923 case Intrinsic::convertsif: 4924 case Intrinsic::convertuif: 4925 case Intrinsic::convertss: 4926 case Intrinsic::convertsu: 4927 case Intrinsic::convertus: 4928 case Intrinsic::convertuu: { 4929 ISD::CvtCode Code = ISD::CVT_INVALID; 4930 switch (Intrinsic) { 4931 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4932 case Intrinsic::convertff: Code = ISD::CVT_FF; break; 4933 case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; 4934 case Intrinsic::convertfui: Code = ISD::CVT_FU; break; 4935 case Intrinsic::convertsif: Code = ISD::CVT_SF; break; 4936 case Intrinsic::convertuif: Code = ISD::CVT_UF; break; 4937 case Intrinsic::convertss: Code = ISD::CVT_SS; break; 4938 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; 4939 case Intrinsic::convertus: Code = ISD::CVT_US; break; 4940 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; 4941 } 4942 EVT DestVT = TLI->getValueType(I.getType()); 4943 const Value *Op1 = I.getArgOperand(0); 4944 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), 4945 DAG.getValueType(DestVT), 4946 DAG.getValueType(getValue(Op1).getValueType()), 4947 getValue(I.getArgOperand(1)), 4948 getValue(I.getArgOperand(2)), 4949 Code); 4950 setValue(&I, Res); 4951 return 0; 4952 } 4953 case Intrinsic::powi: 4954 setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), 4955 getValue(I.getArgOperand(1)), DAG)); 4956 return 0; 4957 case Intrinsic::log: 4958 setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 4959 return 0; 4960 case Intrinsic::log2: 4961 setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 4962 return 0; 4963 case Intrinsic::log10: 4964 setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 4965 return 0; 4966 case Intrinsic::exp: 4967 setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 4968 return 0; 4969 case Intrinsic::exp2: 4970 setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 4971 return 0; 4972 case Intrinsic::pow: 4973 setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), 4974 getValue(I.getArgOperand(1)), DAG, *TLI)); 4975 return 0; 4976 case Intrinsic::sqrt: 4977 case Intrinsic::fabs: 4978 case Intrinsic::sin: 4979 case Intrinsic::cos: 4980 case Intrinsic::floor: 4981 case Intrinsic::ceil: 4982 case Intrinsic::trunc: 4983 case Intrinsic::rint: 4984 case Intrinsic::nearbyint: 4985 case Intrinsic::round: { 4986 unsigned Opcode; 4987 switch (Intrinsic) { 4988 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4989 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 4990 case Intrinsic::fabs: Opcode = ISD::FABS; break; 4991 case Intrinsic::sin: Opcode = ISD::FSIN; break; 4992 case Intrinsic::cos: Opcode = ISD::FCOS; break; 4993 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 4994 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 4995 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 4996 case Intrinsic::rint: Opcode = ISD::FRINT; break; 4997 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 4998 case Intrinsic::round: Opcode = ISD::FROUND; break; 4999 } 5000 5001 setValue(&I, DAG.getNode(Opcode, sdl, 5002 getValue(I.getArgOperand(0)).getValueType(), 5003 getValue(I.getArgOperand(0)))); 5004 return 0; 5005 } 5006 case Intrinsic::copysign: 5007 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, 5008 getValue(I.getArgOperand(0)).getValueType(), 5009 getValue(I.getArgOperand(0)), 5010 getValue(I.getArgOperand(1)))); 5011 return 0; 5012 case Intrinsic::fma: 5013 setValue(&I, DAG.getNode(ISD::FMA, sdl, 5014 getValue(I.getArgOperand(0)).getValueType(), 5015 getValue(I.getArgOperand(0)), 5016 getValue(I.getArgOperand(1)), 5017 getValue(I.getArgOperand(2)))); 5018 return 0; 5019 case Intrinsic::fmuladd: { 5020 EVT VT = TLI->getValueType(I.getType()); 5021 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && 5022 TLI->isFMAFasterThanFMulAndFAdd(VT)) { 5023 setValue(&I, DAG.getNode(ISD::FMA, sdl, 5024 getValue(I.getArgOperand(0)).getValueType(), 5025 getValue(I.getArgOperand(0)), 5026 getValue(I.getArgOperand(1)), 5027 getValue(I.getArgOperand(2)))); 5028 } else { 5029 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, 5030 getValue(I.getArgOperand(0)).getValueType(), 5031 getValue(I.getArgOperand(0)), 5032 getValue(I.getArgOperand(1))); 5033 SDValue Add = DAG.getNode(ISD::FADD, sdl, 5034 getValue(I.getArgOperand(0)).getValueType(), 5035 Mul, 5036 getValue(I.getArgOperand(2))); 5037 setValue(&I, Add); 5038 } 5039 return 0; 5040 } 5041 case Intrinsic::convert_to_fp16: 5042 setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, 5043 MVT::i16, getValue(I.getArgOperand(0)))); 5044 return 0; 5045 case Intrinsic::convert_from_fp16: 5046 setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, 5047 MVT::f32, getValue(I.getArgOperand(0)))); 5048 return 0; 5049 case Intrinsic::pcmarker: { 5050 SDValue Tmp = getValue(I.getArgOperand(0)); 5051 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); 5052 return 0; 5053 } 5054 case Intrinsic::readcyclecounter: { 5055 SDValue Op = getRoot(); 5056 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, 5057 DAG.getVTList(MVT::i64, MVT::Other), 5058 &Op, 1); 5059 setValue(&I, Res); 5060 DAG.setRoot(Res.getValue(1)); 5061 return 0; 5062 } 5063 case Intrinsic::bswap: 5064 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, 5065 getValue(I.getArgOperand(0)).getValueType(), 5066 getValue(I.getArgOperand(0)))); 5067 return 0; 5068 case Intrinsic::cttz: { 5069 SDValue Arg = getValue(I.getArgOperand(0)); 5070 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 5071 EVT Ty = Arg.getValueType(); 5072 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, 5073 sdl, Ty, Arg)); 5074 return 0; 5075 } 5076 case Intrinsic::ctlz: { 5077 SDValue Arg = getValue(I.getArgOperand(0)); 5078 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 5079 EVT Ty = Arg.getValueType(); 5080 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, 5081 sdl, Ty, Arg)); 5082 return 0; 5083 } 5084 case Intrinsic::ctpop: { 5085 SDValue Arg = getValue(I.getArgOperand(0)); 5086 EVT Ty = Arg.getValueType(); 5087 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); 5088 return 0; 5089 } 5090 case Intrinsic::stacksave: { 5091 SDValue Op = getRoot(); 5092 Res = DAG.getNode(ISD::STACKSAVE, sdl, 5093 DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); 5094 setValue(&I, Res); 5095 DAG.setRoot(Res.getValue(1)); 5096 return 0; 5097 } 5098 case Intrinsic::stackrestore: { 5099 Res = getValue(I.getArgOperand(0)); 5100 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); 5101 return 0; 5102 } 5103 case Intrinsic::stackprotector: { 5104 // Emit code into the DAG to store the stack guard onto the stack. 5105 MachineFunction &MF = DAG.getMachineFunction(); 5106 MachineFrameInfo *MFI = MF.getFrameInfo(); 5107 EVT PtrTy = TLI->getPointerTy(); 5108 5109 SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. 5110 AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 5111 5112 int FI = FuncInfo.StaticAllocaMap[Slot]; 5113 MFI->setStackProtectorIndex(FI); 5114 5115 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); 5116 5117 // Store the stack protector onto the stack. 5118 Res = DAG.getStore(getRoot(), sdl, Src, FIN, 5119 MachinePointerInfo::getFixedStack(FI), 5120 true, false, 0); 5121 setValue(&I, Res); 5122 DAG.setRoot(Res); 5123 return 0; 5124 } 5125 case Intrinsic::objectsize: { 5126 // If we don't know by now, we're never going to know. 5127 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 5128 5129 assert(CI && "Non-constant type in __builtin_object_size?"); 5130 5131 SDValue Arg = getValue(I.getCalledValue()); 5132 EVT Ty = Arg.getValueType(); 5133 5134 if (CI->isZero()) 5135 Res = DAG.getConstant(-1ULL, Ty); 5136 else 5137 Res = DAG.getConstant(0, Ty); 5138 5139 setValue(&I, Res); 5140 return 0; 5141 } 5142 case Intrinsic::annotation: 5143 case Intrinsic::ptr_annotation: 5144 // Drop the intrinsic, but forward the value 5145 setValue(&I, getValue(I.getOperand(0))); 5146 return 0; 5147 case Intrinsic::var_annotation: 5148 // Discard annotate attributes 5149 return 0; 5150 5151 case Intrinsic::init_trampoline: { 5152 const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); 5153 5154 SDValue Ops[6]; 5155 Ops[0] = getRoot(); 5156 Ops[1] = getValue(I.getArgOperand(0)); 5157 Ops[2] = getValue(I.getArgOperand(1)); 5158 Ops[3] = getValue(I.getArgOperand(2)); 5159 Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); 5160 Ops[5] = DAG.getSrcValue(F); 5161 5162 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); 5163 5164 DAG.setRoot(Res); 5165 return 0; 5166 } 5167 case Intrinsic::adjust_trampoline: { 5168 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, 5169 TLI->getPointerTy(), 5170 getValue(I.getArgOperand(0)))); 5171 return 0; 5172 } 5173 case Intrinsic::gcroot: 5174 if (GFI) { 5175 const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); 5176 const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); 5177 5178 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); 5179 GFI->addStackRoot(FI->getIndex(), TypeMap); 5180 } 5181 return 0; 5182 case Intrinsic::gcread: 5183 case Intrinsic::gcwrite: 5184 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); 5185 case Intrinsic::flt_rounds: 5186 setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); 5187 return 0; 5188 5189 case Intrinsic::expect: { 5190 // Just replace __builtin_expect(exp, c) with EXP. 5191 setValue(&I, getValue(I.getArgOperand(0))); 5192 return 0; 5193 } 5194 5195 case Intrinsic::debugtrap: 5196 case Intrinsic::trap: { 5197 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); 5198 if (TrapFuncName.empty()) { 5199 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 5200 ISD::TRAP : ISD::DEBUGTRAP; 5201 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); 5202 return 0; 5203 } 5204 TargetLowering::ArgListTy Args; 5205 TargetLowering:: 5206 CallLoweringInfo CLI(getRoot(), I.getType(), 5207 false, false, false, false, 0, CallingConv::C, 5208 /*isTailCall=*/false, 5209 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 5210 DAG.getExternalSymbol(TrapFuncName.data(), 5211 TLI->getPointerTy()), 5212 Args, DAG, sdl); 5213 std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); 5214 DAG.setRoot(Result.second); 5215 return 0; 5216 } 5217 5218 case Intrinsic::uadd_with_overflow: 5219 case Intrinsic::sadd_with_overflow: 5220 case Intrinsic::usub_with_overflow: 5221 case Intrinsic::ssub_with_overflow: 5222 case Intrinsic::umul_with_overflow: 5223 case Intrinsic::smul_with_overflow: { 5224 ISD::NodeType Op; 5225 switch (Intrinsic) { 5226 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 5227 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; 5228 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; 5229 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; 5230 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; 5231 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; 5232 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; 5233 } 5234 SDValue Op1 = getValue(I.getArgOperand(0)); 5235 SDValue Op2 = getValue(I.getArgOperand(1)); 5236 5237 SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); 5238 setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); 5239 return 0; 5240 } 5241 case Intrinsic::prefetch: { 5242 SDValue Ops[5]; 5243 unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 5244 Ops[0] = getRoot(); 5245 Ops[1] = getValue(I.getArgOperand(0)); 5246 Ops[2] = getValue(I.getArgOperand(1)); 5247 Ops[3] = getValue(I.getArgOperand(2)); 5248 Ops[4] = getValue(I.getArgOperand(3)); 5249 DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, 5250 DAG.getVTList(MVT::Other), 5251 &Ops[0], 5, 5252 EVT::getIntegerVT(*Context, 8), 5253 MachinePointerInfo(I.getArgOperand(0)), 5254 0, /* align */ 5255 false, /* volatile */ 5256 rw==0, /* read */ 5257 rw==1)); /* write */ 5258 return 0; 5259 } 5260 case Intrinsic::lifetime_start: 5261 case Intrinsic::lifetime_end: { 5262 bool IsStart = (Intrinsic == Intrinsic::lifetime_start); 5263 // Stack coloring is not enabled in O0, discard region information. 5264 if (TM.getOptLevel() == CodeGenOpt::None) 5265 return 0; 5266 5267 SmallVector<Value *, 4> Allocas; 5268 GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); 5269 5270 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), 5271 E = Allocas.end(); Object != E; ++Object) { 5272 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); 5273 5274 // Could not find an Alloca. 5275 if (!LifetimeObject) 5276 continue; 5277 5278 int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; 5279 5280 SDValue Ops[2]; 5281 Ops[0] = getRoot(); 5282 Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); 5283 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); 5284 5285 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); 5286 DAG.setRoot(Res); 5287 } 5288 return 0; 5289 } 5290 case Intrinsic::invariant_start: 5291 // Discard region information. 5292 setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); 5293 return 0; 5294 case Intrinsic::invariant_end: 5295 // Discard region information. 5296 return 0; 5297 case Intrinsic::stackprotectorcheck: { 5298 // Do not actually emit anything for this basic block. Instead we initialize 5299 // the stack protector descriptor and export the guard variable so we can 5300 // access it in FinishBasicBlock. 5301 const BasicBlock *BB = I.getParent(); 5302 SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); 5303 ExportFromCurrentBlock(SPDescriptor.getGuard()); 5304 5305 // Flush our exports since we are going to process a terminator. 5306 (void)getControlRoot(); 5307 return 0; 5308 } 5309 case Intrinsic::donothing: 5310 // ignore 5311 return 0; 5312 } 5313} 5314 5315void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, 5316 bool isTailCall, 5317 MachineBasicBlock *LandingPad) { 5318 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 5319 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 5320 Type *RetTy = FTy->getReturnType(); 5321 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 5322 MCSymbol *BeginLabel = 0; 5323 5324 TargetLowering::ArgListTy Args; 5325 TargetLowering::ArgListEntry Entry; 5326 Args.reserve(CS.arg_size()); 5327 5328 // Check whether the function can return without sret-demotion. 5329 SmallVector<ISD::OutputArg, 4> Outs; 5330 const TargetLowering *TLI = TM.getTargetLowering(); 5331 GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); 5332 5333 bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), 5334 DAG.getMachineFunction(), 5335 FTy->isVarArg(), Outs, 5336 FTy->getContext()); 5337 5338 SDValue DemoteStackSlot; 5339 int DemoteStackIdx = -100; 5340 5341 if (!CanLowerReturn) { 5342 uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( 5343 FTy->getReturnType()); 5344 unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( 5345 FTy->getReturnType()); 5346 MachineFunction &MF = DAG.getMachineFunction(); 5347 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 5348 Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); 5349 5350 DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); 5351 Entry.Node = DemoteStackSlot; 5352 Entry.Ty = StackSlotPtrType; 5353 Entry.isSExt = false; 5354 Entry.isZExt = false; 5355 Entry.isInReg = false; 5356 Entry.isSRet = true; 5357 Entry.isNest = false; 5358 Entry.isByVal = false; 5359 Entry.isReturned = false; 5360 Entry.Alignment = Align; 5361 Args.push_back(Entry); 5362 RetTy = Type::getVoidTy(FTy->getContext()); 5363 } 5364 5365 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 5366 i != e; ++i) { 5367 const Value *V = *i; 5368 5369 // Skip empty types 5370 if (V->getType()->isEmptyTy()) 5371 continue; 5372 5373 SDValue ArgNode = getValue(V); 5374 Entry.Node = ArgNode; Entry.Ty = V->getType(); 5375 5376 unsigned attrInd = i - CS.arg_begin() + 1; 5377 Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); 5378 Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); 5379 Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); 5380 Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); 5381 Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); 5382 Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); 5383 Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); 5384 Entry.Alignment = CS.getParamAlignment(attrInd); 5385 Args.push_back(Entry); 5386 } 5387 5388 if (LandingPad) { 5389 // Insert a label before the invoke call to mark the try range. This can be 5390 // used to detect deletion of the invoke via the MachineModuleInfo. 5391 BeginLabel = MMI.getContext().CreateTempSymbol(); 5392 5393 // For SjLj, keep track of which landing pads go with which invokes 5394 // so as to maintain the ordering of pads in the LSDA. 5395 unsigned CallSiteIndex = MMI.getCurrentCallSite(); 5396 if (CallSiteIndex) { 5397 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); 5398 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); 5399 5400 // Now that the call site is handled, stop tracking it. 5401 MMI.setCurrentCallSite(0); 5402 } 5403 5404 // Both PendingLoads and PendingExports must be flushed here; 5405 // this call might not return. 5406 (void)getRoot(); 5407 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); 5408 } 5409 5410 // Check if target-independent constraints permit a tail call here. 5411 // Target-dependent constraints are checked within TLI->LowerCallTo. 5412 if (isTailCall && !isInTailCallPosition(CS, *TLI)) 5413 isTailCall = false; 5414 5415 TargetLowering:: 5416 CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, 5417 getCurSDLoc(), CS); 5418 std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); 5419 assert((isTailCall || Result.second.getNode()) && 5420 "Non-null chain expected with non-tail call!"); 5421 assert((Result.second.getNode() || !Result.first.getNode()) && 5422 "Null value expected with tail call!"); 5423 if (Result.first.getNode()) { 5424 setValue(CS.getInstruction(), Result.first); 5425 } else if (!CanLowerReturn && Result.second.getNode()) { 5426 // The instruction result is the result of loading from the 5427 // hidden sret parameter. 5428 SmallVector<EVT, 1> PVTs; 5429 Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); 5430 5431 ComputeValueVTs(*TLI, PtrRetTy, PVTs); 5432 assert(PVTs.size() == 1 && "Pointers should fit in one register"); 5433 EVT PtrVT = PVTs[0]; 5434 5435 SmallVector<EVT, 4> RetTys; 5436 SmallVector<uint64_t, 4> Offsets; 5437 RetTy = FTy->getReturnType(); 5438 ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); 5439 5440 unsigned NumValues = RetTys.size(); 5441 SmallVector<SDValue, 4> Values(NumValues); 5442 SmallVector<SDValue, 4> Chains(NumValues); 5443 5444 for (unsigned i = 0; i < NumValues; ++i) { 5445 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, 5446 DemoteStackSlot, 5447 DAG.getConstant(Offsets[i], PtrVT)); 5448 SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, 5449 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), 5450 false, false, false, 1); 5451 Values[i] = L; 5452 Chains[i] = L.getValue(1); 5453 } 5454 5455 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 5456 MVT::Other, &Chains[0], NumValues); 5457 PendingLoads.push_back(Chain); 5458 5459 setValue(CS.getInstruction(), 5460 DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 5461 DAG.getVTList(&RetTys[0], RetTys.size()), 5462 &Values[0], Values.size())); 5463 } 5464 5465 if (!Result.second.getNode()) { 5466 // As a special case, a null chain means that a tail call has been emitted and 5467 // the DAG root is already updated. 5468 HasTailCall = true; 5469 5470 // Since there's no actual continuation from this block, nothing can be 5471 // relying on us setting vregs for them. 5472 PendingExports.clear(); 5473 } else { 5474 DAG.setRoot(Result.second); 5475 } 5476 5477 if (LandingPad) { 5478 // Insert a label at the end of the invoke call to mark the try range. This 5479 // can be used to detect deletion of the invoke via the MachineModuleInfo. 5480 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); 5481 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); 5482 5483 // Inform MachineModuleInfo of range. 5484 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); 5485 } 5486} 5487 5488/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the 5489/// value is equal or not-equal to zero. 5490static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { 5491 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); 5492 UI != E; ++UI) { 5493 if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) 5494 if (IC->isEquality()) 5495 if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) 5496 if (C->isNullValue()) 5497 continue; 5498 // Unknown instruction. 5499 return false; 5500 } 5501 return true; 5502} 5503 5504static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, 5505 Type *LoadTy, 5506 SelectionDAGBuilder &Builder) { 5507 5508 // Check to see if this load can be trivially constant folded, e.g. if the 5509 // input is from a string literal. 5510 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { 5511 // Cast pointer to the type we really want to load. 5512 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), 5513 PointerType::getUnqual(LoadTy)); 5514 5515 if (const Constant *LoadCst = 5516 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), 5517 Builder.TD)) 5518 return Builder.getValue(LoadCst); 5519 } 5520 5521 // Otherwise, we have to emit the load. If the pointer is to unfoldable but 5522 // still constant memory, the input chain can be the entry node. 5523 SDValue Root; 5524 bool ConstantMemory = false; 5525 5526 // Do not serialize (non-volatile) loads of constant memory with anything. 5527 if (Builder.AA->pointsToConstantMemory(PtrVal)) { 5528 Root = Builder.DAG.getEntryNode(); 5529 ConstantMemory = true; 5530 } else { 5531 // Do not serialize non-volatile loads against each other. 5532 Root = Builder.DAG.getRoot(); 5533 } 5534 5535 SDValue Ptr = Builder.getValue(PtrVal); 5536 SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, 5537 Ptr, MachinePointerInfo(PtrVal), 5538 false /*volatile*/, 5539 false /*nontemporal*/, 5540 false /*isinvariant*/, 1 /* align=1 */); 5541 5542 if (!ConstantMemory) 5543 Builder.PendingLoads.push_back(LoadVal.getValue(1)); 5544 return LoadVal; 5545} 5546 5547/// processIntegerCallValue - Record the value for an instruction that 5548/// produces an integer result, converting the type where necessary. 5549void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, 5550 SDValue Value, 5551 bool IsSigned) { 5552 EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); 5553 if (IsSigned) 5554 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); 5555 else 5556 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); 5557 setValue(&I, Value); 5558} 5559 5560/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. 5561/// If so, return true and lower it, otherwise return false and it will be 5562/// lowered like a normal call. 5563bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { 5564 // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) 5565 if (I.getNumArgOperands() != 3) 5566 return false; 5567 5568 const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); 5569 if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || 5570 !I.getArgOperand(2)->getType()->isIntegerTy() || 5571 !I.getType()->isIntegerTy()) 5572 return false; 5573 5574 const Value *Size = I.getArgOperand(2); 5575 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); 5576 if (CSize && CSize->getZExtValue() == 0) { 5577 EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); 5578 setValue(&I, DAG.getConstant(0, CallVT)); 5579 return true; 5580 } 5581 5582 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5583 std::pair<SDValue, SDValue> Res = 5584 TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), 5585 getValue(LHS), getValue(RHS), getValue(Size), 5586 MachinePointerInfo(LHS), 5587 MachinePointerInfo(RHS)); 5588 if (Res.first.getNode()) { 5589 processIntegerCallValue(I, Res.first, true); 5590 PendingLoads.push_back(Res.second); 5591 return true; 5592 } 5593 5594 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 5595 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 5596 if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { 5597 bool ActuallyDoIt = true; 5598 MVT LoadVT; 5599 Type *LoadTy; 5600 switch (CSize->getZExtValue()) { 5601 default: 5602 LoadVT = MVT::Other; 5603 LoadTy = 0; 5604 ActuallyDoIt = false; 5605 break; 5606 case 2: 5607 LoadVT = MVT::i16; 5608 LoadTy = Type::getInt16Ty(CSize->getContext()); 5609 break; 5610 case 4: 5611 LoadVT = MVT::i32; 5612 LoadTy = Type::getInt32Ty(CSize->getContext()); 5613 break; 5614 case 8: 5615 LoadVT = MVT::i64; 5616 LoadTy = Type::getInt64Ty(CSize->getContext()); 5617 break; 5618 /* 5619 case 16: 5620 LoadVT = MVT::v4i32; 5621 LoadTy = Type::getInt32Ty(CSize->getContext()); 5622 LoadTy = VectorType::get(LoadTy, 4); 5623 break; 5624 */ 5625 } 5626 5627 // This turns into unaligned loads. We only do this if the target natively 5628 // supports the MVT we'll be loading or if it is small enough (<= 4) that 5629 // we'll only produce a small number of byte loads. 5630 5631 // Require that we can find a legal MVT, and only do this if the target 5632 // supports unaligned loads of that type. Expanding into byte loads would 5633 // bloat the code. 5634 const TargetLowering *TLI = TM.getTargetLowering(); 5635 if (ActuallyDoIt && CSize->getZExtValue() > 4) { 5636 // TODO: Handle 5 byte compare as 4-byte + 1 byte. 5637 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. 5638 if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) 5639 ActuallyDoIt = false; 5640 } 5641 5642 if (ActuallyDoIt) { 5643 SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); 5644 SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); 5645 5646 SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, 5647 ISD::SETNE); 5648 processIntegerCallValue(I, Res, false); 5649 return true; 5650 } 5651 } 5652 5653 5654 return false; 5655} 5656 5657/// visitMemChrCall -- See if we can lower a memchr call into an optimized 5658/// form. If so, return true and lower it, otherwise return false and it 5659/// will be lowered like a normal call. 5660bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { 5661 // Verify that the prototype makes sense. void *memchr(void *, int, size_t) 5662 if (I.getNumArgOperands() != 3) 5663 return false; 5664 5665 const Value *Src = I.getArgOperand(0); 5666 const Value *Char = I.getArgOperand(1); 5667 const Value *Length = I.getArgOperand(2); 5668 if (!Src->getType()->isPointerTy() || 5669 !Char->getType()->isIntegerTy() || 5670 !Length->getType()->isIntegerTy() || 5671 !I.getType()->isPointerTy()) 5672 return false; 5673 5674 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5675 std::pair<SDValue, SDValue> Res = 5676 TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), 5677 getValue(Src), getValue(Char), getValue(Length), 5678 MachinePointerInfo(Src)); 5679 if (Res.first.getNode()) { 5680 setValue(&I, Res.first); 5681 PendingLoads.push_back(Res.second); 5682 return true; 5683 } 5684 5685 return false; 5686} 5687 5688/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an 5689/// optimized form. If so, return true and lower it, otherwise return false 5690/// and it will be lowered like a normal call. 5691bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { 5692 // Verify that the prototype makes sense. char *strcpy(char *, char *) 5693 if (I.getNumArgOperands() != 2) 5694 return false; 5695 5696 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5697 if (!Arg0->getType()->isPointerTy() || 5698 !Arg1->getType()->isPointerTy() || 5699 !I.getType()->isPointerTy()) 5700 return false; 5701 5702 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5703 std::pair<SDValue, SDValue> Res = 5704 TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), 5705 getValue(Arg0), getValue(Arg1), 5706 MachinePointerInfo(Arg0), 5707 MachinePointerInfo(Arg1), isStpcpy); 5708 if (Res.first.getNode()) { 5709 setValue(&I, Res.first); 5710 DAG.setRoot(Res.second); 5711 return true; 5712 } 5713 5714 return false; 5715} 5716 5717/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. 5718/// If so, return true and lower it, otherwise return false and it will be 5719/// lowered like a normal call. 5720bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { 5721 // Verify that the prototype makes sense. int strcmp(void*,void*) 5722 if (I.getNumArgOperands() != 2) 5723 return false; 5724 5725 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5726 if (!Arg0->getType()->isPointerTy() || 5727 !Arg1->getType()->isPointerTy() || 5728 !I.getType()->isIntegerTy()) 5729 return false; 5730 5731 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5732 std::pair<SDValue, SDValue> Res = 5733 TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), 5734 getValue(Arg0), getValue(Arg1), 5735 MachinePointerInfo(Arg0), 5736 MachinePointerInfo(Arg1)); 5737 if (Res.first.getNode()) { 5738 processIntegerCallValue(I, Res.first, true); 5739 PendingLoads.push_back(Res.second); 5740 return true; 5741 } 5742 5743 return false; 5744} 5745 5746/// visitStrLenCall -- See if we can lower a strlen call into an optimized 5747/// form. If so, return true and lower it, otherwise return false and it 5748/// will be lowered like a normal call. 5749bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { 5750 // Verify that the prototype makes sense. size_t strlen(char *) 5751 if (I.getNumArgOperands() != 1) 5752 return false; 5753 5754 const Value *Arg0 = I.getArgOperand(0); 5755 if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) 5756 return false; 5757 5758 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5759 std::pair<SDValue, SDValue> Res = 5760 TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), 5761 getValue(Arg0), MachinePointerInfo(Arg0)); 5762 if (Res.first.getNode()) { 5763 processIntegerCallValue(I, Res.first, false); 5764 PendingLoads.push_back(Res.second); 5765 return true; 5766 } 5767 5768 return false; 5769} 5770 5771/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized 5772/// form. If so, return true and lower it, otherwise return false and it 5773/// will be lowered like a normal call. 5774bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { 5775 // Verify that the prototype makes sense. size_t strnlen(char *, size_t) 5776 if (I.getNumArgOperands() != 2) 5777 return false; 5778 5779 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5780 if (!Arg0->getType()->isPointerTy() || 5781 !Arg1->getType()->isIntegerTy() || 5782 !I.getType()->isIntegerTy()) 5783 return false; 5784 5785 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5786 std::pair<SDValue, SDValue> Res = 5787 TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), 5788 getValue(Arg0), getValue(Arg1), 5789 MachinePointerInfo(Arg0)); 5790 if (Res.first.getNode()) { 5791 processIntegerCallValue(I, Res.first, false); 5792 PendingLoads.push_back(Res.second); 5793 return true; 5794 } 5795 5796 return false; 5797} 5798 5799/// visitUnaryFloatCall - If a call instruction is a unary floating-point 5800/// operation (as expected), translate it to an SDNode with the specified opcode 5801/// and return true. 5802bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, 5803 unsigned Opcode) { 5804 // Sanity check that it really is a unary floating-point call. 5805 if (I.getNumArgOperands() != 1 || 5806 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 5807 I.getType() != I.getArgOperand(0)->getType() || 5808 !I.onlyReadsMemory()) 5809 return false; 5810 5811 SDValue Tmp = getValue(I.getArgOperand(0)); 5812 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); 5813 return true; 5814} 5815 5816void SelectionDAGBuilder::visitCall(const CallInst &I) { 5817 // Handle inline assembly differently. 5818 if (isa<InlineAsm>(I.getCalledValue())) { 5819 visitInlineAsm(&I); 5820 return; 5821 } 5822 5823 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 5824 ComputeUsesVAFloatArgument(I, &MMI); 5825 5826 const char *RenameFn = 0; 5827 if (Function *F = I.getCalledFunction()) { 5828 if (F->isDeclaration()) { 5829 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { 5830 if (unsigned IID = II->getIntrinsicID(F)) { 5831 RenameFn = visitIntrinsicCall(I, IID); 5832 if (!RenameFn) 5833 return; 5834 } 5835 } 5836 if (unsigned IID = F->getIntrinsicID()) { 5837 RenameFn = visitIntrinsicCall(I, IID); 5838 if (!RenameFn) 5839 return; 5840 } 5841 } 5842 5843 // Check for well-known libc/libm calls. If the function is internal, it 5844 // can't be a library call. 5845 LibFunc::Func Func; 5846 if (!F->hasLocalLinkage() && F->hasName() && 5847 LibInfo->getLibFunc(F->getName(), Func) && 5848 LibInfo->hasOptimizedCodeGen(Func)) { 5849 switch (Func) { 5850 default: break; 5851 case LibFunc::copysign: 5852 case LibFunc::copysignf: 5853 case LibFunc::copysignl: 5854 if (I.getNumArgOperands() == 2 && // Basic sanity checks. 5855 I.getArgOperand(0)->getType()->isFloatingPointTy() && 5856 I.getType() == I.getArgOperand(0)->getType() && 5857 I.getType() == I.getArgOperand(1)->getType() && 5858 I.onlyReadsMemory()) { 5859 SDValue LHS = getValue(I.getArgOperand(0)); 5860 SDValue RHS = getValue(I.getArgOperand(1)); 5861 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), 5862 LHS.getValueType(), LHS, RHS)); 5863 return; 5864 } 5865 break; 5866 case LibFunc::fabs: 5867 case LibFunc::fabsf: 5868 case LibFunc::fabsl: 5869 if (visitUnaryFloatCall(I, ISD::FABS)) 5870 return; 5871 break; 5872 case LibFunc::sin: 5873 case LibFunc::sinf: 5874 case LibFunc::sinl: 5875 if (visitUnaryFloatCall(I, ISD::FSIN)) 5876 return; 5877 break; 5878 case LibFunc::cos: 5879 case LibFunc::cosf: 5880 case LibFunc::cosl: 5881 if (visitUnaryFloatCall(I, ISD::FCOS)) 5882 return; 5883 break; 5884 case LibFunc::sqrt: 5885 case LibFunc::sqrtf: 5886 case LibFunc::sqrtl: 5887 case LibFunc::sqrt_finite: 5888 case LibFunc::sqrtf_finite: 5889 case LibFunc::sqrtl_finite: 5890 if (visitUnaryFloatCall(I, ISD::FSQRT)) 5891 return; 5892 break; 5893 case LibFunc::floor: 5894 case LibFunc::floorf: 5895 case LibFunc::floorl: 5896 if (visitUnaryFloatCall(I, ISD::FFLOOR)) 5897 return; 5898 break; 5899 case LibFunc::nearbyint: 5900 case LibFunc::nearbyintf: 5901 case LibFunc::nearbyintl: 5902 if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) 5903 return; 5904 break; 5905 case LibFunc::ceil: 5906 case LibFunc::ceilf: 5907 case LibFunc::ceill: 5908 if (visitUnaryFloatCall(I, ISD::FCEIL)) 5909 return; 5910 break; 5911 case LibFunc::rint: 5912 case LibFunc::rintf: 5913 case LibFunc::rintl: 5914 if (visitUnaryFloatCall(I, ISD::FRINT)) 5915 return; 5916 break; 5917 case LibFunc::round: 5918 case LibFunc::roundf: 5919 case LibFunc::roundl: 5920 if (visitUnaryFloatCall(I, ISD::FROUND)) 5921 return; 5922 break; 5923 case LibFunc::trunc: 5924 case LibFunc::truncf: 5925 case LibFunc::truncl: 5926 if (visitUnaryFloatCall(I, ISD::FTRUNC)) 5927 return; 5928 break; 5929 case LibFunc::log2: 5930 case LibFunc::log2f: 5931 case LibFunc::log2l: 5932 if (visitUnaryFloatCall(I, ISD::FLOG2)) 5933 return; 5934 break; 5935 case LibFunc::exp2: 5936 case LibFunc::exp2f: 5937 case LibFunc::exp2l: 5938 if (visitUnaryFloatCall(I, ISD::FEXP2)) 5939 return; 5940 break; 5941 case LibFunc::memcmp: 5942 if (visitMemCmpCall(I)) 5943 return; 5944 break; 5945 case LibFunc::memchr: 5946 if (visitMemChrCall(I)) 5947 return; 5948 break; 5949 case LibFunc::strcpy: 5950 if (visitStrCpyCall(I, false)) 5951 return; 5952 break; 5953 case LibFunc::stpcpy: 5954 if (visitStrCpyCall(I, true)) 5955 return; 5956 break; 5957 case LibFunc::strcmp: 5958 if (visitStrCmpCall(I)) 5959 return; 5960 break; 5961 case LibFunc::strlen: 5962 if (visitStrLenCall(I)) 5963 return; 5964 break; 5965 case LibFunc::strnlen: 5966 if (visitStrNLenCall(I)) 5967 return; 5968 break; 5969 } 5970 } 5971 } 5972 5973 SDValue Callee; 5974 if (!RenameFn) 5975 Callee = getValue(I.getCalledValue()); 5976 else 5977 Callee = DAG.getExternalSymbol(RenameFn, 5978 TM.getTargetLowering()->getPointerTy()); 5979 5980 // Check if we can potentially perform a tail call. More detailed checking is 5981 // be done within LowerCallTo, after more information about the call is known. 5982 LowerCallTo(&I, Callee, I.isTailCall()); 5983} 5984 5985namespace { 5986 5987/// AsmOperandInfo - This contains information for each constraint that we are 5988/// lowering. 5989class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { 5990public: 5991 /// CallOperand - If this is the result output operand or a clobber 5992 /// this is null, otherwise it is the incoming operand to the CallInst. 5993 /// This gets modified as the asm is processed. 5994 SDValue CallOperand; 5995 5996 /// AssignedRegs - If this is a register or register class operand, this 5997 /// contains the set of register corresponding to the operand. 5998 RegsForValue AssignedRegs; 5999 6000 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) 6001 : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { 6002 } 6003 6004 /// getCallOperandValEVT - Return the EVT of the Value* that this operand 6005 /// corresponds to. If there is no Value* for this operand, it returns 6006 /// MVT::Other. 6007 EVT getCallOperandValEVT(LLVMContext &Context, 6008 const TargetLowering &TLI, 6009 const DataLayout *TD) const { 6010 if (CallOperandVal == 0) return MVT::Other; 6011 6012 if (isa<BasicBlock>(CallOperandVal)) 6013 return TLI.getPointerTy(); 6014 6015 llvm::Type *OpTy = CallOperandVal->getType(); 6016 6017 // FIXME: code duplicated from TargetLowering::ParseConstraints(). 6018 // If this is an indirect operand, the operand is a pointer to the 6019 // accessed type. 6020 if (isIndirect) { 6021 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); 6022 if (!PtrTy) 6023 report_fatal_error("Indirect operand for inline asm not a pointer!"); 6024 OpTy = PtrTy->getElementType(); 6025 } 6026 6027 // Look for vector wrapped in a struct. e.g. { <16 x i8> }. 6028 if (StructType *STy = dyn_cast<StructType>(OpTy)) 6029 if (STy->getNumElements() == 1) 6030 OpTy = STy->getElementType(0); 6031 6032 // If OpTy is not a single value, it may be a struct/union that we 6033 // can tile with integers. 6034 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 6035 unsigned BitSize = TD->getTypeSizeInBits(OpTy); 6036 switch (BitSize) { 6037 default: break; 6038 case 1: 6039 case 8: 6040 case 16: 6041 case 32: 6042 case 64: 6043 case 128: 6044 OpTy = IntegerType::get(Context, BitSize); 6045 break; 6046 } 6047 } 6048 6049 return TLI.getValueType(OpTy, true); 6050 } 6051}; 6052 6053typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; 6054 6055} // end anonymous namespace 6056 6057/// GetRegistersForValue - Assign registers (virtual or physical) for the 6058/// specified operand. We prefer to assign virtual registers, to allow the 6059/// register allocator to handle the assignment process. However, if the asm 6060/// uses features that we can't model on machineinstrs, we have SDISel do the 6061/// allocation. This produces generally horrible, but correct, code. 6062/// 6063/// OpInfo describes the operand. 6064/// 6065static void GetRegistersForValue(SelectionDAG &DAG, 6066 const TargetLowering &TLI, 6067 SDLoc DL, 6068 SDISelAsmOperandInfo &OpInfo) { 6069 LLVMContext &Context = *DAG.getContext(); 6070 6071 MachineFunction &MF = DAG.getMachineFunction(); 6072 SmallVector<unsigned, 4> Regs; 6073 6074 // If this is a constraint for a single physreg, or a constraint for a 6075 // register class, find it. 6076 std::pair<unsigned, const TargetRegisterClass*> PhysReg = 6077 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 6078 OpInfo.ConstraintVT); 6079 6080 unsigned NumRegs = 1; 6081 if (OpInfo.ConstraintVT != MVT::Other) { 6082 // If this is a FP input in an integer register (or visa versa) insert a bit 6083 // cast of the input value. More generally, handle any case where the input 6084 // value disagrees with the register class we plan to stick this in. 6085 if (OpInfo.Type == InlineAsm::isInput && 6086 PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { 6087 // Try to convert to the first EVT that the reg class contains. If the 6088 // types are identical size, use a bitcast to convert (e.g. two differing 6089 // vector types). 6090 MVT RegVT = *PhysReg.second->vt_begin(); 6091 if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { 6092 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 6093 RegVT, OpInfo.CallOperand); 6094 OpInfo.ConstraintVT = RegVT; 6095 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { 6096 // If the input is a FP value and we want it in FP registers, do a 6097 // bitcast to the corresponding integer type. This turns an f64 value 6098 // into i64, which can be passed with two i32 values on a 32-bit 6099 // machine. 6100 RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); 6101 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 6102 RegVT, OpInfo.CallOperand); 6103 OpInfo.ConstraintVT = RegVT; 6104 } 6105 } 6106 6107 NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); 6108 } 6109 6110 MVT RegVT; 6111 EVT ValueVT = OpInfo.ConstraintVT; 6112 6113 // If this is a constraint for a specific physical register, like {r17}, 6114 // assign it now. 6115 if (unsigned AssignedReg = PhysReg.first) { 6116 const TargetRegisterClass *RC = PhysReg.second; 6117 if (OpInfo.ConstraintVT == MVT::Other) 6118 ValueVT = *RC->vt_begin(); 6119 6120 // Get the actual register value type. This is important, because the user 6121 // may have asked for (e.g.) the AX register in i32 type. We need to 6122 // remember that AX is actually i16 to get the right extension. 6123 RegVT = *RC->vt_begin(); 6124 6125 // This is a explicit reference to a physical register. 6126 Regs.push_back(AssignedReg); 6127 6128 // If this is an expanded reference, add the rest of the regs to Regs. 6129 if (NumRegs != 1) { 6130 TargetRegisterClass::iterator I = RC->begin(); 6131 for (; *I != AssignedReg; ++I) 6132 assert(I != RC->end() && "Didn't find reg!"); 6133 6134 // Already added the first reg. 6135 --NumRegs; ++I; 6136 for (; NumRegs; --NumRegs, ++I) { 6137 assert(I != RC->end() && "Ran out of registers to allocate!"); 6138 Regs.push_back(*I); 6139 } 6140 } 6141 6142 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 6143 return; 6144 } 6145 6146 // Otherwise, if this was a reference to an LLVM register class, create vregs 6147 // for this reference. 6148 if (const TargetRegisterClass *RC = PhysReg.second) { 6149 RegVT = *RC->vt_begin(); 6150 if (OpInfo.ConstraintVT == MVT::Other) 6151 ValueVT = RegVT; 6152 6153 // Create the appropriate number of virtual registers. 6154 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6155 for (; NumRegs; --NumRegs) 6156 Regs.push_back(RegInfo.createVirtualRegister(RC)); 6157 6158 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 6159 return; 6160 } 6161 6162 // Otherwise, we couldn't allocate enough registers for this. 6163} 6164 6165/// visitInlineAsm - Handle a call to an InlineAsm object. 6166/// 6167void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { 6168 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 6169 6170 /// ConstraintOperands - Information about all of the constraints. 6171 SDISelAsmOperandInfoVector ConstraintOperands; 6172 6173 const TargetLowering *TLI = TM.getTargetLowering(); 6174 TargetLowering::AsmOperandInfoVector 6175 TargetConstraints = TLI->ParseConstraints(CS); 6176 6177 bool hasMemory = false; 6178 6179 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 6180 unsigned ResNo = 0; // ResNo - The result number of the next output. 6181 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 6182 ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); 6183 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); 6184 6185 MVT OpVT = MVT::Other; 6186 6187 // Compute the value type for each operand. 6188 switch (OpInfo.Type) { 6189 case InlineAsm::isOutput: 6190 // Indirect outputs just consume an argument. 6191 if (OpInfo.isIndirect) { 6192 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 6193 break; 6194 } 6195 6196 // The return value of the call is this value. As such, there is no 6197 // corresponding argument. 6198 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 6199 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 6200 OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); 6201 } else { 6202 assert(ResNo == 0 && "Asm only has one result!"); 6203 OpVT = TLI->getSimpleValueType(CS.getType()); 6204 } 6205 ++ResNo; 6206 break; 6207 case InlineAsm::isInput: 6208 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 6209 break; 6210 case InlineAsm::isClobber: 6211 // Nothing to do. 6212 break; 6213 } 6214 6215 // If this is an input or an indirect output, process the call argument. 6216 // BasicBlocks are labels, currently appearing only in asm's. 6217 if (OpInfo.CallOperandVal) { 6218 if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { 6219 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); 6220 } else { 6221 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); 6222 } 6223 6224 OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). 6225 getSimpleVT(); 6226 } 6227 6228 OpInfo.ConstraintVT = OpVT; 6229 6230 // Indirect operand accesses access memory. 6231 if (OpInfo.isIndirect) 6232 hasMemory = true; 6233 else { 6234 for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { 6235 TargetLowering::ConstraintType 6236 CType = TLI->getConstraintType(OpInfo.Codes[j]); 6237 if (CType == TargetLowering::C_Memory) { 6238 hasMemory = true; 6239 break; 6240 } 6241 } 6242 } 6243 } 6244 6245 SDValue Chain, Flag; 6246 6247 // We won't need to flush pending loads if this asm doesn't touch 6248 // memory and is nonvolatile. 6249 if (hasMemory || IA->hasSideEffects()) 6250 Chain = getRoot(); 6251 else 6252 Chain = DAG.getRoot(); 6253 6254 // Second pass over the constraints: compute which constraint option to use 6255 // and assign registers to constraints that want a specific physreg. 6256 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6257 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6258 6259 // If this is an output operand with a matching input operand, look up the 6260 // matching input. If their types mismatch, e.g. one is an integer, the 6261 // other is floating point, or their sizes are different, flag it as an 6262 // error. 6263 if (OpInfo.hasMatchingInput()) { 6264 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 6265 6266 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 6267 std::pair<unsigned, const TargetRegisterClass*> MatchRC = 6268 TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 6269 OpInfo.ConstraintVT); 6270 std::pair<unsigned, const TargetRegisterClass*> InputRC = 6271 TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, 6272 Input.ConstraintVT); 6273 if ((OpInfo.ConstraintVT.isInteger() != 6274 Input.ConstraintVT.isInteger()) || 6275 (MatchRC.second != InputRC.second)) { 6276 report_fatal_error("Unsupported asm: input constraint" 6277 " with a matching output constraint of" 6278 " incompatible type!"); 6279 } 6280 Input.ConstraintVT = OpInfo.ConstraintVT; 6281 } 6282 } 6283 6284 // Compute the constraint code and ConstraintType to use. 6285 TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); 6286 6287 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 6288 OpInfo.Type == InlineAsm::isClobber) 6289 continue; 6290 6291 // If this is a memory input, and if the operand is not indirect, do what we 6292 // need to to provide an address for the memory input. 6293 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 6294 !OpInfo.isIndirect) { 6295 assert((OpInfo.isMultipleAlternative || 6296 (OpInfo.Type == InlineAsm::isInput)) && 6297 "Can only indirectify direct input operands!"); 6298 6299 // Memory operands really want the address of the value. If we don't have 6300 // an indirect input, put it in the constpool if we can, otherwise spill 6301 // it to a stack slot. 6302 // TODO: This isn't quite right. We need to handle these according to 6303 // the addressing mode that the constraint wants. Also, this may take 6304 // an additional register for the computation and we don't want that 6305 // either. 6306 6307 // If the operand is a float, integer, or vector constant, spill to a 6308 // constant pool entry to get its address. 6309 const Value *OpVal = OpInfo.CallOperandVal; 6310 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || 6311 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { 6312 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), 6313 TLI->getPointerTy()); 6314 } else { 6315 // Otherwise, create a stack slot and emit a store to it before the 6316 // asm. 6317 Type *Ty = OpVal->getType(); 6318 uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); 6319 unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); 6320 MachineFunction &MF = DAG.getMachineFunction(); 6321 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 6322 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); 6323 Chain = DAG.getStore(Chain, getCurSDLoc(), 6324 OpInfo.CallOperand, StackSlot, 6325 MachinePointerInfo::getFixedStack(SSFI), 6326 false, false, 0); 6327 OpInfo.CallOperand = StackSlot; 6328 } 6329 6330 // There is no longer a Value* corresponding to this operand. 6331 OpInfo.CallOperandVal = 0; 6332 6333 // It is now an indirect operand. 6334 OpInfo.isIndirect = true; 6335 } 6336 6337 // If this constraint is for a specific register, allocate it before 6338 // anything else. 6339 if (OpInfo.ConstraintType == TargetLowering::C_Register) 6340 GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); 6341 } 6342 6343 // Second pass - Loop over all of the operands, assigning virtual or physregs 6344 // to register class operands. 6345 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6346 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6347 6348 // C_Register operands have already been allocated, Other/Memory don't need 6349 // to be. 6350 if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) 6351 GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); 6352 } 6353 6354 // AsmNodeOperands - The operands for the ISD::INLINEASM node. 6355 std::vector<SDValue> AsmNodeOperands; 6356 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain 6357 AsmNodeOperands.push_back( 6358 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 6359 TLI->getPointerTy())); 6360 6361 // If we have a !srcloc metadata node associated with it, we want to attach 6362 // this to the ultimately generated inline asm machineinstr. To do this, we 6363 // pass in the third operand as this (potentially null) inline asm MDNode. 6364 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); 6365 AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); 6366 6367 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore 6368 // bits as operand 3. 6369 unsigned ExtraInfo = 0; 6370 if (IA->hasSideEffects()) 6371 ExtraInfo |= InlineAsm::Extra_HasSideEffects; 6372 if (IA->isAlignStack()) 6373 ExtraInfo |= InlineAsm::Extra_IsAlignStack; 6374 // Set the asm dialect. 6375 ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; 6376 6377 // Determine if this InlineAsm MayLoad or MayStore based on the constraints. 6378 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 6379 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 6380 6381 // Compute the constraint code and ConstraintType to use. 6382 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 6383 6384 // Ideally, we would only check against memory constraints. However, the 6385 // meaning of an other constraint can be target-specific and we can't easily 6386 // reason about it. Therefore, be conservative and set MayLoad/MayStore 6387 // for other constriants as well. 6388 if (OpInfo.ConstraintType == TargetLowering::C_Memory || 6389 OpInfo.ConstraintType == TargetLowering::C_Other) { 6390 if (OpInfo.Type == InlineAsm::isInput) 6391 ExtraInfo |= InlineAsm::Extra_MayLoad; 6392 else if (OpInfo.Type == InlineAsm::isOutput) 6393 ExtraInfo |= InlineAsm::Extra_MayStore; 6394 else if (OpInfo.Type == InlineAsm::isClobber) 6395 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); 6396 } 6397 } 6398 6399 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, 6400 TLI->getPointerTy())); 6401 6402 // Loop over all of the inputs, copying the operand values into the 6403 // appropriate registers and processing the output regs. 6404 RegsForValue RetValRegs; 6405 6406 // IndirectStoresToEmit - The set of stores to emit after the inline asm node. 6407 std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; 6408 6409 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6410 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6411 6412 switch (OpInfo.Type) { 6413 case InlineAsm::isOutput: { 6414 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && 6415 OpInfo.ConstraintType != TargetLowering::C_Register) { 6416 // Memory output, or 'other' output (e.g. 'X' constraint). 6417 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); 6418 6419 // Add information to the INLINEASM node to know about this output. 6420 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6421 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, 6422 TLI->getPointerTy())); 6423 AsmNodeOperands.push_back(OpInfo.CallOperand); 6424 break; 6425 } 6426 6427 // Otherwise, this is a register or register class output. 6428 6429 // Copy the output from the appropriate register. Find a register that 6430 // we can use. 6431 if (OpInfo.AssignedRegs.Regs.empty()) { 6432 LLVMContext &Ctx = *DAG.getContext(); 6433 Ctx.emitError(CS.getInstruction(), 6434 "couldn't allocate output register for constraint '" + 6435 Twine(OpInfo.ConstraintCode) + "'"); 6436 return; 6437 } 6438 6439 // If this is an indirect operand, store through the pointer after the 6440 // asm. 6441 if (OpInfo.isIndirect) { 6442 IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, 6443 OpInfo.CallOperandVal)); 6444 } else { 6445 // This is the result value of the call. 6446 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 6447 // Concatenate this output onto the outputs list. 6448 RetValRegs.append(OpInfo.AssignedRegs); 6449 } 6450 6451 // Add information to the INLINEASM node to know that this register is 6452 // set. 6453 OpInfo.AssignedRegs 6454 .AddInlineAsmOperands(OpInfo.isEarlyClobber 6455 ? InlineAsm::Kind_RegDefEarlyClobber 6456 : InlineAsm::Kind_RegDef, 6457 false, 0, DAG, AsmNodeOperands); 6458 break; 6459 } 6460 case InlineAsm::isInput: { 6461 SDValue InOperandVal = OpInfo.CallOperand; 6462 6463 if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? 6464 // If this is required to match an output register we have already set, 6465 // just use its register. 6466 unsigned OperandNo = OpInfo.getMatchedOperand(); 6467 6468 // Scan until we find the definition we already emitted of this operand. 6469 // When we find it, create a RegsForValue operand. 6470 unsigned CurOp = InlineAsm::Op_FirstOperand; 6471 for (; OperandNo; --OperandNo) { 6472 // Advance to the next operand. 6473 unsigned OpFlag = 6474 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 6475 assert((InlineAsm::isRegDefKind(OpFlag) || 6476 InlineAsm::isRegDefEarlyClobberKind(OpFlag) || 6477 InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); 6478 CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; 6479 } 6480 6481 unsigned OpFlag = 6482 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 6483 if (InlineAsm::isRegDefKind(OpFlag) || 6484 InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { 6485 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. 6486 if (OpInfo.isIndirect) { 6487 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c 6488 LLVMContext &Ctx = *DAG.getContext(); 6489 Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" 6490 " don't know how to handle tied " 6491 "indirect register inputs"); 6492 return; 6493 } 6494 6495 RegsForValue MatchedRegs; 6496 MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); 6497 MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); 6498 MatchedRegs.RegVTs.push_back(RegVT); 6499 MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); 6500 for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); 6501 i != e; ++i) { 6502 if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) 6503 MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); 6504 else { 6505 LLVMContext &Ctx = *DAG.getContext(); 6506 Ctx.emitError(CS.getInstruction(), 6507 "inline asm error: This value" 6508 " type register class is not natively supported!"); 6509 return; 6510 } 6511 } 6512 // Use the produced MatchedRegs object to 6513 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6514 Chain, &Flag, CS.getInstruction()); 6515 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, 6516 true, OpInfo.getMatchedOperand(), 6517 DAG, AsmNodeOperands); 6518 break; 6519 } 6520 6521 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); 6522 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && 6523 "Unexpected number of operands"); 6524 // Add information to the INLINEASM node to know about this input. 6525 // See InlineAsm.h isUseOperandTiedToDef. 6526 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, 6527 OpInfo.getMatchedOperand()); 6528 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, 6529 TLI->getPointerTy())); 6530 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); 6531 break; 6532 } 6533 6534 // Treat indirect 'X' constraint as memory. 6535 if (OpInfo.ConstraintType == TargetLowering::C_Other && 6536 OpInfo.isIndirect) 6537 OpInfo.ConstraintType = TargetLowering::C_Memory; 6538 6539 if (OpInfo.ConstraintType == TargetLowering::C_Other) { 6540 std::vector<SDValue> Ops; 6541 TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, 6542 Ops, DAG); 6543 if (Ops.empty()) { 6544 LLVMContext &Ctx = *DAG.getContext(); 6545 Ctx.emitError(CS.getInstruction(), 6546 "invalid operand for inline asm constraint '" + 6547 Twine(OpInfo.ConstraintCode) + "'"); 6548 return; 6549 } 6550 6551 // Add information to the INLINEASM node to know about this input. 6552 unsigned ResOpType = 6553 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); 6554 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 6555 TLI->getPointerTy())); 6556 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); 6557 break; 6558 } 6559 6560 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { 6561 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); 6562 assert(InOperandVal.getValueType() == TLI->getPointerTy() && 6563 "Memory operands expect pointer values"); 6564 6565 // Add information to the INLINEASM node to know about this input. 6566 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6567 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 6568 TLI->getPointerTy())); 6569 AsmNodeOperands.push_back(InOperandVal); 6570 break; 6571 } 6572 6573 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || 6574 OpInfo.ConstraintType == TargetLowering::C_Register) && 6575 "Unknown constraint type!"); 6576 6577 // TODO: Support this. 6578 if (OpInfo.isIndirect) { 6579 LLVMContext &Ctx = *DAG.getContext(); 6580 Ctx.emitError(CS.getInstruction(), 6581 "Don't know how to handle indirect register inputs yet " 6582 "for constraint '" + 6583 Twine(OpInfo.ConstraintCode) + "'"); 6584 return; 6585 } 6586 6587 // Copy the input into the appropriate registers. 6588 if (OpInfo.AssignedRegs.Regs.empty()) { 6589 LLVMContext &Ctx = *DAG.getContext(); 6590 Ctx.emitError(CS.getInstruction(), 6591 "couldn't allocate input reg for constraint '" + 6592 Twine(OpInfo.ConstraintCode) + "'"); 6593 return; 6594 } 6595 6596 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6597 Chain, &Flag, CS.getInstruction()); 6598 6599 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, 6600 DAG, AsmNodeOperands); 6601 break; 6602 } 6603 case InlineAsm::isClobber: { 6604 // Add the clobbered value to the operand list, so that the register 6605 // allocator is aware that the physreg got clobbered. 6606 if (!OpInfo.AssignedRegs.Regs.empty()) 6607 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, 6608 false, 0, DAG, 6609 AsmNodeOperands); 6610 break; 6611 } 6612 } 6613 } 6614 6615 // Finish up input operands. Set the input chain and add the flag last. 6616 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 6617 if (Flag.getNode()) AsmNodeOperands.push_back(Flag); 6618 6619 Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), 6620 DAG.getVTList(MVT::Other, MVT::Glue), 6621 &AsmNodeOperands[0], AsmNodeOperands.size()); 6622 Flag = Chain.getValue(1); 6623 6624 // If this asm returns a register value, copy the result from that register 6625 // and set it as the value of the call. 6626 if (!RetValRegs.Regs.empty()) { 6627 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 6628 Chain, &Flag, CS.getInstruction()); 6629 6630 // FIXME: Why don't we do this for inline asms with MRVs? 6631 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { 6632 EVT ResultType = TLI->getValueType(CS.getType()); 6633 6634 // If any of the results of the inline asm is a vector, it may have the 6635 // wrong width/num elts. This can happen for register classes that can 6636 // contain multiple different value types. The preg or vreg allocated may 6637 // not have the same VT as was expected. Convert it to the right type 6638 // with bit_convert. 6639 if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { 6640 Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), 6641 ResultType, Val); 6642 6643 } else if (ResultType != Val.getValueType() && 6644 ResultType.isInteger() && Val.getValueType().isInteger()) { 6645 // If a result value was tied to an input value, the computed result may 6646 // have a wider width than the expected result. Extract the relevant 6647 // portion. 6648 Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); 6649 } 6650 6651 assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); 6652 } 6653 6654 setValue(CS.getInstruction(), Val); 6655 // Don't need to use this as a chain in this case. 6656 if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) 6657 return; 6658 } 6659 6660 std::vector<std::pair<SDValue, const Value *> > StoresToEmit; 6661 6662 // Process indirect outputs, first output all of the flagged copies out of 6663 // physregs. 6664 for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { 6665 RegsForValue &OutRegs = IndirectStoresToEmit[i].first; 6666 const Value *Ptr = IndirectStoresToEmit[i].second; 6667 SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 6668 Chain, &Flag, IA); 6669 StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); 6670 } 6671 6672 // Emit the non-flagged stores from the physregs. 6673 SmallVector<SDValue, 8> OutChains; 6674 for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { 6675 SDValue Val = DAG.getStore(Chain, getCurSDLoc(), 6676 StoresToEmit[i].first, 6677 getValue(StoresToEmit[i].second), 6678 MachinePointerInfo(StoresToEmit[i].second), 6679 false, false, 0); 6680 OutChains.push_back(Val); 6681 } 6682 6683 if (!OutChains.empty()) 6684 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 6685 &OutChains[0], OutChains.size()); 6686 6687 DAG.setRoot(Chain); 6688} 6689 6690void SelectionDAGBuilder::visitVAStart(const CallInst &I) { 6691 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), 6692 MVT::Other, getRoot(), 6693 getValue(I.getArgOperand(0)), 6694 DAG.getSrcValue(I.getArgOperand(0)))); 6695} 6696 6697void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { 6698 const TargetLowering *TLI = TM.getTargetLowering(); 6699 const DataLayout &TD = *TLI->getDataLayout(); 6700 SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), 6701 getRoot(), getValue(I.getOperand(0)), 6702 DAG.getSrcValue(I.getOperand(0)), 6703 TD.getABITypeAlignment(I.getType())); 6704 setValue(&I, V); 6705 DAG.setRoot(V.getValue(1)); 6706} 6707 6708void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { 6709 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), 6710 MVT::Other, getRoot(), 6711 getValue(I.getArgOperand(0)), 6712 DAG.getSrcValue(I.getArgOperand(0)))); 6713} 6714 6715void SelectionDAGBuilder::visitVACopy(const CallInst &I) { 6716 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), 6717 MVT::Other, getRoot(), 6718 getValue(I.getArgOperand(0)), 6719 getValue(I.getArgOperand(1)), 6720 DAG.getSrcValue(I.getArgOperand(0)), 6721 DAG.getSrcValue(I.getArgOperand(1)))); 6722} 6723 6724/// TargetLowering::LowerCallTo - This is the default LowerCallTo 6725/// implementation, which just calls LowerCall. 6726/// FIXME: When all targets are 6727/// migrated to using LowerCall, this hook should be integrated into SDISel. 6728std::pair<SDValue, SDValue> 6729TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { 6730 // Handle the incoming return values from the call. 6731 CLI.Ins.clear(); 6732 SmallVector<EVT, 4> RetTys; 6733 ComputeValueVTs(*this, CLI.RetTy, RetTys); 6734 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 6735 EVT VT = RetTys[I]; 6736 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 6737 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 6738 for (unsigned i = 0; i != NumRegs; ++i) { 6739 ISD::InputArg MyFlags; 6740 MyFlags.VT = RegisterVT; 6741 MyFlags.ArgVT = VT; 6742 MyFlags.Used = CLI.IsReturnValueUsed; 6743 if (CLI.RetSExt) 6744 MyFlags.Flags.setSExt(); 6745 if (CLI.RetZExt) 6746 MyFlags.Flags.setZExt(); 6747 if (CLI.IsInReg) 6748 MyFlags.Flags.setInReg(); 6749 CLI.Ins.push_back(MyFlags); 6750 } 6751 } 6752 6753 // Handle all of the outgoing arguments. 6754 CLI.Outs.clear(); 6755 CLI.OutVals.clear(); 6756 ArgListTy &Args = CLI.Args; 6757 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 6758 SmallVector<EVT, 4> ValueVTs; 6759 ComputeValueVTs(*this, Args[i].Ty, ValueVTs); 6760 for (unsigned Value = 0, NumValues = ValueVTs.size(); 6761 Value != NumValues; ++Value) { 6762 EVT VT = ValueVTs[Value]; 6763 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); 6764 SDValue Op = SDValue(Args[i].Node.getNode(), 6765 Args[i].Node.getResNo() + Value); 6766 ISD::ArgFlagsTy Flags; 6767 unsigned OriginalAlignment = 6768 getDataLayout()->getABITypeAlignment(ArgTy); 6769 6770 if (Args[i].isZExt) 6771 Flags.setZExt(); 6772 if (Args[i].isSExt) 6773 Flags.setSExt(); 6774 if (Args[i].isInReg) 6775 Flags.setInReg(); 6776 if (Args[i].isSRet) 6777 Flags.setSRet(); 6778 if (Args[i].isByVal) { 6779 Flags.setByVal(); 6780 PointerType *Ty = cast<PointerType>(Args[i].Ty); 6781 Type *ElementTy = Ty->getElementType(); 6782 Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); 6783 // For ByVal, alignment should come from FE. BE will guess if this 6784 // info is not there but there are cases it cannot get right. 6785 unsigned FrameAlign; 6786 if (Args[i].Alignment) 6787 FrameAlign = Args[i].Alignment; 6788 else 6789 FrameAlign = getByValTypeAlignment(ElementTy); 6790 Flags.setByValAlign(FrameAlign); 6791 } 6792 if (Args[i].isNest) 6793 Flags.setNest(); 6794 Flags.setOrigAlign(OriginalAlignment); 6795 6796 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); 6797 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); 6798 SmallVector<SDValue, 4> Parts(NumParts); 6799 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 6800 6801 if (Args[i].isSExt) 6802 ExtendKind = ISD::SIGN_EXTEND; 6803 else if (Args[i].isZExt) 6804 ExtendKind = ISD::ZERO_EXTEND; 6805 6806 // Conservatively only handle 'returned' on non-vectors for now 6807 if (Args[i].isReturned && !Op.getValueType().isVector()) { 6808 assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && 6809 "unexpected use of 'returned'"); 6810 // Before passing 'returned' to the target lowering code, ensure that 6811 // either the register MVT and the actual EVT are the same size or that 6812 // the return value and argument are extended in the same way; in these 6813 // cases it's safe to pass the argument register value unchanged as the 6814 // return register value (although it's at the target's option whether 6815 // to do so) 6816 // TODO: allow code generation to take advantage of partially preserved 6817 // registers rather than clobbering the entire register when the 6818 // parameter extension method is not compatible with the return 6819 // extension method 6820 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || 6821 (ExtendKind != ISD::ANY_EXTEND && 6822 CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) 6823 Flags.setReturned(); 6824 } 6825 6826 getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, 6827 PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); 6828 6829 for (unsigned j = 0; j != NumParts; ++j) { 6830 // if it isn't first piece, alignment must be 1 6831 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, 6832 i < CLI.NumFixedArgs, 6833 i, j*Parts[j].getValueType().getStoreSize()); 6834 if (NumParts > 1 && j == 0) 6835 MyFlags.Flags.setSplit(); 6836 else if (j != 0) 6837 MyFlags.Flags.setOrigAlign(1); 6838 6839 CLI.Outs.push_back(MyFlags); 6840 CLI.OutVals.push_back(Parts[j]); 6841 } 6842 } 6843 } 6844 6845 SmallVector<SDValue, 4> InVals; 6846 CLI.Chain = LowerCall(CLI, InVals); 6847 6848 // Verify that the target's LowerCall behaved as expected. 6849 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && 6850 "LowerCall didn't return a valid chain!"); 6851 assert((!CLI.IsTailCall || InVals.empty()) && 6852 "LowerCall emitted a return value for a tail call!"); 6853 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && 6854 "LowerCall didn't emit the correct number of values!"); 6855 6856 // For a tail call, the return value is merely live-out and there aren't 6857 // any nodes in the DAG representing it. Return a special value to 6858 // indicate that a tail call has been emitted and no more Instructions 6859 // should be processed in the current block. 6860 if (CLI.IsTailCall) { 6861 CLI.DAG.setRoot(CLI.Chain); 6862 return std::make_pair(SDValue(), SDValue()); 6863 } 6864 6865 DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { 6866 assert(InVals[i].getNode() && 6867 "LowerCall emitted a null value!"); 6868 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && 6869 "LowerCall emitted a value with the wrong type!"); 6870 }); 6871 6872 // Collect the legal value parts into potentially illegal values 6873 // that correspond to the original function's return values. 6874 ISD::NodeType AssertOp = ISD::DELETED_NODE; 6875 if (CLI.RetSExt) 6876 AssertOp = ISD::AssertSext; 6877 else if (CLI.RetZExt) 6878 AssertOp = ISD::AssertZext; 6879 SmallVector<SDValue, 4> ReturnValues; 6880 unsigned CurReg = 0; 6881 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 6882 EVT VT = RetTys[I]; 6883 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 6884 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 6885 6886 ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], 6887 NumRegs, RegisterVT, VT, NULL, 6888 AssertOp)); 6889 CurReg += NumRegs; 6890 } 6891 6892 // For a function returning void, there is no return value. We can't create 6893 // such a node, so we just return a null return value in that case. In 6894 // that case, nothing will actually look at the value. 6895 if (ReturnValues.empty()) 6896 return std::make_pair(SDValue(), CLI.Chain); 6897 6898 SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, 6899 CLI.DAG.getVTList(&RetTys[0], RetTys.size()), 6900 &ReturnValues[0], ReturnValues.size()); 6901 return std::make_pair(Res, CLI.Chain); 6902} 6903 6904void TargetLowering::LowerOperationWrapper(SDNode *N, 6905 SmallVectorImpl<SDValue> &Results, 6906 SelectionDAG &DAG) const { 6907 SDValue Res = LowerOperation(SDValue(N, 0), DAG); 6908 if (Res.getNode()) 6909 Results.push_back(Res); 6910} 6911 6912SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6913 llvm_unreachable("LowerOperation not implemented for this target!"); 6914} 6915 6916void 6917SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { 6918 SDValue Op = getNonRegisterValue(V); 6919 assert((Op.getOpcode() != ISD::CopyFromReg || 6920 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && 6921 "Copy from a reg to the same reg!"); 6922 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); 6923 6924 const TargetLowering *TLI = TM.getTargetLowering(); 6925 RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); 6926 SDValue Chain = DAG.getEntryNode(); 6927 RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); 6928 PendingExports.push_back(Chain); 6929} 6930 6931#include "llvm/CodeGen/SelectionDAGISel.h" 6932 6933/// isOnlyUsedInEntryBlock - If the specified argument is only used in the 6934/// entry block, return true. This includes arguments used by switches, since 6935/// the switch may expand into multiple basic blocks. 6936static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { 6937 // With FastISel active, we may be splitting blocks, so force creation 6938 // of virtual registers for all non-dead arguments. 6939 if (FastISel) 6940 return A->use_empty(); 6941 6942 const BasicBlock *Entry = A->getParent()->begin(); 6943 for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); 6944 UI != E; ++UI) { 6945 const User *U = *UI; 6946 if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) 6947 return false; // Use not in entry block. 6948 } 6949 return true; 6950} 6951 6952void SelectionDAGISel::LowerArguments(const Function &F) { 6953 SelectionDAG &DAG = SDB->DAG; 6954 SDLoc dl = SDB->getCurSDLoc(); 6955 const TargetLowering *TLI = getTargetLowering(); 6956 const DataLayout *TD = TLI->getDataLayout(); 6957 SmallVector<ISD::InputArg, 16> Ins; 6958 6959 if (!FuncInfo->CanLowerReturn) { 6960 // Put in an sret pointer parameter before all the other parameters. 6961 SmallVector<EVT, 1> ValueVTs; 6962 ComputeValueVTs(*getTargetLowering(), 6963 PointerType::getUnqual(F.getReturnType()), ValueVTs); 6964 6965 // NOTE: Assuming that a pointer will never break down to more than one VT 6966 // or one register. 6967 ISD::ArgFlagsTy Flags; 6968 Flags.setSRet(); 6969 MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); 6970 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); 6971 Ins.push_back(RetArg); 6972 } 6973 6974 // Set up the incoming argument description vector. 6975 unsigned Idx = 1; 6976 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 6977 I != E; ++I, ++Idx) { 6978 SmallVector<EVT, 4> ValueVTs; 6979 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 6980 bool isArgValueUsed = !I->use_empty(); 6981 unsigned PartBase = 0; 6982 for (unsigned Value = 0, NumValues = ValueVTs.size(); 6983 Value != NumValues; ++Value) { 6984 EVT VT = ValueVTs[Value]; 6985 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); 6986 ISD::ArgFlagsTy Flags; 6987 unsigned OriginalAlignment = 6988 TD->getABITypeAlignment(ArgTy); 6989 6990 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 6991 Flags.setZExt(); 6992 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 6993 Flags.setSExt(); 6994 if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) 6995 Flags.setInReg(); 6996 if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) 6997 Flags.setSRet(); 6998 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { 6999 Flags.setByVal(); 7000 PointerType *Ty = cast<PointerType>(I->getType()); 7001 Type *ElementTy = Ty->getElementType(); 7002 Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); 7003 // For ByVal, alignment should be passed from FE. BE will guess if 7004 // this info is not there but there are cases it cannot get right. 7005 unsigned FrameAlign; 7006 if (F.getParamAlignment(Idx)) 7007 FrameAlign = F.getParamAlignment(Idx); 7008 else 7009 FrameAlign = TLI->getByValTypeAlignment(ElementTy); 7010 Flags.setByValAlign(FrameAlign); 7011 } 7012 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) 7013 Flags.setNest(); 7014 Flags.setOrigAlign(OriginalAlignment); 7015 7016 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7017 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); 7018 for (unsigned i = 0; i != NumRegs; ++i) { 7019 ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, 7020 Idx-1, PartBase+i*RegisterVT.getStoreSize()); 7021 if (NumRegs > 1 && i == 0) 7022 MyFlags.Flags.setSplit(); 7023 // if it isn't first piece, alignment must be 1 7024 else if (i > 0) 7025 MyFlags.Flags.setOrigAlign(1); 7026 Ins.push_back(MyFlags); 7027 } 7028 PartBase += VT.getStoreSize(); 7029 } 7030 } 7031 7032 // Call the target to set up the argument values. 7033 SmallVector<SDValue, 8> InVals; 7034 SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), 7035 F.isVarArg(), Ins, 7036 dl, DAG, InVals); 7037 7038 // Verify that the target's LowerFormalArguments behaved as expected. 7039 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && 7040 "LowerFormalArguments didn't return a valid chain!"); 7041 assert(InVals.size() == Ins.size() && 7042 "LowerFormalArguments didn't emit the correct number of values!"); 7043 DEBUG({ 7044 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 7045 assert(InVals[i].getNode() && 7046 "LowerFormalArguments emitted a null value!"); 7047 assert(EVT(Ins[i].VT) == InVals[i].getValueType() && 7048 "LowerFormalArguments emitted a value with the wrong type!"); 7049 } 7050 }); 7051 7052 // Update the DAG with the new chain value resulting from argument lowering. 7053 DAG.setRoot(NewRoot); 7054 7055 // Set up the argument values. 7056 unsigned i = 0; 7057 Idx = 1; 7058 if (!FuncInfo->CanLowerReturn) { 7059 // Create a virtual register for the sret pointer, and put in a copy 7060 // from the sret argument into it. 7061 SmallVector<EVT, 1> ValueVTs; 7062 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 7063 MVT VT = ValueVTs[0].getSimpleVT(); 7064 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7065 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7066 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, 7067 RegVT, VT, NULL, AssertOp); 7068 7069 MachineFunction& MF = SDB->DAG.getMachineFunction(); 7070 MachineRegisterInfo& RegInfo = MF.getRegInfo(); 7071 unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); 7072 FuncInfo->DemoteRegister = SRetReg; 7073 NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), 7074 SRetReg, ArgValue); 7075 DAG.setRoot(NewRoot); 7076 7077 // i indexes lowered arguments. Bump it past the hidden sret argument. 7078 // Idx indexes LLVM arguments. Don't touch it. 7079 ++i; 7080 } 7081 7082 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 7083 ++I, ++Idx) { 7084 SmallVector<SDValue, 4> ArgValues; 7085 SmallVector<EVT, 4> ValueVTs; 7086 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 7087 unsigned NumValues = ValueVTs.size(); 7088 7089 // If this argument is unused then remember its value. It is used to generate 7090 // debugging information. 7091 if (I->use_empty() && NumValues) { 7092 SDB->setUnusedArgValue(I, InVals[i]); 7093 7094 // Also remember any frame index for use in FastISel. 7095 if (FrameIndexSDNode *FI = 7096 dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) 7097 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7098 } 7099 7100 for (unsigned Val = 0; Val != NumValues; ++Val) { 7101 EVT VT = ValueVTs[Val]; 7102 MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7103 unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); 7104 7105 if (!I->use_empty()) { 7106 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7107 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 7108 AssertOp = ISD::AssertSext; 7109 else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 7110 AssertOp = ISD::AssertZext; 7111 7112 ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], 7113 NumParts, PartVT, VT, 7114 NULL, AssertOp)); 7115 } 7116 7117 i += NumParts; 7118 } 7119 7120 // We don't need to do anything else for unused arguments. 7121 if (ArgValues.empty()) 7122 continue; 7123 7124 // Note down frame index. 7125 if (FrameIndexSDNode *FI = 7126 dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) 7127 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7128 7129 SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, 7130 SDB->getCurSDLoc()); 7131 7132 SDB->setValue(I, Res); 7133 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { 7134 if (LoadSDNode *LNode = 7135 dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) 7136 if (FrameIndexSDNode *FI = 7137 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 7138 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7139 } 7140 7141 // If this argument is live outside of the entry block, insert a copy from 7142 // wherever we got it to the vreg that other BB's will reference it as. 7143 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { 7144 // If we can, though, try to skip creating an unnecessary vreg. 7145 // FIXME: This isn't very clean... it would be nice to make this more 7146 // general. It's also subtly incompatible with the hacks FastISel 7147 // uses with vregs. 7148 unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); 7149 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 7150 FuncInfo->ValueMap[I] = Reg; 7151 continue; 7152 } 7153 } 7154 if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { 7155 FuncInfo->InitializeRegForValue(I); 7156 SDB->CopyToExportRegsIfNeeded(I); 7157 } 7158 } 7159 7160 assert(i == InVals.size() && "Argument register count mismatch!"); 7161 7162 // Finally, if the target has anything special to do, allow it to do so. 7163 // FIXME: this should insert code into the DAG! 7164 EmitFunctionEntryCode(); 7165} 7166 7167/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to 7168/// ensure constants are generated when needed. Remember the virtual registers 7169/// that need to be added to the Machine PHI nodes as input. We cannot just 7170/// directly add them, because expansion might result in multiple MBB's for one 7171/// BB. As such, the start of the BB might correspond to a different MBB than 7172/// the end. 7173/// 7174void 7175SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { 7176 const TerminatorInst *TI = LLVMBB->getTerminator(); 7177 7178 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; 7179 7180 // Check successor nodes' PHI nodes that expect a constant to be available 7181 // from this block. 7182 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 7183 const BasicBlock *SuccBB = TI->getSuccessor(succ); 7184 if (!isa<PHINode>(SuccBB->begin())) continue; 7185 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; 7186 7187 // If this terminator has multiple identical successors (common for 7188 // switches), only handle each succ once. 7189 if (!SuccsHandled.insert(SuccMBB)) continue; 7190 7191 MachineBasicBlock::iterator MBBI = SuccMBB->begin(); 7192 7193 // At this point we know that there is a 1-1 correspondence between LLVM PHI 7194 // nodes and Machine PHI nodes, but the incoming operands have not been 7195 // emitted yet. 7196 for (BasicBlock::const_iterator I = SuccBB->begin(); 7197 const PHINode *PN = dyn_cast<PHINode>(I); ++I) { 7198 // Ignore dead phi's. 7199 if (PN->use_empty()) continue; 7200 7201 // Skip empty types 7202 if (PN->getType()->isEmptyTy()) 7203 continue; 7204 7205 unsigned Reg; 7206 const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); 7207 7208 if (const Constant *C = dyn_cast<Constant>(PHIOp)) { 7209 unsigned &RegOut = ConstantsOut[C]; 7210 if (RegOut == 0) { 7211 RegOut = FuncInfo.CreateRegs(C->getType()); 7212 CopyValueToVirtualRegister(C, RegOut); 7213 } 7214 Reg = RegOut; 7215 } else { 7216 DenseMap<const Value *, unsigned>::iterator I = 7217 FuncInfo.ValueMap.find(PHIOp); 7218 if (I != FuncInfo.ValueMap.end()) 7219 Reg = I->second; 7220 else { 7221 assert(isa<AllocaInst>(PHIOp) && 7222 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && 7223 "Didn't codegen value into a register!??"); 7224 Reg = FuncInfo.CreateRegs(PHIOp->getType()); 7225 CopyValueToVirtualRegister(PHIOp, Reg); 7226 } 7227 } 7228 7229 // Remember that this register needs to added to the machine PHI node as 7230 // the input for this MBB. 7231 SmallVector<EVT, 4> ValueVTs; 7232 const TargetLowering *TLI = TM.getTargetLowering(); 7233 ComputeValueVTs(*TLI, PN->getType(), ValueVTs); 7234 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { 7235 EVT VT = ValueVTs[vti]; 7236 unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); 7237 for (unsigned i = 0, e = NumRegisters; i != e; ++i) 7238 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); 7239 Reg += NumRegisters; 7240 } 7241 } 7242 } 7243 7244 ConstantsOut.clear(); 7245} 7246 7247/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB 7248/// is 0. 7249MachineBasicBlock * 7250SelectionDAGBuilder::StackProtectorDescriptor:: 7251AddSuccessorMBB(const BasicBlock *BB, 7252 MachineBasicBlock *ParentMBB, 7253 MachineBasicBlock *SuccMBB) { 7254 // If SuccBB has not been created yet, create it. 7255 if (!SuccMBB) { 7256 MachineFunction *MF = ParentMBB->getParent(); 7257 MachineFunction::iterator BBI = ParentMBB; 7258 SuccMBB = MF->CreateMachineBasicBlock(BB); 7259 MF->insert(++BBI, SuccMBB); 7260 } 7261 // Add it as a successor of ParentMBB. 7262 ParentMBB->addSuccessor(SuccMBB); 7263 return SuccMBB; 7264} 7265