InstCombineCalls.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===- InstCombineCalls.cpp -----------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the visitCall and visitInvoke functions. 11// 12//===----------------------------------------------------------------------===// 13 14#include "InstCombine.h" 15#include "llvm/ADT/Statistic.h" 16#include "llvm/Analysis/MemoryBuiltins.h" 17#include "llvm/IR/CallSite.h" 18#include "llvm/IR/DataLayout.h" 19#include "llvm/IR/PatternMatch.h" 20#include "llvm/Transforms/Utils/BuildLibCalls.h" 21#include "llvm/Transforms/Utils/Local.h" 22using namespace llvm; 23using namespace PatternMatch; 24 25#define DEBUG_TYPE "instcombine" 26 27STATISTIC(NumSimplified, "Number of library calls simplified"); 28 29/// getPromotedType - Return the specified type promoted as it would be to pass 30/// though a va_arg area. 31static Type *getPromotedType(Type *Ty) { 32 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 33 if (ITy->getBitWidth() < 32) 34 return Type::getInt32Ty(Ty->getContext()); 35 } 36 return Ty; 37} 38 39/// reduceToSingleValueType - Given an aggregate type which ultimately holds a 40/// single scalar element, like {{{type}}} or [1 x type], return type. 41static Type *reduceToSingleValueType(Type *T) { 42 while (!T->isSingleValueType()) { 43 if (StructType *STy = dyn_cast<StructType>(T)) { 44 if (STy->getNumElements() == 1) 45 T = STy->getElementType(0); 46 else 47 break; 48 } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) { 49 if (ATy->getNumElements() == 1) 50 T = ATy->getElementType(); 51 else 52 break; 53 } else 54 break; 55 } 56 57 return T; 58} 59 60Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 61 unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL); 62 unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL); 63 unsigned MinAlign = std::min(DstAlign, SrcAlign); 64 unsigned CopyAlign = MI->getAlignment(); 65 66 if (CopyAlign < MinAlign) { 67 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 68 MinAlign, false)); 69 return MI; 70 } 71 72 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 73 // load/store. 74 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); 75 if (!MemOpLength) return nullptr; 76 77 // Source and destination pointer types are always "i8*" for intrinsic. See 78 // if the size is something we can handle with a single primitive load/store. 79 // A single load+store correctly handles overlapping memory in the memmove 80 // case. 81 uint64_t Size = MemOpLength->getLimitedValue(); 82 assert(Size && "0-sized memory transferring should be removed already."); 83 84 if (Size > 8 || (Size&(Size-1))) 85 return nullptr; // If not 1/2/4/8 bytes, exit. 86 87 // Use an integer load+store unless we can find something better. 88 unsigned SrcAddrSp = 89 cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); 90 unsigned DstAddrSp = 91 cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); 92 93 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); 94 Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); 95 Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); 96 97 // Memcpy forces the use of i8* for the source and destination. That means 98 // that if you're using memcpy to move one double around, you'll get a cast 99 // from double* to i8*. We'd much rather use a double load+store rather than 100 // an i64 load+store, here because this improves the odds that the source or 101 // dest address will be promotable. See if we can find a better type than the 102 // integer datatype. 103 Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); 104 MDNode *CopyMD = nullptr; 105 if (StrippedDest != MI->getArgOperand(0)) { 106 Type *SrcETy = cast<PointerType>(StrippedDest->getType()) 107 ->getElementType(); 108 if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { 109 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 110 // down through these levels if so. 111 SrcETy = reduceToSingleValueType(SrcETy); 112 113 if (SrcETy->isSingleValueType()) { 114 NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); 115 NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); 116 117 // If the memcpy has metadata describing the members, see if we can 118 // get the TBAA tag describing our copy. 119 if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { 120 if (M->getNumOperands() == 3 && 121 M->getOperand(0) && 122 isa<ConstantInt>(M->getOperand(0)) && 123 cast<ConstantInt>(M->getOperand(0))->isNullValue() && 124 M->getOperand(1) && 125 isa<ConstantInt>(M->getOperand(1)) && 126 cast<ConstantInt>(M->getOperand(1))->getValue() == Size && 127 M->getOperand(2) && 128 isa<MDNode>(M->getOperand(2))) 129 CopyMD = cast<MDNode>(M->getOperand(2)); 130 } 131 } 132 } 133 } 134 135 // If the memcpy/memmove provides better alignment info than we can 136 // infer, use it. 137 SrcAlign = std::max(SrcAlign, CopyAlign); 138 DstAlign = std::max(DstAlign, CopyAlign); 139 140 Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); 141 Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); 142 LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); 143 L->setAlignment(SrcAlign); 144 if (CopyMD) 145 L->setMetadata(LLVMContext::MD_tbaa, CopyMD); 146 StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); 147 S->setAlignment(DstAlign); 148 if (CopyMD) 149 S->setMetadata(LLVMContext::MD_tbaa, CopyMD); 150 151 // Set the size of the copy to 0, it will be deleted on the next iteration. 152 MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); 153 return MI; 154} 155 156Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 157 unsigned Alignment = getKnownAlignment(MI->getDest(), DL); 158 if (MI->getAlignment() < Alignment) { 159 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 160 Alignment, false)); 161 return MI; 162 } 163 164 // Extract the length and alignment and fill if they are constant. 165 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 166 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 167 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) 168 return nullptr; 169 uint64_t Len = LenC->getLimitedValue(); 170 Alignment = MI->getAlignment(); 171 assert(Len && "0-sized memory setting should be removed already."); 172 173 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 174 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 175 Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 176 177 Value *Dest = MI->getDest(); 178 unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); 179 Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); 180 Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); 181 182 // Alignment 0 is identity for alignment 1 for memset, but not store. 183 if (Alignment == 0) Alignment = 1; 184 185 // Extract the fill value and store. 186 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 187 StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, 188 MI->isVolatile()); 189 S->setAlignment(Alignment); 190 191 // Set the size of the copy to 0, it will be deleted on the next iteration. 192 MI->setLength(Constant::getNullValue(LenC->getType())); 193 return MI; 194 } 195 196 return nullptr; 197} 198 199/// visitCallInst - CallInst simplification. This mostly only handles folding 200/// of intrinsic instructions. For normal calls, it allows visitCallSite to do 201/// the heavy lifting. 202/// 203Instruction *InstCombiner::visitCallInst(CallInst &CI) { 204 if (isFreeCall(&CI, TLI)) 205 return visitFree(CI); 206 207 // If the caller function is nounwind, mark the call as nounwind, even if the 208 // callee isn't. 209 if (CI.getParent()->getParent()->doesNotThrow() && 210 !CI.doesNotThrow()) { 211 CI.setDoesNotThrow(); 212 return &CI; 213 } 214 215 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 216 if (!II) return visitCallSite(&CI); 217 218 // Intrinsics cannot occur in an invoke, so handle them here instead of in 219 // visitCallSite. 220 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 221 bool Changed = false; 222 223 // memmove/cpy/set of zero bytes is a noop. 224 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 225 if (NumBytes->isNullValue()) 226 return EraseInstFromFunction(CI); 227 228 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 229 if (CI->getZExtValue() == 1) { 230 // Replace the instruction with just byte operations. We would 231 // transform other cases to loads/stores, but we don't know if 232 // alignment is sufficient. 233 } 234 } 235 236 // No other transformations apply to volatile transfers. 237 if (MI->isVolatile()) 238 return nullptr; 239 240 // If we have a memmove and the source operation is a constant global, 241 // then the source and dest pointers can't alias, so we can change this 242 // into a call to memcpy. 243 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 244 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 245 if (GVSrc->isConstant()) { 246 Module *M = CI.getParent()->getParent()->getParent(); 247 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 248 Type *Tys[3] = { CI.getArgOperand(0)->getType(), 249 CI.getArgOperand(1)->getType(), 250 CI.getArgOperand(2)->getType() }; 251 CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); 252 Changed = true; 253 } 254 } 255 256 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 257 // memmove(x,x,size) -> noop. 258 if (MTI->getSource() == MTI->getDest()) 259 return EraseInstFromFunction(CI); 260 } 261 262 // If we can determine a pointer alignment that is bigger than currently 263 // set, update the alignment. 264 if (isa<MemTransferInst>(MI)) { 265 if (Instruction *I = SimplifyMemTransfer(MI)) 266 return I; 267 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 268 if (Instruction *I = SimplifyMemSet(MSI)) 269 return I; 270 } 271 272 if (Changed) return II; 273 } 274 275 switch (II->getIntrinsicID()) { 276 default: break; 277 case Intrinsic::objectsize: { 278 uint64_t Size; 279 if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) 280 return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); 281 return nullptr; 282 } 283 case Intrinsic::bswap: { 284 Value *IIOperand = II->getArgOperand(0); 285 Value *X = nullptr; 286 287 // bswap(bswap(x)) -> x 288 if (match(IIOperand, m_BSwap(m_Value(X)))) 289 return ReplaceInstUsesWith(CI, X); 290 291 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 292 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { 293 unsigned C = X->getType()->getPrimitiveSizeInBits() - 294 IIOperand->getType()->getPrimitiveSizeInBits(); 295 Value *CV = ConstantInt::get(X->getType(), C); 296 Value *V = Builder->CreateLShr(X, CV); 297 return new TruncInst(V, IIOperand->getType()); 298 } 299 break; 300 } 301 302 case Intrinsic::powi: 303 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 304 // powi(x, 0) -> 1.0 305 if (Power->isZero()) 306 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 307 // powi(x, 1) -> x 308 if (Power->isOne()) 309 return ReplaceInstUsesWith(CI, II->getArgOperand(0)); 310 // powi(x, -1) -> 1/x 311 if (Power->isAllOnesValue()) 312 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 313 II->getArgOperand(0)); 314 } 315 break; 316 case Intrinsic::cttz: { 317 // If all bits below the first known one are known zero, 318 // this value is constant. 319 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 320 // FIXME: Try to simplify vectors of integers. 321 if (!IT) break; 322 uint32_t BitWidth = IT->getBitWidth(); 323 APInt KnownZero(BitWidth, 0); 324 APInt KnownOne(BitWidth, 0); 325 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); 326 unsigned TrailingZeros = KnownOne.countTrailingZeros(); 327 APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); 328 if ((Mask & KnownZero) == Mask) 329 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 330 APInt(BitWidth, TrailingZeros))); 331 332 } 333 break; 334 case Intrinsic::ctlz: { 335 // If all bits above the first known one are known zero, 336 // this value is constant. 337 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 338 // FIXME: Try to simplify vectors of integers. 339 if (!IT) break; 340 uint32_t BitWidth = IT->getBitWidth(); 341 APInt KnownZero(BitWidth, 0); 342 APInt KnownOne(BitWidth, 0); 343 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); 344 unsigned LeadingZeros = KnownOne.countLeadingZeros(); 345 APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); 346 if ((Mask & KnownZero) == Mask) 347 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 348 APInt(BitWidth, LeadingZeros))); 349 350 } 351 break; 352 case Intrinsic::uadd_with_overflow: { 353 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 354 IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); 355 uint32_t BitWidth = IT->getBitWidth(); 356 APInt LHSKnownZero(BitWidth, 0); 357 APInt LHSKnownOne(BitWidth, 0); 358 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); 359 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 360 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 361 362 if (LHSKnownNegative || LHSKnownPositive) { 363 APInt RHSKnownZero(BitWidth, 0); 364 APInt RHSKnownOne(BitWidth, 0); 365 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); 366 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 367 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 368 if (LHSKnownNegative && RHSKnownNegative) { 369 // The sign bit is set in both cases: this MUST overflow. 370 // Create a simple add instruction, and insert it into the struct. 371 Value *Add = Builder->CreateAdd(LHS, RHS); 372 Add->takeName(&CI); 373 Constant *V[] = { 374 UndefValue::get(LHS->getType()), 375 ConstantInt::getTrue(II->getContext()) 376 }; 377 StructType *ST = cast<StructType>(II->getType()); 378 Constant *Struct = ConstantStruct::get(ST, V); 379 return InsertValueInst::Create(Struct, Add, 0); 380 } 381 382 if (LHSKnownPositive && RHSKnownPositive) { 383 // The sign bit is clear in both cases: this CANNOT overflow. 384 // Create a simple add instruction, and insert it into the struct. 385 Value *Add = Builder->CreateNUWAdd(LHS, RHS); 386 Add->takeName(&CI); 387 Constant *V[] = { 388 UndefValue::get(LHS->getType()), 389 ConstantInt::getFalse(II->getContext()) 390 }; 391 StructType *ST = cast<StructType>(II->getType()); 392 Constant *Struct = ConstantStruct::get(ST, V); 393 return InsertValueInst::Create(Struct, Add, 0); 394 } 395 } 396 } 397 // FALL THROUGH uadd into sadd 398 case Intrinsic::sadd_with_overflow: 399 // Canonicalize constants into the RHS. 400 if (isa<Constant>(II->getArgOperand(0)) && 401 !isa<Constant>(II->getArgOperand(1))) { 402 Value *LHS = II->getArgOperand(0); 403 II->setArgOperand(0, II->getArgOperand(1)); 404 II->setArgOperand(1, LHS); 405 return II; 406 } 407 408 // X + undef -> undef 409 if (isa<UndefValue>(II->getArgOperand(1))) 410 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 411 412 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 413 // X + 0 -> {X, false} 414 if (RHS->isZero()) { 415 Constant *V[] = { 416 UndefValue::get(II->getArgOperand(0)->getType()), 417 ConstantInt::getFalse(II->getContext()) 418 }; 419 Constant *Struct = 420 ConstantStruct::get(cast<StructType>(II->getType()), V); 421 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 422 } 423 } 424 break; 425 case Intrinsic::usub_with_overflow: 426 case Intrinsic::ssub_with_overflow: 427 // undef - X -> undef 428 // X - undef -> undef 429 if (isa<UndefValue>(II->getArgOperand(0)) || 430 isa<UndefValue>(II->getArgOperand(1))) 431 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 432 433 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 434 // X - 0 -> {X, false} 435 if (RHS->isZero()) { 436 Constant *V[] = { 437 UndefValue::get(II->getArgOperand(0)->getType()), 438 ConstantInt::getFalse(II->getContext()) 439 }; 440 Constant *Struct = 441 ConstantStruct::get(cast<StructType>(II->getType()), V); 442 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 443 } 444 } 445 break; 446 case Intrinsic::umul_with_overflow: { 447 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 448 unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); 449 450 APInt LHSKnownZero(BitWidth, 0); 451 APInt LHSKnownOne(BitWidth, 0); 452 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); 453 APInt RHSKnownZero(BitWidth, 0); 454 APInt RHSKnownOne(BitWidth, 0); 455 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); 456 457 // Get the largest possible values for each operand. 458 APInt LHSMax = ~LHSKnownZero; 459 APInt RHSMax = ~RHSKnownZero; 460 461 // If multiplying the maximum values does not overflow then we can turn 462 // this into a plain NUW mul. 463 bool Overflow; 464 LHSMax.umul_ov(RHSMax, Overflow); 465 if (!Overflow) { 466 Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); 467 Constant *V[] = { 468 UndefValue::get(LHS->getType()), 469 Builder->getFalse() 470 }; 471 Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); 472 return InsertValueInst::Create(Struct, Mul, 0); 473 } 474 } // FALL THROUGH 475 case Intrinsic::smul_with_overflow: 476 // Canonicalize constants into the RHS. 477 if (isa<Constant>(II->getArgOperand(0)) && 478 !isa<Constant>(II->getArgOperand(1))) { 479 Value *LHS = II->getArgOperand(0); 480 II->setArgOperand(0, II->getArgOperand(1)); 481 II->setArgOperand(1, LHS); 482 return II; 483 } 484 485 // X * undef -> undef 486 if (isa<UndefValue>(II->getArgOperand(1))) 487 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 488 489 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 490 // X*0 -> {0, false} 491 if (RHSI->isZero()) 492 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 493 494 // X * 1 -> {X, false} 495 if (RHSI->equalsInt(1)) { 496 Constant *V[] = { 497 UndefValue::get(II->getArgOperand(0)->getType()), 498 ConstantInt::getFalse(II->getContext()) 499 }; 500 Constant *Struct = 501 ConstantStruct::get(cast<StructType>(II->getType()), V); 502 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 503 } 504 } 505 break; 506 case Intrinsic::ppc_altivec_lvx: 507 case Intrinsic::ppc_altivec_lvxl: 508 // Turn PPC lvx -> load if the pointer is known aligned. 509 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { 510 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), 511 PointerType::getUnqual(II->getType())); 512 return new LoadInst(Ptr); 513 } 514 break; 515 case Intrinsic::ppc_altivec_stvx: 516 case Intrinsic::ppc_altivec_stvxl: 517 // Turn stvx -> store if the pointer is known aligned. 518 if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL) >= 16) { 519 Type *OpPtrTy = 520 PointerType::getUnqual(II->getArgOperand(0)->getType()); 521 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); 522 return new StoreInst(II->getArgOperand(0), Ptr); 523 } 524 break; 525 case Intrinsic::x86_sse_storeu_ps: 526 case Intrinsic::x86_sse2_storeu_pd: 527 case Intrinsic::x86_sse2_storeu_dq: 528 // Turn X86 storeu -> store if the pointer is known aligned. 529 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { 530 Type *OpPtrTy = 531 PointerType::getUnqual(II->getArgOperand(1)->getType()); 532 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); 533 return new StoreInst(II->getArgOperand(1), Ptr); 534 } 535 break; 536 537 case Intrinsic::x86_sse_cvtss2si: 538 case Intrinsic::x86_sse_cvtss2si64: 539 case Intrinsic::x86_sse_cvttss2si: 540 case Intrinsic::x86_sse_cvttss2si64: 541 case Intrinsic::x86_sse2_cvtsd2si: 542 case Intrinsic::x86_sse2_cvtsd2si64: 543 case Intrinsic::x86_sse2_cvttsd2si: 544 case Intrinsic::x86_sse2_cvttsd2si64: { 545 // These intrinsics only demand the 0th element of their input vectors. If 546 // we can simplify the input based on that, do so now. 547 unsigned VWidth = 548 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 549 APInt DemandedElts(VWidth, 1); 550 APInt UndefElts(VWidth, 0); 551 if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), 552 DemandedElts, UndefElts)) { 553 II->setArgOperand(0, V); 554 return II; 555 } 556 break; 557 } 558 559 // Constant fold <A x Bi> << Ci. 560 // FIXME: We don't handle _dq because it's a shift of an i128, but is 561 // represented in the IR as <2 x i64>. A per element shift is wrong. 562 case Intrinsic::x86_sse2_psll_d: 563 case Intrinsic::x86_sse2_psll_q: 564 case Intrinsic::x86_sse2_psll_w: 565 case Intrinsic::x86_sse2_pslli_d: 566 case Intrinsic::x86_sse2_pslli_q: 567 case Intrinsic::x86_sse2_pslli_w: 568 case Intrinsic::x86_avx2_psll_d: 569 case Intrinsic::x86_avx2_psll_q: 570 case Intrinsic::x86_avx2_psll_w: 571 case Intrinsic::x86_avx2_pslli_d: 572 case Intrinsic::x86_avx2_pslli_q: 573 case Intrinsic::x86_avx2_pslli_w: 574 case Intrinsic::x86_sse2_psrl_d: 575 case Intrinsic::x86_sse2_psrl_q: 576 case Intrinsic::x86_sse2_psrl_w: 577 case Intrinsic::x86_sse2_psrli_d: 578 case Intrinsic::x86_sse2_psrli_q: 579 case Intrinsic::x86_sse2_psrli_w: 580 case Intrinsic::x86_avx2_psrl_d: 581 case Intrinsic::x86_avx2_psrl_q: 582 case Intrinsic::x86_avx2_psrl_w: 583 case Intrinsic::x86_avx2_psrli_d: 584 case Intrinsic::x86_avx2_psrli_q: 585 case Intrinsic::x86_avx2_psrli_w: { 586 // Simplify if count is constant. To 0 if >= BitWidth, 587 // otherwise to shl/lshr. 588 auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); 589 auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); 590 if (!CDV && !CInt) 591 break; 592 ConstantInt *Count; 593 if (CDV) 594 Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); 595 else 596 Count = CInt; 597 598 auto Vec = II->getArgOperand(0); 599 auto VT = cast<VectorType>(Vec->getType()); 600 if (Count->getZExtValue() > 601 VT->getElementType()->getPrimitiveSizeInBits() - 1) 602 return ReplaceInstUsesWith( 603 CI, ConstantAggregateZero::get(Vec->getType())); 604 605 bool isPackedShiftLeft = true; 606 switch (II->getIntrinsicID()) { 607 default : break; 608 case Intrinsic::x86_sse2_psrl_d: 609 case Intrinsic::x86_sse2_psrl_q: 610 case Intrinsic::x86_sse2_psrl_w: 611 case Intrinsic::x86_sse2_psrli_d: 612 case Intrinsic::x86_sse2_psrli_q: 613 case Intrinsic::x86_sse2_psrli_w: 614 case Intrinsic::x86_avx2_psrl_d: 615 case Intrinsic::x86_avx2_psrl_q: 616 case Intrinsic::x86_avx2_psrl_w: 617 case Intrinsic::x86_avx2_psrli_d: 618 case Intrinsic::x86_avx2_psrli_q: 619 case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; 620 } 621 622 unsigned VWidth = VT->getNumElements(); 623 // Get a constant vector of the same type as the first operand. 624 auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); 625 if (isPackedShiftLeft) 626 return BinaryOperator::CreateShl(Vec, 627 Builder->CreateVectorSplat(VWidth, VTCI)); 628 629 return BinaryOperator::CreateLShr(Vec, 630 Builder->CreateVectorSplat(VWidth, VTCI)); 631 } 632 633 case Intrinsic::x86_sse41_pmovsxbw: 634 case Intrinsic::x86_sse41_pmovsxwd: 635 case Intrinsic::x86_sse41_pmovsxdq: 636 case Intrinsic::x86_sse41_pmovzxbw: 637 case Intrinsic::x86_sse41_pmovzxwd: 638 case Intrinsic::x86_sse41_pmovzxdq: { 639 // pmov{s|z}x ignores the upper half of their input vectors. 640 unsigned VWidth = 641 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 642 unsigned LowHalfElts = VWidth / 2; 643 APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); 644 APInt UndefElts(VWidth, 0); 645 if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), 646 InputDemandedElts, 647 UndefElts)) { 648 II->setArgOperand(0, TmpV); 649 return II; 650 } 651 break; 652 } 653 654 case Intrinsic::x86_sse4a_insertqi: { 655 // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top 656 // ones undef 657 // TODO: eventually we should lower this intrinsic to IR 658 if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { 659 if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { 660 if (CIWidth->equalsInt(64) && CIStart->isZero()) { 661 Value *Vec = II->getArgOperand(1); 662 Value *Undef = UndefValue::get(Vec->getType()); 663 const uint32_t Mask[] = { 0, 2 }; 664 return ReplaceInstUsesWith( 665 CI, 666 Builder->CreateShuffleVector( 667 Vec, Undef, ConstantDataVector::get( 668 II->getContext(), ArrayRef<uint32_t>(Mask)))); 669 670 } else if (auto Source = 671 dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 672 if (Source->hasOneUse() && 673 Source->getArgOperand(1) == II->getArgOperand(1)) { 674 // If the source of the insert has only one use and it's another 675 // insert (and they're both inserting from the same vector), try to 676 // bundle both together. 677 auto CISourceWidth = 678 dyn_cast<ConstantInt>(Source->getArgOperand(2)); 679 auto CISourceStart = 680 dyn_cast<ConstantInt>(Source->getArgOperand(3)); 681 if (CISourceStart && CISourceWidth) { 682 unsigned Start = CIStart->getZExtValue(); 683 unsigned Width = CIWidth->getZExtValue(); 684 unsigned End = Start + Width; 685 unsigned SourceStart = CISourceStart->getZExtValue(); 686 unsigned SourceWidth = CISourceWidth->getZExtValue(); 687 unsigned SourceEnd = SourceStart + SourceWidth; 688 unsigned NewStart, NewWidth; 689 bool ShouldReplace = false; 690 if (Start <= SourceStart && SourceStart <= End) { 691 NewStart = Start; 692 NewWidth = std::max(End, SourceEnd) - NewStart; 693 ShouldReplace = true; 694 } else if (SourceStart <= Start && Start <= SourceEnd) { 695 NewStart = SourceStart; 696 NewWidth = std::max(SourceEnd, End) - NewStart; 697 ShouldReplace = true; 698 } 699 700 if (ShouldReplace) { 701 Constant *ConstantWidth = ConstantInt::get( 702 II->getArgOperand(2)->getType(), NewWidth, false); 703 Constant *ConstantStart = ConstantInt::get( 704 II->getArgOperand(3)->getType(), NewStart, false); 705 Value *Args[4] = { Source->getArgOperand(0), 706 II->getArgOperand(1), ConstantWidth, 707 ConstantStart }; 708 Module *M = CI.getParent()->getParent()->getParent(); 709 Value *F = 710 Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 711 return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); 712 } 713 } 714 } 715 } 716 } 717 } 718 break; 719 } 720 721 case Intrinsic::x86_sse41_pblendvb: 722 case Intrinsic::x86_sse41_blendvps: 723 case Intrinsic::x86_sse41_blendvpd: 724 case Intrinsic::x86_avx_blendv_ps_256: 725 case Intrinsic::x86_avx_blendv_pd_256: 726 case Intrinsic::x86_avx2_pblendvb: { 727 // Convert blendv* to vector selects if the mask is constant. 728 // This optimization is convoluted because the intrinsic is defined as 729 // getting a vector of floats or doubles for the ps and pd versions. 730 // FIXME: That should be changed. 731 Value *Mask = II->getArgOperand(2); 732 if (auto C = dyn_cast<ConstantDataVector>(Mask)) { 733 auto Tyi1 = Builder->getInt1Ty(); 734 auto SelectorType = cast<VectorType>(Mask->getType()); 735 auto EltTy = SelectorType->getElementType(); 736 unsigned Size = SelectorType->getNumElements(); 737 unsigned BitWidth = 738 EltTy->isFloatTy() 739 ? 32 740 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); 741 assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && 742 "Wrong arguments for variable blend intrinsic"); 743 SmallVector<Constant *, 32> Selectors; 744 for (unsigned I = 0; I < Size; ++I) { 745 // The intrinsics only read the top bit 746 uint64_t Selector; 747 if (BitWidth == 8) 748 Selector = C->getElementAsInteger(I); 749 else 750 Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); 751 Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); 752 } 753 auto NewSelector = ConstantVector::get(Selectors); 754 return SelectInst::Create(NewSelector, II->getArgOperand(1), 755 II->getArgOperand(0), "blendv"); 756 } else { 757 break; 758 } 759 } 760 761 case Intrinsic::x86_avx_vpermilvar_ps: 762 case Intrinsic::x86_avx_vpermilvar_ps_256: 763 case Intrinsic::x86_avx_vpermilvar_pd: 764 case Intrinsic::x86_avx_vpermilvar_pd_256: { 765 // Convert vpermil* to shufflevector if the mask is constant. 766 Value *V = II->getArgOperand(1); 767 unsigned Size = cast<VectorType>(V->getType())->getNumElements(); 768 assert(Size == 8 || Size == 4 || Size == 2); 769 uint32_t Indexes[8]; 770 if (auto C = dyn_cast<ConstantDataVector>(V)) { 771 // The intrinsics only read one or two bits, clear the rest. 772 for (unsigned I = 0; I < Size; ++I) { 773 uint32_t Index = C->getElementAsInteger(I) & 0x3; 774 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || 775 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) 776 Index >>= 1; 777 Indexes[I] = Index; 778 } 779 } else if (isa<ConstantAggregateZero>(V)) { 780 for (unsigned I = 0; I < Size; ++I) 781 Indexes[I] = 0; 782 } else { 783 break; 784 } 785 // The _256 variants are a bit trickier since the mask bits always index 786 // into the corresponding 128 half. In order to convert to a generic 787 // shuffle, we have to make that explicit. 788 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || 789 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { 790 for (unsigned I = Size / 2; I < Size; ++I) 791 Indexes[I] += Size / 2; 792 } 793 auto NewC = 794 ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); 795 auto V1 = II->getArgOperand(0); 796 auto V2 = UndefValue::get(V1->getType()); 797 auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); 798 return ReplaceInstUsesWith(CI, Shuffle); 799 } 800 801 case Intrinsic::ppc_altivec_vperm: 802 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 803 if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { 804 assert(Mask->getType()->getVectorNumElements() == 16 && 805 "Bad type for intrinsic!"); 806 807 // Check that all of the elements are integer constants or undefs. 808 bool AllEltsOk = true; 809 for (unsigned i = 0; i != 16; ++i) { 810 Constant *Elt = Mask->getAggregateElement(i); 811 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { 812 AllEltsOk = false; 813 break; 814 } 815 } 816 817 if (AllEltsOk) { 818 // Cast the input vectors to byte vectors. 819 Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), 820 Mask->getType()); 821 Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), 822 Mask->getType()); 823 Value *Result = UndefValue::get(Op0->getType()); 824 825 // Only extract each element once. 826 Value *ExtractedElts[32]; 827 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 828 829 for (unsigned i = 0; i != 16; ++i) { 830 if (isa<UndefValue>(Mask->getAggregateElement(i))) 831 continue; 832 unsigned Idx = 833 cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); 834 Idx &= 31; // Match the hardware behavior. 835 836 if (!ExtractedElts[Idx]) { 837 ExtractedElts[Idx] = 838 Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 839 Builder->getInt32(Idx&15)); 840 } 841 842 // Insert this value into the result vector. 843 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 844 Builder->getInt32(i)); 845 } 846 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 847 } 848 } 849 break; 850 851 case Intrinsic::arm_neon_vld1: 852 case Intrinsic::arm_neon_vld2: 853 case Intrinsic::arm_neon_vld3: 854 case Intrinsic::arm_neon_vld4: 855 case Intrinsic::arm_neon_vld2lane: 856 case Intrinsic::arm_neon_vld3lane: 857 case Intrinsic::arm_neon_vld4lane: 858 case Intrinsic::arm_neon_vst1: 859 case Intrinsic::arm_neon_vst2: 860 case Intrinsic::arm_neon_vst3: 861 case Intrinsic::arm_neon_vst4: 862 case Intrinsic::arm_neon_vst2lane: 863 case Intrinsic::arm_neon_vst3lane: 864 case Intrinsic::arm_neon_vst4lane: { 865 unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL); 866 unsigned AlignArg = II->getNumArgOperands() - 1; 867 ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); 868 if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { 869 II->setArgOperand(AlignArg, 870 ConstantInt::get(Type::getInt32Ty(II->getContext()), 871 MemAlign, false)); 872 return II; 873 } 874 break; 875 } 876 877 case Intrinsic::arm_neon_vmulls: 878 case Intrinsic::arm_neon_vmullu: 879 case Intrinsic::aarch64_neon_smull: 880 case Intrinsic::aarch64_neon_umull: { 881 Value *Arg0 = II->getArgOperand(0); 882 Value *Arg1 = II->getArgOperand(1); 883 884 // Handle mul by zero first: 885 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { 886 return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); 887 } 888 889 // Check for constant LHS & RHS - in this case we just simplify. 890 bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || 891 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); 892 VectorType *NewVT = cast<VectorType>(II->getType()); 893 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { 894 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { 895 CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext); 896 CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext); 897 898 return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1)); 899 } 900 901 // Couldn't simplify - canonicalize constant to the RHS. 902 std::swap(Arg0, Arg1); 903 } 904 905 // Handle mul by one: 906 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) 907 if (ConstantInt *Splat = 908 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) 909 if (Splat->isOne()) 910 return CastInst::CreateIntegerCast(Arg0, II->getType(), 911 /*isSigned=*/!Zext); 912 913 break; 914 } 915 916 case Intrinsic::stackrestore: { 917 // If the save is right next to the restore, remove the restore. This can 918 // happen when variable allocas are DCE'd. 919 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 920 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 921 BasicBlock::iterator BI = SS; 922 if (&*++BI == II) 923 return EraseInstFromFunction(CI); 924 } 925 } 926 927 // Scan down this block to see if there is another stack restore in the 928 // same block without an intervening call/alloca. 929 BasicBlock::iterator BI = II; 930 TerminatorInst *TI = II->getParent()->getTerminator(); 931 bool CannotRemove = false; 932 for (++BI; &*BI != TI; ++BI) { 933 if (isa<AllocaInst>(BI)) { 934 CannotRemove = true; 935 break; 936 } 937 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 938 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 939 // If there is a stackrestore below this one, remove this one. 940 if (II->getIntrinsicID() == Intrinsic::stackrestore) 941 return EraseInstFromFunction(CI); 942 // Otherwise, ignore the intrinsic. 943 } else { 944 // If we found a non-intrinsic call, we can't remove the stack 945 // restore. 946 CannotRemove = true; 947 break; 948 } 949 } 950 } 951 952 // If the stack restore is in a return, resume, or unwind block and if there 953 // are no allocas or calls between the restore and the return, nuke the 954 // restore. 955 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) 956 return EraseInstFromFunction(CI); 957 break; 958 } 959 } 960 961 return visitCallSite(II); 962} 963 964// InvokeInst simplification 965// 966Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 967 return visitCallSite(&II); 968} 969 970/// isSafeToEliminateVarargsCast - If this cast does not affect the value 971/// passed through the varargs area, we can eliminate the use of the cast. 972static bool isSafeToEliminateVarargsCast(const CallSite CS, 973 const CastInst * const CI, 974 const DataLayout * const DL, 975 const int ix) { 976 if (!CI->isLosslessCast()) 977 return false; 978 979 // The size of ByVal or InAlloca arguments is derived from the type, so we 980 // can't change to a type with a different size. If the size were 981 // passed explicitly we could avoid this check. 982 if (!CS.isByValOrInAllocaArgument(ix)) 983 return true; 984 985 Type* SrcTy = 986 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 987 Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 988 if (!SrcTy->isSized() || !DstTy->isSized()) 989 return false; 990 if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) 991 return false; 992 return true; 993} 994 995// Try to fold some different type of calls here. 996// Currently we're only working with the checking functions, memcpy_chk, 997// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, 998// strcat_chk and strncat_chk. 999Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { 1000 if (!CI->getCalledFunction()) return nullptr; 1001 1002 if (Value *With = Simplifier->optimizeCall(CI)) { 1003 ++NumSimplified; 1004 return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); 1005 } 1006 1007 return nullptr; 1008} 1009 1010static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { 1011 // Strip off at most one level of pointer casts, looking for an alloca. This 1012 // is good enough in practice and simpler than handling any number of casts. 1013 Value *Underlying = TrampMem->stripPointerCasts(); 1014 if (Underlying != TrampMem && 1015 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) 1016 return nullptr; 1017 if (!isa<AllocaInst>(Underlying)) 1018 return nullptr; 1019 1020 IntrinsicInst *InitTrampoline = nullptr; 1021 for (User *U : TrampMem->users()) { 1022 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 1023 if (!II) 1024 return nullptr; 1025 if (II->getIntrinsicID() == Intrinsic::init_trampoline) { 1026 if (InitTrampoline) 1027 // More than one init_trampoline writes to this value. Give up. 1028 return nullptr; 1029 InitTrampoline = II; 1030 continue; 1031 } 1032 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) 1033 // Allow any number of calls to adjust.trampoline. 1034 continue; 1035 return nullptr; 1036 } 1037 1038 // No call to init.trampoline found. 1039 if (!InitTrampoline) 1040 return nullptr; 1041 1042 // Check that the alloca is being used in the expected way. 1043 if (InitTrampoline->getOperand(0) != TrampMem) 1044 return nullptr; 1045 1046 return InitTrampoline; 1047} 1048 1049static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, 1050 Value *TrampMem) { 1051 // Visit all the previous instructions in the basic block, and try to find a 1052 // init.trampoline which has a direct path to the adjust.trampoline. 1053 for (BasicBlock::iterator I = AdjustTramp, 1054 E = AdjustTramp->getParent()->begin(); I != E; ) { 1055 Instruction *Inst = --I; 1056 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1057 if (II->getIntrinsicID() == Intrinsic::init_trampoline && 1058 II->getOperand(0) == TrampMem) 1059 return II; 1060 if (Inst->mayWriteToMemory()) 1061 return nullptr; 1062 } 1063 return nullptr; 1064} 1065 1066// Given a call to llvm.adjust.trampoline, find and return the corresponding 1067// call to llvm.init.trampoline if the call to the trampoline can be optimized 1068// to a direct call to a function. Otherwise return NULL. 1069// 1070static IntrinsicInst *FindInitTrampoline(Value *Callee) { 1071 Callee = Callee->stripPointerCasts(); 1072 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); 1073 if (!AdjustTramp || 1074 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) 1075 return nullptr; 1076 1077 Value *TrampMem = AdjustTramp->getOperand(0); 1078 1079 if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) 1080 return IT; 1081 if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) 1082 return IT; 1083 return nullptr; 1084} 1085 1086// visitCallSite - Improvements for call and invoke instructions. 1087// 1088Instruction *InstCombiner::visitCallSite(CallSite CS) { 1089 if (isAllocLikeFn(CS.getInstruction(), TLI)) 1090 return visitAllocSite(*CS.getInstruction()); 1091 1092 bool Changed = false; 1093 1094 // If the callee is a pointer to a function, attempt to move any casts to the 1095 // arguments of the call/invoke. 1096 Value *Callee = CS.getCalledValue(); 1097 if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) 1098 return nullptr; 1099 1100 if (Function *CalleeF = dyn_cast<Function>(Callee)) 1101 // If the call and callee calling conventions don't match, this call must 1102 // be unreachable, as the call is undefined. 1103 if (CalleeF->getCallingConv() != CS.getCallingConv() && 1104 // Only do this for calls to a function with a body. A prototype may 1105 // not actually end up matching the implementation's calling conv for a 1106 // variety of reasons (e.g. it may be written in assembly). 1107 !CalleeF->isDeclaration()) { 1108 Instruction *OldCall = CS.getInstruction(); 1109 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 1110 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 1111 OldCall); 1112 // If OldCall does not return void then replaceAllUsesWith undef. 1113 // This allows ValueHandlers and custom metadata to adjust itself. 1114 if (!OldCall->getType()->isVoidTy()) 1115 ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); 1116 if (isa<CallInst>(OldCall)) 1117 return EraseInstFromFunction(*OldCall); 1118 1119 // We cannot remove an invoke, because it would change the CFG, just 1120 // change the callee to a null pointer. 1121 cast<InvokeInst>(OldCall)->setCalledFunction( 1122 Constant::getNullValue(CalleeF->getType())); 1123 return nullptr; 1124 } 1125 1126 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 1127 // If CS does not return void then replaceAllUsesWith undef. 1128 // This allows ValueHandlers and custom metadata to adjust itself. 1129 if (!CS.getInstruction()->getType()->isVoidTy()) 1130 ReplaceInstUsesWith(*CS.getInstruction(), 1131 UndefValue::get(CS.getInstruction()->getType())); 1132 1133 if (isa<InvokeInst>(CS.getInstruction())) { 1134 // Can't remove an invoke because we cannot change the CFG. 1135 return nullptr; 1136 } 1137 1138 // This instruction is not reachable, just remove it. We insert a store to 1139 // undef so that we know that this code is not reachable, despite the fact 1140 // that we can't modify the CFG here. 1141 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 1142 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 1143 CS.getInstruction()); 1144 1145 return EraseInstFromFunction(*CS.getInstruction()); 1146 } 1147 1148 if (IntrinsicInst *II = FindInitTrampoline(Callee)) 1149 return transformCallThroughTrampoline(CS, II); 1150 1151 PointerType *PTy = cast<PointerType>(Callee->getType()); 1152 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 1153 if (FTy->isVarArg()) { 1154 int ix = FTy->getNumParams(); 1155 // See if we can optimize any arguments passed through the varargs area of 1156 // the call. 1157 for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), 1158 E = CS.arg_end(); I != E; ++I, ++ix) { 1159 CastInst *CI = dyn_cast<CastInst>(*I); 1160 if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { 1161 *I = CI->getOperand(0); 1162 Changed = true; 1163 } 1164 } 1165 } 1166 1167 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 1168 // Inline asm calls cannot throw - mark them 'nounwind'. 1169 CS.setDoesNotThrow(); 1170 Changed = true; 1171 } 1172 1173 // Try to optimize the call if possible, we require DataLayout for most of 1174 // this. None of these calls are seen as possibly dead so go ahead and 1175 // delete the instruction now. 1176 if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { 1177 Instruction *I = tryOptimizeCall(CI, DL); 1178 // If we changed something return the result, etc. Otherwise let 1179 // the fallthrough check. 1180 if (I) return EraseInstFromFunction(*I); 1181 } 1182 1183 return Changed ? CS.getInstruction() : nullptr; 1184} 1185 1186// transformConstExprCastCall - If the callee is a constexpr cast of a function, 1187// attempt to move the cast to the arguments of the call/invoke. 1188// 1189bool InstCombiner::transformConstExprCastCall(CallSite CS) { 1190 Function *Callee = 1191 dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); 1192 if (!Callee) 1193 return false; 1194 Instruction *Caller = CS.getInstruction(); 1195 const AttributeSet &CallerPAL = CS.getAttributes(); 1196 1197 // Okay, this is a cast from a function to a different type. Unless doing so 1198 // would cause a type conversion of one of our arguments, change this call to 1199 // be a direct call with arguments casted to the appropriate types. 1200 // 1201 FunctionType *FT = Callee->getFunctionType(); 1202 Type *OldRetTy = Caller->getType(); 1203 Type *NewRetTy = FT->getReturnType(); 1204 1205 // Check to see if we are changing the return type... 1206 if (OldRetTy != NewRetTy) { 1207 1208 if (NewRetTy->isStructTy()) 1209 return false; // TODO: Handle multiple return values. 1210 1211 if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { 1212 if (Callee->isDeclaration()) 1213 return false; // Cannot transform this return value. 1214 1215 if (!Caller->use_empty() && 1216 // void -> non-void is handled specially 1217 !NewRetTy->isVoidTy()) 1218 return false; // Cannot transform this return value. 1219 } 1220 1221 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 1222 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 1223 if (RAttrs. 1224 hasAttributes(AttributeFuncs:: 1225 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 1226 AttributeSet::ReturnIndex)) 1227 return false; // Attribute not compatible with transformed value. 1228 } 1229 1230 // If the callsite is an invoke instruction, and the return value is used by 1231 // a PHI node in a successor, we cannot change the return type of the call 1232 // because there is no place to put the cast instruction (without breaking 1233 // the critical edge). Bail out in this case. 1234 if (!Caller->use_empty()) 1235 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 1236 for (User *U : II->users()) 1237 if (PHINode *PN = dyn_cast<PHINode>(U)) 1238 if (PN->getParent() == II->getNormalDest() || 1239 PN->getParent() == II->getUnwindDest()) 1240 return false; 1241 } 1242 1243 unsigned NumActualArgs = CS.arg_size(); 1244 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 1245 1246 CallSite::arg_iterator AI = CS.arg_begin(); 1247 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 1248 Type *ParamTy = FT->getParamType(i); 1249 Type *ActTy = (*AI)->getType(); 1250 1251 if (!CastInst::isBitCastable(ActTy, ParamTy)) 1252 return false; // Cannot transform this parameter value. 1253 1254 if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). 1255 hasAttributes(AttributeFuncs:: 1256 typeIncompatible(ParamTy, i + 1), i + 1)) 1257 return false; // Attribute not compatible with transformed value. 1258 1259 if (CS.isInAllocaArgument(i)) 1260 return false; // Cannot transform to and from inalloca. 1261 1262 // If the parameter is passed as a byval argument, then we have to have a 1263 // sized type and the sized type has to have the same size as the old type. 1264 if (ParamTy != ActTy && 1265 CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, 1266 Attribute::ByVal)) { 1267 PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); 1268 if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) 1269 return false; 1270 1271 Type *CurElTy = ActTy->getPointerElementType(); 1272 if (DL->getTypeAllocSize(CurElTy) != 1273 DL->getTypeAllocSize(ParamPTy->getElementType())) 1274 return false; 1275 } 1276 } 1277 1278 if (Callee->isDeclaration()) { 1279 // Do not delete arguments unless we have a function body. 1280 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) 1281 return false; 1282 1283 // If the callee is just a declaration, don't change the varargsness of the 1284 // call. We don't want to introduce a varargs call where one doesn't 1285 // already exist. 1286 PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); 1287 if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) 1288 return false; 1289 1290 // If both the callee and the cast type are varargs, we still have to make 1291 // sure the number of fixed parameters are the same or we have the same 1292 // ABI issues as if we introduce a varargs call. 1293 if (FT->isVarArg() && 1294 cast<FunctionType>(APTy->getElementType())->isVarArg() && 1295 FT->getNumParams() != 1296 cast<FunctionType>(APTy->getElementType())->getNumParams()) 1297 return false; 1298 } 1299 1300 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 1301 !CallerPAL.isEmpty()) 1302 // In this case we have more arguments than the new function type, but we 1303 // won't be dropping them. Check that these extra arguments have attributes 1304 // that are compatible with being a vararg call argument. 1305 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 1306 unsigned Index = CallerPAL.getSlotIndex(i - 1); 1307 if (Index <= FT->getNumParams()) 1308 break; 1309 1310 // Check if it has an attribute that's incompatible with varargs. 1311 AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); 1312 if (PAttrs.hasAttribute(Index, Attribute::StructRet)) 1313 return false; 1314 } 1315 1316 1317 // Okay, we decided that this is a safe thing to do: go ahead and start 1318 // inserting cast instructions as necessary. 1319 std::vector<Value*> Args; 1320 Args.reserve(NumActualArgs); 1321 SmallVector<AttributeSet, 8> attrVec; 1322 attrVec.reserve(NumCommonArgs); 1323 1324 // Get any return attributes. 1325 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 1326 1327 // If the return value is not being used, the type may not be compatible 1328 // with the existing attributes. Wipe out any problematic attributes. 1329 RAttrs. 1330 removeAttributes(AttributeFuncs:: 1331 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 1332 AttributeSet::ReturnIndex); 1333 1334 // Add the new return attributes. 1335 if (RAttrs.hasAttributes()) 1336 attrVec.push_back(AttributeSet::get(Caller->getContext(), 1337 AttributeSet::ReturnIndex, RAttrs)); 1338 1339 AI = CS.arg_begin(); 1340 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 1341 Type *ParamTy = FT->getParamType(i); 1342 1343 if ((*AI)->getType() == ParamTy) { 1344 Args.push_back(*AI); 1345 } else { 1346 Args.push_back(Builder->CreateBitCast(*AI, ParamTy)); 1347 } 1348 1349 // Add any parameter attributes. 1350 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 1351 if (PAttrs.hasAttributes()) 1352 attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, 1353 PAttrs)); 1354 } 1355 1356 // If the function takes more arguments than the call was taking, add them 1357 // now. 1358 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 1359 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 1360 1361 // If we are removing arguments to the function, emit an obnoxious warning. 1362 if (FT->getNumParams() < NumActualArgs) { 1363 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 1364 if (FT->isVarArg()) { 1365 // Add all of the arguments in their promoted form to the arg list. 1366 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 1367 Type *PTy = getPromotedType((*AI)->getType()); 1368 if (PTy != (*AI)->getType()) { 1369 // Must promote to pass through va_arg area! 1370 Instruction::CastOps opcode = 1371 CastInst::getCastOpcode(*AI, false, PTy, false); 1372 Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); 1373 } else { 1374 Args.push_back(*AI); 1375 } 1376 1377 // Add any parameter attributes. 1378 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 1379 if (PAttrs.hasAttributes()) 1380 attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, 1381 PAttrs)); 1382 } 1383 } 1384 } 1385 1386 AttributeSet FnAttrs = CallerPAL.getFnAttributes(); 1387 if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) 1388 attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); 1389 1390 if (NewRetTy->isVoidTy()) 1391 Caller->setName(""); // Void type should not have a name. 1392 1393 const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), 1394 attrVec); 1395 1396 Instruction *NC; 1397 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1398 NC = Builder->CreateInvoke(Callee, II->getNormalDest(), 1399 II->getUnwindDest(), Args); 1400 NC->takeName(II); 1401 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 1402 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 1403 } else { 1404 CallInst *CI = cast<CallInst>(Caller); 1405 NC = Builder->CreateCall(Callee, Args); 1406 NC->takeName(CI); 1407 if (CI->isTailCall()) 1408 cast<CallInst>(NC)->setTailCall(); 1409 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 1410 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 1411 } 1412 1413 // Insert a cast of the return type as necessary. 1414 Value *NV = NC; 1415 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 1416 if (!NV->getType()->isVoidTy()) { 1417 NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy); 1418 NC->setDebugLoc(Caller->getDebugLoc()); 1419 1420 // If this is an invoke instruction, we should insert it after the first 1421 // non-phi, instruction in the normal successor block. 1422 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1423 BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); 1424 InsertNewInstBefore(NC, *I); 1425 } else { 1426 // Otherwise, it's a call, just insert cast right after the call. 1427 InsertNewInstBefore(NC, *Caller); 1428 } 1429 Worklist.AddUsersToWorkList(*Caller); 1430 } else { 1431 NV = UndefValue::get(Caller->getType()); 1432 } 1433 } 1434 1435 if (!Caller->use_empty()) 1436 ReplaceInstUsesWith(*Caller, NV); 1437 else if (Caller->hasValueHandle()) 1438 ValueHandleBase::ValueIsRAUWd(Caller, NV); 1439 1440 EraseInstFromFunction(*Caller); 1441 return true; 1442} 1443 1444// transformCallThroughTrampoline - Turn a call to a function created by 1445// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the 1446// underlying function. 1447// 1448Instruction * 1449InstCombiner::transformCallThroughTrampoline(CallSite CS, 1450 IntrinsicInst *Tramp) { 1451 Value *Callee = CS.getCalledValue(); 1452 PointerType *PTy = cast<PointerType>(Callee->getType()); 1453 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 1454 const AttributeSet &Attrs = CS.getAttributes(); 1455 1456 // If the call already has the 'nest' attribute somewhere then give up - 1457 // otherwise 'nest' would occur twice after splicing in the chain. 1458 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 1459 return nullptr; 1460 1461 assert(Tramp && 1462 "transformCallThroughTrampoline called with incorrect CallSite."); 1463 1464 Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); 1465 PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 1466 FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 1467 1468 const AttributeSet &NestAttrs = NestF->getAttributes(); 1469 if (!NestAttrs.isEmpty()) { 1470 unsigned NestIdx = 1; 1471 Type *NestTy = nullptr; 1472 AttributeSet NestAttr; 1473 1474 // Look for a parameter marked with the 'nest' attribute. 1475 for (FunctionType::param_iterator I = NestFTy->param_begin(), 1476 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 1477 if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { 1478 // Record the parameter type and any other attributes. 1479 NestTy = *I; 1480 NestAttr = NestAttrs.getParamAttributes(NestIdx); 1481 break; 1482 } 1483 1484 if (NestTy) { 1485 Instruction *Caller = CS.getInstruction(); 1486 std::vector<Value*> NewArgs; 1487 NewArgs.reserve(CS.arg_size() + 1); 1488 1489 SmallVector<AttributeSet, 8> NewAttrs; 1490 NewAttrs.reserve(Attrs.getNumSlots() + 1); 1491 1492 // Insert the nest argument into the call argument list, which may 1493 // mean appending it. Likewise for attributes. 1494 1495 // Add any result attributes. 1496 if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) 1497 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1498 Attrs.getRetAttributes())); 1499 1500 { 1501 unsigned Idx = 1; 1502 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 1503 do { 1504 if (Idx == NestIdx) { 1505 // Add the chain argument and attributes. 1506 Value *NestVal = Tramp->getArgOperand(2); 1507 if (NestVal->getType() != NestTy) 1508 NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); 1509 NewArgs.push_back(NestVal); 1510 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1511 NestAttr)); 1512 } 1513 1514 if (I == E) 1515 break; 1516 1517 // Add the original argument and attributes. 1518 NewArgs.push_back(*I); 1519 AttributeSet Attr = Attrs.getParamAttributes(Idx); 1520 if (Attr.hasAttributes(Idx)) { 1521 AttrBuilder B(Attr, Idx); 1522 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1523 Idx + (Idx >= NestIdx), B)); 1524 } 1525 1526 ++Idx, ++I; 1527 } while (1); 1528 } 1529 1530 // Add any function attributes. 1531 if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) 1532 NewAttrs.push_back(AttributeSet::get(FTy->getContext(), 1533 Attrs.getFnAttributes())); 1534 1535 // The trampoline may have been bitcast to a bogus type (FTy). 1536 // Handle this by synthesizing a new function type, equal to FTy 1537 // with the chain parameter inserted. 1538 1539 std::vector<Type*> NewTypes; 1540 NewTypes.reserve(FTy->getNumParams()+1); 1541 1542 // Insert the chain's type into the list of parameter types, which may 1543 // mean appending it. 1544 { 1545 unsigned Idx = 1; 1546 FunctionType::param_iterator I = FTy->param_begin(), 1547 E = FTy->param_end(); 1548 1549 do { 1550 if (Idx == NestIdx) 1551 // Add the chain's type. 1552 NewTypes.push_back(NestTy); 1553 1554 if (I == E) 1555 break; 1556 1557 // Add the original type. 1558 NewTypes.push_back(*I); 1559 1560 ++Idx, ++I; 1561 } while (1); 1562 } 1563 1564 // Replace the trampoline call with a direct call. Let the generic 1565 // code sort out any function type mismatches. 1566 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 1567 FTy->isVarArg()); 1568 Constant *NewCallee = 1569 NestF->getType() == PointerType::getUnqual(NewFTy) ? 1570 NestF : ConstantExpr::getBitCast(NestF, 1571 PointerType::getUnqual(NewFTy)); 1572 const AttributeSet &NewPAL = 1573 AttributeSet::get(FTy->getContext(), NewAttrs); 1574 1575 Instruction *NewCaller; 1576 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1577 NewCaller = InvokeInst::Create(NewCallee, 1578 II->getNormalDest(), II->getUnwindDest(), 1579 NewArgs); 1580 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 1581 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 1582 } else { 1583 NewCaller = CallInst::Create(NewCallee, NewArgs); 1584 if (cast<CallInst>(Caller)->isTailCall()) 1585 cast<CallInst>(NewCaller)->setTailCall(); 1586 cast<CallInst>(NewCaller)-> 1587 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 1588 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 1589 } 1590 1591 return NewCaller; 1592 } 1593 } 1594 1595 // Replace the trampoline call with a direct call. Since there is no 'nest' 1596 // parameter, there is no need to adjust the argument list. Let the generic 1597 // code sort out any function type mismatches. 1598 Constant *NewCallee = 1599 NestF->getType() == PTy ? NestF : 1600 ConstantExpr::getBitCast(NestF, PTy); 1601 CS.setCalledFunction(NewCallee); 1602 return CS.getInstruction(); 1603} 1604