InstructionCombining.cpp revision 02446fc99abb06d3117d65c0b1f5fba4f906db2e
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// InstructionCombining - Combine instructions to form fewer, simple 11// instructions. This pass does not modify the CFG. This pass is where 12// algebraic simplification happens. 13// 14// This pass combines things like: 15// %Y = add i32 %X, 1 16// %Z = add i32 %Y, 1 17// into: 18// %Z = add i32 %X, 2 19// 20// This is a simple worklist driven algorithm. 21// 22// This pass guarantees that the following canonicalizations are performed on 23// the program: 24// 1. If a binary operator has a constant operand, it is moved to the RHS 25// 2. Bitwise operators with constant operands are always grouped so that 26// shifts are performed first, then or's, then and's, then xor's. 27// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible 28// 4. All cmp instructions on boolean values are replaced with logical ops 29// 5. add X, X is represented as (X*2) => (X << 1) 30// 6. Multiplies with a power-of-two constant argument are transformed into 31// shifts. 32// ... etc. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "instcombine" 37#include "llvm/Transforms/Scalar.h" 38#include "InstCombine.h" 39#include "llvm/IntrinsicInst.h" 40#include "llvm/LLVMContext.h" 41#include "llvm/DerivedTypes.h" 42#include "llvm/GlobalVariable.h" 43#include "llvm/Operator.h" 44#include "llvm/Analysis/ConstantFolding.h" 45#include "llvm/Analysis/InstructionSimplify.h" 46#include "llvm/Analysis/MemoryBuiltins.h" 47#include "llvm/Target/TargetData.h" 48#include "llvm/Transforms/Utils/BasicBlockUtils.h" 49#include "llvm/Transforms/Utils/Local.h" 50#include "llvm/Support/CallSite.h" 51#include "llvm/Support/Debug.h" 52#include "llvm/Support/ErrorHandling.h" 53#include "llvm/Support/GetElementPtrTypeIterator.h" 54#include "llvm/Support/MathExtras.h" 55#include "llvm/Support/PatternMatch.h" 56#include "llvm/ADT/SmallPtrSet.h" 57#include "llvm/ADT/Statistic.h" 58#include "llvm/ADT/STLExtras.h" 59#include <algorithm> 60#include <climits> 61using namespace llvm; 62using namespace llvm::PatternMatch; 63 64STATISTIC(NumCombined , "Number of insts combined"); 65STATISTIC(NumConstProp, "Number of constant folds"); 66STATISTIC(NumDeadInst , "Number of dead inst eliminated"); 67STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 68STATISTIC(NumSunkInst , "Number of instructions sunk"); 69 70 71char InstCombiner::ID = 0; 72static RegisterPass<InstCombiner> 73X("instcombine", "Combine redundant instructions"); 74 75void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 76 AU.addPreservedID(LCSSAID); 77 AU.setPreservesCFG(); 78} 79 80 81// isOnlyUse - Return true if this instruction will be deleted if we stop using 82// it. 83static bool isOnlyUse(Value *V) { 84 return V->hasOneUse() || isa<Constant>(V); 85} 86 87// getPromotedType - Return the specified type promoted as it would be to pass 88// though a va_arg area... 89static const Type *getPromotedType(const Type *Ty) { 90 if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 91 if (ITy->getBitWidth() < 32) 92 return Type::getInt32Ty(Ty->getContext()); 93 } 94 return Ty; 95} 96 97/// ShouldChangeType - Return true if it is desirable to convert a computation 98/// from 'From' to 'To'. We don't want to convert from a legal to an illegal 99/// type for example, or from a smaller to a larger illegal type. 100static bool ShouldChangeType(const Type *From, const Type *To, 101 const TargetData *TD) { 102 assert(isa<IntegerType>(From) && isa<IntegerType>(To)); 103 104 // If we don't have TD, we don't know if the source/dest are legal. 105 if (!TD) return false; 106 107 unsigned FromWidth = From->getPrimitiveSizeInBits(); 108 unsigned ToWidth = To->getPrimitiveSizeInBits(); 109 bool FromLegal = TD->isLegalInteger(FromWidth); 110 bool ToLegal = TD->isLegalInteger(ToWidth); 111 112 // If this is a legal integer from type, and the result would be an illegal 113 // type, don't do the transformation. 114 if (FromLegal && !ToLegal) 115 return false; 116 117 // Otherwise, if both are illegal, do not increase the size of the result. We 118 // do allow things like i160 -> i64, but not i64 -> i160. 119 if (!FromLegal && !ToLegal && ToWidth > FromWidth) 120 return false; 121 122 return true; 123} 124 125/// getBitCastOperand - If the specified operand is a CastInst, a constant 126/// expression bitcast, or a GetElementPtrInst with all zero indices, return the 127/// operand value, otherwise return null. 128static Value *getBitCastOperand(Value *V) { 129 if (Operator *O = dyn_cast<Operator>(V)) { 130 if (O->getOpcode() == Instruction::BitCast) 131 return O->getOperand(0); 132 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) 133 if (GEP->hasAllZeroIndices()) 134 return GEP->getPointerOperand(); 135 } 136 return 0; 137} 138 139/// This function is a wrapper around CastInst::isEliminableCastPair. It 140/// simply extracts arguments and returns what that function returns. 141static Instruction::CastOps 142isEliminableCastPair( 143 const CastInst *CI, ///< The first cast instruction 144 unsigned opcode, ///< The opcode of the second cast instruction 145 const Type *DstTy, ///< The target type for the second cast instruction 146 TargetData *TD ///< The target data for pointer size 147) { 148 149 const Type *SrcTy = CI->getOperand(0)->getType(); // A from above 150 const Type *MidTy = CI->getType(); // B from above 151 152 // Get the opcodes of the two Cast instructions 153 Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); 154 Instruction::CastOps secondOp = Instruction::CastOps(opcode); 155 156 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, 157 DstTy, 158 TD ? TD->getIntPtrType(CI->getContext()) : 0); 159 160 // We don't want to form an inttoptr or ptrtoint that converts to an integer 161 // type that differs from the pointer size. 162 if ((Res == Instruction::IntToPtr && 163 (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || 164 (Res == Instruction::PtrToInt && 165 (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) 166 Res = 0; 167 168 return Instruction::CastOps(Res); 169} 170 171/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results 172/// in any code being generated. It does not require codegen if V is simple 173/// enough or if the cast can be folded into other casts. 174static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, 175 const Type *Ty, TargetData *TD) { 176 if (V->getType() == Ty || isa<Constant>(V)) return false; 177 178 // If this is another cast that can be eliminated, it isn't codegen either. 179 if (const CastInst *CI = dyn_cast<CastInst>(V)) 180 if (isEliminableCastPair(CI, opcode, Ty, TD)) 181 return false; 182 return true; 183} 184 185// SimplifyCommutative - This performs a few simplifications for commutative 186// operators: 187// 188// 1. Order operands such that they are listed from right (least complex) to 189// left (most complex). This puts constants before unary operators before 190// binary operators. 191// 192// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) 193// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 194// 195bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { 196 bool Changed = false; 197 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) 198 Changed = !I.swapOperands(); 199 200 if (!I.isAssociative()) return Changed; 201 Instruction::BinaryOps Opcode = I.getOpcode(); 202 if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) 203 if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { 204 if (isa<Constant>(I.getOperand(1))) { 205 Constant *Folded = ConstantExpr::get(I.getOpcode(), 206 cast<Constant>(I.getOperand(1)), 207 cast<Constant>(Op->getOperand(1))); 208 I.setOperand(0, Op->getOperand(0)); 209 I.setOperand(1, Folded); 210 return true; 211 } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1))) 212 if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && 213 isOnlyUse(Op) && isOnlyUse(Op1)) { 214 Constant *C1 = cast<Constant>(Op->getOperand(1)); 215 Constant *C2 = cast<Constant>(Op1->getOperand(1)); 216 217 // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 218 Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); 219 Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), 220 Op1->getOperand(0), 221 Op1->getName(), &I); 222 Worklist.Add(New); 223 I.setOperand(0, New); 224 I.setOperand(1, Folded); 225 return true; 226 } 227 } 228 return Changed; 229} 230 231// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction 232// if the LHS is a constant zero (which is the 'negate' form). 233// 234Value *InstCombiner::dyn_castNegVal(Value *V) const { 235 if (BinaryOperator::isNeg(V)) 236 return BinaryOperator::getNegArgument(V); 237 238 // Constants can be considered to be negated values if they can be folded. 239 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 240 return ConstantExpr::getNeg(C); 241 242 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 243 if (C->getType()->getElementType()->isInteger()) 244 return ConstantExpr::getNeg(C); 245 246 return 0; 247} 248 249// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the 250// instruction if the LHS is a constant negative zero (which is the 'negate' 251// form). 252// 253static inline Value *dyn_castFNegVal(Value *V) { 254 if (BinaryOperator::isFNeg(V)) 255 return BinaryOperator::getFNegArgument(V); 256 257 // Constants can be considered to be negated values if they can be folded. 258 if (ConstantFP *C = dyn_cast<ConstantFP>(V)) 259 return ConstantExpr::getFNeg(C); 260 261 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 262 if (C->getType()->getElementType()->isFloatingPoint()) 263 return ConstantExpr::getFNeg(C); 264 265 return 0; 266} 267 268/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, 269/// returning the kind and providing the out parameter results if we 270/// successfully match. 271static SelectPatternFlavor 272MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { 273 SelectInst *SI = dyn_cast<SelectInst>(V); 274 if (SI == 0) return SPF_UNKNOWN; 275 276 ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); 277 if (ICI == 0) return SPF_UNKNOWN; 278 279 LHS = ICI->getOperand(0); 280 RHS = ICI->getOperand(1); 281 282 // (icmp X, Y) ? X : Y 283 if (SI->getTrueValue() == ICI->getOperand(0) && 284 SI->getFalseValue() == ICI->getOperand(1)) { 285 switch (ICI->getPredicate()) { 286 default: return SPF_UNKNOWN; // Equality. 287 case ICmpInst::ICMP_UGT: 288 case ICmpInst::ICMP_UGE: return SPF_UMAX; 289 case ICmpInst::ICMP_SGT: 290 case ICmpInst::ICMP_SGE: return SPF_SMAX; 291 case ICmpInst::ICMP_ULT: 292 case ICmpInst::ICMP_ULE: return SPF_UMIN; 293 case ICmpInst::ICMP_SLT: 294 case ICmpInst::ICMP_SLE: return SPF_SMIN; 295 } 296 } 297 298 // (icmp X, Y) ? Y : X 299 if (SI->getTrueValue() == ICI->getOperand(1) && 300 SI->getFalseValue() == ICI->getOperand(0)) { 301 switch (ICI->getPredicate()) { 302 default: return SPF_UNKNOWN; // Equality. 303 case ICmpInst::ICMP_UGT: 304 case ICmpInst::ICMP_UGE: return SPF_UMIN; 305 case ICmpInst::ICMP_SGT: 306 case ICmpInst::ICMP_SGE: return SPF_SMIN; 307 case ICmpInst::ICMP_ULT: 308 case ICmpInst::ICMP_ULE: return SPF_UMAX; 309 case ICmpInst::ICMP_SLT: 310 case ICmpInst::ICMP_SLE: return SPF_SMAX; 311 } 312 } 313 314 // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) 315 316 return SPF_UNKNOWN; 317} 318 319/// isFreeToInvert - Return true if the specified value is free to invert (apply 320/// ~ to). This happens in cases where the ~ can be eliminated. 321static inline bool isFreeToInvert(Value *V) { 322 // ~(~(X)) -> X. 323 if (BinaryOperator::isNot(V)) 324 return true; 325 326 // Constants can be considered to be not'ed values. 327 if (isa<ConstantInt>(V)) 328 return true; 329 330 // Compares can be inverted if they have a single use. 331 if (CmpInst *CI = dyn_cast<CmpInst>(V)) 332 return CI->hasOneUse(); 333 334 return false; 335} 336 337static inline Value *dyn_castNotVal(Value *V) { 338 // If this is not(not(x)) don't return that this is a not: we want the two 339 // not's to be folded first. 340 if (BinaryOperator::isNot(V)) { 341 Value *Operand = BinaryOperator::getNotArgument(V); 342 if (!isFreeToInvert(Operand)) 343 return Operand; 344 } 345 346 // Constants can be considered to be not'ed values... 347 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 348 return ConstantInt::get(C->getType(), ~C->getValue()); 349 return 0; 350} 351 352 353 354// dyn_castFoldableMul - If this value is a multiply that can be folded into 355// other computations (because it has a constant operand), return the 356// non-constant operand of the multiply, and set CST to point to the multiplier. 357// Otherwise, return null. 358// 359static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 360 if (V->hasOneUse() && V->getType()->isInteger()) 361 if (Instruction *I = dyn_cast<Instruction>(V)) { 362 if (I->getOpcode() == Instruction::Mul) 363 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 364 return I->getOperand(0); 365 if (I->getOpcode() == Instruction::Shl) 366 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 367 // The multiplier is really 1 << CST. 368 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 369 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 370 CST = ConstantInt::get(V->getType()->getContext(), 371 APInt(BitWidth, 1).shl(CSTVal)); 372 return I->getOperand(0); 373 } 374 } 375 return 0; 376} 377 378/// AddOne - Add one to a ConstantInt 379static Constant *AddOne(Constant *C) { 380 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 381} 382/// SubOne - Subtract one from a ConstantInt 383static Constant *SubOne(ConstantInt *C) { 384 return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); 385} 386/// MultiplyOverflows - True if the multiply can not be expressed in an int 387/// this size. 388static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { 389 uint32_t W = C1->getBitWidth(); 390 APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); 391 if (sign) { 392 LHSExt.sext(W * 2); 393 RHSExt.sext(W * 2); 394 } else { 395 LHSExt.zext(W * 2); 396 RHSExt.zext(W * 2); 397 } 398 399 APInt MulExt = LHSExt * RHSExt; 400 401 if (!sign) 402 return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); 403 404 APInt Min = APInt::getSignedMinValue(W).sext(W * 2); 405 APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); 406 return MulExt.slt(Min) || MulExt.sgt(Max); 407} 408 409 410 411/// AssociativeOpt - Perform an optimization on an associative operator. This 412/// function is designed to check a chain of associative operators for a 413/// potential to apply a certain optimization. Since the optimization may be 414/// applicable if the expression was reassociated, this checks the chain, then 415/// reassociates the expression as necessary to expose the optimization 416/// opportunity. This makes use of a special Functor, which must define 417/// 'shouldApply' and 'apply' methods. 418/// 419template<typename Functor> 420static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { 421 unsigned Opcode = Root.getOpcode(); 422 Value *LHS = Root.getOperand(0); 423 424 // Quick check, see if the immediate LHS matches... 425 if (F.shouldApply(LHS)) 426 return F.apply(Root); 427 428 // Otherwise, if the LHS is not of the same opcode as the root, return. 429 Instruction *LHSI = dyn_cast<Instruction>(LHS); 430 while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { 431 // Should we apply this transform to the RHS? 432 bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); 433 434 // If not to the RHS, check to see if we should apply to the LHS... 435 if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { 436 cast<BinaryOperator>(LHSI)->swapOperands(); // Make the LHS the RHS 437 ShouldApply = true; 438 } 439 440 // If the functor wants to apply the optimization to the RHS of LHSI, 441 // reassociate the expression from ((? op A) op B) to (? op (A op B)) 442 if (ShouldApply) { 443 // Now all of the instructions are in the current basic block, go ahead 444 // and perform the reassociation. 445 Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0)); 446 447 // First move the selected RHS to the LHS of the root... 448 Root.setOperand(0, LHSI->getOperand(1)); 449 450 // Make what used to be the LHS of the root be the user of the root... 451 Value *ExtraOperand = TmpLHSI->getOperand(1); 452 if (&Root == TmpLHSI) { 453 Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); 454 return 0; 455 } 456 Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI 457 TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root 458 BasicBlock::iterator ARI = &Root; ++ARI; 459 TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root 460 ARI = Root; 461 462 // Now propagate the ExtraOperand down the chain of instructions until we 463 // get to LHSI. 464 while (TmpLHSI != LHSI) { 465 Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0)); 466 // Move the instruction to immediately before the chain we are 467 // constructing to avoid breaking dominance properties. 468 NextLHSI->moveBefore(ARI); 469 ARI = NextLHSI; 470 471 Value *NextOp = NextLHSI->getOperand(1); 472 NextLHSI->setOperand(1, ExtraOperand); 473 TmpLHSI = NextLHSI; 474 ExtraOperand = NextOp; 475 } 476 477 // Now that the instructions are reassociated, have the functor perform 478 // the transformation... 479 return F.apply(Root); 480 } 481 482 LHSI = dyn_cast<Instruction>(LHSI->getOperand(0)); 483 } 484 return 0; 485} 486 487namespace { 488 489// AddRHS - Implements: X + X --> X << 1 490struct AddRHS { 491 Value *RHS; 492 explicit AddRHS(Value *rhs) : RHS(rhs) {} 493 bool shouldApply(Value *LHS) const { return LHS == RHS; } 494 Instruction *apply(BinaryOperator &Add) const { 495 return BinaryOperator::CreateShl(Add.getOperand(0), 496 ConstantInt::get(Add.getType(), 1)); 497 } 498}; 499 500// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) 501// iff C1&C2 == 0 502struct AddMaskingAnd { 503 Constant *C2; 504 explicit AddMaskingAnd(Constant *c) : C2(c) {} 505 bool shouldApply(Value *LHS) const { 506 ConstantInt *C1; 507 return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && 508 ConstantExpr::getAnd(C1, C2)->isNullValue(); 509 } 510 Instruction *apply(BinaryOperator &Add) const { 511 return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); 512 } 513}; 514 515} 516 517static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, 518 InstCombiner *IC) { 519 if (CastInst *CI = dyn_cast<CastInst>(&I)) 520 return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); 521 522 // Figure out if the constant is the left or the right argument. 523 bool ConstIsRHS = isa<Constant>(I.getOperand(1)); 524 Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); 525 526 if (Constant *SOC = dyn_cast<Constant>(SO)) { 527 if (ConstIsRHS) 528 return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); 529 return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); 530 } 531 532 Value *Op0 = SO, *Op1 = ConstOperand; 533 if (!ConstIsRHS) 534 std::swap(Op0, Op1); 535 536 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 537 return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, 538 SO->getName()+".op"); 539 if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) 540 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 541 SO->getName()+".cmp"); 542 if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) 543 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 544 SO->getName()+".cmp"); 545 llvm_unreachable("Unknown binary instruction type!"); 546} 547 548// FoldOpIntoSelect - Given an instruction with a select as one operand and a 549// constant as the other operand, try to fold the binary operator into the 550// select arguments. This also works for Cast instructions, which obviously do 551// not have a second operand. 552static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, 553 InstCombiner *IC) { 554 // Don't modify shared select instructions 555 if (!SI->hasOneUse()) return 0; 556 Value *TV = SI->getOperand(1); 557 Value *FV = SI->getOperand(2); 558 559 if (isa<Constant>(TV) || isa<Constant>(FV)) { 560 // Bool selects with constant operands can be folded to logical ops. 561 if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0; 562 563 Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); 564 Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); 565 566 return SelectInst::Create(SI->getCondition(), SelectTrueVal, 567 SelectFalseVal); 568 } 569 return 0; 570} 571 572 573/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which 574/// has a PHI node as operand #0, see if we can fold the instruction into the 575/// PHI (which is only possible if all operands to the PHI are constants). 576/// 577/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms 578/// that would normally be unprofitable because they strongly encourage jump 579/// threading. 580Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, 581 bool AllowAggressive) { 582 AllowAggressive = false; 583 PHINode *PN = cast<PHINode>(I.getOperand(0)); 584 unsigned NumPHIValues = PN->getNumIncomingValues(); 585 if (NumPHIValues == 0 || 586 // We normally only transform phis with a single use, unless we're trying 587 // hard to make jump threading happen. 588 (!PN->hasOneUse() && !AllowAggressive)) 589 return 0; 590 591 592 // Check to see if all of the operands of the PHI are simple constants 593 // (constantint/constantfp/undef). If there is one non-constant value, 594 // remember the BB it is in. If there is more than one or if *it* is a PHI, 595 // bail out. We don't do arbitrary constant expressions here because moving 596 // their computation can be expensive without a cost model. 597 BasicBlock *NonConstBB = 0; 598 for (unsigned i = 0; i != NumPHIValues; ++i) 599 if (!isa<Constant>(PN->getIncomingValue(i)) || 600 isa<ConstantExpr>(PN->getIncomingValue(i))) { 601 if (NonConstBB) return 0; // More than one non-const value. 602 if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. 603 NonConstBB = PN->getIncomingBlock(i); 604 605 // If the incoming non-constant value is in I's block, we have an infinite 606 // loop. 607 if (NonConstBB == I.getParent()) 608 return 0; 609 } 610 611 // If there is exactly one non-constant value, we can insert a copy of the 612 // operation in that block. However, if this is a critical edge, we would be 613 // inserting the computation one some other paths (e.g. inside a loop). Only 614 // do this if the pred block is unconditionally branching into the phi block. 615 if (NonConstBB != 0 && !AllowAggressive) { 616 BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); 617 if (!BI || !BI->isUnconditional()) return 0; 618 } 619 620 // Okay, we can do the transformation: create the new PHI node. 621 PHINode *NewPN = PHINode::Create(I.getType(), ""); 622 NewPN->reserveOperandSpace(PN->getNumOperands()/2); 623 InsertNewInstBefore(NewPN, *PN); 624 NewPN->takeName(PN); 625 626 // Next, add all of the operands to the PHI. 627 if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { 628 // We only currently try to fold the condition of a select when it is a phi, 629 // not the true/false values. 630 Value *TrueV = SI->getTrueValue(); 631 Value *FalseV = SI->getFalseValue(); 632 BasicBlock *PhiTransBB = PN->getParent(); 633 for (unsigned i = 0; i != NumPHIValues; ++i) { 634 BasicBlock *ThisBB = PN->getIncomingBlock(i); 635 Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); 636 Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); 637 Value *InV = 0; 638 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 639 InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; 640 } else { 641 assert(PN->getIncomingBlock(i) == NonConstBB); 642 InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, 643 FalseVInPred, 644 "phitmp", NonConstBB->getTerminator()); 645 Worklist.Add(cast<Instruction>(InV)); 646 } 647 NewPN->addIncoming(InV, ThisBB); 648 } 649 } else if (I.getNumOperands() == 2) { 650 Constant *C = cast<Constant>(I.getOperand(1)); 651 for (unsigned i = 0; i != NumPHIValues; ++i) { 652 Value *InV = 0; 653 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 654 if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 655 InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); 656 else 657 InV = ConstantExpr::get(I.getOpcode(), InC, C); 658 } else { 659 assert(PN->getIncomingBlock(i) == NonConstBB); 660 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 661 InV = BinaryOperator::Create(BO->getOpcode(), 662 PN->getIncomingValue(i), C, "phitmp", 663 NonConstBB->getTerminator()); 664 else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 665 InV = CmpInst::Create(CI->getOpcode(), 666 CI->getPredicate(), 667 PN->getIncomingValue(i), C, "phitmp", 668 NonConstBB->getTerminator()); 669 else 670 llvm_unreachable("Unknown binop!"); 671 672 Worklist.Add(cast<Instruction>(InV)); 673 } 674 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 675 } 676 } else { 677 CastInst *CI = cast<CastInst>(&I); 678 const Type *RetTy = CI->getType(); 679 for (unsigned i = 0; i != NumPHIValues; ++i) { 680 Value *InV; 681 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 682 InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); 683 } else { 684 assert(PN->getIncomingBlock(i) == NonConstBB); 685 InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 686 I.getType(), "phitmp", 687 NonConstBB->getTerminator()); 688 Worklist.Add(cast<Instruction>(InV)); 689 } 690 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 691 } 692 } 693 return ReplaceInstUsesWith(I, NewPN); 694} 695 696 697/// WillNotOverflowSignedAdd - Return true if we can prove that: 698/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 699/// This basically requires proving that the add in the original type would not 700/// overflow to change the sign bit or have a carry out. 701bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 702 // There are different heuristics we can use for this. Here are some simple 703 // ones. 704 705 // Add has the property that adding any two 2's complement numbers can only 706 // have one carry bit which can change a sign. As such, if LHS and RHS each 707 // have at least two sign bits, we know that the addition of the two values 708 // will sign extend fine. 709 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 710 return true; 711 712 713 // If one of the operands only has one non-zero bit, and if the other operand 714 // has a known-zero bit in a more significant place than it (not including the 715 // sign bit) the ripple may go up to and fill the zero, but won't change the 716 // sign. For example, (X & ~4) + 1. 717 718 // TODO: Implement. 719 720 return false; 721} 722 723 724Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 725 bool Changed = SimplifyCommutative(I); 726 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 727 728 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 729 I.hasNoUnsignedWrap(), TD)) 730 return ReplaceInstUsesWith(I, V); 731 732 733 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 734 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { 735 // X + (signbit) --> X ^ signbit 736 const APInt& Val = CI->getValue(); 737 uint32_t BitWidth = Val.getBitWidth(); 738 if (Val == APInt::getSignBit(BitWidth)) 739 return BinaryOperator::CreateXor(LHS, RHS); 740 741 // See if SimplifyDemandedBits can simplify this. This handles stuff like 742 // (X & 254)+1 -> (X&254)|1 743 if (SimplifyDemandedInstructionBits(I)) 744 return &I; 745 746 // zext(bool) + C -> bool ? C + 1 : C 747 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 748 if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) 749 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 750 } 751 752 if (isa<PHINode>(LHS)) 753 if (Instruction *NV = FoldOpIntoPhi(I)) 754 return NV; 755 756 ConstantInt *XorRHS = 0; 757 Value *XorLHS = 0; 758 if (isa<ConstantInt>(RHSC) && 759 match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 760 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 761 const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); 762 763 uint32_t Size = TySizeBits / 2; 764 APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); 765 APInt CFF80Val(-C0080Val); 766 do { 767 if (TySizeBits > Size) { 768 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 769 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 770 if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || 771 (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { 772 // This is a sign extend if the top bits are known zero. 773 if (!MaskedValueIsZero(XorLHS, 774 APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) 775 Size = 0; // Not a sign ext, but can't be any others either. 776 break; 777 } 778 } 779 Size >>= 1; 780 C0080Val = APIntOps::lshr(C0080Val, Size); 781 CFF80Val = APIntOps::ashr(CFF80Val, Size); 782 } while (Size >= 1); 783 784 // FIXME: This shouldn't be necessary. When the backends can handle types 785 // with funny bit widths then this switch statement should be removed. It 786 // is just here to get the size of the "middle" type back up to something 787 // that the back ends can handle. 788 const Type *MiddleType = 0; 789 switch (Size) { 790 default: break; 791 case 32: 792 case 16: 793 case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; 794 } 795 if (MiddleType) { 796 Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); 797 return new SExtInst(NewTrunc, I.getType(), I.getName()); 798 } 799 } 800 } 801 802 if (I.getType() == Type::getInt1Ty(I.getContext())) 803 return BinaryOperator::CreateXor(LHS, RHS); 804 805 // X + X --> X << 1 806 if (I.getType()->isInteger()) { 807 if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) 808 return Result; 809 810 if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { 811 if (RHSI->getOpcode() == Instruction::Sub) 812 if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B 813 return ReplaceInstUsesWith(I, RHSI->getOperand(0)); 814 } 815 if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 816 if (LHSI->getOpcode() == Instruction::Sub) 817 if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B 818 return ReplaceInstUsesWith(I, LHSI->getOperand(0)); 819 } 820 } 821 822 // -A + B --> B - A 823 // -A + -B --> -(A + B) 824 if (Value *LHSV = dyn_castNegVal(LHS)) { 825 if (LHS->getType()->isIntOrIntVector()) { 826 if (Value *RHSV = dyn_castNegVal(RHS)) { 827 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 828 return BinaryOperator::CreateNeg(NewAdd); 829 } 830 } 831 832 return BinaryOperator::CreateSub(RHS, LHSV); 833 } 834 835 // A + -B --> A - B 836 if (!isa<Constant>(RHS)) 837 if (Value *V = dyn_castNegVal(RHS)) 838 return BinaryOperator::CreateSub(LHS, V); 839 840 841 ConstantInt *C2; 842 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 843 if (X == RHS) // X*C + X --> X * (C+1) 844 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 845 846 // X*C1 + X*C2 --> X * (C1+C2) 847 ConstantInt *C1; 848 if (X == dyn_castFoldableMul(RHS, C1)) 849 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 850 } 851 852 // X + X*C --> X * (C+1) 853 if (dyn_castFoldableMul(RHS, C2) == LHS) 854 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 855 856 // X + ~X --> -1 since ~X = -X-1 857 if (dyn_castNotVal(LHS) == RHS || 858 dyn_castNotVal(RHS) == LHS) 859 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 860 861 862 // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 863 if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) 864 if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) 865 return R; 866 867 // A+B --> A|B iff A and B have no bits set in common. 868 if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 869 APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); 870 APInt LHSKnownOne(IT->getBitWidth(), 0); 871 APInt LHSKnownZero(IT->getBitWidth(), 0); 872 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 873 if (LHSKnownZero != 0) { 874 APInt RHSKnownOne(IT->getBitWidth(), 0); 875 APInt RHSKnownZero(IT->getBitWidth(), 0); 876 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 877 878 // No bits in common -> bitwise or. 879 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 880 return BinaryOperator::CreateOr(LHS, RHS); 881 } 882 } 883 884 // W*X + Y*Z --> W * (X+Z) iff W == Y 885 if (I.getType()->isIntOrIntVector()) { 886 Value *W, *X, *Y, *Z; 887 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 888 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 889 if (W != Y) { 890 if (W == Z) { 891 std::swap(Y, Z); 892 } else if (Y == X) { 893 std::swap(W, X); 894 } else if (X == Z) { 895 std::swap(Y, Z); 896 std::swap(W, X); 897 } 898 } 899 900 if (W == Y) { 901 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 902 return BinaryOperator::CreateMul(W, NewAdd); 903 } 904 } 905 } 906 907 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 908 Value *X = 0; 909 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 910 return BinaryOperator::CreateSub(SubOne(CRHS), X); 911 912 // (X & FF00) + xx00 -> (X+xx00) & FF00 913 if (LHS->hasOneUse() && 914 match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { 915 Constant *Anded = ConstantExpr::getAnd(CRHS, C2); 916 if (Anded == CRHS) { 917 // See if all bits from the first bit set in the Add RHS up are included 918 // in the mask. First, get the rightmost bit. 919 const APInt& AddRHSV = CRHS->getValue(); 920 921 // Form a mask of all bits from the lowest bit added through the top. 922 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 923 924 // See if the and mask includes all of these bits. 925 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 926 927 if (AddRHSHighBits == AddRHSHighBitsAnd) { 928 // Okay, the xform is safe. Insert the new add pronto. 929 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 930 return BinaryOperator::CreateAnd(NewAdd, C2); 931 } 932 } 933 } 934 935 // Try to fold constant add into select arguments. 936 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 937 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 938 return R; 939 } 940 941 // add (select X 0 (sub n A)) A --> select X A n 942 { 943 SelectInst *SI = dyn_cast<SelectInst>(LHS); 944 Value *A = RHS; 945 if (!SI) { 946 SI = dyn_cast<SelectInst>(RHS); 947 A = LHS; 948 } 949 if (SI && SI->hasOneUse()) { 950 Value *TV = SI->getTrueValue(); 951 Value *FV = SI->getFalseValue(); 952 Value *N; 953 954 // Can we fold the add into the argument of the select? 955 // We check both true and false select arguments for a matching subtract. 956 if (match(FV, m_Zero()) && 957 match(TV, m_Sub(m_Value(N), m_Specific(A)))) 958 // Fold the add into the true select value. 959 return SelectInst::Create(SI->getCondition(), N, A); 960 if (match(TV, m_Zero()) && 961 match(FV, m_Sub(m_Value(N), m_Specific(A)))) 962 // Fold the add into the false select value. 963 return SelectInst::Create(SI->getCondition(), A, N); 964 } 965 } 966 967 // Check for (add (sext x), y), see if we can merge this into an 968 // integer add followed by a sext. 969 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 970 // (add (sext x), cst) --> (sext (add x, cst')) 971 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 972 Constant *CI = 973 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 974 if (LHSConv->hasOneUse() && 975 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 976 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 977 // Insert the new, smaller add. 978 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 979 CI, "addconv"); 980 return new SExtInst(NewAdd, I.getType()); 981 } 982 } 983 984 // (add (sext x), (sext y)) --> (sext (add int x, y)) 985 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 986 // Only do this if x/y have the same type, if at last one of them has a 987 // single use (so we don't increase the number of sexts), and if the 988 // integer add will not overflow. 989 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 990 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 991 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 992 RHSConv->getOperand(0))) { 993 // Insert the new integer add. 994 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 995 RHSConv->getOperand(0), "addconv"); 996 return new SExtInst(NewAdd, I.getType()); 997 } 998 } 999 } 1000 1001 return Changed ? &I : 0; 1002} 1003 1004Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 1005 bool Changed = SimplifyCommutative(I); 1006 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1007 1008 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 1009 // X + 0 --> X 1010 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { 1011 if (CFP->isExactlyValue(ConstantFP::getNegativeZero 1012 (I.getType())->getValueAPF())) 1013 return ReplaceInstUsesWith(I, LHS); 1014 } 1015 1016 if (isa<PHINode>(LHS)) 1017 if (Instruction *NV = FoldOpIntoPhi(I)) 1018 return NV; 1019 } 1020 1021 // -A + B --> B - A 1022 // -A + -B --> -(A + B) 1023 if (Value *LHSV = dyn_castFNegVal(LHS)) 1024 return BinaryOperator::CreateFSub(RHS, LHSV); 1025 1026 // A + -B --> A - B 1027 if (!isa<Constant>(RHS)) 1028 if (Value *V = dyn_castFNegVal(RHS)) 1029 return BinaryOperator::CreateFSub(LHS, V); 1030 1031 // Check for X+0.0. Simplify it to X if we know X is not -0.0. 1032 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) 1033 if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) 1034 return ReplaceInstUsesWith(I, LHS); 1035 1036 // Check for (add double (sitofp x), y), see if we can merge this into an 1037 // integer add followed by a promotion. 1038 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 1039 // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 1040 // ... if the constant fits in the integer value. This is useful for things 1041 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 1042 // requires a constant pool load, and generally allows the add to be better 1043 // instcombined. 1044 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 1045 Constant *CI = 1046 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 1047 if (LHSConv->hasOneUse() && 1048 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 1049 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 1050 // Insert the new integer add. 1051 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1052 CI, "addconv"); 1053 return new SIToFPInst(NewAdd, I.getType()); 1054 } 1055 } 1056 1057 // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 1058 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 1059 // Only do this if x/y have the same type, if at last one of them has a 1060 // single use (so we don't increase the number of int->fp conversions), 1061 // and if the integer add will not overflow. 1062 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 1063 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 1064 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 1065 RHSConv->getOperand(0))) { 1066 // Insert the new integer add. 1067 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1068 RHSConv->getOperand(0),"addconv"); 1069 return new SIToFPInst(NewAdd, I.getType()); 1070 } 1071 } 1072 } 1073 1074 return Changed ? &I : 0; 1075} 1076 1077 1078/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the 1079/// code necessary to compute the offset from the base pointer (without adding 1080/// in the base pointer). Return the result as a signed integer of intptr size. 1081Value *InstCombiner::EmitGEPOffset(User *GEP) { 1082 TargetData &TD = *getTargetData(); 1083 gep_type_iterator GTI = gep_type_begin(GEP); 1084 const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); 1085 Value *Result = Constant::getNullValue(IntPtrTy); 1086 1087 // Build a mask for high order bits. 1088 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 1089 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); 1090 1091 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; 1092 ++i, ++GTI) { 1093 Value *Op = *i; 1094 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; 1095 if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { 1096 if (OpC->isZero()) continue; 1097 1098 // Handle a struct index, which adds its field offset to the pointer. 1099 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 1100 Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 1101 1102 Result = Builder->CreateAdd(Result, 1103 ConstantInt::get(IntPtrTy, Size), 1104 GEP->getName()+".offs"); 1105 continue; 1106 } 1107 1108 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 1109 Constant *OC = 1110 ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); 1111 Scale = ConstantExpr::getMul(OC, Scale); 1112 // Emit an add instruction. 1113 Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); 1114 continue; 1115 } 1116 // Convert to correct type. 1117 if (Op->getType() != IntPtrTy) 1118 Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); 1119 if (Size != 1) { 1120 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 1121 // We'll let instcombine(mul) convert this to a shl if possible. 1122 Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); 1123 } 1124 1125 // Emit an add instruction. 1126 Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); 1127 } 1128 return Result; 1129} 1130 1131 1132 1133 1134/// Optimize pointer differences into the same array into a size. Consider: 1135/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 1136/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 1137/// 1138Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 1139 const Type *Ty) { 1140 assert(TD && "Must have target data info for this"); 1141 1142 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 1143 // this. 1144 bool Swapped; 1145 GetElementPtrInst *GEP = 0; 1146 ConstantExpr *CstGEP = 0; 1147 1148 // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". 1149 // For now we require one side to be the base pointer "A" or a constant 1150 // expression derived from it. 1151 if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { 1152 // (gep X, ...) - X 1153 if (LHSGEP->getOperand(0) == RHS) { 1154 GEP = LHSGEP; 1155 Swapped = false; 1156 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { 1157 // (gep X, ...) - (ce_gep X, ...) 1158 if (CE->getOpcode() == Instruction::GetElementPtr && 1159 LHSGEP->getOperand(0) == CE->getOperand(0)) { 1160 CstGEP = CE; 1161 GEP = LHSGEP; 1162 Swapped = false; 1163 } 1164 } 1165 } 1166 1167 if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { 1168 // X - (gep X, ...) 1169 if (RHSGEP->getOperand(0) == LHS) { 1170 GEP = RHSGEP; 1171 Swapped = true; 1172 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { 1173 // (ce_gep X, ...) - (gep X, ...) 1174 if (CE->getOpcode() == Instruction::GetElementPtr && 1175 RHSGEP->getOperand(0) == CE->getOperand(0)) { 1176 CstGEP = CE; 1177 GEP = RHSGEP; 1178 Swapped = true; 1179 } 1180 } 1181 } 1182 1183 if (GEP == 0) 1184 return 0; 1185 1186 // Emit the offset of the GEP and an intptr_t. 1187 Value *Result = EmitGEPOffset(GEP); 1188 1189 // If we had a constant expression GEP on the other side offsetting the 1190 // pointer, subtract it from the offset we have. 1191 if (CstGEP) { 1192 Value *CstOffset = EmitGEPOffset(CstGEP); 1193 Result = Builder->CreateSub(Result, CstOffset); 1194 } 1195 1196 1197 // If we have p - gep(p, ...) then we have to negate the result. 1198 if (Swapped) 1199 Result = Builder->CreateNeg(Result, "diff.neg"); 1200 1201 return Builder->CreateIntCast(Result, Ty, true); 1202} 1203 1204 1205Instruction *InstCombiner::visitSub(BinaryOperator &I) { 1206 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1207 1208 if (Op0 == Op1) // sub X, X -> 0 1209 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1210 1211 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 1212 if (Value *V = dyn_castNegVal(Op1)) { 1213 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 1214 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 1215 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 1216 return Res; 1217 } 1218 1219 if (isa<UndefValue>(Op0)) 1220 return ReplaceInstUsesWith(I, Op0); // undef - X -> undef 1221 if (isa<UndefValue>(Op1)) 1222 return ReplaceInstUsesWith(I, Op1); // X - undef -> undef 1223 if (I.getType() == Type::getInt1Ty(I.getContext())) 1224 return BinaryOperator::CreateXor(Op0, Op1); 1225 1226 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 1227 // Replace (-1 - A) with (~A). 1228 if (C->isAllOnesValue()) 1229 return BinaryOperator::CreateNot(Op1); 1230 1231 // C - ~X == X + (1+C) 1232 Value *X = 0; 1233 if (match(Op1, m_Not(m_Value(X)))) 1234 return BinaryOperator::CreateAdd(X, AddOne(C)); 1235 1236 // -(X >>u 31) -> (X >>s 31) 1237 // -(X >>s 31) -> (X >>u 31) 1238 if (C->isZero()) { 1239 if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { 1240 if (SI->getOpcode() == Instruction::LShr) { 1241 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1242 // Check to see if we are shifting out everything but the sign bit. 1243 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 1244 SI->getType()->getPrimitiveSizeInBits()-1) { 1245 // Ok, the transformation is safe. Insert AShr. 1246 return BinaryOperator::Create(Instruction::AShr, 1247 SI->getOperand(0), CU, SI->getName()); 1248 } 1249 } 1250 } else if (SI->getOpcode() == Instruction::AShr) { 1251 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1252 // Check to see if we are shifting out everything but the sign bit. 1253 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 1254 SI->getType()->getPrimitiveSizeInBits()-1) { 1255 // Ok, the transformation is safe. Insert LShr. 1256 return BinaryOperator::CreateLShr( 1257 SI->getOperand(0), CU, SI->getName()); 1258 } 1259 } 1260 } 1261 } 1262 } 1263 1264 // Try to fold constant sub into select arguments. 1265 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 1266 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 1267 return R; 1268 1269 // C - zext(bool) -> bool ? C - 1 : C 1270 if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) 1271 if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) 1272 return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); 1273 } 1274 1275 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 1276 if (Op1I->getOpcode() == Instruction::Add) { 1277 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 1278 return BinaryOperator::CreateNeg(Op1I->getOperand(1), 1279 I.getName()); 1280 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 1281 return BinaryOperator::CreateNeg(Op1I->getOperand(0), 1282 I.getName()); 1283 else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { 1284 if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) 1285 // C1-(X+C2) --> (C1-C2)-X 1286 return BinaryOperator::CreateSub( 1287 ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); 1288 } 1289 } 1290 1291 if (Op1I->hasOneUse()) { 1292 // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression 1293 // is not used by anyone else... 1294 // 1295 if (Op1I->getOpcode() == Instruction::Sub) { 1296 // Swap the two operands of the subexpr... 1297 Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); 1298 Op1I->setOperand(0, IIOp1); 1299 Op1I->setOperand(1, IIOp0); 1300 1301 // Create the new top level add instruction... 1302 return BinaryOperator::CreateAdd(Op0, Op1); 1303 } 1304 1305 // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... 1306 // 1307 if (Op1I->getOpcode() == Instruction::And && 1308 (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { 1309 Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); 1310 1311 Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); 1312 return BinaryOperator::CreateAnd(Op0, NewNot); 1313 } 1314 1315 // 0 - (X sdiv C) -> (X sdiv -C) 1316 if (Op1I->getOpcode() == Instruction::SDiv) 1317 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 1318 if (CSI->isZero()) 1319 if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) 1320 return BinaryOperator::CreateSDiv(Op1I->getOperand(0), 1321 ConstantExpr::getNeg(DivRHS)); 1322 1323 // X - X*C --> X * (1-C) 1324 ConstantInt *C2 = 0; 1325 if (dyn_castFoldableMul(Op1I, C2) == Op0) { 1326 Constant *CP1 = 1327 ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), 1328 C2); 1329 return BinaryOperator::CreateMul(Op0, CP1); 1330 } 1331 } 1332 } 1333 1334 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 1335 if (Op0I->getOpcode() == Instruction::Add) { 1336 if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X 1337 return ReplaceInstUsesWith(I, Op0I->getOperand(1)); 1338 else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X 1339 return ReplaceInstUsesWith(I, Op0I->getOperand(0)); 1340 } else if (Op0I->getOpcode() == Instruction::Sub) { 1341 if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y 1342 return BinaryOperator::CreateNeg(Op0I->getOperand(1), 1343 I.getName()); 1344 } 1345 } 1346 1347 ConstantInt *C1; 1348 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 1349 if (X == Op1) // X*C - X --> X * (C-1) 1350 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 1351 1352 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 1353 if (X == dyn_castFoldableMul(Op1, C2)) 1354 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 1355 } 1356 1357 // Optimize pointer differences into the same array into a size. Consider: 1358 // &A[10] - &A[0]: we should compile this to "10". 1359 if (TD) { 1360 Value *LHSOp, *RHSOp; 1361 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 1362 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 1363 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1364 return ReplaceInstUsesWith(I, Res); 1365 1366 // trunc(p)-trunc(q) -> trunc(p-q) 1367 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 1368 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 1369 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1370 return ReplaceInstUsesWith(I, Res); 1371 } 1372 1373 return 0; 1374} 1375 1376Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 1377 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1378 1379 // If this is a 'B = x-(-A)', change to B = x+A... 1380 if (Value *V = dyn_castFNegVal(Op1)) 1381 return BinaryOperator::CreateFAdd(Op0, V); 1382 1383 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 1384 if (Op1I->getOpcode() == Instruction::FAdd) { 1385 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 1386 return BinaryOperator::CreateFNeg(Op1I->getOperand(1), 1387 I.getName()); 1388 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 1389 return BinaryOperator::CreateFNeg(Op1I->getOperand(0), 1390 I.getName()); 1391 } 1392 } 1393 1394 return 0; 1395} 1396 1397Instruction *InstCombiner::visitMul(BinaryOperator &I) { 1398 bool Changed = SimplifyCommutative(I); 1399 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1400 1401 if (isa<UndefValue>(Op1)) // undef * X -> 0 1402 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1403 1404 // Simplify mul instructions with a constant RHS. 1405 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 1406 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { 1407 1408 // ((X << C1)*C2) == (X * (C2 << C1)) 1409 if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) 1410 if (SI->getOpcode() == Instruction::Shl) 1411 if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) 1412 return BinaryOperator::CreateMul(SI->getOperand(0), 1413 ConstantExpr::getShl(CI, ShOp)); 1414 1415 if (CI->isZero()) 1416 return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 1417 if (CI->equalsInt(1)) // X * 1 == X 1418 return ReplaceInstUsesWith(I, Op0); 1419 if (CI->isAllOnesValue()) // X * -1 == 0 - X 1420 return BinaryOperator::CreateNeg(Op0, I.getName()); 1421 1422 const APInt& Val = cast<ConstantInt>(CI)->getValue(); 1423 if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C 1424 return BinaryOperator::CreateShl(Op0, 1425 ConstantInt::get(Op0->getType(), Val.logBase2())); 1426 } 1427 } else if (isa<VectorType>(Op1C->getType())) { 1428 if (Op1C->isNullValue()) 1429 return ReplaceInstUsesWith(I, Op1C); 1430 1431 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { 1432 if (Op1V->isAllOnesValue()) // X * -1 == 0 - X 1433 return BinaryOperator::CreateNeg(Op0, I.getName()); 1434 1435 // As above, vector X*splat(1.0) -> X in all defined cases. 1436 if (Constant *Splat = Op1V->getSplatValue()) { 1437 if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) 1438 if (CI->equalsInt(1)) 1439 return ReplaceInstUsesWith(I, Op0); 1440 } 1441 } 1442 } 1443 1444 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) 1445 if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && 1446 isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { 1447 // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. 1448 Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); 1449 Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); 1450 return BinaryOperator::CreateAdd(Add, C1C2); 1451 1452 } 1453 1454 // Try to fold constant mul into select arguments. 1455 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 1456 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 1457 return R; 1458 1459 if (isa<PHINode>(Op0)) 1460 if (Instruction *NV = FoldOpIntoPhi(I)) 1461 return NV; 1462 } 1463 1464 if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y 1465 if (Value *Op1v = dyn_castNegVal(Op1)) 1466 return BinaryOperator::CreateMul(Op0v, Op1v); 1467 1468 // (X / Y) * Y = X - (X % Y) 1469 // (X / Y) * -Y = (X % Y) - X 1470 { 1471 Value *Op1C = Op1; 1472 BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); 1473 if (!BO || 1474 (BO->getOpcode() != Instruction::UDiv && 1475 BO->getOpcode() != Instruction::SDiv)) { 1476 Op1C = Op0; 1477 BO = dyn_cast<BinaryOperator>(Op1); 1478 } 1479 Value *Neg = dyn_castNegVal(Op1C); 1480 if (BO && BO->hasOneUse() && 1481 (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && 1482 (BO->getOpcode() == Instruction::UDiv || 1483 BO->getOpcode() == Instruction::SDiv)) { 1484 Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); 1485 1486 // If the division is exact, X % Y is zero. 1487 if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) 1488 if (SDiv->isExact()) { 1489 if (Op1BO == Op1C) 1490 return ReplaceInstUsesWith(I, Op0BO); 1491 return BinaryOperator::CreateNeg(Op0BO); 1492 } 1493 1494 Value *Rem; 1495 if (BO->getOpcode() == Instruction::UDiv) 1496 Rem = Builder->CreateURem(Op0BO, Op1BO); 1497 else 1498 Rem = Builder->CreateSRem(Op0BO, Op1BO); 1499 Rem->takeName(BO); 1500 1501 if (Op1BO == Op1C) 1502 return BinaryOperator::CreateSub(Op0BO, Rem); 1503 return BinaryOperator::CreateSub(Rem, Op0BO); 1504 } 1505 } 1506 1507 /// i1 mul -> i1 and. 1508 if (I.getType() == Type::getInt1Ty(I.getContext())) 1509 return BinaryOperator::CreateAnd(Op0, Op1); 1510 1511 // X*(1 << Y) --> X << Y 1512 // (1 << Y)*X --> X << Y 1513 { 1514 Value *Y; 1515 if (match(Op0, m_Shl(m_One(), m_Value(Y)))) 1516 return BinaryOperator::CreateShl(Op1, Y); 1517 if (match(Op1, m_Shl(m_One(), m_Value(Y)))) 1518 return BinaryOperator::CreateShl(Op0, Y); 1519 } 1520 1521 // If one of the operands of the multiply is a cast from a boolean value, then 1522 // we know the bool is either zero or one, so this is a 'masking' multiply. 1523 // X * Y (where Y is 0 or 1) -> X & (0-Y) 1524 if (!isa<VectorType>(I.getType())) { 1525 // -2 is "-1 << 1" so it is all bits set except the low one. 1526 APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); 1527 1528 Value *BoolCast = 0, *OtherOp = 0; 1529 if (MaskedValueIsZero(Op0, Negative2)) 1530 BoolCast = Op0, OtherOp = Op1; 1531 else if (MaskedValueIsZero(Op1, Negative2)) 1532 BoolCast = Op1, OtherOp = Op0; 1533 1534 if (BoolCast) { 1535 Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), 1536 BoolCast, "tmp"); 1537 return BinaryOperator::CreateAnd(V, OtherOp); 1538 } 1539 } 1540 1541 return Changed ? &I : 0; 1542} 1543 1544Instruction *InstCombiner::visitFMul(BinaryOperator &I) { 1545 bool Changed = SimplifyCommutative(I); 1546 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1547 1548 // Simplify mul instructions with a constant RHS... 1549 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 1550 if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { 1551 // "In IEEE floating point, x*1 is not equivalent to x for nans. However, 1552 // ANSI says we can drop signals, so we can do this anyway." (from GCC) 1553 if (Op1F->isExactlyValue(1.0)) 1554 return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' 1555 } else if (isa<VectorType>(Op1C->getType())) { 1556 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { 1557 // As above, vector X*splat(1.0) -> X in all defined cases. 1558 if (Constant *Splat = Op1V->getSplatValue()) { 1559 if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) 1560 if (F->isExactlyValue(1.0)) 1561 return ReplaceInstUsesWith(I, Op0); 1562 } 1563 } 1564 } 1565 1566 // Try to fold constant mul into select arguments. 1567 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 1568 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 1569 return R; 1570 1571 if (isa<PHINode>(Op0)) 1572 if (Instruction *NV = FoldOpIntoPhi(I)) 1573 return NV; 1574 } 1575 1576 if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y 1577 if (Value *Op1v = dyn_castFNegVal(Op1)) 1578 return BinaryOperator::CreateFMul(Op0v, Op1v); 1579 1580 return Changed ? &I : 0; 1581} 1582 1583/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select 1584/// instruction. 1585bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { 1586 SelectInst *SI = cast<SelectInst>(I.getOperand(1)); 1587 1588 // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y 1589 int NonNullOperand = -1; 1590 if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) 1591 if (ST->isNullValue()) 1592 NonNullOperand = 2; 1593 // div/rem X, (Cond ? Y : 0) -> div/rem X, Y 1594 if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) 1595 if (ST->isNullValue()) 1596 NonNullOperand = 1; 1597 1598 if (NonNullOperand == -1) 1599 return false; 1600 1601 Value *SelectCond = SI->getOperand(0); 1602 1603 // Change the div/rem to use 'Y' instead of the select. 1604 I.setOperand(1, SI->getOperand(NonNullOperand)); 1605 1606 // Okay, we know we replace the operand of the div/rem with 'Y' with no 1607 // problem. However, the select, or the condition of the select may have 1608 // multiple uses. Based on our knowledge that the operand must be non-zero, 1609 // propagate the known value for the select into other uses of it, and 1610 // propagate a known value of the condition into its other users. 1611 1612 // If the select and condition only have a single use, don't bother with this, 1613 // early exit. 1614 if (SI->use_empty() && SelectCond->hasOneUse()) 1615 return true; 1616 1617 // Scan the current block backward, looking for other uses of SI. 1618 BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); 1619 1620 while (BBI != BBFront) { 1621 --BBI; 1622 // If we found a call to a function, we can't assume it will return, so 1623 // information from below it cannot be propagated above it. 1624 if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) 1625 break; 1626 1627 // Replace uses of the select or its condition with the known values. 1628 for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); 1629 I != E; ++I) { 1630 if (*I == SI) { 1631 *I = SI->getOperand(NonNullOperand); 1632 Worklist.Add(BBI); 1633 } else if (*I == SelectCond) { 1634 *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : 1635 ConstantInt::getFalse(BBI->getContext()); 1636 Worklist.Add(BBI); 1637 } 1638 } 1639 1640 // If we past the instruction, quit looking for it. 1641 if (&*BBI == SI) 1642 SI = 0; 1643 if (&*BBI == SelectCond) 1644 SelectCond = 0; 1645 1646 // If we ran out of things to eliminate, break out of the loop. 1647 if (SelectCond == 0 && SI == 0) 1648 break; 1649 1650 } 1651 return true; 1652} 1653 1654 1655/// This function implements the transforms on div instructions that work 1656/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is 1657/// used by the visitors to those instructions. 1658/// @brief Transforms common to all three div instructions 1659Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { 1660 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1661 1662 // undef / X -> 0 for integer. 1663 // undef / X -> undef for FP (the undef could be a snan). 1664 if (isa<UndefValue>(Op0)) { 1665 if (Op0->getType()->isFPOrFPVector()) 1666 return ReplaceInstUsesWith(I, Op0); 1667 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1668 } 1669 1670 // X / undef -> undef 1671 if (isa<UndefValue>(Op1)) 1672 return ReplaceInstUsesWith(I, Op1); 1673 1674 return 0; 1675} 1676 1677/// This function implements the transforms common to both integer division 1678/// instructions (udiv and sdiv). It is called by the visitors to those integer 1679/// division instructions. 1680/// @brief Common integer divide transforms 1681Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { 1682 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1683 1684 // (sdiv X, X) --> 1 (udiv X, X) --> 1 1685 if (Op0 == Op1) { 1686 if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { 1687 Constant *CI = ConstantInt::get(Ty->getElementType(), 1); 1688 std::vector<Constant*> Elts(Ty->getNumElements(), CI); 1689 return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); 1690 } 1691 1692 Constant *CI = ConstantInt::get(I.getType(), 1); 1693 return ReplaceInstUsesWith(I, CI); 1694 } 1695 1696 if (Instruction *Common = commonDivTransforms(I)) 1697 return Common; 1698 1699 // Handle cases involving: [su]div X, (select Cond, Y, Z) 1700 // This does not apply for fdiv. 1701 if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) 1702 return &I; 1703 1704 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 1705 // div X, 1 == X 1706 if (RHS->equalsInt(1)) 1707 return ReplaceInstUsesWith(I, Op0); 1708 1709 // (X / C1) / C2 -> X / (C1*C2) 1710 if (Instruction *LHS = dyn_cast<Instruction>(Op0)) 1711 if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) 1712 if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { 1713 if (MultiplyOverflows(RHS, LHSRHS, 1714 I.getOpcode()==Instruction::SDiv)) 1715 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1716 else 1717 return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), 1718 ConstantExpr::getMul(RHS, LHSRHS)); 1719 } 1720 1721 if (!RHS->isZero()) { // avoid X udiv 0 1722 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 1723 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 1724 return R; 1725 if (isa<PHINode>(Op0)) 1726 if (Instruction *NV = FoldOpIntoPhi(I)) 1727 return NV; 1728 } 1729 } 1730 1731 // 0 / X == 0, we don't need to preserve faults! 1732 if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) 1733 if (LHS->equalsInt(0)) 1734 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1735 1736 // It can't be division by zero, hence it must be division by one. 1737 if (I.getType() == Type::getInt1Ty(I.getContext())) 1738 return ReplaceInstUsesWith(I, Op0); 1739 1740 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { 1741 if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) 1742 // div X, 1 == X 1743 if (X->isOne()) 1744 return ReplaceInstUsesWith(I, Op0); 1745 } 1746 1747 return 0; 1748} 1749 1750Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { 1751 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1752 1753 // Handle the integer div common cases 1754 if (Instruction *Common = commonIDivTransforms(I)) 1755 return Common; 1756 1757 if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { 1758 // X udiv C^2 -> X >> C 1759 // Check to see if this is an unsigned division with an exact power of 2, 1760 // if so, convert to a right shift. 1761 if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 1762 return BinaryOperator::CreateLShr(Op0, 1763 ConstantInt::get(Op0->getType(), C->getValue().logBase2())); 1764 1765 // X udiv C, where C >= signbit 1766 if (C->getValue().isNegative()) { 1767 Value *IC = Builder->CreateICmpULT( Op0, C); 1768 return SelectInst::Create(IC, Constant::getNullValue(I.getType()), 1769 ConstantInt::get(I.getType(), 1)); 1770 } 1771 } 1772 1773 // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) 1774 if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { 1775 if (RHSI->getOpcode() == Instruction::Shl && 1776 isa<ConstantInt>(RHSI->getOperand(0))) { 1777 const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); 1778 if (C1.isPowerOf2()) { 1779 Value *N = RHSI->getOperand(1); 1780 const Type *NTy = N->getType(); 1781 if (uint32_t C2 = C1.logBase2()) 1782 N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); 1783 return BinaryOperator::CreateLShr(Op0, N); 1784 } 1785 } 1786 } 1787 1788 // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) 1789 // where C1&C2 are powers of two. 1790 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 1791 if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) 1792 if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { 1793 const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); 1794 if (TVA.isPowerOf2() && FVA.isPowerOf2()) { 1795 // Compute the shift amounts 1796 uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); 1797 // Construct the "on true" case of the select 1798 Constant *TC = ConstantInt::get(Op0->getType(), TSA); 1799 Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); 1800 1801 // Construct the "on false" case of the select 1802 Constant *FC = ConstantInt::get(Op0->getType(), FSA); 1803 Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); 1804 1805 // construct the select instruction and return it. 1806 return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); 1807 } 1808 } 1809 return 0; 1810} 1811 1812Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { 1813 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1814 1815 // Handle the integer div common cases 1816 if (Instruction *Common = commonIDivTransforms(I)) 1817 return Common; 1818 1819 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 1820 // sdiv X, -1 == -X 1821 if (RHS->isAllOnesValue()) 1822 return BinaryOperator::CreateNeg(Op0); 1823 1824 // sdiv X, C --> ashr X, log2(C) 1825 if (cast<SDivOperator>(&I)->isExact() && 1826 RHS->getValue().isNonNegative() && 1827 RHS->getValue().isPowerOf2()) { 1828 Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), 1829 RHS->getValue().exactLogBase2()); 1830 return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); 1831 } 1832 1833 // -X/C --> X/-C provided the negation doesn't overflow. 1834 if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) 1835 if (isa<Constant>(Sub->getOperand(0)) && 1836 cast<Constant>(Sub->getOperand(0))->isNullValue() && 1837 Sub->hasNoSignedWrap()) 1838 return BinaryOperator::CreateSDiv(Sub->getOperand(1), 1839 ConstantExpr::getNeg(RHS)); 1840 } 1841 1842 // If the sign bits of both operands are zero (i.e. we can prove they are 1843 // unsigned inputs), turn this into a udiv. 1844 if (I.getType()->isInteger()) { 1845 APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); 1846 if (MaskedValueIsZero(Op0, Mask)) { 1847 if (MaskedValueIsZero(Op1, Mask)) { 1848 // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set 1849 return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 1850 } 1851 ConstantInt *ShiftedInt; 1852 if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && 1853 ShiftedInt->getValue().isPowerOf2()) { 1854 // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) 1855 // Safe because the only negative value (1 << Y) can take on is 1856 // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have 1857 // the sign bit set. 1858 return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 1859 } 1860 } 1861 } 1862 1863 return 0; 1864} 1865 1866Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { 1867 return commonDivTransforms(I); 1868} 1869 1870/// This function implements the transforms on rem instructions that work 1871/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 1872/// is used by the visitors to those instructions. 1873/// @brief Transforms common to all three rem instructions 1874Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { 1875 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1876 1877 if (isa<UndefValue>(Op0)) { // undef % X -> 0 1878 if (I.getType()->isFPOrFPVector()) 1879 return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) 1880 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1881 } 1882 if (isa<UndefValue>(Op1)) 1883 return ReplaceInstUsesWith(I, Op1); // X % undef -> undef 1884 1885 // Handle cases involving: rem X, (select Cond, Y, Z) 1886 if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) 1887 return &I; 1888 1889 return 0; 1890} 1891 1892/// This function implements the transforms common to both integer remainder 1893/// instructions (urem and srem). It is called by the visitors to those integer 1894/// remainder instructions. 1895/// @brief Common integer remainder transforms 1896Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { 1897 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1898 1899 if (Instruction *common = commonRemTransforms(I)) 1900 return common; 1901 1902 // 0 % X == 0 for integer, we don't need to preserve faults! 1903 if (Constant *LHS = dyn_cast<Constant>(Op0)) 1904 if (LHS->isNullValue()) 1905 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1906 1907 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 1908 // X % 0 == undef, we don't need to preserve faults! 1909 if (RHS->equalsInt(0)) 1910 return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); 1911 1912 if (RHS->equalsInt(1)) // X % 1 == 0 1913 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 1914 1915 if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { 1916 if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { 1917 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 1918 return R; 1919 } else if (isa<PHINode>(Op0I)) { 1920 if (Instruction *NV = FoldOpIntoPhi(I)) 1921 return NV; 1922 } 1923 1924 // See if we can fold away this rem instruction. 1925 if (SimplifyDemandedInstructionBits(I)) 1926 return &I; 1927 } 1928 } 1929 1930 return 0; 1931} 1932 1933Instruction *InstCombiner::visitURem(BinaryOperator &I) { 1934 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1935 1936 if (Instruction *common = commonIRemTransforms(I)) 1937 return common; 1938 1939 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 1940 // X urem C^2 -> X and C 1941 // Check to see if this is an unsigned remainder with an exact power of 2, 1942 // if so, convert to a bitwise and. 1943 if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) 1944 if (C->getValue().isPowerOf2()) 1945 return BinaryOperator::CreateAnd(Op0, SubOne(C)); 1946 } 1947 1948 if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { 1949 // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) 1950 if (RHSI->getOpcode() == Instruction::Shl && 1951 isa<ConstantInt>(RHSI->getOperand(0))) { 1952 if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { 1953 Constant *N1 = Constant::getAllOnesValue(I.getType()); 1954 Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); 1955 return BinaryOperator::CreateAnd(Op0, Add); 1956 } 1957 } 1958 } 1959 1960 // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) 1961 // where C1&C2 are powers of two. 1962 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { 1963 if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) 1964 if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { 1965 // STO == 0 and SFO == 0 handled above. 1966 if ((STO->getValue().isPowerOf2()) && 1967 (SFO->getValue().isPowerOf2())) { 1968 Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), 1969 SI->getName()+".t"); 1970 Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), 1971 SI->getName()+".f"); 1972 return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); 1973 } 1974 } 1975 } 1976 1977 return 0; 1978} 1979 1980Instruction *InstCombiner::visitSRem(BinaryOperator &I) { 1981 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1982 1983 // Handle the integer rem common cases 1984 if (Instruction *Common = commonIRemTransforms(I)) 1985 return Common; 1986 1987 if (Value *RHSNeg = dyn_castNegVal(Op1)) 1988 if (!isa<Constant>(RHSNeg) || 1989 (isa<ConstantInt>(RHSNeg) && 1990 cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { 1991 // X % -Y -> X % Y 1992 Worklist.AddValue(I.getOperand(1)); 1993 I.setOperand(1, RHSNeg); 1994 return &I; 1995 } 1996 1997 // If the sign bits of both operands are zero (i.e. we can prove they are 1998 // unsigned inputs), turn this into a urem. 1999 if (I.getType()->isInteger()) { 2000 APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); 2001 if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { 2002 // X srem Y -> X urem Y, iff X and Y don't have sign bit set 2003 return BinaryOperator::CreateURem(Op0, Op1, I.getName()); 2004 } 2005 } 2006 2007 // If it's a constant vector, flip any negative values positive. 2008 if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { 2009 unsigned VWidth = RHSV->getNumOperands(); 2010 2011 bool hasNegative = false; 2012 for (unsigned i = 0; !hasNegative && i != VWidth; ++i) 2013 if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) 2014 if (RHS->getValue().isNegative()) 2015 hasNegative = true; 2016 2017 if (hasNegative) { 2018 std::vector<Constant *> Elts(VWidth); 2019 for (unsigned i = 0; i != VWidth; ++i) { 2020 if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { 2021 if (RHS->getValue().isNegative()) 2022 Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); 2023 else 2024 Elts[i] = RHS; 2025 } 2026 } 2027 2028 Constant *NewRHSV = ConstantVector::get(Elts); 2029 if (NewRHSV != RHSV) { 2030 Worklist.AddValue(I.getOperand(1)); 2031 I.setOperand(1, NewRHSV); 2032 return &I; 2033 } 2034 } 2035 } 2036 2037 return 0; 2038} 2039 2040Instruction *InstCombiner::visitFRem(BinaryOperator &I) { 2041 return commonRemTransforms(I); 2042} 2043 2044// isOneBitSet - Return true if there is exactly one bit set in the specified 2045// constant. 2046static bool isOneBitSet(const ConstantInt *CI) { 2047 return CI->getValue().isPowerOf2(); 2048} 2049 2050/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits 2051/// are carefully arranged to allow folding of expressions such as: 2052/// 2053/// (A < B) | (A > B) --> (A != B) 2054/// 2055/// Note that this is only valid if the first and second predicates have the 2056/// same sign. Is illegal to do: (A u< B) | (A s> B) 2057/// 2058/// Three bits are used to represent the condition, as follows: 2059/// 0 A > B 2060/// 1 A == B 2061/// 2 A < B 2062/// 2063/// <=> Value Definition 2064/// 000 0 Always false 2065/// 001 1 A > B 2066/// 010 2 A == B 2067/// 011 3 A >= B 2068/// 100 4 A < B 2069/// 101 5 A != B 2070/// 110 6 A <= B 2071/// 111 7 Always true 2072/// 2073static unsigned getICmpCode(const ICmpInst *ICI) { 2074 switch (ICI->getPredicate()) { 2075 // False -> 0 2076 case ICmpInst::ICMP_UGT: return 1; // 001 2077 case ICmpInst::ICMP_SGT: return 1; // 001 2078 case ICmpInst::ICMP_EQ: return 2; // 010 2079 case ICmpInst::ICMP_UGE: return 3; // 011 2080 case ICmpInst::ICMP_SGE: return 3; // 011 2081 case ICmpInst::ICMP_ULT: return 4; // 100 2082 case ICmpInst::ICMP_SLT: return 4; // 100 2083 case ICmpInst::ICMP_NE: return 5; // 101 2084 case ICmpInst::ICMP_ULE: return 6; // 110 2085 case ICmpInst::ICMP_SLE: return 6; // 110 2086 // True -> 7 2087 default: 2088 llvm_unreachable("Invalid ICmp predicate!"); 2089 return 0; 2090 } 2091} 2092 2093/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp 2094/// predicate into a three bit mask. It also returns whether it is an ordered 2095/// predicate by reference. 2096static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { 2097 isOrdered = false; 2098 switch (CC) { 2099 case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 2100 case FCmpInst::FCMP_UNO: return 0; // 000 2101 case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 2102 case FCmpInst::FCMP_UGT: return 1; // 001 2103 case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 2104 case FCmpInst::FCMP_UEQ: return 2; // 010 2105 case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 2106 case FCmpInst::FCMP_UGE: return 3; // 011 2107 case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 2108 case FCmpInst::FCMP_ULT: return 4; // 100 2109 case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 2110 case FCmpInst::FCMP_UNE: return 5; // 101 2111 case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 2112 case FCmpInst::FCMP_ULE: return 6; // 110 2113 // True -> 7 2114 default: 2115 // Not expecting FCMP_FALSE and FCMP_TRUE; 2116 llvm_unreachable("Unexpected FCmp predicate!"); 2117 return 0; 2118 } 2119} 2120 2121/// getICmpValue - This is the complement of getICmpCode, which turns an 2122/// opcode and two operands into either a constant true or false, or a brand 2123/// new ICmp instruction. The sign is passed in to determine which kind 2124/// of predicate to use in the new icmp instruction. 2125static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { 2126 switch (code) { 2127 default: llvm_unreachable("Illegal ICmp code!"); 2128 case 0: return ConstantInt::getFalse(LHS->getContext()); 2129 case 1: 2130 if (sign) 2131 return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); 2132 else 2133 return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); 2134 case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); 2135 case 3: 2136 if (sign) 2137 return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); 2138 else 2139 return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); 2140 case 4: 2141 if (sign) 2142 return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); 2143 else 2144 return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); 2145 case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); 2146 case 6: 2147 if (sign) 2148 return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); 2149 else 2150 return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); 2151 case 7: return ConstantInt::getTrue(LHS->getContext()); 2152 } 2153} 2154 2155/// getFCmpValue - This is the complement of getFCmpCode, which turns an 2156/// opcode and two operands into either a FCmp instruction. isordered is passed 2157/// in to determine which kind of predicate to use in the new fcmp instruction. 2158static Value *getFCmpValue(bool isordered, unsigned code, 2159 Value *LHS, Value *RHS) { 2160 switch (code) { 2161 default: llvm_unreachable("Illegal FCmp code!"); 2162 case 0: 2163 if (isordered) 2164 return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); 2165 else 2166 return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); 2167 case 1: 2168 if (isordered) 2169 return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); 2170 else 2171 return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); 2172 case 2: 2173 if (isordered) 2174 return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); 2175 else 2176 return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); 2177 case 3: 2178 if (isordered) 2179 return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); 2180 else 2181 return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); 2182 case 4: 2183 if (isordered) 2184 return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); 2185 else 2186 return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); 2187 case 5: 2188 if (isordered) 2189 return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); 2190 else 2191 return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); 2192 case 6: 2193 if (isordered) 2194 return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); 2195 else 2196 return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); 2197 case 7: return ConstantInt::getTrue(LHS->getContext()); 2198 } 2199} 2200 2201/// PredicatesFoldable - Return true if both predicates match sign or if at 2202/// least one of them is an equality comparison (which is signless). 2203static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { 2204 return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || 2205 (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || 2206 (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); 2207} 2208 2209namespace { 2210// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 2211struct FoldICmpLogical { 2212 InstCombiner &IC; 2213 Value *LHS, *RHS; 2214 ICmpInst::Predicate pred; 2215 FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) 2216 : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), 2217 pred(ICI->getPredicate()) {} 2218 bool shouldApply(Value *V) const { 2219 if (ICmpInst *ICI = dyn_cast<ICmpInst>(V)) 2220 if (PredicatesFoldable(pred, ICI->getPredicate())) 2221 return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || 2222 (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); 2223 return false; 2224 } 2225 Instruction *apply(Instruction &Log) const { 2226 ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0)); 2227 if (ICI->getOperand(0) != LHS) { 2228 assert(ICI->getOperand(1) == LHS); 2229 ICI->swapOperands(); // Swap the LHS and RHS of the ICmp 2230 } 2231 2232 ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1)); 2233 unsigned LHSCode = getICmpCode(ICI); 2234 unsigned RHSCode = getICmpCode(RHSICI); 2235 unsigned Code; 2236 switch (Log.getOpcode()) { 2237 case Instruction::And: Code = LHSCode & RHSCode; break; 2238 case Instruction::Or: Code = LHSCode | RHSCode; break; 2239 case Instruction::Xor: Code = LHSCode ^ RHSCode; break; 2240 default: llvm_unreachable("Illegal logical opcode!"); return 0; 2241 } 2242 2243 bool isSigned = RHSICI->isSigned() || ICI->isSigned(); 2244 Value *RV = getICmpValue(isSigned, Code, LHS, RHS); 2245 if (Instruction *I = dyn_cast<Instruction>(RV)) 2246 return I; 2247 // Otherwise, it's a constant boolean value... 2248 return IC.ReplaceInstUsesWith(Log, RV); 2249 } 2250}; 2251} // end anonymous namespace 2252 2253// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where 2254// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is 2255// guaranteed to be a binary operator. 2256Instruction *InstCombiner::OptAndOp(Instruction *Op, 2257 ConstantInt *OpRHS, 2258 ConstantInt *AndRHS, 2259 BinaryOperator &TheAnd) { 2260 Value *X = Op->getOperand(0); 2261 Constant *Together = 0; 2262 if (!Op->isShift()) 2263 Together = ConstantExpr::getAnd(AndRHS, OpRHS); 2264 2265 switch (Op->getOpcode()) { 2266 case Instruction::Xor: 2267 if (Op->hasOneUse()) { 2268 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 2269 Value *And = Builder->CreateAnd(X, AndRHS); 2270 And->takeName(Op); 2271 return BinaryOperator::CreateXor(And, Together); 2272 } 2273 break; 2274 case Instruction::Or: 2275 if (Together == AndRHS) // (X | C) & C --> C 2276 return ReplaceInstUsesWith(TheAnd, AndRHS); 2277 2278 if (Op->hasOneUse() && Together != OpRHS) { 2279 // (X | C1) & C2 --> (X | (C1&C2)) & C2 2280 Value *Or = Builder->CreateOr(X, Together); 2281 Or->takeName(Op); 2282 return BinaryOperator::CreateAnd(Or, AndRHS); 2283 } 2284 break; 2285 case Instruction::Add: 2286 if (Op->hasOneUse()) { 2287 // Adding a one to a single bit bit-field should be turned into an XOR 2288 // of the bit. First thing to check is to see if this AND is with a 2289 // single bit constant. 2290 const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); 2291 2292 // If there is only one bit set... 2293 if (isOneBitSet(cast<ConstantInt>(AndRHS))) { 2294 // Ok, at this point, we know that we are masking the result of the 2295 // ADD down to exactly one bit. If the constant we are adding has 2296 // no bits set below this bit, then we can eliminate the ADD. 2297 const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); 2298 2299 // Check to see if any bits below the one bit set in AndRHSV are set. 2300 if ((AddRHS & (AndRHSV-1)) == 0) { 2301 // If not, the only thing that can effect the output of the AND is 2302 // the bit specified by AndRHSV. If that bit is set, the effect of 2303 // the XOR is to toggle the bit. If it is clear, then the ADD has 2304 // no effect. 2305 if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop 2306 TheAnd.setOperand(0, X); 2307 return &TheAnd; 2308 } else { 2309 // Pull the XOR out of the AND. 2310 Value *NewAnd = Builder->CreateAnd(X, AndRHS); 2311 NewAnd->takeName(Op); 2312 return BinaryOperator::CreateXor(NewAnd, AndRHS); 2313 } 2314 } 2315 } 2316 } 2317 break; 2318 2319 case Instruction::Shl: { 2320 // We know that the AND will not produce any of the bits shifted in, so if 2321 // the anded constant includes them, clear them now! 2322 // 2323 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 2324 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 2325 APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); 2326 ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), 2327 AndRHS->getValue() & ShlMask); 2328 2329 if (CI->getValue() == ShlMask) { 2330 // Masking out bits that the shift already masks 2331 return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. 2332 } else if (CI != AndRHS) { // Reducing bits set in and. 2333 TheAnd.setOperand(1, CI); 2334 return &TheAnd; 2335 } 2336 break; 2337 } 2338 case Instruction::LShr: { 2339 // We know that the AND will not produce any of the bits shifted in, so if 2340 // the anded constant includes them, clear them now! This only applies to 2341 // unsigned shifts, because a signed shr may bring in set bits! 2342 // 2343 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 2344 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 2345 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 2346 ConstantInt *CI = ConstantInt::get(Op->getContext(), 2347 AndRHS->getValue() & ShrMask); 2348 2349 if (CI->getValue() == ShrMask) { 2350 // Masking out bits that the shift already masks. 2351 return ReplaceInstUsesWith(TheAnd, Op); 2352 } else if (CI != AndRHS) { 2353 TheAnd.setOperand(1, CI); // Reduce bits set in and cst. 2354 return &TheAnd; 2355 } 2356 break; 2357 } 2358 case Instruction::AShr: 2359 // Signed shr. 2360 // See if this is shifting in some sign extension, then masking it out 2361 // with an and. 2362 if (Op->hasOneUse()) { 2363 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 2364 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 2365 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 2366 Constant *C = ConstantInt::get(Op->getContext(), 2367 AndRHS->getValue() & ShrMask); 2368 if (C == AndRHS) { // Masking out bits shifted in. 2369 // (Val ashr C1) & C2 -> (Val lshr C1) & C2 2370 // Make the argument unsigned. 2371 Value *ShVal = Op->getOperand(0); 2372 ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); 2373 return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); 2374 } 2375 } 2376 break; 2377 } 2378 return 0; 2379} 2380 2381 2382/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is 2383/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient 2384/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates 2385/// whether to treat the V, Lo and HI as signed or not. IB is the location to 2386/// insert new instructions. 2387Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, 2388 bool isSigned, bool Inside, 2389 Instruction &IB) { 2390 assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 2391 ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && 2392 "Lo is not <= Hi in range emission code!"); 2393 2394 if (Inside) { 2395 if (Lo == Hi) // Trivially false. 2396 return new ICmpInst(ICmpInst::ICMP_NE, V, V); 2397 2398 // V >= Min && V < Hi --> V < Hi 2399 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 2400 ICmpInst::Predicate pred = (isSigned ? 2401 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); 2402 return new ICmpInst(pred, V, Hi); 2403 } 2404 2405 // Emit V-Lo <u Hi-Lo 2406 Constant *NegLo = ConstantExpr::getNeg(Lo); 2407 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 2408 Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); 2409 return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); 2410 } 2411 2412 if (Lo == Hi) // Trivially true. 2413 return new ICmpInst(ICmpInst::ICMP_EQ, V, V); 2414 2415 // V < Min || V >= Hi -> V > Hi-1 2416 Hi = SubOne(cast<ConstantInt>(Hi)); 2417 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 2418 ICmpInst::Predicate pred = (isSigned ? 2419 ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 2420 return new ICmpInst(pred, V, Hi); 2421 } 2422 2423 // Emit V-Lo >u Hi-1-Lo 2424 // Note that Hi has already had one subtracted from it, above. 2425 ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); 2426 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 2427 Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); 2428 return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); 2429} 2430 2431// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with 2432// any number of 0s on either side. The 1s are allowed to wrap from LSB to 2433// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is 2434// not, since all 1s are not contiguous. 2435static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { 2436 const APInt& V = Val->getValue(); 2437 uint32_t BitWidth = Val->getType()->getBitWidth(); 2438 if (!APIntOps::isShiftedMask(BitWidth, V)) return false; 2439 2440 // look for the first zero bit after the run of ones 2441 MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); 2442 // look for the first non-zero bit 2443 ME = V.getActiveBits(); 2444 return true; 2445} 2446 2447/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, 2448/// where isSub determines whether the operator is a sub. If we can fold one of 2449/// the following xforms: 2450/// 2451/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask 2452/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 2453/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 2454/// 2455/// return (A +/- B). 2456/// 2457Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, 2458 ConstantInt *Mask, bool isSub, 2459 Instruction &I) { 2460 Instruction *LHSI = dyn_cast<Instruction>(LHS); 2461 if (!LHSI || LHSI->getNumOperands() != 2 || 2462 !isa<ConstantInt>(LHSI->getOperand(1))) return 0; 2463 2464 ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); 2465 2466 switch (LHSI->getOpcode()) { 2467 default: return 0; 2468 case Instruction::And: 2469 if (ConstantExpr::getAnd(N, Mask) == Mask) { 2470 // If the AndRHS is a power of two minus one (0+1+), this is simple. 2471 if ((Mask->getValue().countLeadingZeros() + 2472 Mask->getValue().countPopulation()) == 2473 Mask->getValue().getBitWidth()) 2474 break; 2475 2476 // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ 2477 // part, we don't need any explicit masks to take them out of A. If that 2478 // is all N is, ignore it. 2479 uint32_t MB = 0, ME = 0; 2480 if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive 2481 uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); 2482 APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); 2483 if (MaskedValueIsZero(RHS, Mask)) 2484 break; 2485 } 2486 } 2487 return 0; 2488 case Instruction::Or: 2489 case Instruction::Xor: 2490 // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 2491 if ((Mask->getValue().countLeadingZeros() + 2492 Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() 2493 && ConstantExpr::getAnd(N, Mask)->isNullValue()) 2494 break; 2495 return 0; 2496 } 2497 2498 if (isSub) 2499 return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); 2500 return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); 2501} 2502 2503/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. 2504Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, 2505 ICmpInst *LHS, ICmpInst *RHS) { 2506 Value *Val, *Val2; 2507 ConstantInt *LHSCst, *RHSCst; 2508 ICmpInst::Predicate LHSCC, RHSCC; 2509 2510 // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). 2511 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), 2512 m_ConstantInt(LHSCst))) || 2513 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), 2514 m_ConstantInt(RHSCst)))) 2515 return 0; 2516 2517 if (LHSCst == RHSCst && LHSCC == RHSCC) { 2518 // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) 2519 // where C is a power of 2 2520 if (LHSCC == ICmpInst::ICMP_ULT && 2521 LHSCst->getValue().isPowerOf2()) { 2522 Value *NewOr = Builder->CreateOr(Val, Val2); 2523 return new ICmpInst(LHSCC, NewOr, LHSCst); 2524 } 2525 2526 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 2527 if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { 2528 Value *NewOr = Builder->CreateOr(Val, Val2); 2529 return new ICmpInst(LHSCC, NewOr, LHSCst); 2530 } 2531 } 2532 2533 // From here on, we only handle: 2534 // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. 2535 if (Val != Val2) return 0; 2536 2537 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 2538 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 2539 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 2540 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 2541 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 2542 return 0; 2543 2544 // We can't fold (ugt x, C) & (sgt x, C2). 2545 if (!PredicatesFoldable(LHSCC, RHSCC)) 2546 return 0; 2547 2548 // Ensure that the larger constant is on the RHS. 2549 bool ShouldSwap; 2550 if (CmpInst::isSigned(LHSCC) || 2551 (ICmpInst::isEquality(LHSCC) && 2552 CmpInst::isSigned(RHSCC))) 2553 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 2554 else 2555 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 2556 2557 if (ShouldSwap) { 2558 std::swap(LHS, RHS); 2559 std::swap(LHSCst, RHSCst); 2560 std::swap(LHSCC, RHSCC); 2561 } 2562 2563 // At this point, we know we have have two icmp instructions 2564 // comparing a value against two constants and and'ing the result 2565 // together. Because of the above check, we know that we only have 2566 // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 2567 // (from the FoldICmpLogical check above), that the two constants 2568 // are not equal and that the larger constant is on the RHS 2569 assert(LHSCst != RHSCst && "Compares not folded above?"); 2570 2571 switch (LHSCC) { 2572 default: llvm_unreachable("Unknown integer condition code!"); 2573 case ICmpInst::ICMP_EQ: 2574 switch (RHSCC) { 2575 default: llvm_unreachable("Unknown integer condition code!"); 2576 case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false 2577 case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false 2578 case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false 2579 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2580 case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 2581 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 2582 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 2583 return ReplaceInstUsesWith(I, LHS); 2584 } 2585 case ICmpInst::ICMP_NE: 2586 switch (RHSCC) { 2587 default: llvm_unreachable("Unknown integer condition code!"); 2588 case ICmpInst::ICMP_ULT: 2589 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 2590 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); 2591 break; // (X != 13 & X u< 15) -> no change 2592 case ICmpInst::ICMP_SLT: 2593 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 2594 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); 2595 break; // (X != 13 & X s< 15) -> no change 2596 case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 2597 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 2598 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 2599 return ReplaceInstUsesWith(I, RHS); 2600 case ICmpInst::ICMP_NE: 2601 if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 2602 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 2603 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 2604 return new ICmpInst(ICmpInst::ICMP_UGT, Add, 2605 ConstantInt::get(Add->getType(), 1)); 2606 } 2607 break; // (X != 13 & X != 15) -> no change 2608 } 2609 break; 2610 case ICmpInst::ICMP_ULT: 2611 switch (RHSCC) { 2612 default: llvm_unreachable("Unknown integer condition code!"); 2613 case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false 2614 case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false 2615 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2616 case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change 2617 break; 2618 case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 2619 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 2620 return ReplaceInstUsesWith(I, LHS); 2621 case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change 2622 break; 2623 } 2624 break; 2625 case ICmpInst::ICMP_SLT: 2626 switch (RHSCC) { 2627 default: llvm_unreachable("Unknown integer condition code!"); 2628 case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false 2629 case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false 2630 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2631 case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change 2632 break; 2633 case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 2634 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 2635 return ReplaceInstUsesWith(I, LHS); 2636 case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change 2637 break; 2638 } 2639 break; 2640 case ICmpInst::ICMP_UGT: 2641 switch (RHSCC) { 2642 default: llvm_unreachable("Unknown integer condition code!"); 2643 case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 2644 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 2645 return ReplaceInstUsesWith(I, RHS); 2646 case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change 2647 break; 2648 case ICmpInst::ICMP_NE: 2649 if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 2650 return new ICmpInst(LHSCC, Val, RHSCst); 2651 break; // (X u> 13 & X != 15) -> no change 2652 case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 2653 return InsertRangeTest(Val, AddOne(LHSCst), 2654 RHSCst, false, true, I); 2655 case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change 2656 break; 2657 } 2658 break; 2659 case ICmpInst::ICMP_SGT: 2660 switch (RHSCC) { 2661 default: llvm_unreachable("Unknown integer condition code!"); 2662 case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 2663 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 2664 return ReplaceInstUsesWith(I, RHS); 2665 case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change 2666 break; 2667 case ICmpInst::ICMP_NE: 2668 if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 2669 return new ICmpInst(LHSCC, Val, RHSCst); 2670 break; // (X s> 13 & X != 15) -> no change 2671 case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 2672 return InsertRangeTest(Val, AddOne(LHSCst), 2673 RHSCst, true, true, I); 2674 case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change 2675 break; 2676 } 2677 break; 2678 } 2679 2680 return 0; 2681} 2682 2683Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, 2684 FCmpInst *RHS) { 2685 2686 if (LHS->getPredicate() == FCmpInst::FCMP_ORD && 2687 RHS->getPredicate() == FCmpInst::FCMP_ORD) { 2688 // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) 2689 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 2690 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 2691 // If either of the constants are nans, then the whole thing returns 2692 // false. 2693 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 2694 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2695 return new FCmpInst(FCmpInst::FCMP_ORD, 2696 LHS->getOperand(0), RHS->getOperand(0)); 2697 } 2698 2699 // Handle vector zeros. This occurs because the canonical form of 2700 // "fcmp ord x,x" is "fcmp ord x, 0". 2701 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 2702 isa<ConstantAggregateZero>(RHS->getOperand(1))) 2703 return new FCmpInst(FCmpInst::FCMP_ORD, 2704 LHS->getOperand(0), RHS->getOperand(0)); 2705 return 0; 2706 } 2707 2708 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 2709 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 2710 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 2711 2712 2713 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 2714 // Swap RHS operands to match LHS. 2715 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 2716 std::swap(Op1LHS, Op1RHS); 2717 } 2718 2719 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 2720 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 2721 if (Op0CC == Op1CC) 2722 return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 2723 2724 if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) 2725 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2726 if (Op0CC == FCmpInst::FCMP_TRUE) 2727 return ReplaceInstUsesWith(I, RHS); 2728 if (Op1CC == FCmpInst::FCMP_TRUE) 2729 return ReplaceInstUsesWith(I, LHS); 2730 2731 bool Op0Ordered; 2732 bool Op1Ordered; 2733 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 2734 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 2735 if (Op1Pred == 0) { 2736 std::swap(LHS, RHS); 2737 std::swap(Op0Pred, Op1Pred); 2738 std::swap(Op0Ordered, Op1Ordered); 2739 } 2740 if (Op0Pred == 0) { 2741 // uno && ueq -> uno && (uno || eq) -> ueq 2742 // ord && olt -> ord && (ord && lt) -> olt 2743 if (Op0Ordered == Op1Ordered) 2744 return ReplaceInstUsesWith(I, RHS); 2745 2746 // uno && oeq -> uno && (ord && eq) -> false 2747 // uno && ord -> false 2748 if (!Op0Ordered) 2749 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 2750 // ord && ueq -> ord && (uno || eq) -> oeq 2751 return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); 2752 } 2753 } 2754 2755 return 0; 2756} 2757 2758 2759Instruction *InstCombiner::visitAnd(BinaryOperator &I) { 2760 bool Changed = SimplifyCommutative(I); 2761 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2762 2763 if (Value *V = SimplifyAndInst(Op0, Op1, TD)) 2764 return ReplaceInstUsesWith(I, V); 2765 2766 // See if we can simplify any instructions used by the instruction whose sole 2767 // purpose is to compute bits we don't care about. 2768 if (SimplifyDemandedInstructionBits(I)) 2769 return &I; 2770 2771 if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { 2772 const APInt &AndRHSMask = AndRHS->getValue(); 2773 APInt NotAndRHS(~AndRHSMask); 2774 2775 // Optimize a variety of ((val OP C1) & C2) combinations... 2776 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 2777 Value *Op0LHS = Op0I->getOperand(0); 2778 Value *Op0RHS = Op0I->getOperand(1); 2779 switch (Op0I->getOpcode()) { 2780 default: break; 2781 case Instruction::Xor: 2782 case Instruction::Or: 2783 // If the mask is only needed on one incoming arm, push it up. 2784 if (!Op0I->hasOneUse()) break; 2785 2786 if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { 2787 // Not masking anything out for the LHS, move to RHS. 2788 Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, 2789 Op0RHS->getName()+".masked"); 2790 return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); 2791 } 2792 if (!isa<Constant>(Op0RHS) && 2793 MaskedValueIsZero(Op0RHS, NotAndRHS)) { 2794 // Not masking anything out for the RHS, move to LHS. 2795 Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, 2796 Op0LHS->getName()+".masked"); 2797 return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); 2798 } 2799 2800 break; 2801 case Instruction::Add: 2802 // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. 2803 // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 2804 // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 2805 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) 2806 return BinaryOperator::CreateAnd(V, AndRHS); 2807 if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) 2808 return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes 2809 break; 2810 2811 case Instruction::Sub: 2812 // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. 2813 // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 2814 // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 2815 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) 2816 return BinaryOperator::CreateAnd(V, AndRHS); 2817 2818 // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS 2819 // has 1's for all bits that the subtraction with A might affect. 2820 if (Op0I->hasOneUse()) { 2821 uint32_t BitWidth = AndRHSMask.getBitWidth(); 2822 uint32_t Zeros = AndRHSMask.countLeadingZeros(); 2823 APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); 2824 2825 ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); 2826 if (!(A && A->isZero()) && // avoid infinite recursion. 2827 MaskedValueIsZero(Op0LHS, Mask)) { 2828 Value *NewNeg = Builder->CreateNeg(Op0RHS); 2829 return BinaryOperator::CreateAnd(NewNeg, AndRHS); 2830 } 2831 } 2832 break; 2833 2834 case Instruction::Shl: 2835 case Instruction::LShr: 2836 // (1 << x) & 1 --> zext(x == 0) 2837 // (1 >> x) & 1 --> zext(x == 0) 2838 if (AndRHSMask == 1 && Op0LHS == AndRHS) { 2839 Value *NewICmp = 2840 Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); 2841 return new ZExtInst(NewICmp, I.getType()); 2842 } 2843 break; 2844 } 2845 2846 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) 2847 if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) 2848 return Res; 2849 } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { 2850 // If this is an integer truncation or change from signed-to-unsigned, and 2851 // if the source is an and/or with immediate, transform it. This 2852 // frequently occurs for bitfield accesses. 2853 if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { 2854 if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && 2855 CastOp->getNumOperands() == 2) 2856 if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ 2857 if (CastOp->getOpcode() == Instruction::And) { 2858 // Change: and (cast (and X, C1) to T), C2 2859 // into : and (cast X to T), trunc_or_bitcast(C1)&C2 2860 // This will fold the two constants together, which may allow 2861 // other simplifications. 2862 Value *NewCast = Builder->CreateTruncOrBitCast( 2863 CastOp->getOperand(0), I.getType(), 2864 CastOp->getName()+".shrunk"); 2865 // trunc_or_bitcast(C1)&C2 2866 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 2867 C3 = ConstantExpr::getAnd(C3, AndRHS); 2868 return BinaryOperator::CreateAnd(NewCast, C3); 2869 } else if (CastOp->getOpcode() == Instruction::Or) { 2870 // Change: and (cast (or X, C1) to T), C2 2871 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 2872 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 2873 if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) 2874 // trunc(C1)&C2 2875 return ReplaceInstUsesWith(I, AndRHS); 2876 } 2877 } 2878 } 2879 } 2880 2881 // Try to fold constant and into select arguments. 2882 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2883 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 2884 return R; 2885 if (isa<PHINode>(Op0)) 2886 if (Instruction *NV = FoldOpIntoPhi(I)) 2887 return NV; 2888 } 2889 2890 2891 // (~A & ~B) == (~(A | B)) - De Morgan's Law 2892 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 2893 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 2894 if (Op0->hasOneUse() && Op1->hasOneUse()) { 2895 Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, 2896 I.getName()+".demorgan"); 2897 return BinaryOperator::CreateNot(Or); 2898 } 2899 2900 { 2901 Value *A = 0, *B = 0, *C = 0, *D = 0; 2902 // (A|B) & ~(A&B) -> A^B 2903 if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 2904 match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && 2905 ((A == C && B == D) || (A == D && B == C))) 2906 return BinaryOperator::CreateXor(A, B); 2907 2908 // ~(A&B) & (A|B) -> A^B 2909 if (match(Op1, m_Or(m_Value(A), m_Value(B))) && 2910 match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && 2911 ((A == C && B == D) || (A == D && B == C))) 2912 return BinaryOperator::CreateXor(A, B); 2913 2914 if (Op0->hasOneUse() && 2915 match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 2916 if (A == Op1) { // (A^B)&A -> A&(A^B) 2917 I.swapOperands(); // Simplify below 2918 std::swap(Op0, Op1); 2919 } else if (B == Op1) { // (A^B)&B -> B&(B^A) 2920 cast<BinaryOperator>(Op0)->swapOperands(); 2921 I.swapOperands(); // Simplify below 2922 std::swap(Op0, Op1); 2923 } 2924 } 2925 2926 if (Op1->hasOneUse() && 2927 match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 2928 if (B == Op0) { // B&(A^B) -> B&(B^A) 2929 cast<BinaryOperator>(Op1)->swapOperands(); 2930 std::swap(A, B); 2931 } 2932 if (A == Op0) // A&(A^B) -> A & ~B 2933 return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); 2934 } 2935 2936 // (A&((~A)|B)) -> A&B 2937 if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || 2938 match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) 2939 return BinaryOperator::CreateAnd(A, Op1); 2940 if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || 2941 match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) 2942 return BinaryOperator::CreateAnd(A, Op0); 2943 } 2944 2945 if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) { 2946 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 2947 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 2948 return R; 2949 2950 if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) 2951 if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) 2952 return Res; 2953 } 2954 2955 // fold (and (cast A), (cast B)) -> (cast (and A, B)) 2956 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) 2957 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 2958 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? 2959 const Type *SrcTy = Op0C->getOperand(0)->getType(); 2960 if (SrcTy == Op1C->getOperand(0)->getType() && 2961 SrcTy->isIntOrIntVector() && 2962 // Only do this if the casts both really cause code to be generated. 2963 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 2964 I.getType(), TD) && 2965 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 2966 I.getType(), TD)) { 2967 Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), 2968 Op1C->getOperand(0), I.getName()); 2969 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2970 } 2971 } 2972 2973 // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. 2974 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 2975 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 2976 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 2977 SI0->getOperand(1) == SI1->getOperand(1) && 2978 (SI0->hasOneUse() || SI1->hasOneUse())) { 2979 Value *NewOp = 2980 Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), 2981 SI0->getName()); 2982 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 2983 SI1->getOperand(1)); 2984 } 2985 } 2986 2987 // If and'ing two fcmp, try combine them into one. 2988 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 2989 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 2990 if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) 2991 return Res; 2992 } 2993 2994 return Changed ? &I : 0; 2995} 2996 2997/// CollectBSwapParts - Analyze the specified subexpression and see if it is 2998/// capable of providing pieces of a bswap. The subexpression provides pieces 2999/// of a bswap if it is proven that each of the non-zero bytes in the output of 3000/// the expression came from the corresponding "byte swapped" byte in some other 3001/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then 3002/// we know that the expression deposits the low byte of %X into the high byte 3003/// of the bswap result and that all other bytes are zero. This expression is 3004/// accepted, the high byte of ByteValues is set to X to indicate a correct 3005/// match. 3006/// 3007/// This function returns true if the match was unsuccessful and false if so. 3008/// On entry to the function the "OverallLeftShift" is a signed integer value 3009/// indicating the number of bytes that the subexpression is later shifted. For 3010/// example, if the expression is later right shifted by 16 bits, the 3011/// OverallLeftShift value would be -2 on entry. This is used to specify which 3012/// byte of ByteValues is actually being set. 3013/// 3014/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding 3015/// byte is masked to zero by a user. For example, in (X & 255), X will be 3016/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits 3017/// this function to working on up to 32-byte (256 bit) values. ByteMask is 3018/// always in the local (OverallLeftShift) coordinate space. 3019/// 3020static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, 3021 SmallVector<Value*, 8> &ByteValues) { 3022 if (Instruction *I = dyn_cast<Instruction>(V)) { 3023 // If this is an or instruction, it may be an inner node of the bswap. 3024 if (I->getOpcode() == Instruction::Or) { 3025 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 3026 ByteValues) || 3027 CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, 3028 ByteValues); 3029 } 3030 3031 // If this is a logical shift by a constant multiple of 8, recurse with 3032 // OverallLeftShift and ByteMask adjusted. 3033 if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { 3034 unsigned ShAmt = 3035 cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); 3036 // Ensure the shift amount is defined and of a byte value. 3037 if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) 3038 return true; 3039 3040 unsigned ByteShift = ShAmt >> 3; 3041 if (I->getOpcode() == Instruction::Shl) { 3042 // X << 2 -> collect(X, +2) 3043 OverallLeftShift += ByteShift; 3044 ByteMask >>= ByteShift; 3045 } else { 3046 // X >>u 2 -> collect(X, -2) 3047 OverallLeftShift -= ByteShift; 3048 ByteMask <<= ByteShift; 3049 ByteMask &= (~0U >> (32-ByteValues.size())); 3050 } 3051 3052 if (OverallLeftShift >= (int)ByteValues.size()) return true; 3053 if (OverallLeftShift <= -(int)ByteValues.size()) return true; 3054 3055 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 3056 ByteValues); 3057 } 3058 3059 // If this is a logical 'and' with a mask that clears bytes, clear the 3060 // corresponding bytes in ByteMask. 3061 if (I->getOpcode() == Instruction::And && 3062 isa<ConstantInt>(I->getOperand(1))) { 3063 // Scan every byte of the and mask, seeing if the byte is either 0 or 255. 3064 unsigned NumBytes = ByteValues.size(); 3065 APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); 3066 const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); 3067 3068 for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { 3069 // If this byte is masked out by a later operation, we don't care what 3070 // the and mask is. 3071 if ((ByteMask & (1 << i)) == 0) 3072 continue; 3073 3074 // If the AndMask is all zeros for this byte, clear the bit. 3075 APInt MaskB = AndMask & Byte; 3076 if (MaskB == 0) { 3077 ByteMask &= ~(1U << i); 3078 continue; 3079 } 3080 3081 // If the AndMask is not all ones for this byte, it's not a bytezap. 3082 if (MaskB != Byte) 3083 return true; 3084 3085 // Otherwise, this byte is kept. 3086 } 3087 3088 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 3089 ByteValues); 3090 } 3091 } 3092 3093 // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be 3094 // the input value to the bswap. Some observations: 1) if more than one byte 3095 // is demanded from this input, then it could not be successfully assembled 3096 // into a byteswap. At least one of the two bytes would not be aligned with 3097 // their ultimate destination. 3098 if (!isPowerOf2_32(ByteMask)) return true; 3099 unsigned InputByteNo = CountTrailingZeros_32(ByteMask); 3100 3101 // 2) The input and ultimate destinations must line up: if byte 3 of an i32 3102 // is demanded, it needs to go into byte 0 of the result. This means that the 3103 // byte needs to be shifted until it lands in the right byte bucket. The 3104 // shift amount depends on the position: if the byte is coming from the high 3105 // part of the value (e.g. byte 3) then it must be shifted right. If from the 3106 // low part, it must be shifted left. 3107 unsigned DestByteNo = InputByteNo + OverallLeftShift; 3108 if (InputByteNo < ByteValues.size()/2) { 3109 if (ByteValues.size()-1-DestByteNo != InputByteNo) 3110 return true; 3111 } else { 3112 if (ByteValues.size()-1-DestByteNo != InputByteNo) 3113 return true; 3114 } 3115 3116 // If the destination byte value is already defined, the values are or'd 3117 // together, which isn't a bswap (unless it's an or of the same bits). 3118 if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) 3119 return true; 3120 ByteValues[DestByteNo] = V; 3121 return false; 3122} 3123 3124/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. 3125/// If so, insert the new bswap intrinsic and return it. 3126Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { 3127 const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); 3128 if (!ITy || ITy->getBitWidth() % 16 || 3129 // ByteMask only allows up to 32-byte values. 3130 ITy->getBitWidth() > 32*8) 3131 return 0; // Can only bswap pairs of bytes. Can't do vectors. 3132 3133 /// ByteValues - For each byte of the result, we keep track of which value 3134 /// defines each byte. 3135 SmallVector<Value*, 8> ByteValues; 3136 ByteValues.resize(ITy->getBitWidth()/8); 3137 3138 // Try to find all the pieces corresponding to the bswap. 3139 uint32_t ByteMask = ~0U >> (32-ByteValues.size()); 3140 if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) 3141 return 0; 3142 3143 // Check to see if all of the bytes come from the same value. 3144 Value *V = ByteValues[0]; 3145 if (V == 0) return 0; // Didn't find a byte? Must be zero. 3146 3147 // Check to make sure that all of the bytes come from the same value. 3148 for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) 3149 if (ByteValues[i] != V) 3150 return 0; 3151 const Type *Tys[] = { ITy }; 3152 Module *M = I.getParent()->getParent()->getParent(); 3153 Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); 3154 return CallInst::Create(F, V); 3155} 3156 3157/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check 3158/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then 3159/// we can simplify this expression to "cond ? C : D or B". 3160static Instruction *MatchSelectFromAndOr(Value *A, Value *B, 3161 Value *C, Value *D) { 3162 // If A is not a select of -1/0, this cannot match. 3163 Value *Cond = 0; 3164 if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) 3165 return 0; 3166 3167 // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. 3168 if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) 3169 return SelectInst::Create(Cond, C, B); 3170 if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 3171 return SelectInst::Create(Cond, C, B); 3172 // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. 3173 if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) 3174 return SelectInst::Create(Cond, C, D); 3175 if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 3176 return SelectInst::Create(Cond, C, D); 3177 return 0; 3178} 3179 3180/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. 3181Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, 3182 ICmpInst *LHS, ICmpInst *RHS) { 3183 Value *Val, *Val2; 3184 ConstantInt *LHSCst, *RHSCst; 3185 ICmpInst::Predicate LHSCC, RHSCC; 3186 3187 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 3188 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || 3189 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) 3190 return 0; 3191 3192 3193 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 3194 if (LHSCst == RHSCst && LHSCC == RHSCC && 3195 LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { 3196 Value *NewOr = Builder->CreateOr(Val, Val2); 3197 return new ICmpInst(LHSCC, NewOr, LHSCst); 3198 } 3199 3200 // From here on, we only handle: 3201 // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. 3202 if (Val != Val2) return 0; 3203 3204 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 3205 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 3206 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 3207 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 3208 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 3209 return 0; 3210 3211 // We can't fold (ugt x, C) | (sgt x, C2). 3212 if (!PredicatesFoldable(LHSCC, RHSCC)) 3213 return 0; 3214 3215 // Ensure that the larger constant is on the RHS. 3216 bool ShouldSwap; 3217 if (CmpInst::isSigned(LHSCC) || 3218 (ICmpInst::isEquality(LHSCC) && 3219 CmpInst::isSigned(RHSCC))) 3220 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 3221 else 3222 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 3223 3224 if (ShouldSwap) { 3225 std::swap(LHS, RHS); 3226 std::swap(LHSCst, RHSCst); 3227 std::swap(LHSCC, RHSCC); 3228 } 3229 3230 // At this point, we know we have have two icmp instructions 3231 // comparing a value against two constants and or'ing the result 3232 // together. Because of the above check, we know that we only have 3233 // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the 3234 // FoldICmpLogical check above), that the two constants are not 3235 // equal. 3236 assert(LHSCst != RHSCst && "Compares not folded above?"); 3237 3238 switch (LHSCC) { 3239 default: llvm_unreachable("Unknown integer condition code!"); 3240 case ICmpInst::ICMP_EQ: 3241 switch (RHSCC) { 3242 default: llvm_unreachable("Unknown integer condition code!"); 3243 case ICmpInst::ICMP_EQ: 3244 if (LHSCst == SubOne(RHSCst)) { 3245 // (X == 13 | X == 14) -> X-13 <u 2 3246 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 3247 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 3248 AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); 3249 return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); 3250 } 3251 break; // (X == 13 | X == 15) -> no change 3252 case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change 3253 case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change 3254 break; 3255 case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 3256 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 3257 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 3258 return ReplaceInstUsesWith(I, RHS); 3259 } 3260 break; 3261 case ICmpInst::ICMP_NE: 3262 switch (RHSCC) { 3263 default: llvm_unreachable("Unknown integer condition code!"); 3264 case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 3265 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 3266 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 3267 return ReplaceInstUsesWith(I, LHS); 3268 case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true 3269 case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true 3270 case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true 3271 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 3272 } 3273 break; 3274 case ICmpInst::ICMP_ULT: 3275 switch (RHSCC) { 3276 default: llvm_unreachable("Unknown integer condition code!"); 3277 case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change 3278 break; 3279 case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 3280 // If RHSCst is [us]MAXINT, it is always false. Not handling 3281 // this can cause overflow. 3282 if (RHSCst->isMaxValue(false)) 3283 return ReplaceInstUsesWith(I, LHS); 3284 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 3285 false, false, I); 3286 case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change 3287 break; 3288 case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 3289 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 3290 return ReplaceInstUsesWith(I, RHS); 3291 case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change 3292 break; 3293 } 3294 break; 3295 case ICmpInst::ICMP_SLT: 3296 switch (RHSCC) { 3297 default: llvm_unreachable("Unknown integer condition code!"); 3298 case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change 3299 break; 3300 case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 3301 // If RHSCst is [us]MAXINT, it is always false. Not handling 3302 // this can cause overflow. 3303 if (RHSCst->isMaxValue(true)) 3304 return ReplaceInstUsesWith(I, LHS); 3305 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 3306 true, false, I); 3307 case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change 3308 break; 3309 case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 3310 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 3311 return ReplaceInstUsesWith(I, RHS); 3312 case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change 3313 break; 3314 } 3315 break; 3316 case ICmpInst::ICMP_UGT: 3317 switch (RHSCC) { 3318 default: llvm_unreachable("Unknown integer condition code!"); 3319 case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 3320 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 3321 return ReplaceInstUsesWith(I, LHS); 3322 case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change 3323 break; 3324 case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true 3325 case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true 3326 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 3327 case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change 3328 break; 3329 } 3330 break; 3331 case ICmpInst::ICMP_SGT: 3332 switch (RHSCC) { 3333 default: llvm_unreachable("Unknown integer condition code!"); 3334 case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 3335 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 3336 return ReplaceInstUsesWith(I, LHS); 3337 case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change 3338 break; 3339 case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true 3340 case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true 3341 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 3342 case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change 3343 break; 3344 } 3345 break; 3346 } 3347 return 0; 3348} 3349 3350Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, 3351 FCmpInst *RHS) { 3352 if (LHS->getPredicate() == FCmpInst::FCMP_UNO && 3353 RHS->getPredicate() == FCmpInst::FCMP_UNO && 3354 LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { 3355 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 3356 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 3357 // If either of the constants are nans, then the whole thing returns 3358 // true. 3359 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 3360 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 3361 3362 // Otherwise, no need to compare the two constants, compare the 3363 // rest. 3364 return new FCmpInst(FCmpInst::FCMP_UNO, 3365 LHS->getOperand(0), RHS->getOperand(0)); 3366 } 3367 3368 // Handle vector zeros. This occurs because the canonical form of 3369 // "fcmp uno x,x" is "fcmp uno x, 0". 3370 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 3371 isa<ConstantAggregateZero>(RHS->getOperand(1))) 3372 return new FCmpInst(FCmpInst::FCMP_UNO, 3373 LHS->getOperand(0), RHS->getOperand(0)); 3374 3375 return 0; 3376 } 3377 3378 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 3379 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 3380 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 3381 3382 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 3383 // Swap RHS operands to match LHS. 3384 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 3385 std::swap(Op1LHS, Op1RHS); 3386 } 3387 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 3388 // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). 3389 if (Op0CC == Op1CC) 3390 return new FCmpInst((FCmpInst::Predicate)Op0CC, 3391 Op0LHS, Op0RHS); 3392 if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) 3393 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 3394 if (Op0CC == FCmpInst::FCMP_FALSE) 3395 return ReplaceInstUsesWith(I, RHS); 3396 if (Op1CC == FCmpInst::FCMP_FALSE) 3397 return ReplaceInstUsesWith(I, LHS); 3398 bool Op0Ordered; 3399 bool Op1Ordered; 3400 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 3401 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 3402 if (Op0Ordered == Op1Ordered) { 3403 // If both are ordered or unordered, return a new fcmp with 3404 // or'ed predicates. 3405 Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); 3406 if (Instruction *I = dyn_cast<Instruction>(RV)) 3407 return I; 3408 // Otherwise, it's a constant boolean value... 3409 return ReplaceInstUsesWith(I, RV); 3410 } 3411 } 3412 return 0; 3413} 3414 3415/// FoldOrWithConstants - This helper function folds: 3416/// 3417/// ((A | B) & C1) | (B & C2) 3418/// 3419/// into: 3420/// 3421/// (A & C1) | B 3422/// 3423/// when the XOR of the two constants is "all ones" (-1). 3424Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, 3425 Value *A, Value *B, Value *C) { 3426 ConstantInt *CI1 = dyn_cast<ConstantInt>(C); 3427 if (!CI1) return 0; 3428 3429 Value *V1 = 0; 3430 ConstantInt *CI2 = 0; 3431 if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; 3432 3433 APInt Xor = CI1->getValue() ^ CI2->getValue(); 3434 if (!Xor.isAllOnesValue()) return 0; 3435 3436 if (V1 == A || V1 == B) { 3437 Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); 3438 return BinaryOperator::CreateOr(NewOp, V1); 3439 } 3440 3441 return 0; 3442} 3443 3444Instruction *InstCombiner::visitOr(BinaryOperator &I) { 3445 bool Changed = SimplifyCommutative(I); 3446 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3447 3448 if (Value *V = SimplifyOrInst(Op0, Op1, TD)) 3449 return ReplaceInstUsesWith(I, V); 3450 3451 3452 // See if we can simplify any instructions used by the instruction whose sole 3453 // purpose is to compute bits we don't care about. 3454 if (SimplifyDemandedInstructionBits(I)) 3455 return &I; 3456 3457 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3458 ConstantInt *C1 = 0; Value *X = 0; 3459 // (X & C1) | C2 --> (X | C2) & (C1|C2) 3460 if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && 3461 isOnlyUse(Op0)) { 3462 Value *Or = Builder->CreateOr(X, RHS); 3463 Or->takeName(Op0); 3464 return BinaryOperator::CreateAnd(Or, 3465 ConstantInt::get(I.getContext(), 3466 RHS->getValue() | C1->getValue())); 3467 } 3468 3469 // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) 3470 if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && 3471 isOnlyUse(Op0)) { 3472 Value *Or = Builder->CreateOr(X, RHS); 3473 Or->takeName(Op0); 3474 return BinaryOperator::CreateXor(Or, 3475 ConstantInt::get(I.getContext(), 3476 C1->getValue() & ~RHS->getValue())); 3477 } 3478 3479 // Try to fold constant and into select arguments. 3480 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3481 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3482 return R; 3483 if (isa<PHINode>(Op0)) 3484 if (Instruction *NV = FoldOpIntoPhi(I)) 3485 return NV; 3486 } 3487 3488 Value *A = 0, *B = 0; 3489 ConstantInt *C1 = 0, *C2 = 0; 3490 3491 // (A | B) | C and A | (B | C) -> bswap if possible. 3492 // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. 3493 if (match(Op0, m_Or(m_Value(), m_Value())) || 3494 match(Op1, m_Or(m_Value(), m_Value())) || 3495 (match(Op0, m_Shift(m_Value(), m_Value())) && 3496 match(Op1, m_Shift(m_Value(), m_Value())))) { 3497 if (Instruction *BSwap = MatchBSwap(I)) 3498 return BSwap; 3499 } 3500 3501 // (X^C)|Y -> (X|Y)^C iff Y&C == 0 3502 if (Op0->hasOneUse() && 3503 match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && 3504 MaskedValueIsZero(Op1, C1->getValue())) { 3505 Value *NOr = Builder->CreateOr(A, Op1); 3506 NOr->takeName(Op0); 3507 return BinaryOperator::CreateXor(NOr, C1); 3508 } 3509 3510 // Y|(X^C) -> (X|Y)^C iff Y&C == 0 3511 if (Op1->hasOneUse() && 3512 match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && 3513 MaskedValueIsZero(Op0, C1->getValue())) { 3514 Value *NOr = Builder->CreateOr(A, Op0); 3515 NOr->takeName(Op0); 3516 return BinaryOperator::CreateXor(NOr, C1); 3517 } 3518 3519 // (A & C)|(B & D) 3520 Value *C = 0, *D = 0; 3521 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 3522 match(Op1, m_And(m_Value(B), m_Value(D)))) { 3523 Value *V1 = 0, *V2 = 0, *V3 = 0; 3524 C1 = dyn_cast<ConstantInt>(C); 3525 C2 = dyn_cast<ConstantInt>(D); 3526 if (C1 && C2) { // (A & C1)|(B & C2) 3527 // If we have: ((V + N) & C1) | (V & C2) 3528 // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 3529 // replace with V+N. 3530 if (C1->getValue() == ~C2->getValue()) { 3531 if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ 3532 match(A, m_Add(m_Value(V1), m_Value(V2)))) { 3533 // Add commutes, try both ways. 3534 if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) 3535 return ReplaceInstUsesWith(I, A); 3536 if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) 3537 return ReplaceInstUsesWith(I, A); 3538 } 3539 // Or commutes, try both ways. 3540 if ((C1->getValue() & (C1->getValue()+1)) == 0 && 3541 match(B, m_Add(m_Value(V1), m_Value(V2)))) { 3542 // Add commutes, try both ways. 3543 if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) 3544 return ReplaceInstUsesWith(I, B); 3545 if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) 3546 return ReplaceInstUsesWith(I, B); 3547 } 3548 } 3549 3550 // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) 3551 // iff (C1&C2) == 0 and (N&~C1) == 0 3552 if ((C1->getValue() & C2->getValue()) == 0) { 3553 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && 3554 ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) 3555 (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) 3556 return BinaryOperator::CreateAnd(A, 3557 ConstantInt::get(A->getContext(), 3558 C1->getValue()|C2->getValue())); 3559 // Or commutes, try both ways. 3560 if (match(B, m_Or(m_Value(V1), m_Value(V2))) && 3561 ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) 3562 (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) 3563 return BinaryOperator::CreateAnd(B, 3564 ConstantInt::get(B->getContext(), 3565 C1->getValue()|C2->getValue())); 3566 } 3567 } 3568 3569 // Check to see if we have any common things being and'ed. If so, find the 3570 // terms for V1 & (V2|V3). 3571 if (isOnlyUse(Op0) || isOnlyUse(Op1)) { 3572 V1 = 0; 3573 if (A == B) // (A & C)|(A & D) == A & (C|D) 3574 V1 = A, V2 = C, V3 = D; 3575 else if (A == D) // (A & C)|(B & A) == A & (B|C) 3576 V1 = A, V2 = B, V3 = C; 3577 else if (C == B) // (A & C)|(C & D) == C & (A|D) 3578 V1 = C, V2 = A, V3 = D; 3579 else if (C == D) // (A & C)|(B & C) == C & (A|B) 3580 V1 = C, V2 = A, V3 = B; 3581 3582 if (V1) { 3583 Value *Or = Builder->CreateOr(V2, V3, "tmp"); 3584 return BinaryOperator::CreateAnd(V1, Or); 3585 } 3586 } 3587 3588 // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants 3589 if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) 3590 return Match; 3591 if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) 3592 return Match; 3593 if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) 3594 return Match; 3595 if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) 3596 return Match; 3597 3598 // ((A&~B)|(~A&B)) -> A^B 3599 if ((match(C, m_Not(m_Specific(D))) && 3600 match(B, m_Not(m_Specific(A))))) 3601 return BinaryOperator::CreateXor(A, D); 3602 // ((~B&A)|(~A&B)) -> A^B 3603 if ((match(A, m_Not(m_Specific(D))) && 3604 match(B, m_Not(m_Specific(C))))) 3605 return BinaryOperator::CreateXor(C, D); 3606 // ((A&~B)|(B&~A)) -> A^B 3607 if ((match(C, m_Not(m_Specific(B))) && 3608 match(D, m_Not(m_Specific(A))))) 3609 return BinaryOperator::CreateXor(A, B); 3610 // ((~B&A)|(B&~A)) -> A^B 3611 if ((match(A, m_Not(m_Specific(B))) && 3612 match(D, m_Not(m_Specific(C))))) 3613 return BinaryOperator::CreateXor(C, B); 3614 } 3615 3616 // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. 3617 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 3618 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 3619 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 3620 SI0->getOperand(1) == SI1->getOperand(1) && 3621 (SI0->hasOneUse() || SI1->hasOneUse())) { 3622 Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), 3623 SI0->getName()); 3624 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 3625 SI1->getOperand(1)); 3626 } 3627 } 3628 3629 // ((A|B)&1)|(B&-2) -> (A&1) | B 3630 if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 3631 match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 3632 Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); 3633 if (Ret) return Ret; 3634 } 3635 // (B&-2)|((A|B)&1) -> (A&1) | B 3636 if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 3637 match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 3638 Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); 3639 if (Ret) return Ret; 3640 } 3641 3642 // (~A | ~B) == (~(A & B)) - De Morgan's Law 3643 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 3644 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 3645 if (Op0->hasOneUse() && Op1->hasOneUse()) { 3646 Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, 3647 I.getName()+".demorgan"); 3648 return BinaryOperator::CreateNot(And); 3649 } 3650 3651 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 3652 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { 3653 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 3654 return R; 3655 3656 if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 3657 if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) 3658 return Res; 3659 } 3660 3661 // fold (or (cast A), (cast B)) -> (cast (or A, B)) 3662 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 3663 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 3664 if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? 3665 if (!isa<ICmpInst>(Op0C->getOperand(0)) || 3666 !isa<ICmpInst>(Op1C->getOperand(0))) { 3667 const Type *SrcTy = Op0C->getOperand(0)->getType(); 3668 if (SrcTy == Op1C->getOperand(0)->getType() && 3669 SrcTy->isIntOrIntVector() && 3670 // Only do this if the casts both really cause code to be 3671 // generated. 3672 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 3673 I.getType(), TD) && 3674 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 3675 I.getType(), TD)) { 3676 Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), 3677 Op1C->getOperand(0), I.getName()); 3678 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 3679 } 3680 } 3681 } 3682 } 3683 3684 3685 // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) 3686 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 3687 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 3688 if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) 3689 return Res; 3690 } 3691 3692 return Changed ? &I : 0; 3693} 3694 3695namespace { 3696 3697// XorSelf - Implements: X ^ X --> 0 3698struct XorSelf { 3699 Value *RHS; 3700 XorSelf(Value *rhs) : RHS(rhs) {} 3701 bool shouldApply(Value *LHS) const { return LHS == RHS; } 3702 Instruction *apply(BinaryOperator &Xor) const { 3703 return &Xor; 3704 } 3705}; 3706 3707} 3708 3709Instruction *InstCombiner::visitXor(BinaryOperator &I) { 3710 bool Changed = SimplifyCommutative(I); 3711 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3712 3713 if (isa<UndefValue>(Op1)) { 3714 if (isa<UndefValue>(Op0)) 3715 // Handle undef ^ undef -> 0 special case. This is a common 3716 // idiom (misuse). 3717 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3718 return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef 3719 } 3720 3721 // xor X, X = 0, even if X is nested in a sequence of Xor's. 3722 if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { 3723 assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; 3724 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3725 } 3726 3727 // See if we can simplify any instructions used by the instruction whose sole 3728 // purpose is to compute bits we don't care about. 3729 if (SimplifyDemandedInstructionBits(I)) 3730 return &I; 3731 if (isa<VectorType>(I.getType())) 3732 if (isa<ConstantAggregateZero>(Op1)) 3733 return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X 3734 3735 // Is this a ~ operation? 3736 if (Value *NotOp = dyn_castNotVal(&I)) { 3737 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { 3738 if (Op0I->getOpcode() == Instruction::And || 3739 Op0I->getOpcode() == Instruction::Or) { 3740 // ~(~X & Y) --> (X | ~Y) - De Morgan's Law 3741 // ~(~X | Y) === (X & ~Y) - De Morgan's Law 3742 if (dyn_castNotVal(Op0I->getOperand(1))) 3743 Op0I->swapOperands(); 3744 if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { 3745 Value *NotY = 3746 Builder->CreateNot(Op0I->getOperand(1), 3747 Op0I->getOperand(1)->getName()+".not"); 3748 if (Op0I->getOpcode() == Instruction::And) 3749 return BinaryOperator::CreateOr(Op0NotVal, NotY); 3750 return BinaryOperator::CreateAnd(Op0NotVal, NotY); 3751 } 3752 3753 // ~(X & Y) --> (~X | ~Y) - De Morgan's Law 3754 // ~(X | Y) === (~X & ~Y) - De Morgan's Law 3755 if (isFreeToInvert(Op0I->getOperand(0)) && 3756 isFreeToInvert(Op0I->getOperand(1))) { 3757 Value *NotX = 3758 Builder->CreateNot(Op0I->getOperand(0), "notlhs"); 3759 Value *NotY = 3760 Builder->CreateNot(Op0I->getOperand(1), "notrhs"); 3761 if (Op0I->getOpcode() == Instruction::And) 3762 return BinaryOperator::CreateOr(NotX, NotY); 3763 return BinaryOperator::CreateAnd(NotX, NotY); 3764 } 3765 } 3766 } 3767 } 3768 3769 3770 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3771 if (RHS->isOne() && Op0->hasOneUse()) { 3772 // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B 3773 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) 3774 return new ICmpInst(ICI->getInversePredicate(), 3775 ICI->getOperand(0), ICI->getOperand(1)); 3776 3777 if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) 3778 return new FCmpInst(FCI->getInversePredicate(), 3779 FCI->getOperand(0), FCI->getOperand(1)); 3780 } 3781 3782 // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). 3783 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 3784 if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { 3785 if (CI->hasOneUse() && Op0C->hasOneUse()) { 3786 Instruction::CastOps Opcode = Op0C->getOpcode(); 3787 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && 3788 (RHS == ConstantExpr::getCast(Opcode, 3789 ConstantInt::getTrue(I.getContext()), 3790 Op0C->getDestTy()))) { 3791 CI->setPredicate(CI->getInversePredicate()); 3792 return CastInst::Create(Opcode, CI, Op0C->getType()); 3793 } 3794 } 3795 } 3796 } 3797 3798 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 3799 // ~(c-X) == X-c-1 == X+(-c-1) 3800 if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) 3801 if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { 3802 Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); 3803 Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, 3804 ConstantInt::get(I.getType(), 1)); 3805 return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); 3806 } 3807 3808 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 3809 if (Op0I->getOpcode() == Instruction::Add) { 3810 // ~(X-c) --> (-c-1)-X 3811 if (RHS->isAllOnesValue()) { 3812 Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); 3813 return BinaryOperator::CreateSub( 3814 ConstantExpr::getSub(NegOp0CI, 3815 ConstantInt::get(I.getType(), 1)), 3816 Op0I->getOperand(0)); 3817 } else if (RHS->getValue().isSignBit()) { 3818 // (X + C) ^ signbit -> (X + C + signbit) 3819 Constant *C = ConstantInt::get(I.getContext(), 3820 RHS->getValue() + Op0CI->getValue()); 3821 return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); 3822 3823 } 3824 } else if (Op0I->getOpcode() == Instruction::Or) { 3825 // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 3826 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { 3827 Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); 3828 // Anything in both C1 and C2 is known to be zero, remove it from 3829 // NewRHS. 3830 Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); 3831 NewRHS = ConstantExpr::getAnd(NewRHS, 3832 ConstantExpr::getNot(CommonBits)); 3833 Worklist.Add(Op0I); 3834 I.setOperand(0, Op0I->getOperand(0)); 3835 I.setOperand(1, NewRHS); 3836 return &I; 3837 } 3838 } 3839 } 3840 } 3841 3842 // Try to fold constant and into select arguments. 3843 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3844 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3845 return R; 3846 if (isa<PHINode>(Op0)) 3847 if (Instruction *NV = FoldOpIntoPhi(I)) 3848 return NV; 3849 } 3850 3851 if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 3852 if (X == Op1) 3853 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 3854 3855 if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 3856 if (X == Op0) 3857 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 3858 3859 3860 BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); 3861 if (Op1I) { 3862 Value *A, *B; 3863 if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 3864 if (A == Op0) { // B^(B|A) == (A|B)^B 3865 Op1I->swapOperands(); 3866 I.swapOperands(); 3867 std::swap(Op0, Op1); 3868 } else if (B == Op0) { // B^(A|B) == (A|B)^B 3869 I.swapOperands(); // Simplified below. 3870 std::swap(Op0, Op1); 3871 } 3872 } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { 3873 return ReplaceInstUsesWith(I, B); // A^(A^B) == B 3874 } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { 3875 return ReplaceInstUsesWith(I, A); // A^(B^A) == B 3876 } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 3877 Op1I->hasOneUse()){ 3878 if (A == Op0) { // A^(A&B) -> A^(B&A) 3879 Op1I->swapOperands(); 3880 std::swap(A, B); 3881 } 3882 if (B == Op0) { // A^(B&A) -> (B&A)^A 3883 I.swapOperands(); // Simplified below. 3884 std::swap(Op0, Op1); 3885 } 3886 } 3887 } 3888 3889 BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); 3890 if (Op0I) { 3891 Value *A, *B; 3892 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 3893 Op0I->hasOneUse()) { 3894 if (A == Op1) // (B|A)^B == (A|B)^B 3895 std::swap(A, B); 3896 if (B == Op1) // (A|B)^B == A & ~B 3897 return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); 3898 } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { 3899 return ReplaceInstUsesWith(I, B); // (A^B)^A == B 3900 } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { 3901 return ReplaceInstUsesWith(I, A); // (B^A)^A == B 3902 } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 3903 Op0I->hasOneUse()){ 3904 if (A == Op1) // (A&B)^A -> (B&A)^A 3905 std::swap(A, B); 3906 if (B == Op1 && // (B&A)^A == ~B & A 3907 !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C 3908 return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); 3909 } 3910 } 3911 } 3912 3913 // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. 3914 if (Op0I && Op1I && Op0I->isShift() && 3915 Op0I->getOpcode() == Op1I->getOpcode() && 3916 Op0I->getOperand(1) == Op1I->getOperand(1) && 3917 (Op1I->hasOneUse() || Op1I->hasOneUse())) { 3918 Value *NewOp = 3919 Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), 3920 Op0I->getName()); 3921 return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 3922 Op1I->getOperand(1)); 3923 } 3924 3925 if (Op0I && Op1I) { 3926 Value *A, *B, *C, *D; 3927 // (A & B)^(A | B) -> A ^ B 3928 if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 3929 match(Op1I, m_Or(m_Value(C), m_Value(D)))) { 3930 if ((A == C && B == D) || (A == D && B == C)) 3931 return BinaryOperator::CreateXor(A, B); 3932 } 3933 // (A | B)^(A & B) -> A ^ B 3934 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 3935 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 3936 if ((A == C && B == D) || (A == D && B == C)) 3937 return BinaryOperator::CreateXor(A, B); 3938 } 3939 3940 // (A & B)^(C & D) 3941 if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && 3942 match(Op0I, m_And(m_Value(A), m_Value(B))) && 3943 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 3944 // (X & Y)^(X & Y) -> (Y^Z) & X 3945 Value *X = 0, *Y = 0, *Z = 0; 3946 if (A == C) 3947 X = A, Y = B, Z = D; 3948 else if (A == D) 3949 X = A, Y = B, Z = C; 3950 else if (B == C) 3951 X = B, Y = A, Z = D; 3952 else if (B == D) 3953 X = B, Y = A, Z = C; 3954 3955 if (X) { 3956 Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); 3957 return BinaryOperator::CreateAnd(NewOp, X); 3958 } 3959 } 3960 } 3961 3962 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 3963 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 3964 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 3965 return R; 3966 3967 // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) 3968 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 3969 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 3970 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? 3971 const Type *SrcTy = Op0C->getOperand(0)->getType(); 3972 if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && 3973 // Only do this if the casts both really cause code to be generated. 3974 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 3975 I.getType(), TD) && 3976 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 3977 I.getType(), TD)) { 3978 Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), 3979 Op1C->getOperand(0), I.getName()); 3980 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 3981 } 3982 } 3983 } 3984 3985 return Changed ? &I : 0; 3986} 3987 3988 3989Instruction *InstCombiner::visitShl(BinaryOperator &I) { 3990 return commonShiftTransforms(I); 3991} 3992 3993Instruction *InstCombiner::visitLShr(BinaryOperator &I) { 3994 return commonShiftTransforms(I); 3995} 3996 3997Instruction *InstCombiner::visitAShr(BinaryOperator &I) { 3998 if (Instruction *R = commonShiftTransforms(I)) 3999 return R; 4000 4001 Value *Op0 = I.getOperand(0); 4002 4003 // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) 4004 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 4005 if (CSI->isAllOnesValue()) 4006 return ReplaceInstUsesWith(I, CSI); 4007 4008 // See if we can turn a signed shr into an unsigned shr. 4009 if (MaskedValueIsZero(Op0, 4010 APInt::getSignBit(I.getType()->getScalarSizeInBits()))) 4011 return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); 4012 4013 // Arithmetic shifting an all-sign-bit value is a no-op. 4014 unsigned NumSignBits = ComputeNumSignBits(Op0); 4015 if (NumSignBits == Op0->getType()->getScalarSizeInBits()) 4016 return ReplaceInstUsesWith(I, Op0); 4017 4018 return 0; 4019} 4020 4021Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { 4022 assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); 4023 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 4024 4025 // shl X, 0 == X and shr X, 0 == X 4026 // shl 0, X == 0 and shr 0, X == 0 4027 if (Op1 == Constant::getNullValue(Op1->getType()) || 4028 Op0 == Constant::getNullValue(Op0->getType())) 4029 return ReplaceInstUsesWith(I, Op0); 4030 4031 if (isa<UndefValue>(Op0)) { 4032 if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef 4033 return ReplaceInstUsesWith(I, Op0); 4034 else // undef << X -> 0, undef >>u X -> 0 4035 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 4036 } 4037 if (isa<UndefValue>(Op1)) { 4038 if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X 4039 return ReplaceInstUsesWith(I, Op0); 4040 else // X << undef, X >>u undef -> 0 4041 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 4042 } 4043 4044 // See if we can fold away this shift. 4045 if (SimplifyDemandedInstructionBits(I)) 4046 return &I; 4047 4048 // Try to fold constant and into select arguments. 4049 if (isa<Constant>(Op0)) 4050 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 4051 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 4052 return R; 4053 4054 if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) 4055 if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) 4056 return Res; 4057 return 0; 4058} 4059 4060Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, 4061 BinaryOperator &I) { 4062 bool isLeftShift = I.getOpcode() == Instruction::Shl; 4063 4064 // See if we can simplify any instructions used by the instruction whose sole 4065 // purpose is to compute bits we don't care about. 4066 uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); 4067 4068 // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate 4069 // a signed shift. 4070 // 4071 if (Op1->uge(TypeBits)) { 4072 if (I.getOpcode() != Instruction::AShr) 4073 return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); 4074 else { 4075 I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); 4076 return &I; 4077 } 4078 } 4079 4080 // ((X*C1) << C2) == (X * (C1 << C2)) 4081 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) 4082 if (BO->getOpcode() == Instruction::Mul && isLeftShift) 4083 if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) 4084 return BinaryOperator::CreateMul(BO->getOperand(0), 4085 ConstantExpr::getShl(BOOp, Op1)); 4086 4087 // Try to fold constant and into select arguments. 4088 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 4089 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 4090 return R; 4091 if (isa<PHINode>(Op0)) 4092 if (Instruction *NV = FoldOpIntoPhi(I)) 4093 return NV; 4094 4095 // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) 4096 if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { 4097 Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); 4098 // If 'shift2' is an ashr, we would have to get the sign bit into a funny 4099 // place. Don't try to do this transformation in this case. Also, we 4100 // require that the input operand is a shift-by-constant so that we have 4101 // confidence that the shifts will get folded together. We could do this 4102 // xform in more cases, but it is unlikely to be profitable. 4103 if (TrOp && I.isLogicalShift() && TrOp->isShift() && 4104 isa<ConstantInt>(TrOp->getOperand(1))) { 4105 // Okay, we'll do this xform. Make the shift of shift. 4106 Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); 4107 // (shift2 (shift1 & 0x00FF), c2) 4108 Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); 4109 4110 // For logical shifts, the truncation has the effect of making the high 4111 // part of the register be zeros. Emulate this by inserting an AND to 4112 // clear the top bits as needed. This 'and' will usually be zapped by 4113 // other xforms later if dead. 4114 unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); 4115 unsigned DstSize = TI->getType()->getScalarSizeInBits(); 4116 APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); 4117 4118 // The mask we constructed says what the trunc would do if occurring 4119 // between the shifts. We want to know the effect *after* the second 4120 // shift. We know that it is a logical shift by a constant, so adjust the 4121 // mask as appropriate. 4122 if (I.getOpcode() == Instruction::Shl) 4123 MaskV <<= Op1->getZExtValue(); 4124 else { 4125 assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); 4126 MaskV = MaskV.lshr(Op1->getZExtValue()); 4127 } 4128 4129 // shift1 & 0x00FF 4130 Value *And = Builder->CreateAnd(NSh, 4131 ConstantInt::get(I.getContext(), MaskV), 4132 TI->getName()); 4133 4134 // Return the value truncated to the interesting size. 4135 return new TruncInst(And, I.getType()); 4136 } 4137 } 4138 4139 if (Op0->hasOneUse()) { 4140 if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { 4141 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 4142 Value *V1, *V2; 4143 ConstantInt *CC; 4144 switch (Op0BO->getOpcode()) { 4145 default: break; 4146 case Instruction::Add: 4147 case Instruction::And: 4148 case Instruction::Or: 4149 case Instruction::Xor: { 4150 // These operators commute. 4151 // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) 4152 if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && 4153 match(Op0BO->getOperand(1), m_Shr(m_Value(V1), 4154 m_Specific(Op1)))) { 4155 Value *YS = // (Y << C) 4156 Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); 4157 // (X + (Y << C)) 4158 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, 4159 Op0BO->getOperand(1)->getName()); 4160 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 4161 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 4162 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 4163 } 4164 4165 // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) 4166 Value *Op0BOOp1 = Op0BO->getOperand(1); 4167 if (isLeftShift && Op0BOOp1->hasOneUse() && 4168 match(Op0BOOp1, 4169 m_And(m_Shr(m_Value(V1), m_Specific(Op1)), 4170 m_ConstantInt(CC))) && 4171 cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { 4172 Value *YS = // (Y << C) 4173 Builder->CreateShl(Op0BO->getOperand(0), Op1, 4174 Op0BO->getName()); 4175 // X & (CC << C) 4176 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 4177 V1->getName()+".mask"); 4178 return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); 4179 } 4180 } 4181 4182 // FALL THROUGH. 4183 case Instruction::Sub: { 4184 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 4185 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 4186 match(Op0BO->getOperand(0), m_Shr(m_Value(V1), 4187 m_Specific(Op1)))) { 4188 Value *YS = // (Y << C) 4189 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 4190 // (X + (Y << C)) 4191 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, 4192 Op0BO->getOperand(0)->getName()); 4193 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 4194 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 4195 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 4196 } 4197 4198 // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) 4199 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 4200 match(Op0BO->getOperand(0), 4201 m_And(m_Shr(m_Value(V1), m_Value(V2)), 4202 m_ConstantInt(CC))) && V2 == Op1 && 4203 cast<BinaryOperator>(Op0BO->getOperand(0)) 4204 ->getOperand(0)->hasOneUse()) { 4205 Value *YS = // (Y << C) 4206 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 4207 // X & (CC << C) 4208 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 4209 V1->getName()+".mask"); 4210 4211 return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); 4212 } 4213 4214 break; 4215 } 4216 } 4217 4218 4219 // If the operand is an bitwise operator with a constant RHS, and the 4220 // shift is the only use, we can pull it out of the shift. 4221 if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { 4222 bool isValid = true; // Valid only for And, Or, Xor 4223 bool highBitSet = false; // Transform if high bit of constant set? 4224 4225 switch (Op0BO->getOpcode()) { 4226 default: isValid = false; break; // Do not perform transform! 4227 case Instruction::Add: 4228 isValid = isLeftShift; 4229 break; 4230 case Instruction::Or: 4231 case Instruction::Xor: 4232 highBitSet = false; 4233 break; 4234 case Instruction::And: 4235 highBitSet = true; 4236 break; 4237 } 4238 4239 // If this is a signed shift right, and the high bit is modified 4240 // by the logical operation, do not perform the transformation. 4241 // The highBitSet boolean indicates the value of the high bit of 4242 // the constant which would cause it to be modified for this 4243 // operation. 4244 // 4245 if (isValid && I.getOpcode() == Instruction::AShr) 4246 isValid = Op0C->getValue()[TypeBits-1] == highBitSet; 4247 4248 if (isValid) { 4249 Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); 4250 4251 Value *NewShift = 4252 Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); 4253 NewShift->takeName(Op0BO); 4254 4255 return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, 4256 NewRHS); 4257 } 4258 } 4259 } 4260 } 4261 4262 // Find out if this is a shift of a shift by a constant. 4263 BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); 4264 if (ShiftOp && !ShiftOp->isShift()) 4265 ShiftOp = 0; 4266 4267 if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { 4268 ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); 4269 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); 4270 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); 4271 assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); 4272 if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. 4273 Value *X = ShiftOp->getOperand(0); 4274 4275 uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. 4276 4277 const IntegerType *Ty = cast<IntegerType>(I.getType()); 4278 4279 // Check for (X << c1) << c2 and (X >> c1) >> c2 4280 if (I.getOpcode() == ShiftOp->getOpcode()) { 4281 // If this is oversized composite shift, then unsigned shifts get 0, ashr 4282 // saturates. 4283 if (AmtSum >= TypeBits) { 4284 if (I.getOpcode() != Instruction::AShr) 4285 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 4286 AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. 4287 } 4288 4289 return BinaryOperator::Create(I.getOpcode(), X, 4290 ConstantInt::get(Ty, AmtSum)); 4291 } 4292 4293 if (ShiftOp->getOpcode() == Instruction::LShr && 4294 I.getOpcode() == Instruction::AShr) { 4295 if (AmtSum >= TypeBits) 4296 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 4297 4298 // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. 4299 return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); 4300 } 4301 4302 if (ShiftOp->getOpcode() == Instruction::AShr && 4303 I.getOpcode() == Instruction::LShr) { 4304 // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. 4305 if (AmtSum >= TypeBits) 4306 AmtSum = TypeBits-1; 4307 4308 Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); 4309 4310 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 4311 return BinaryOperator::CreateAnd(Shift, 4312 ConstantInt::get(I.getContext(), Mask)); 4313 } 4314 4315 // Okay, if we get here, one shift must be left, and the other shift must be 4316 // right. See if the amounts are equal. 4317 if (ShiftAmt1 == ShiftAmt2) { 4318 // If we have ((X >>? C) << C), turn this into X & (-1 << C). 4319 if (I.getOpcode() == Instruction::Shl) { 4320 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); 4321 return BinaryOperator::CreateAnd(X, 4322 ConstantInt::get(I.getContext(),Mask)); 4323 } 4324 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). 4325 if (I.getOpcode() == Instruction::LShr) { 4326 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); 4327 return BinaryOperator::CreateAnd(X, 4328 ConstantInt::get(I.getContext(), Mask)); 4329 } 4330 // We can simplify ((X << C) >>s C) into a trunc + sext. 4331 // NOTE: we could do this for any C, but that would make 'unusual' integer 4332 // types. For now, just stick to ones well-supported by the code 4333 // generators. 4334 const Type *SExtType = 0; 4335 switch (Ty->getBitWidth() - ShiftAmt1) { 4336 case 1 : 4337 case 8 : 4338 case 16 : 4339 case 32 : 4340 case 64 : 4341 case 128: 4342 SExtType = IntegerType::get(I.getContext(), 4343 Ty->getBitWidth() - ShiftAmt1); 4344 break; 4345 default: break; 4346 } 4347 if (SExtType) 4348 return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); 4349 // Otherwise, we can't handle it yet. 4350 } else if (ShiftAmt1 < ShiftAmt2) { 4351 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; 4352 4353 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) 4354 if (I.getOpcode() == Instruction::Shl) { 4355 assert(ShiftOp->getOpcode() == Instruction::LShr || 4356 ShiftOp->getOpcode() == Instruction::AShr); 4357 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 4358 4359 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 4360 return BinaryOperator::CreateAnd(Shift, 4361 ConstantInt::get(I.getContext(),Mask)); 4362 } 4363 4364 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) 4365 if (I.getOpcode() == Instruction::LShr) { 4366 assert(ShiftOp->getOpcode() == Instruction::Shl); 4367 Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); 4368 4369 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 4370 return BinaryOperator::CreateAnd(Shift, 4371 ConstantInt::get(I.getContext(),Mask)); 4372 } 4373 4374 // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. 4375 } else { 4376 assert(ShiftAmt2 < ShiftAmt1); 4377 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; 4378 4379 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) 4380 if (I.getOpcode() == Instruction::Shl) { 4381 assert(ShiftOp->getOpcode() == Instruction::LShr || 4382 ShiftOp->getOpcode() == Instruction::AShr); 4383 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, 4384 ConstantInt::get(Ty, ShiftDiff)); 4385 4386 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 4387 return BinaryOperator::CreateAnd(Shift, 4388 ConstantInt::get(I.getContext(),Mask)); 4389 } 4390 4391 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) 4392 if (I.getOpcode() == Instruction::LShr) { 4393 assert(ShiftOp->getOpcode() == Instruction::Shl); 4394 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 4395 4396 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 4397 return BinaryOperator::CreateAnd(Shift, 4398 ConstantInt::get(I.getContext(),Mask)); 4399 } 4400 4401 // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. 4402 } 4403 } 4404 return 0; 4405} 4406 4407 4408/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear 4409/// expression. If so, decompose it, returning some value X, such that Val is 4410/// X*Scale+Offset. 4411/// 4412static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, 4413 int &Offset) { 4414 assert(Val->getType() == Type::getInt32Ty(Val->getContext()) && 4415 "Unexpected allocation size type!"); 4416 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 4417 Offset = CI->getZExtValue(); 4418 Scale = 0; 4419 return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); 4420 } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { 4421 if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 4422 if (I->getOpcode() == Instruction::Shl) { 4423 // This is a value scaled by '1 << the shift amt'. 4424 Scale = 1U << RHS->getZExtValue(); 4425 Offset = 0; 4426 return I->getOperand(0); 4427 } else if (I->getOpcode() == Instruction::Mul) { 4428 // This value is scaled by 'RHS'. 4429 Scale = RHS->getZExtValue(); 4430 Offset = 0; 4431 return I->getOperand(0); 4432 } else if (I->getOpcode() == Instruction::Add) { 4433 // We have X+C. Check to see if we really have (X*C2)+C1, 4434 // where C1 is divisible by C2. 4435 unsigned SubScale; 4436 Value *SubVal = 4437 DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); 4438 Offset += RHS->getZExtValue(); 4439 Scale = SubScale; 4440 return SubVal; 4441 } 4442 } 4443 } 4444 4445 // Otherwise, we can't look past this. 4446 Scale = 1; 4447 Offset = 0; 4448 return Val; 4449} 4450 4451 4452/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, 4453/// try to eliminate the cast by moving the type information into the alloc. 4454Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, 4455 AllocaInst &AI) { 4456 const PointerType *PTy = cast<PointerType>(CI.getType()); 4457 4458 BuilderTy AllocaBuilder(*Builder); 4459 AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); 4460 4461 // Remove any uses of AI that are dead. 4462 assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); 4463 4464 for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { 4465 Instruction *User = cast<Instruction>(*UI++); 4466 if (isInstructionTriviallyDead(User)) { 4467 while (UI != E && *UI == User) 4468 ++UI; // If this instruction uses AI more than once, don't break UI. 4469 4470 ++NumDeadInst; 4471 DEBUG(errs() << "IC: DCE: " << *User << '\n'); 4472 EraseInstFromFunction(*User); 4473 } 4474 } 4475 4476 // This requires TargetData to get the alloca alignment and size information. 4477 if (!TD) return 0; 4478 4479 // Get the type really allocated and the type casted to. 4480 const Type *AllocElTy = AI.getAllocatedType(); 4481 const Type *CastElTy = PTy->getElementType(); 4482 if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; 4483 4484 unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); 4485 unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); 4486 if (CastElTyAlign < AllocElTyAlign) return 0; 4487 4488 // If the allocation has multiple uses, only promote it if we are strictly 4489 // increasing the alignment of the resultant allocation. If we keep it the 4490 // same, we open the door to infinite loops of various kinds. (A reference 4491 // from a dbg.declare doesn't count as a use for this purpose.) 4492 if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && 4493 CastElTyAlign == AllocElTyAlign) return 0; 4494 4495 uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); 4496 uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); 4497 if (CastElTySize == 0 || AllocElTySize == 0) return 0; 4498 4499 // See if we can satisfy the modulus by pulling a scale out of the array 4500 // size argument. 4501 unsigned ArraySizeScale; 4502 int ArrayOffset; 4503 Value *NumElements = // See if the array size is a decomposable linear expr. 4504 DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); 4505 4506 // If we can now satisfy the modulus, by using a non-1 scale, we really can 4507 // do the xform. 4508 if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || 4509 (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; 4510 4511 unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; 4512 Value *Amt = 0; 4513 if (Scale == 1) { 4514 Amt = NumElements; 4515 } else { 4516 Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); 4517 // Insert before the alloca, not before the cast. 4518 Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); 4519 } 4520 4521 if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { 4522 Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), 4523 Offset, true); 4524 Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); 4525 } 4526 4527 AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); 4528 New->setAlignment(AI.getAlignment()); 4529 New->takeName(&AI); 4530 4531 // If the allocation has one real use plus a dbg.declare, just remove the 4532 // declare. 4533 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { 4534 EraseInstFromFunction(*DI); 4535 } 4536 // If the allocation has multiple real uses, insert a cast and change all 4537 // things that used it to use the new cast. This will also hack on CI, but it 4538 // will die soon. 4539 else if (!AI.hasOneUse()) { 4540 // New is the allocation instruction, pointer typed. AI is the original 4541 // allocation instruction, also pointer typed. Thus, cast to use is BitCast. 4542 Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); 4543 AI.replaceAllUsesWith(NewCast); 4544 } 4545 return ReplaceInstUsesWith(CI, New); 4546} 4547 4548/// CanEvaluateInDifferentType - Return true if we can take the specified value 4549/// and return it as type Ty without inserting any new casts and without 4550/// changing the computed value. This is used by code that tries to decide 4551/// whether promoting or shrinking integer operations to wider or smaller types 4552/// will allow us to eliminate a truncate or extend. 4553/// 4554/// This is a truncation operation if Ty is smaller than V->getType(), or an 4555/// extension operation if Ty is larger. 4556/// 4557/// If CastOpc is a truncation, then Ty will be a type smaller than V. We 4558/// should return true if trunc(V) can be computed by computing V in the smaller 4559/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as 4560/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be 4561/// efficiently truncated. 4562/// 4563/// If CastOpc is a sext or zext, we are asking if the low bits of the value can 4564/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get 4565/// the final result. 4566bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, 4567 unsigned CastOpc, 4568 int &NumCastsRemoved){ 4569 // We can always evaluate constants in another type. 4570 if (isa<Constant>(V)) 4571 return true; 4572 4573 Instruction *I = dyn_cast<Instruction>(V); 4574 if (!I) return false; 4575 4576 const Type *OrigTy = V->getType(); 4577 4578 // If this is an extension or truncate, we can often eliminate it. 4579 if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) { 4580 // If this is a cast from the destination type, we can trivially eliminate 4581 // it, and this will remove a cast overall. 4582 if (I->getOperand(0)->getType() == Ty) { 4583 // If the first operand is itself a cast, and is eliminable, do not count 4584 // this as an eliminable cast. We would prefer to eliminate those two 4585 // casts first. 4586 if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse()) 4587 ++NumCastsRemoved; 4588 return true; 4589 } 4590 } 4591 4592 // We can't extend or shrink something that has multiple uses: doing so would 4593 // require duplicating the instruction in general, which isn't profitable. 4594 if (!I->hasOneUse()) return false; 4595 4596 unsigned Opc = I->getOpcode(); 4597 switch (Opc) { 4598 case Instruction::Add: 4599 case Instruction::Sub: 4600 case Instruction::Mul: 4601 case Instruction::And: 4602 case Instruction::Or: 4603 case Instruction::Xor: 4604 // These operators can all arbitrarily be extended or truncated. 4605 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 4606 NumCastsRemoved) && 4607 CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, 4608 NumCastsRemoved); 4609 4610 case Instruction::UDiv: 4611 case Instruction::URem: { 4612 // UDiv and URem can be truncated if all the truncated bits are zero. 4613 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); 4614 uint32_t BitWidth = Ty->getScalarSizeInBits(); 4615 if (BitWidth < OrigBitWidth) { 4616 APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); 4617 if (MaskedValueIsZero(I->getOperand(0), Mask) && 4618 MaskedValueIsZero(I->getOperand(1), Mask)) { 4619 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 4620 NumCastsRemoved) && 4621 CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, 4622 NumCastsRemoved); 4623 } 4624 } 4625 break; 4626 } 4627 case Instruction::Shl: 4628 // If we are truncating the result of this SHL, and if it's a shift of a 4629 // constant amount, we can always perform a SHL in a smaller type. 4630 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 4631 uint32_t BitWidth = Ty->getScalarSizeInBits(); 4632 if (BitWidth < OrigTy->getScalarSizeInBits() && 4633 CI->getLimitedValue(BitWidth) < BitWidth) 4634 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 4635 NumCastsRemoved); 4636 } 4637 break; 4638 case Instruction::LShr: 4639 // If this is a truncate of a logical shr, we can truncate it to a smaller 4640 // lshr iff we know that the bits we would otherwise be shifting in are 4641 // already zeros. 4642 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 4643 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); 4644 uint32_t BitWidth = Ty->getScalarSizeInBits(); 4645 if (BitWidth < OrigBitWidth && 4646 MaskedValueIsZero(I->getOperand(0), 4647 APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && 4648 CI->getLimitedValue(BitWidth) < BitWidth) { 4649 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 4650 NumCastsRemoved); 4651 } 4652 } 4653 break; 4654 case Instruction::ZExt: 4655 case Instruction::SExt: 4656 case Instruction::Trunc: 4657 // If this is the same kind of case as our original (e.g. zext+zext), we 4658 // can safely replace it. Note that replacing it does not reduce the number 4659 // of casts in the input. 4660 if (Opc == CastOpc) 4661 return true; 4662 4663 // sext (zext ty1), ty2 -> zext ty2 4664 if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) 4665 return true; 4666 break; 4667 case Instruction::Select: { 4668 SelectInst *SI = cast<SelectInst>(I); 4669 return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, 4670 NumCastsRemoved) && 4671 CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, 4672 NumCastsRemoved); 4673 } 4674 case Instruction::PHI: { 4675 // We can change a phi if we can change all operands. 4676 PHINode *PN = cast<PHINode>(I); 4677 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 4678 if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, 4679 NumCastsRemoved)) 4680 return false; 4681 return true; 4682 } 4683 default: 4684 // TODO: Can handle more cases here. 4685 break; 4686 } 4687 4688 return false; 4689} 4690 4691/// EvaluateInDifferentType - Given an expression that 4692/// CanEvaluateInDifferentType returns true for, actually insert the code to 4693/// evaluate the expression. 4694Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 4695 bool isSigned) { 4696 if (Constant *C = dyn_cast<Constant>(V)) 4697 return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); 4698 4699 // Otherwise, it must be an instruction. 4700 Instruction *I = cast<Instruction>(V); 4701 Instruction *Res = 0; 4702 unsigned Opc = I->getOpcode(); 4703 switch (Opc) { 4704 case Instruction::Add: 4705 case Instruction::Sub: 4706 case Instruction::Mul: 4707 case Instruction::And: 4708 case Instruction::Or: 4709 case Instruction::Xor: 4710 case Instruction::AShr: 4711 case Instruction::LShr: 4712 case Instruction::Shl: 4713 case Instruction::UDiv: 4714 case Instruction::URem: { 4715 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); 4716 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); 4717 Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); 4718 break; 4719 } 4720 case Instruction::Trunc: 4721 case Instruction::ZExt: 4722 case Instruction::SExt: 4723 // If the source type of the cast is the type we're trying for then we can 4724 // just return the source. There's no need to insert it because it is not 4725 // new. 4726 if (I->getOperand(0)->getType() == Ty) 4727 return I->getOperand(0); 4728 4729 // Otherwise, must be the same type of cast, so just reinsert a new one. 4730 Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty); 4731 break; 4732 case Instruction::Select: { 4733 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); 4734 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); 4735 Res = SelectInst::Create(I->getOperand(0), True, False); 4736 break; 4737 } 4738 case Instruction::PHI: { 4739 PHINode *OPN = cast<PHINode>(I); 4740 PHINode *NPN = PHINode::Create(Ty); 4741 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { 4742 Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); 4743 NPN->addIncoming(V, OPN->getIncomingBlock(i)); 4744 } 4745 Res = NPN; 4746 break; 4747 } 4748 default: 4749 // TODO: Can handle more cases here. 4750 llvm_unreachable("Unreachable!"); 4751 break; 4752 } 4753 4754 Res->takeName(I); 4755 return InsertNewInstBefore(Res, *I); 4756} 4757 4758/// @brief Implement the transforms common to all CastInst visitors. 4759Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { 4760 Value *Src = CI.getOperand(0); 4761 4762 // Many cases of "cast of a cast" are eliminable. If it's eliminable we just 4763 // eliminate it now. 4764 if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast 4765 if (Instruction::CastOps opc = 4766 isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { 4767 // The first cast (CSrc) is eliminable so we need to fix up or replace 4768 // the second cast (CI). CSrc will then have a good chance of being dead. 4769 return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); 4770 } 4771 } 4772 4773 // If we are casting a select then fold the cast into the select 4774 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) 4775 if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) 4776 return NV; 4777 4778 // If we are casting a PHI then fold the cast into the PHI 4779 if (isa<PHINode>(Src)) { 4780 // We don't do this if this would create a PHI node with an illegal type if 4781 // it is currently legal. 4782 if (!isa<IntegerType>(Src->getType()) || 4783 !isa<IntegerType>(CI.getType()) || 4784 ShouldChangeType(CI.getType(), Src->getType(), TD)) 4785 if (Instruction *NV = FoldOpIntoPhi(CI)) 4786 return NV; 4787 } 4788 4789 return 0; 4790} 4791 4792/// FindElementAtOffset - Given a type and a constant offset, determine whether 4793/// or not there is a sequence of GEP indices into the type that will land us at 4794/// the specified offset. If so, fill them into NewIndices and return the 4795/// resultant element type, otherwise return null. 4796static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 4797 SmallVectorImpl<Value*> &NewIndices, 4798 const TargetData *TD) { 4799 if (!TD) return 0; 4800 if (!Ty->isSized()) return 0; 4801 4802 // Start with the index over the outer type. Note that the type size 4803 // might be zero (even if the offset isn't zero) if the indexed type 4804 // is something like [0 x {int, int}] 4805 const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); 4806 int64_t FirstIdx = 0; 4807 if (int64_t TySize = TD->getTypeAllocSize(Ty)) { 4808 FirstIdx = Offset/TySize; 4809 Offset -= FirstIdx*TySize; 4810 4811 // Handle hosts where % returns negative instead of values [0..TySize). 4812 if (Offset < 0) { 4813 --FirstIdx; 4814 Offset += TySize; 4815 assert(Offset >= 0); 4816 } 4817 assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); 4818 } 4819 4820 NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); 4821 4822 // Index into the types. If we fail, set OrigBase to null. 4823 while (Offset) { 4824 // Indexing into tail padding between struct/array elements. 4825 if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) 4826 return 0; 4827 4828 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 4829 const StructLayout *SL = TD->getStructLayout(STy); 4830 assert(Offset < (int64_t)SL->getSizeInBytes() && 4831 "Offset must stay within the indexed type"); 4832 4833 unsigned Elt = SL->getElementContainingOffset(Offset); 4834 NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 4835 Elt)); 4836 4837 Offset -= SL->getElementOffset(Elt); 4838 Ty = STy->getElementType(Elt); 4839 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 4840 uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); 4841 assert(EltSize && "Cannot index into a zero-sized array"); 4842 NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); 4843 Offset %= EltSize; 4844 Ty = AT->getElementType(); 4845 } else { 4846 // Otherwise, we can't index into the middle of this atomic type, bail. 4847 return 0; 4848 } 4849 } 4850 4851 return Ty; 4852} 4853 4854/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) 4855Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { 4856 Value *Src = CI.getOperand(0); 4857 4858 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { 4859 // If casting the result of a getelementptr instruction with no offset, turn 4860 // this into a cast of the original pointer! 4861 if (GEP->hasAllZeroIndices()) { 4862 // Changing the cast operand is usually not a good idea but it is safe 4863 // here because the pointer operand is being replaced with another 4864 // pointer operand so the opcode doesn't need to change. 4865 Worklist.Add(GEP); 4866 CI.setOperand(0, GEP->getOperand(0)); 4867 return &CI; 4868 } 4869 4870 // If the GEP has a single use, and the base pointer is a bitcast, and the 4871 // GEP computes a constant offset, see if we can convert these three 4872 // instructions into fewer. This typically happens with unions and other 4873 // non-type-safe code. 4874 if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { 4875 if (GEP->hasAllConstantIndices()) { 4876 // We are guaranteed to get a constant from EmitGEPOffset. 4877 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP)); 4878 int64_t Offset = OffsetV->getSExtValue(); 4879 4880 // Get the base pointer input of the bitcast, and the type it points to. 4881 Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); 4882 const Type *GEPIdxTy = 4883 cast<PointerType>(OrigBase->getType())->getElementType(); 4884 SmallVector<Value*, 8> NewIndices; 4885 if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) { 4886 // If we were able to index down into an element, create the GEP 4887 // and bitcast the result. This eliminates one bitcast, potentially 4888 // two. 4889 Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? 4890 Builder->CreateInBoundsGEP(OrigBase, 4891 NewIndices.begin(), NewIndices.end()) : 4892 Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); 4893 NGEP->takeName(GEP); 4894 4895 if (isa<BitCastInst>(CI)) 4896 return new BitCastInst(NGEP, CI.getType()); 4897 assert(isa<PtrToIntInst>(CI)); 4898 return new PtrToIntInst(NGEP, CI.getType()); 4899 } 4900 } 4901 } 4902 } 4903 4904 return commonCastTransforms(CI); 4905} 4906 4907/// commonIntCastTransforms - This function implements the common transforms 4908/// for trunc, zext, and sext. 4909Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { 4910 if (Instruction *Result = commonCastTransforms(CI)) 4911 return Result; 4912 4913 Value *Src = CI.getOperand(0); 4914 const Type *SrcTy = Src->getType(); 4915 const Type *DestTy = CI.getType(); 4916 uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); 4917 uint32_t DestBitSize = DestTy->getScalarSizeInBits(); 4918 4919 // See if we can simplify any instructions used by the LHS whose sole 4920 // purpose is to compute bits we don't care about. 4921 if (SimplifyDemandedInstructionBits(CI)) 4922 return &CI; 4923 4924 // If the source isn't an instruction or has more than one use then we 4925 // can't do anything more. 4926 Instruction *SrcI = dyn_cast<Instruction>(Src); 4927 if (!SrcI || !Src->hasOneUse()) 4928 return 0; 4929 4930 // Attempt to propagate the cast into the instruction for int->int casts. 4931 int NumCastsRemoved = 0; 4932 // Only do this if the dest type is a simple type, don't convert the 4933 // expression tree to something weird like i93 unless the source is also 4934 // strange. 4935 if ((isa<VectorType>(DestTy) || 4936 ShouldChangeType(SrcI->getType(), DestTy, TD)) && 4937 CanEvaluateInDifferentType(SrcI, DestTy, 4938 CI.getOpcode(), NumCastsRemoved)) { 4939 // If this cast is a truncate, evaluting in a different type always 4940 // eliminates the cast, so it is always a win. If this is a zero-extension, 4941 // we need to do an AND to maintain the clear top-part of the computation, 4942 // so we require that the input have eliminated at least one cast. If this 4943 // is a sign extension, we insert two new casts (to do the extension) so we 4944 // require that two casts have been eliminated. 4945 bool DoXForm = false; 4946 bool JustReplace = false; 4947 switch (CI.getOpcode()) { 4948 default: 4949 // All the others use floating point so we shouldn't actually 4950 // get here because of the check above. 4951 llvm_unreachable("Unknown cast type"); 4952 case Instruction::Trunc: 4953 DoXForm = true; 4954 break; 4955 case Instruction::ZExt: { 4956 DoXForm = NumCastsRemoved >= 1; 4957 4958 if (!DoXForm && 0) { 4959 // If it's unnecessary to issue an AND to clear the high bits, it's 4960 // always profitable to do this xform. 4961 Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); 4962 APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); 4963 if (MaskedValueIsZero(TryRes, Mask)) 4964 return ReplaceInstUsesWith(CI, TryRes); 4965 4966 if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) 4967 if (TryI->use_empty()) 4968 EraseInstFromFunction(*TryI); 4969 } 4970 break; 4971 } 4972 case Instruction::SExt: { 4973 DoXForm = NumCastsRemoved >= 2; 4974 if (!DoXForm && !isa<TruncInst>(SrcI) && 0) { 4975 // If we do not have to emit the truncate + sext pair, then it's always 4976 // profitable to do this xform. 4977 // 4978 // It's not safe to eliminate the trunc + sext pair if one of the 4979 // eliminated cast is a truncate. e.g. 4980 // t2 = trunc i32 t1 to i16 4981 // t3 = sext i16 t2 to i32 4982 // != 4983 // i32 t1 4984 Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); 4985 unsigned NumSignBits = ComputeNumSignBits(TryRes); 4986 if (NumSignBits > (DestBitSize - SrcBitSize)) 4987 return ReplaceInstUsesWith(CI, TryRes); 4988 4989 if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) 4990 if (TryI->use_empty()) 4991 EraseInstFromFunction(*TryI); 4992 } 4993 break; 4994 } 4995 } 4996 4997 if (DoXForm) { 4998 DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" 4999 " to avoid cast: " << CI); 5000 Value *Res = EvaluateInDifferentType(SrcI, DestTy, 5001 CI.getOpcode() == Instruction::SExt); 5002 if (JustReplace) 5003 // Just replace this cast with the result. 5004 return ReplaceInstUsesWith(CI, Res); 5005 5006 assert(Res->getType() == DestTy); 5007 switch (CI.getOpcode()) { 5008 default: llvm_unreachable("Unknown cast type!"); 5009 case Instruction::Trunc: 5010 // Just replace this cast with the result. 5011 return ReplaceInstUsesWith(CI, Res); 5012 case Instruction::ZExt: { 5013 assert(SrcBitSize < DestBitSize && "Not a zext?"); 5014 5015 // If the high bits are already zero, just replace this cast with the 5016 // result. 5017 APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); 5018 if (MaskedValueIsZero(Res, Mask)) 5019 return ReplaceInstUsesWith(CI, Res); 5020 5021 // We need to emit an AND to clear the high bits. 5022 Constant *C = ConstantInt::get(CI.getContext(), 5023 APInt::getLowBitsSet(DestBitSize, SrcBitSize)); 5024 return BinaryOperator::CreateAnd(Res, C); 5025 } 5026 case Instruction::SExt: { 5027 // If the high bits are already filled with sign bit, just replace this 5028 // cast with the result. 5029 unsigned NumSignBits = ComputeNumSignBits(Res); 5030 if (NumSignBits > (DestBitSize - SrcBitSize)) 5031 return ReplaceInstUsesWith(CI, Res); 5032 5033 // We need to emit a cast to truncate, then a cast to sext. 5034 return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); 5035 } 5036 } 5037 } 5038 } 5039 5040 Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; 5041 Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; 5042 5043 switch (SrcI->getOpcode()) { 5044 case Instruction::Add: 5045 case Instruction::Mul: 5046 case Instruction::And: 5047 case Instruction::Or: 5048 case Instruction::Xor: 5049 // If we are discarding information, rewrite. 5050 if (DestBitSize < SrcBitSize && DestBitSize != 1) { 5051 // Don't insert two casts unless at least one can be eliminated. 5052 if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || 5053 !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { 5054 Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); 5055 Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); 5056 return BinaryOperator::Create( 5057 cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); 5058 } 5059 } 5060 5061 // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 5062 if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 5063 SrcI->getOpcode() == Instruction::Xor && 5064 Op1 == ConstantInt::getTrue(CI.getContext()) && 5065 (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) { 5066 Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); 5067 return BinaryOperator::CreateXor(New, 5068 ConstantInt::get(CI.getType(), 1)); 5069 } 5070 break; 5071 5072 case Instruction::Shl: { 5073 // Canonicalize trunc inside shl, if we can. 5074 ConstantInt *CI = dyn_cast<ConstantInt>(Op1); 5075 if (CI && DestBitSize < SrcBitSize && 5076 CI->getLimitedValue(DestBitSize) < DestBitSize) { 5077 Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); 5078 Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); 5079 return BinaryOperator::CreateShl(Op0c, Op1c); 5080 } 5081 break; 5082 } 5083 } 5084 return 0; 5085} 5086 5087Instruction *InstCombiner::visitTrunc(TruncInst &CI) { 5088 if (Instruction *Result = commonIntCastTransforms(CI)) 5089 return Result; 5090 5091 Value *Src = CI.getOperand(0); 5092 const Type *Ty = CI.getType(); 5093 uint32_t DestBitWidth = Ty->getScalarSizeInBits(); 5094 uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); 5095 5096 // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) 5097 if (DestBitWidth == 1) { 5098 Constant *One = ConstantInt::get(Src->getType(), 1); 5099 Src = Builder->CreateAnd(Src, One, "tmp"); 5100 Value *Zero = Constant::getNullValue(Src->getType()); 5101 return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); 5102 } 5103 5104 // Optimize trunc(lshr(), c) to pull the shift through the truncate. 5105 ConstantInt *ShAmtV = 0; 5106 Value *ShiftOp = 0; 5107 if (Src->hasOneUse() && 5108 match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { 5109 uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); 5110 5111 // Get a mask for the bits shifting in. 5112 APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); 5113 if (MaskedValueIsZero(ShiftOp, Mask)) { 5114 if (ShAmt >= DestBitWidth) // All zeros. 5115 return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); 5116 5117 // Okay, we can shrink this. Truncate the input, then return a new 5118 // shift. 5119 Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); 5120 Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); 5121 return BinaryOperator::CreateLShr(V1, V2); 5122 } 5123 } 5124 5125 return 0; 5126} 5127 5128/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations 5129/// in order to eliminate the icmp. 5130Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, 5131 bool DoXform) { 5132 // If we are just checking for a icmp eq of a single bit and zext'ing it 5133 // to an integer, then shift the bit to the appropriate place and then 5134 // cast to integer to avoid the comparison. 5135 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { 5136 const APInt &Op1CV = Op1C->getValue(); 5137 5138 // zext (x <s 0) to i32 --> x>>u31 true if signbit set. 5139 // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. 5140 if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || 5141 (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { 5142 if (!DoXform) return ICI; 5143 5144 Value *In = ICI->getOperand(0); 5145 Value *Sh = ConstantInt::get(In->getType(), 5146 In->getType()->getScalarSizeInBits()-1); 5147 In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); 5148 if (In->getType() != CI.getType()) 5149 In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); 5150 5151 if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { 5152 Constant *One = ConstantInt::get(In->getType(), 1); 5153 In = Builder->CreateXor(In, One, In->getName()+".not"); 5154 } 5155 5156 return ReplaceInstUsesWith(CI, In); 5157 } 5158 5159 5160 5161 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. 5162 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. 5163 // zext (X == 1) to i32 --> X iff X has only the low bit set. 5164 // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. 5165 // zext (X != 0) to i32 --> X iff X has only the low bit set. 5166 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. 5167 // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. 5168 // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. 5169 if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 5170 // This only works for EQ and NE 5171 ICI->isEquality()) { 5172 // If Op1C some other power of two, convert: 5173 uint32_t BitWidth = Op1C->getType()->getBitWidth(); 5174 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 5175 APInt TypeMask(APInt::getAllOnesValue(BitWidth)); 5176 ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); 5177 5178 APInt KnownZeroMask(~KnownZero); 5179 if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? 5180 if (!DoXform) return ICI; 5181 5182 bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; 5183 if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { 5184 // (X&4) == 2 --> false 5185 // (X&4) != 2 --> true 5186 Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), 5187 isNE); 5188 Res = ConstantExpr::getZExt(Res, CI.getType()); 5189 return ReplaceInstUsesWith(CI, Res); 5190 } 5191 5192 uint32_t ShiftAmt = KnownZeroMask.logBase2(); 5193 Value *In = ICI->getOperand(0); 5194 if (ShiftAmt) { 5195 // Perform a logical shr by shiftamt. 5196 // Insert the shift to put the result in the low bit. 5197 In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), 5198 In->getName()+".lobit"); 5199 } 5200 5201 if ((Op1CV != 0) == isNE) { // Toggle the low bit. 5202 Constant *One = ConstantInt::get(In->getType(), 1); 5203 In = Builder->CreateXor(In, One, "tmp"); 5204 } 5205 5206 if (CI.getType() == In->getType()) 5207 return ReplaceInstUsesWith(CI, In); 5208 else 5209 return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); 5210 } 5211 } 5212 } 5213 5214 // icmp ne A, B is equal to xor A, B when A and B only really have one bit. 5215 // It is also profitable to transform icmp eq into not(xor(A, B)) because that 5216 // may lead to additional simplifications. 5217 if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { 5218 if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { 5219 uint32_t BitWidth = ITy->getBitWidth(); 5220 Value *LHS = ICI->getOperand(0); 5221 Value *RHS = ICI->getOperand(1); 5222 5223 APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); 5224 APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); 5225 APInt TypeMask(APInt::getAllOnesValue(BitWidth)); 5226 ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); 5227 ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); 5228 5229 if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { 5230 APInt KnownBits = KnownZeroLHS | KnownOneLHS; 5231 APInt UnknownBit = ~KnownBits; 5232 if (UnknownBit.countPopulation() == 1) { 5233 if (!DoXform) return ICI; 5234 5235 Value *Result = Builder->CreateXor(LHS, RHS); 5236 5237 // Mask off any bits that are set and won't be shifted away. 5238 if (KnownOneLHS.uge(UnknownBit)) 5239 Result = Builder->CreateAnd(Result, 5240 ConstantInt::get(ITy, UnknownBit)); 5241 5242 // Shift the bit we're testing down to the lsb. 5243 Result = Builder->CreateLShr( 5244 Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); 5245 5246 if (ICI->getPredicate() == ICmpInst::ICMP_EQ) 5247 Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); 5248 Result->takeName(ICI); 5249 return ReplaceInstUsesWith(CI, Result); 5250 } 5251 } 5252 } 5253 } 5254 5255 return 0; 5256} 5257 5258Instruction *InstCombiner::visitZExt(ZExtInst &CI) { 5259 // If one of the common conversion will work, do it. 5260 if (Instruction *Result = commonIntCastTransforms(CI)) 5261 return Result; 5262 5263 Value *Src = CI.getOperand(0); 5264 5265 // If this is a TRUNC followed by a ZEXT then we are dealing with integral 5266 // types and if the sizes are just right we can convert this into a logical 5267 // 'and' which will be much cheaper than the pair of casts. 5268 if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast 5269 // Get the sizes of the types involved. We know that the intermediate type 5270 // will be smaller than A or C, but don't know the relation between A and C. 5271 Value *A = CSrc->getOperand(0); 5272 unsigned SrcSize = A->getType()->getScalarSizeInBits(); 5273 unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); 5274 unsigned DstSize = CI.getType()->getScalarSizeInBits(); 5275 // If we're actually extending zero bits, then if 5276 // SrcSize < DstSize: zext(a & mask) 5277 // SrcSize == DstSize: a & mask 5278 // SrcSize > DstSize: trunc(a) & mask 5279 if (SrcSize < DstSize) { 5280 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); 5281 Constant *AndConst = ConstantInt::get(A->getType(), AndValue); 5282 Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); 5283 return new ZExtInst(And, CI.getType()); 5284 } 5285 5286 if (SrcSize == DstSize) { 5287 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); 5288 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), 5289 AndValue)); 5290 } 5291 if (SrcSize > DstSize) { 5292 Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); 5293 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); 5294 return BinaryOperator::CreateAnd(Trunc, 5295 ConstantInt::get(Trunc->getType(), 5296 AndValue)); 5297 } 5298 } 5299 5300 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) 5301 return transformZExtICmp(ICI, CI); 5302 5303 BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); 5304 if (SrcI && SrcI->getOpcode() == Instruction::Or) { 5305 // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one 5306 // of the (zext icmp) will be transformed. 5307 ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); 5308 ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); 5309 if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && 5310 (transformZExtICmp(LHS, CI, false) || 5311 transformZExtICmp(RHS, CI, false))) { 5312 Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); 5313 Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); 5314 return BinaryOperator::Create(Instruction::Or, LCast, RCast); 5315 } 5316 } 5317 5318 // zext(trunc(t) & C) -> (t & zext(C)). 5319 if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) 5320 if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) 5321 if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { 5322 Value *TI0 = TI->getOperand(0); 5323 if (TI0->getType() == CI.getType()) 5324 return 5325 BinaryOperator::CreateAnd(TI0, 5326 ConstantExpr::getZExt(C, CI.getType())); 5327 } 5328 5329 // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). 5330 if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) 5331 if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) 5332 if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) 5333 if (And->getOpcode() == Instruction::And && And->hasOneUse() && 5334 And->getOperand(1) == C) 5335 if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { 5336 Value *TI0 = TI->getOperand(0); 5337 if (TI0->getType() == CI.getType()) { 5338 Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); 5339 Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); 5340 return BinaryOperator::CreateXor(NewAnd, ZC); 5341 } 5342 } 5343 5344 return 0; 5345} 5346 5347Instruction *InstCombiner::visitSExt(SExtInst &CI) { 5348 if (Instruction *I = commonIntCastTransforms(CI)) 5349 return I; 5350 5351 Value *Src = CI.getOperand(0); 5352 5353 // Canonicalize sign-extend from i1 to a select. 5354 if (Src->getType() == Type::getInt1Ty(CI.getContext())) 5355 return SelectInst::Create(Src, 5356 Constant::getAllOnesValue(CI.getType()), 5357 Constant::getNullValue(CI.getType())); 5358 5359 // See if the value being truncated is already sign extended. If so, just 5360 // eliminate the trunc/sext pair. 5361 if (Operator::getOpcode(Src) == Instruction::Trunc) { 5362 Value *Op = cast<User>(Src)->getOperand(0); 5363 unsigned OpBits = Op->getType()->getScalarSizeInBits(); 5364 unsigned MidBits = Src->getType()->getScalarSizeInBits(); 5365 unsigned DestBits = CI.getType()->getScalarSizeInBits(); 5366 unsigned NumSignBits = ComputeNumSignBits(Op); 5367 5368 if (OpBits == DestBits) { 5369 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5370 // bits, it is already ready. 5371 if (NumSignBits > DestBits-MidBits) 5372 return ReplaceInstUsesWith(CI, Op); 5373 } else if (OpBits < DestBits) { 5374 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5375 // bits, just sext from i32. 5376 if (NumSignBits > OpBits-MidBits) 5377 return new SExtInst(Op, CI.getType(), "tmp"); 5378 } else { 5379 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5380 // bits, just truncate to i32. 5381 if (NumSignBits > OpBits-MidBits) 5382 return new TruncInst(Op, CI.getType(), "tmp"); 5383 } 5384 } 5385 5386 // If the input is a shl/ashr pair of a same constant, then this is a sign 5387 // extension from a smaller value. If we could trust arbitrary bitwidth 5388 // integers, we could turn this into a truncate to the smaller bit and then 5389 // use a sext for the whole extension. Since we don't, look deeper and check 5390 // for a truncate. If the source and dest are the same type, eliminate the 5391 // trunc and extend and just do shifts. For example, turn: 5392 // %a = trunc i32 %i to i8 5393 // %b = shl i8 %a, 6 5394 // %c = ashr i8 %b, 6 5395 // %d = sext i8 %c to i32 5396 // into: 5397 // %a = shl i32 %i, 30 5398 // %d = ashr i32 %a, 30 5399 Value *A = 0; 5400 ConstantInt *BA = 0, *CA = 0; 5401 if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), 5402 m_ConstantInt(CA))) && 5403 BA == CA && isa<TruncInst>(A)) { 5404 Value *I = cast<TruncInst>(A)->getOperand(0); 5405 if (I->getType() == CI.getType()) { 5406 unsigned MidSize = Src->getType()->getScalarSizeInBits(); 5407 unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); 5408 unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; 5409 Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); 5410 I = Builder->CreateShl(I, ShAmtV, CI.getName()); 5411 return BinaryOperator::CreateAShr(I, ShAmtV); 5412 } 5413 } 5414 5415 return 0; 5416} 5417 5418/// FitsInFPType - Return a Constant* for the specified FP constant if it fits 5419/// in the specified FP type without changing its value. 5420static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { 5421 bool losesInfo; 5422 APFloat F = CFP->getValueAPF(); 5423 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); 5424 if (!losesInfo) 5425 return ConstantFP::get(CFP->getContext(), F); 5426 return 0; 5427} 5428 5429/// LookThroughFPExtensions - If this is an fp extension instruction, look 5430/// through it until we get the source value. 5431static Value *LookThroughFPExtensions(Value *V) { 5432 if (Instruction *I = dyn_cast<Instruction>(V)) 5433 if (I->getOpcode() == Instruction::FPExt) 5434 return LookThroughFPExtensions(I->getOperand(0)); 5435 5436 // If this value is a constant, return the constant in the smallest FP type 5437 // that can accurately represent it. This allows us to turn 5438 // (float)((double)X+2.0) into x+2.0f. 5439 if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 5440 if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) 5441 return V; // No constant folding of this. 5442 // See if the value can be truncated to float and then reextended. 5443 if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) 5444 return V; 5445 if (CFP->getType() == Type::getDoubleTy(V->getContext())) 5446 return V; // Won't shrink. 5447 if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) 5448 return V; 5449 // Don't try to shrink to various long double types. 5450 } 5451 5452 return V; 5453} 5454 5455Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { 5456 if (Instruction *I = commonCastTransforms(CI)) 5457 return I; 5458 5459 // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are 5460 // smaller than the destination type, we can eliminate the truncate by doing 5461 // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as 5462 // many builtins (sqrt, etc). 5463 BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); 5464 if (OpI && OpI->hasOneUse()) { 5465 switch (OpI->getOpcode()) { 5466 default: break; 5467 case Instruction::FAdd: 5468 case Instruction::FSub: 5469 case Instruction::FMul: 5470 case Instruction::FDiv: 5471 case Instruction::FRem: 5472 const Type *SrcTy = OpI->getType(); 5473 Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); 5474 Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); 5475 if (LHSTrunc->getType() != SrcTy && 5476 RHSTrunc->getType() != SrcTy) { 5477 unsigned DstSize = CI.getType()->getScalarSizeInBits(); 5478 // If the source types were both smaller than the destination type of 5479 // the cast, do this xform. 5480 if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && 5481 RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { 5482 LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); 5483 RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); 5484 return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); 5485 } 5486 } 5487 break; 5488 } 5489 } 5490 return 0; 5491} 5492 5493Instruction *InstCombiner::visitFPExt(CastInst &CI) { 5494 return commonCastTransforms(CI); 5495} 5496 5497Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { 5498 Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); 5499 if (OpI == 0) 5500 return commonCastTransforms(FI); 5501 5502 // fptoui(uitofp(X)) --> X 5503 // fptoui(sitofp(X)) --> X 5504 // This is safe if the intermediate type has enough bits in its mantissa to 5505 // accurately represent all values of X. For example, do not do this with 5506 // i64->float->i64. This is also safe for sitofp case, because any negative 5507 // 'X' value would cause an undefined result for the fptoui. 5508 if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && 5509 OpI->getOperand(0)->getType() == FI.getType() && 5510 (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ 5511 OpI->getType()->getFPMantissaWidth()) 5512 return ReplaceInstUsesWith(FI, OpI->getOperand(0)); 5513 5514 return commonCastTransforms(FI); 5515} 5516 5517Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { 5518 Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); 5519 if (OpI == 0) 5520 return commonCastTransforms(FI); 5521 5522 // fptosi(sitofp(X)) --> X 5523 // fptosi(uitofp(X)) --> X 5524 // This is safe if the intermediate type has enough bits in its mantissa to 5525 // accurately represent all values of X. For example, do not do this with 5526 // i64->float->i64. This is also safe for sitofp case, because any negative 5527 // 'X' value would cause an undefined result for the fptoui. 5528 if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && 5529 OpI->getOperand(0)->getType() == FI.getType() && 5530 (int)FI.getType()->getScalarSizeInBits() <= 5531 OpI->getType()->getFPMantissaWidth()) 5532 return ReplaceInstUsesWith(FI, OpI->getOperand(0)); 5533 5534 return commonCastTransforms(FI); 5535} 5536 5537Instruction *InstCombiner::visitUIToFP(CastInst &CI) { 5538 return commonCastTransforms(CI); 5539} 5540 5541Instruction *InstCombiner::visitSIToFP(CastInst &CI) { 5542 return commonCastTransforms(CI); 5543} 5544 5545Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { 5546 // If the destination integer type is smaller than the intptr_t type for 5547 // this target, do a ptrtoint to intptr_t then do a trunc. This allows the 5548 // trunc to be exposed to other transforms. Don't do this for extending 5549 // ptrtoint's, because we don't know if the target sign or zero extends its 5550 // pointers. 5551 if (TD && 5552 CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { 5553 Value *P = Builder->CreatePtrToInt(CI.getOperand(0), 5554 TD->getIntPtrType(CI.getContext()), 5555 "tmp"); 5556 return new TruncInst(P, CI.getType()); 5557 } 5558 5559 return commonPointerCastTransforms(CI); 5560} 5561 5562Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { 5563 // If the source integer type is larger than the intptr_t type for 5564 // this target, do a trunc to the intptr_t type, then inttoptr of it. This 5565 // allows the trunc to be exposed to other transforms. Don't do this for 5566 // extending inttoptr's, because we don't know if the target sign or zero 5567 // extends to pointers. 5568 if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > 5569 TD->getPointerSizeInBits()) { 5570 Value *P = Builder->CreateTrunc(CI.getOperand(0), 5571 TD->getIntPtrType(CI.getContext()), "tmp"); 5572 return new IntToPtrInst(P, CI.getType()); 5573 } 5574 5575 if (Instruction *I = commonCastTransforms(CI)) 5576 return I; 5577 5578 return 0; 5579} 5580 5581Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { 5582 // If the operands are integer typed then apply the integer transforms, 5583 // otherwise just apply the common ones. 5584 Value *Src = CI.getOperand(0); 5585 const Type *SrcTy = Src->getType(); 5586 const Type *DestTy = CI.getType(); 5587 5588 if (isa<PointerType>(SrcTy)) { 5589 if (Instruction *I = commonPointerCastTransforms(CI)) 5590 return I; 5591 } else { 5592 if (Instruction *Result = commonCastTransforms(CI)) 5593 return Result; 5594 } 5595 5596 5597 // Get rid of casts from one type to the same type. These are useless and can 5598 // be replaced by the operand. 5599 if (DestTy == Src->getType()) 5600 return ReplaceInstUsesWith(CI, Src); 5601 5602 if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { 5603 const PointerType *SrcPTy = cast<PointerType>(SrcTy); 5604 const Type *DstElTy = DstPTy->getElementType(); 5605 const Type *SrcElTy = SrcPTy->getElementType(); 5606 5607 // If the address spaces don't match, don't eliminate the bitcast, which is 5608 // required for changing types. 5609 if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) 5610 return 0; 5611 5612 // If we are casting a alloca to a pointer to a type of the same 5613 // size, rewrite the allocation instruction to allocate the "right" type. 5614 // There is no need to modify malloc calls because it is their bitcast that 5615 // needs to be cleaned up. 5616 if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) 5617 if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) 5618 return V; 5619 5620 // If the source and destination are pointers, and this cast is equivalent 5621 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. 5622 // This can enhance SROA and other transforms that want type-safe pointers. 5623 Constant *ZeroUInt = 5624 Constant::getNullValue(Type::getInt32Ty(CI.getContext())); 5625 unsigned NumZeros = 0; 5626 while (SrcElTy != DstElTy && 5627 isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && 5628 SrcElTy->getNumContainedTypes() /* not "{}" */) { 5629 SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); 5630 ++NumZeros; 5631 } 5632 5633 // If we found a path from the src to dest, create the getelementptr now. 5634 if (SrcElTy == DstElTy) { 5635 SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); 5636 return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"", 5637 ((Instruction*) NULL)); 5638 } 5639 } 5640 5641 if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { 5642 if (DestVTy->getNumElements() == 1) { 5643 if (!isa<VectorType>(SrcTy)) { 5644 Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); 5645 return InsertElementInst::Create(UndefValue::get(DestTy), Elem, 5646 Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); 5647 } 5648 // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) 5649 } 5650 } 5651 5652 if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { 5653 if (SrcVTy->getNumElements() == 1) { 5654 if (!isa<VectorType>(DestTy)) { 5655 Value *Elem = 5656 Builder->CreateExtractElement(Src, 5657 Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); 5658 return CastInst::Create(Instruction::BitCast, Elem, DestTy); 5659 } 5660 } 5661 } 5662 5663 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { 5664 if (SVI->hasOneUse()) { 5665 // Okay, we have (bitconvert (shuffle ..)). Check to see if this is 5666 // a bitconvert to a vector with the same # elts. 5667 if (isa<VectorType>(DestTy) && 5668 cast<VectorType>(DestTy)->getNumElements() == 5669 SVI->getType()->getNumElements() && 5670 SVI->getType()->getNumElements() == 5671 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) { 5672 CastInst *Tmp; 5673 // If either of the operands is a cast from CI.getType(), then 5674 // evaluating the shuffle in the casted destination's type will allow 5675 // us to eliminate at least one cast. 5676 if (((Tmp = dyn_cast<CastInst>(SVI->getOperand(0))) && 5677 Tmp->getOperand(0)->getType() == DestTy) || 5678 ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 5679 Tmp->getOperand(0)->getType() == DestTy)) { 5680 Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); 5681 Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); 5682 // Return a new shuffle vector. Use the same element ID's, as we 5683 // know the vector types match #elts. 5684 return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); 5685 } 5686 } 5687 } 5688 } 5689 return 0; 5690} 5691 5692/// GetSelectFoldableOperands - We want to turn code that looks like this: 5693/// %C = or %A, %B 5694/// %D = select %cond, %C, %A 5695/// into: 5696/// %C = select %cond, %B, 0 5697/// %D = or %A, %C 5698/// 5699/// Assuming that the specified instruction is an operand to the select, return 5700/// a bitmask indicating which operands of this instruction are foldable if they 5701/// equal the other incoming value of the select. 5702/// 5703static unsigned GetSelectFoldableOperands(Instruction *I) { 5704 switch (I->getOpcode()) { 5705 case Instruction::Add: 5706 case Instruction::Mul: 5707 case Instruction::And: 5708 case Instruction::Or: 5709 case Instruction::Xor: 5710 return 3; // Can fold through either operand. 5711 case Instruction::Sub: // Can only fold on the amount subtracted. 5712 case Instruction::Shl: // Can only fold on the shift amount. 5713 case Instruction::LShr: 5714 case Instruction::AShr: 5715 return 1; 5716 default: 5717 return 0; // Cannot fold 5718 } 5719} 5720 5721/// GetSelectFoldableConstant - For the same transformation as the previous 5722/// function, return the identity constant that goes into the select. 5723static Constant *GetSelectFoldableConstant(Instruction *I) { 5724 switch (I->getOpcode()) { 5725 default: llvm_unreachable("This cannot happen!"); 5726 case Instruction::Add: 5727 case Instruction::Sub: 5728 case Instruction::Or: 5729 case Instruction::Xor: 5730 case Instruction::Shl: 5731 case Instruction::LShr: 5732 case Instruction::AShr: 5733 return Constant::getNullValue(I->getType()); 5734 case Instruction::And: 5735 return Constant::getAllOnesValue(I->getType()); 5736 case Instruction::Mul: 5737 return ConstantInt::get(I->getType(), 1); 5738 } 5739} 5740 5741/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI 5742/// have the same opcode and only one use each. Try to simplify this. 5743Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, 5744 Instruction *FI) { 5745 if (TI->getNumOperands() == 1) { 5746 // If this is a non-volatile load or a cast from the same type, 5747 // merge. 5748 if (TI->isCast()) { 5749 if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) 5750 return 0; 5751 } else { 5752 return 0; // unknown unary op. 5753 } 5754 5755 // Fold this by inserting a select from the input values. 5756 SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), 5757 FI->getOperand(0), SI.getName()+".v"); 5758 InsertNewInstBefore(NewSI, SI); 5759 return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 5760 TI->getType()); 5761 } 5762 5763 // Only handle binary operators here. 5764 if (!isa<BinaryOperator>(TI)) 5765 return 0; 5766 5767 // Figure out if the operations have any operands in common. 5768 Value *MatchOp, *OtherOpT, *OtherOpF; 5769 bool MatchIsOpZero; 5770 if (TI->getOperand(0) == FI->getOperand(0)) { 5771 MatchOp = TI->getOperand(0); 5772 OtherOpT = TI->getOperand(1); 5773 OtherOpF = FI->getOperand(1); 5774 MatchIsOpZero = true; 5775 } else if (TI->getOperand(1) == FI->getOperand(1)) { 5776 MatchOp = TI->getOperand(1); 5777 OtherOpT = TI->getOperand(0); 5778 OtherOpF = FI->getOperand(0); 5779 MatchIsOpZero = false; 5780 } else if (!TI->isCommutative()) { 5781 return 0; 5782 } else if (TI->getOperand(0) == FI->getOperand(1)) { 5783 MatchOp = TI->getOperand(0); 5784 OtherOpT = TI->getOperand(1); 5785 OtherOpF = FI->getOperand(0); 5786 MatchIsOpZero = true; 5787 } else if (TI->getOperand(1) == FI->getOperand(0)) { 5788 MatchOp = TI->getOperand(1); 5789 OtherOpT = TI->getOperand(0); 5790 OtherOpF = FI->getOperand(1); 5791 MatchIsOpZero = true; 5792 } else { 5793 return 0; 5794 } 5795 5796 // If we reach here, they do have operations in common. 5797 SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, 5798 OtherOpF, SI.getName()+".v"); 5799 InsertNewInstBefore(NewSI, SI); 5800 5801 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { 5802 if (MatchIsOpZero) 5803 return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); 5804 else 5805 return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); 5806 } 5807 llvm_unreachable("Shouldn't get here"); 5808 return 0; 5809} 5810 5811static bool isSelect01(Constant *C1, Constant *C2) { 5812 ConstantInt *C1I = dyn_cast<ConstantInt>(C1); 5813 if (!C1I) 5814 return false; 5815 ConstantInt *C2I = dyn_cast<ConstantInt>(C2); 5816 if (!C2I) 5817 return false; 5818 return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); 5819} 5820 5821/// FoldSelectIntoOp - Try fold the select into one of the operands to 5822/// facilitate further optimization. 5823Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, 5824 Value *FalseVal) { 5825 // See the comment above GetSelectFoldableOperands for a description of the 5826 // transformation we are doing here. 5827 if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { 5828 if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && 5829 !isa<Constant>(FalseVal)) { 5830 if (unsigned SFO = GetSelectFoldableOperands(TVI)) { 5831 unsigned OpToFold = 0; 5832 if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { 5833 OpToFold = 1; 5834 } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { 5835 OpToFold = 2; 5836 } 5837 5838 if (OpToFold) { 5839 Constant *C = GetSelectFoldableConstant(TVI); 5840 Value *OOp = TVI->getOperand(2-OpToFold); 5841 // Avoid creating select between 2 constants unless it's selecting 5842 // between 0 and 1. 5843 if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { 5844 Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); 5845 InsertNewInstBefore(NewSel, SI); 5846 NewSel->takeName(TVI); 5847 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) 5848 return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); 5849 llvm_unreachable("Unknown instruction!!"); 5850 } 5851 } 5852 } 5853 } 5854 } 5855 5856 if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { 5857 if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && 5858 !isa<Constant>(TrueVal)) { 5859 if (unsigned SFO = GetSelectFoldableOperands(FVI)) { 5860 unsigned OpToFold = 0; 5861 if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { 5862 OpToFold = 1; 5863 } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { 5864 OpToFold = 2; 5865 } 5866 5867 if (OpToFold) { 5868 Constant *C = GetSelectFoldableConstant(FVI); 5869 Value *OOp = FVI->getOperand(2-OpToFold); 5870 // Avoid creating select between 2 constants unless it's selecting 5871 // between 0 and 1. 5872 if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { 5873 Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); 5874 InsertNewInstBefore(NewSel, SI); 5875 NewSel->takeName(FVI); 5876 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) 5877 return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); 5878 llvm_unreachable("Unknown instruction!!"); 5879 } 5880 } 5881 } 5882 } 5883 } 5884 5885 return 0; 5886} 5887 5888/// visitSelectInstWithICmp - Visit a SelectInst that has an 5889/// ICmpInst as its first operand. 5890/// 5891Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, 5892 ICmpInst *ICI) { 5893 bool Changed = false; 5894 ICmpInst::Predicate Pred = ICI->getPredicate(); 5895 Value *CmpLHS = ICI->getOperand(0); 5896 Value *CmpRHS = ICI->getOperand(1); 5897 Value *TrueVal = SI.getTrueValue(); 5898 Value *FalseVal = SI.getFalseValue(); 5899 5900 // Check cases where the comparison is with a constant that 5901 // can be adjusted to fit the min/max idiom. We may edit ICI in 5902 // place here, so make sure the select is the only user. 5903 if (ICI->hasOneUse()) 5904 if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { 5905 switch (Pred) { 5906 default: break; 5907 case ICmpInst::ICMP_ULT: 5908 case ICmpInst::ICMP_SLT: { 5909 // X < MIN ? T : F --> F 5910 if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) 5911 return ReplaceInstUsesWith(SI, FalseVal); 5912 // X < C ? X : C-1 --> X > C-1 ? C-1 : X 5913 Constant *AdjustedRHS = SubOne(CI); 5914 if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || 5915 (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { 5916 Pred = ICmpInst::getSwappedPredicate(Pred); 5917 CmpRHS = AdjustedRHS; 5918 std::swap(FalseVal, TrueVal); 5919 ICI->setPredicate(Pred); 5920 ICI->setOperand(1, CmpRHS); 5921 SI.setOperand(1, TrueVal); 5922 SI.setOperand(2, FalseVal); 5923 Changed = true; 5924 } 5925 break; 5926 } 5927 case ICmpInst::ICMP_UGT: 5928 case ICmpInst::ICMP_SGT: { 5929 // X > MAX ? T : F --> F 5930 if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) 5931 return ReplaceInstUsesWith(SI, FalseVal); 5932 // X > C ? X : C+1 --> X < C+1 ? C+1 : X 5933 Constant *AdjustedRHS = AddOne(CI); 5934 if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || 5935 (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { 5936 Pred = ICmpInst::getSwappedPredicate(Pred); 5937 CmpRHS = AdjustedRHS; 5938 std::swap(FalseVal, TrueVal); 5939 ICI->setPredicate(Pred); 5940 ICI->setOperand(1, CmpRHS); 5941 SI.setOperand(1, TrueVal); 5942 SI.setOperand(2, FalseVal); 5943 Changed = true; 5944 } 5945 break; 5946 } 5947 } 5948 5949 // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed 5950 // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed 5951 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 5952 if (match(TrueVal, m_ConstantInt<-1>()) && 5953 match(FalseVal, m_ConstantInt<0>())) 5954 Pred = ICI->getPredicate(); 5955 else if (match(TrueVal, m_ConstantInt<0>()) && 5956 match(FalseVal, m_ConstantInt<-1>())) 5957 Pred = CmpInst::getInversePredicate(ICI->getPredicate()); 5958 5959 if (Pred != CmpInst::BAD_ICMP_PREDICATE) { 5960 // If we are just checking for a icmp eq of a single bit and zext'ing it 5961 // to an integer, then shift the bit to the appropriate place and then 5962 // cast to integer to avoid the comparison. 5963 const APInt &Op1CV = CI->getValue(); 5964 5965 // sext (x <s 0) to i32 --> x>>s31 true if signbit set. 5966 // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. 5967 if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || 5968 (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { 5969 Value *In = ICI->getOperand(0); 5970 Value *Sh = ConstantInt::get(In->getType(), 5971 In->getType()->getScalarSizeInBits()-1); 5972 In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, 5973 In->getName()+".lobit"), 5974 *ICI); 5975 if (In->getType() != SI.getType()) 5976 In = CastInst::CreateIntegerCast(In, SI.getType(), 5977 true/*SExt*/, "tmp", ICI); 5978 5979 if (Pred == ICmpInst::ICMP_SGT) 5980 In = InsertNewInstBefore(BinaryOperator::CreateNot(In, 5981 In->getName()+".not"), *ICI); 5982 5983 return ReplaceInstUsesWith(SI, In); 5984 } 5985 } 5986 } 5987 5988 if (CmpLHS == TrueVal && CmpRHS == FalseVal) { 5989 // Transform (X == Y) ? X : Y -> Y 5990 if (Pred == ICmpInst::ICMP_EQ) 5991 return ReplaceInstUsesWith(SI, FalseVal); 5992 // Transform (X != Y) ? X : Y -> X 5993 if (Pred == ICmpInst::ICMP_NE) 5994 return ReplaceInstUsesWith(SI, TrueVal); 5995 /// NOTE: if we wanted to, this is where to detect integer MIN/MAX 5996 5997 } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { 5998 // Transform (X == Y) ? Y : X -> X 5999 if (Pred == ICmpInst::ICMP_EQ) 6000 return ReplaceInstUsesWith(SI, FalseVal); 6001 // Transform (X != Y) ? Y : X -> Y 6002 if (Pred == ICmpInst::ICMP_NE) 6003 return ReplaceInstUsesWith(SI, TrueVal); 6004 /// NOTE: if we wanted to, this is where to detect integer MIN/MAX 6005 } 6006 return Changed ? &SI : 0; 6007} 6008 6009 6010/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a 6011/// PHI node (but the two may be in different blocks). See if the true/false 6012/// values (V) are live in all of the predecessor blocks of the PHI. For 6013/// example, cases like this cannot be mapped: 6014/// 6015/// X = phi [ C1, BB1], [C2, BB2] 6016/// Y = add 6017/// Z = select X, Y, 0 6018/// 6019/// because Y is not live in BB1/BB2. 6020/// 6021static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, 6022 const SelectInst &SI) { 6023 // If the value is a non-instruction value like a constant or argument, it 6024 // can always be mapped. 6025 const Instruction *I = dyn_cast<Instruction>(V); 6026 if (I == 0) return true; 6027 6028 // If V is a PHI node defined in the same block as the condition PHI, we can 6029 // map the arguments. 6030 const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); 6031 6032 if (const PHINode *VP = dyn_cast<PHINode>(I)) 6033 if (VP->getParent() == CondPHI->getParent()) 6034 return true; 6035 6036 // Otherwise, if the PHI and select are defined in the same block and if V is 6037 // defined in a different block, then we can transform it. 6038 if (SI.getParent() == CondPHI->getParent() && 6039 I->getParent() != CondPHI->getParent()) 6040 return true; 6041 6042 // Otherwise we have a 'hard' case and we can't tell without doing more 6043 // detailed dominator based analysis, punt. 6044 return false; 6045} 6046 6047/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: 6048/// SPF2(SPF1(A, B), C) 6049Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, 6050 SelectPatternFlavor SPF1, 6051 Value *A, Value *B, 6052 Instruction &Outer, 6053 SelectPatternFlavor SPF2, Value *C) { 6054 if (C == A || C == B) { 6055 // MAX(MAX(A, B), B) -> MAX(A, B) 6056 // MIN(MIN(a, b), a) -> MIN(a, b) 6057 if (SPF1 == SPF2) 6058 return ReplaceInstUsesWith(Outer, Inner); 6059 6060 // MAX(MIN(a, b), a) -> a 6061 // MIN(MAX(a, b), a) -> a 6062 if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || 6063 (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || 6064 (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || 6065 (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) 6066 return ReplaceInstUsesWith(Outer, C); 6067 } 6068 6069 // TODO: MIN(MIN(A, 23), 97) 6070 return 0; 6071} 6072 6073 6074 6075 6076Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { 6077 Value *CondVal = SI.getCondition(); 6078 Value *TrueVal = SI.getTrueValue(); 6079 Value *FalseVal = SI.getFalseValue(); 6080 6081 // select true, X, Y -> X 6082 // select false, X, Y -> Y 6083 if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal)) 6084 return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); 6085 6086 // select C, X, X -> X 6087 if (TrueVal == FalseVal) 6088 return ReplaceInstUsesWith(SI, TrueVal); 6089 6090 if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X 6091 return ReplaceInstUsesWith(SI, FalseVal); 6092 if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X 6093 return ReplaceInstUsesWith(SI, TrueVal); 6094 if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y 6095 if (isa<Constant>(TrueVal)) 6096 return ReplaceInstUsesWith(SI, TrueVal); 6097 else 6098 return ReplaceInstUsesWith(SI, FalseVal); 6099 } 6100 6101 if (SI.getType() == Type::getInt1Ty(SI.getContext())) { 6102 if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { 6103 if (C->getZExtValue()) { 6104 // Change: A = select B, true, C --> A = or B, C 6105 return BinaryOperator::CreateOr(CondVal, FalseVal); 6106 } else { 6107 // Change: A = select B, false, C --> A = and !B, C 6108 Value *NotCond = 6109 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 6110 "not."+CondVal->getName()), SI); 6111 return BinaryOperator::CreateAnd(NotCond, FalseVal); 6112 } 6113 } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { 6114 if (C->getZExtValue() == false) { 6115 // Change: A = select B, C, false --> A = and B, C 6116 return BinaryOperator::CreateAnd(CondVal, TrueVal); 6117 } else { 6118 // Change: A = select B, C, true --> A = or !B, C 6119 Value *NotCond = 6120 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 6121 "not."+CondVal->getName()), SI); 6122 return BinaryOperator::CreateOr(NotCond, TrueVal); 6123 } 6124 } 6125 6126 // select a, b, a -> a&b 6127 // select a, a, b -> a|b 6128 if (CondVal == TrueVal) 6129 return BinaryOperator::CreateOr(CondVal, FalseVal); 6130 else if (CondVal == FalseVal) 6131 return BinaryOperator::CreateAnd(CondVal, TrueVal); 6132 } 6133 6134 // Selecting between two integer constants? 6135 if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) 6136 if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { 6137 // select C, 1, 0 -> zext C to int 6138 if (FalseValC->isZero() && TrueValC->getValue() == 1) { 6139 return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); 6140 } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { 6141 // select C, 0, 1 -> zext !C to int 6142 Value *NotCond = 6143 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 6144 "not."+CondVal->getName()), SI); 6145 return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); 6146 } 6147 6148 if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { 6149 // If one of the constants is zero (we know they can't both be) and we 6150 // have an icmp instruction with zero, and we have an 'and' with the 6151 // non-constant value, eliminate this whole mess. This corresponds to 6152 // cases like this: ((X & 27) ? 27 : 0) 6153 if (TrueValC->isZero() || FalseValC->isZero()) 6154 if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && 6155 cast<Constant>(IC->getOperand(1))->isNullValue()) 6156 if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) 6157 if (ICA->getOpcode() == Instruction::And && 6158 isa<ConstantInt>(ICA->getOperand(1)) && 6159 (ICA->getOperand(1) == TrueValC || 6160 ICA->getOperand(1) == FalseValC) && 6161 isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) { 6162 // Okay, now we know that everything is set up, we just don't 6163 // know whether we have a icmp_ne or icmp_eq and whether the 6164 // true or false val is the zero. 6165 bool ShouldNotVal = !TrueValC->isZero(); 6166 ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; 6167 Value *V = ICA; 6168 if (ShouldNotVal) 6169 V = InsertNewInstBefore(BinaryOperator::Create( 6170 Instruction::Xor, V, ICA->getOperand(1)), SI); 6171 return ReplaceInstUsesWith(SI, V); 6172 } 6173 } 6174 } 6175 6176 // See if we are selecting two values based on a comparison of the two values. 6177 if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) { 6178 if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { 6179 // Transform (X == Y) ? X : Y -> Y 6180 if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { 6181 // This is not safe in general for floating point: 6182 // consider X== -0, Y== +0. 6183 // It becomes safe if either operand is a nonzero constant. 6184 ConstantFP *CFPt, *CFPf; 6185 if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && 6186 !CFPt->getValueAPF().isZero()) || 6187 ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && 6188 !CFPf->getValueAPF().isZero())) 6189 return ReplaceInstUsesWith(SI, FalseVal); 6190 } 6191 // Transform (X != Y) ? X : Y -> X 6192 if (FCI->getPredicate() == FCmpInst::FCMP_ONE) 6193 return ReplaceInstUsesWith(SI, TrueVal); 6194 // NOTE: if we wanted to, this is where to detect MIN/MAX 6195 6196 } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ 6197 // Transform (X == Y) ? Y : X -> X 6198 if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { 6199 // This is not safe in general for floating point: 6200 // consider X== -0, Y== +0. 6201 // It becomes safe if either operand is a nonzero constant. 6202 ConstantFP *CFPt, *CFPf; 6203 if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && 6204 !CFPt->getValueAPF().isZero()) || 6205 ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && 6206 !CFPf->getValueAPF().isZero())) 6207 return ReplaceInstUsesWith(SI, FalseVal); 6208 } 6209 // Transform (X != Y) ? Y : X -> Y 6210 if (FCI->getPredicate() == FCmpInst::FCMP_ONE) 6211 return ReplaceInstUsesWith(SI, TrueVal); 6212 // NOTE: if we wanted to, this is where to detect MIN/MAX 6213 } 6214 // NOTE: if we wanted to, this is where to detect ABS 6215 } 6216 6217 // See if we are selecting two values based on a comparison of the two values. 6218 if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) 6219 if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) 6220 return Result; 6221 6222 if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) 6223 if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) 6224 if (TI->hasOneUse() && FI->hasOneUse()) { 6225 Instruction *AddOp = 0, *SubOp = 0; 6226 6227 // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) 6228 if (TI->getOpcode() == FI->getOpcode()) 6229 if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) 6230 return IV; 6231 6232 // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is 6233 // even legal for FP. 6234 if ((TI->getOpcode() == Instruction::Sub && 6235 FI->getOpcode() == Instruction::Add) || 6236 (TI->getOpcode() == Instruction::FSub && 6237 FI->getOpcode() == Instruction::FAdd)) { 6238 AddOp = FI; SubOp = TI; 6239 } else if ((FI->getOpcode() == Instruction::Sub && 6240 TI->getOpcode() == Instruction::Add) || 6241 (FI->getOpcode() == Instruction::FSub && 6242 TI->getOpcode() == Instruction::FAdd)) { 6243 AddOp = TI; SubOp = FI; 6244 } 6245 6246 if (AddOp) { 6247 Value *OtherAddOp = 0; 6248 if (SubOp->getOperand(0) == AddOp->getOperand(0)) { 6249 OtherAddOp = AddOp->getOperand(1); 6250 } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { 6251 OtherAddOp = AddOp->getOperand(0); 6252 } 6253 6254 if (OtherAddOp) { 6255 // So at this point we know we have (Y -> OtherAddOp): 6256 // select C, (add X, Y), (sub X, Z) 6257 Value *NegVal; // Compute -Z 6258 if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { 6259 NegVal = ConstantExpr::getNeg(C); 6260 } else { 6261 NegVal = InsertNewInstBefore( 6262 BinaryOperator::CreateNeg(SubOp->getOperand(1), 6263 "tmp"), SI); 6264 } 6265 6266 Value *NewTrueOp = OtherAddOp; 6267 Value *NewFalseOp = NegVal; 6268 if (AddOp != TI) 6269 std::swap(NewTrueOp, NewFalseOp); 6270 Instruction *NewSel = 6271 SelectInst::Create(CondVal, NewTrueOp, 6272 NewFalseOp, SI.getName() + ".p"); 6273 6274 NewSel = InsertNewInstBefore(NewSel, SI); 6275 return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); 6276 } 6277 } 6278 } 6279 6280 // See if we can fold the select into one of our operands. 6281 if (SI.getType()->isInteger()) { 6282 if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) 6283 return FoldI; 6284 6285 // MAX(MAX(a, b), a) -> MAX(a, b) 6286 // MIN(MIN(a, b), a) -> MIN(a, b) 6287 // MAX(MIN(a, b), a) -> a 6288 // MIN(MAX(a, b), a) -> a 6289 Value *LHS, *RHS, *LHS2, *RHS2; 6290 if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { 6291 if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) 6292 if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 6293 SI, SPF, RHS)) 6294 return R; 6295 if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) 6296 if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, 6297 SI, SPF, LHS)) 6298 return R; 6299 } 6300 6301 // TODO. 6302 // ABS(-X) -> ABS(X) 6303 // ABS(ABS(X)) -> ABS(X) 6304 } 6305 6306 // See if we can fold the select into a phi node if the condition is a select. 6307 if (isa<PHINode>(SI.getCondition())) 6308 // The true/false values have to be live in the PHI predecessor's blocks. 6309 if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && 6310 CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) 6311 if (Instruction *NV = FoldOpIntoPhi(SI)) 6312 return NV; 6313 6314 if (BinaryOperator::isNot(CondVal)) { 6315 SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); 6316 SI.setOperand(1, FalseVal); 6317 SI.setOperand(2, TrueVal); 6318 return &SI; 6319 } 6320 6321 return 0; 6322} 6323 6324/// EnforceKnownAlignment - If the specified pointer points to an object that 6325/// we control, modify the object's alignment to PrefAlign. This isn't 6326/// often possible though. If alignment is important, a more reliable approach 6327/// is to simply align all global variables and allocation instructions to 6328/// their preferred alignment from the beginning. 6329/// 6330static unsigned EnforceKnownAlignment(Value *V, 6331 unsigned Align, unsigned PrefAlign) { 6332 6333 User *U = dyn_cast<User>(V); 6334 if (!U) return Align; 6335 6336 switch (Operator::getOpcode(U)) { 6337 default: break; 6338 case Instruction::BitCast: 6339 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 6340 case Instruction::GetElementPtr: { 6341 // If all indexes are zero, it is just the alignment of the base pointer. 6342 bool AllZeroOperands = true; 6343 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) 6344 if (!isa<Constant>(*i) || 6345 !cast<Constant>(*i)->isNullValue()) { 6346 AllZeroOperands = false; 6347 break; 6348 } 6349 6350 if (AllZeroOperands) { 6351 // Treat this like a bitcast. 6352 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 6353 } 6354 break; 6355 } 6356 } 6357 6358 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 6359 // If there is a large requested alignment and we can, bump up the alignment 6360 // of the global. 6361 if (!GV->isDeclaration()) { 6362 if (GV->getAlignment() >= PrefAlign) 6363 Align = GV->getAlignment(); 6364 else { 6365 GV->setAlignment(PrefAlign); 6366 Align = PrefAlign; 6367 } 6368 } 6369 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6370 // If there is a requested alignment and if this is an alloca, round up. 6371 if (AI->getAlignment() >= PrefAlign) 6372 Align = AI->getAlignment(); 6373 else { 6374 AI->setAlignment(PrefAlign); 6375 Align = PrefAlign; 6376 } 6377 } 6378 6379 return Align; 6380} 6381 6382/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that 6383/// we can determine, return it, otherwise return 0. If PrefAlign is specified, 6384/// and it is more than the alignment of the ultimate object, see if we can 6385/// increase the alignment of the ultimate object, making this check succeed. 6386unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, 6387 unsigned PrefAlign) { 6388 unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 6389 sizeof(PrefAlign) * CHAR_BIT; 6390 APInt Mask = APInt::getAllOnesValue(BitWidth); 6391 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 6392 ComputeMaskedBits(V, Mask, KnownZero, KnownOne); 6393 unsigned TrailZ = KnownZero.countTrailingOnes(); 6394 unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); 6395 6396 if (PrefAlign > Align) 6397 Align = EnforceKnownAlignment(V, Align, PrefAlign); 6398 6399 // We don't need to make any adjustment. 6400 return Align; 6401} 6402 6403Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 6404 unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); 6405 unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); 6406 unsigned MinAlign = std::min(DstAlign, SrcAlign); 6407 unsigned CopyAlign = MI->getAlignment(); 6408 6409 if (CopyAlign < MinAlign) { 6410 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 6411 MinAlign, false)); 6412 return MI; 6413 } 6414 6415 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 6416 // load/store. 6417 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); 6418 if (MemOpLength == 0) return 0; 6419 6420 // Source and destination pointer types are always "i8*" for intrinsic. See 6421 // if the size is something we can handle with a single primitive load/store. 6422 // A single load+store correctly handles overlapping memory in the memmove 6423 // case. 6424 unsigned Size = MemOpLength->getZExtValue(); 6425 if (Size == 0) return MI; // Delete this mem transfer. 6426 6427 if (Size > 8 || (Size&(Size-1))) 6428 return 0; // If not 1/2/4/8 bytes, exit. 6429 6430 // Use an integer load+store unless we can find something better. 6431 Type *NewPtrTy = 6432 PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); 6433 6434 // Memcpy forces the use of i8* for the source and destination. That means 6435 // that if you're using memcpy to move one double around, you'll get a cast 6436 // from double* to i8*. We'd much rather use a double load+store rather than 6437 // an i64 load+store, here because this improves the odds that the source or 6438 // dest address will be promotable. See if we can find a better type than the 6439 // integer datatype. 6440 if (Value *Op = getBitCastOperand(MI->getOperand(1))) { 6441 const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); 6442 if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { 6443 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 6444 // down through these levels if so. 6445 while (!SrcETy->isSingleValueType()) { 6446 if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { 6447 if (STy->getNumElements() == 1) 6448 SrcETy = STy->getElementType(0); 6449 else 6450 break; 6451 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { 6452 if (ATy->getNumElements() == 1) 6453 SrcETy = ATy->getElementType(); 6454 else 6455 break; 6456 } else 6457 break; 6458 } 6459 6460 if (SrcETy->isSingleValueType()) 6461 NewPtrTy = PointerType::getUnqual(SrcETy); 6462 } 6463 } 6464 6465 6466 // If the memcpy/memmove provides better alignment info than we can 6467 // infer, use it. 6468 SrcAlign = std::max(SrcAlign, CopyAlign); 6469 DstAlign = std::max(DstAlign, CopyAlign); 6470 6471 Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); 6472 Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); 6473 Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); 6474 InsertNewInstBefore(L, *MI); 6475 InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); 6476 6477 // Set the size of the copy to 0, it will be deleted on the next iteration. 6478 MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); 6479 return MI; 6480} 6481 6482Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 6483 unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); 6484 if (MI->getAlignment() < Alignment) { 6485 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 6486 Alignment, false)); 6487 return MI; 6488 } 6489 6490 // Extract the length and alignment and fill if they are constant. 6491 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 6492 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 6493 if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext())) 6494 return 0; 6495 uint64_t Len = LenC->getZExtValue(); 6496 Alignment = MI->getAlignment(); 6497 6498 // If the length is zero, this is a no-op 6499 if (Len == 0) return MI; // memset(d,c,0,a) -> noop 6500 6501 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 6502 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 6503 const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 6504 6505 Value *Dest = MI->getDest(); 6506 Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); 6507 6508 // Alignment 0 is identity for alignment 1 for memset, but not store. 6509 if (Alignment == 0) Alignment = 1; 6510 6511 // Extract the fill value and store. 6512 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 6513 InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), 6514 Dest, false, Alignment), *MI); 6515 6516 // Set the size of the copy to 0, it will be deleted on the next iteration. 6517 MI->setLength(Constant::getNullValue(LenC->getType())); 6518 return MI; 6519 } 6520 6521 return 0; 6522} 6523 6524 6525/// visitCallInst - CallInst simplification. This mostly only handles folding 6526/// of intrinsic instructions. For normal calls, it allows visitCallSite to do 6527/// the heavy lifting. 6528/// 6529Instruction *InstCombiner::visitCallInst(CallInst &CI) { 6530 if (isFreeCall(&CI)) 6531 return visitFree(CI); 6532 6533 // If the caller function is nounwind, mark the call as nounwind, even if the 6534 // callee isn't. 6535 if (CI.getParent()->getParent()->doesNotThrow() && 6536 !CI.doesNotThrow()) { 6537 CI.setDoesNotThrow(); 6538 return &CI; 6539 } 6540 6541 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 6542 if (!II) return visitCallSite(&CI); 6543 6544 // Intrinsics cannot occur in an invoke, so handle them here instead of in 6545 // visitCallSite. 6546 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 6547 bool Changed = false; 6548 6549 // memmove/cpy/set of zero bytes is a noop. 6550 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 6551 if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); 6552 6553 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 6554 if (CI->getZExtValue() == 1) { 6555 // Replace the instruction with just byte operations. We would 6556 // transform other cases to loads/stores, but we don't know if 6557 // alignment is sufficient. 6558 } 6559 } 6560 6561 // If we have a memmove and the source operation is a constant global, 6562 // then the source and dest pointers can't alias, so we can change this 6563 // into a call to memcpy. 6564 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 6565 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 6566 if (GVSrc->isConstant()) { 6567 Module *M = CI.getParent()->getParent()->getParent(); 6568 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 6569 const Type *Tys[1]; 6570 Tys[0] = CI.getOperand(3)->getType(); 6571 CI.setOperand(0, 6572 Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); 6573 Changed = true; 6574 } 6575 } 6576 6577 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 6578 // memmove(x,x,size) -> noop. 6579 if (MTI->getSource() == MTI->getDest()) 6580 return EraseInstFromFunction(CI); 6581 } 6582 6583 // If we can determine a pointer alignment that is bigger than currently 6584 // set, update the alignment. 6585 if (isa<MemTransferInst>(MI)) { 6586 if (Instruction *I = SimplifyMemTransfer(MI)) 6587 return I; 6588 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 6589 if (Instruction *I = SimplifyMemSet(MSI)) 6590 return I; 6591 } 6592 6593 if (Changed) return II; 6594 } 6595 6596 switch (II->getIntrinsicID()) { 6597 default: break; 6598 case Intrinsic::bswap: 6599 // bswap(bswap(x)) -> x 6600 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) 6601 if (Operand->getIntrinsicID() == Intrinsic::bswap) 6602 return ReplaceInstUsesWith(CI, Operand->getOperand(1)); 6603 6604 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 6605 if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { 6606 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) 6607 if (Operand->getIntrinsicID() == Intrinsic::bswap) { 6608 unsigned C = Operand->getType()->getPrimitiveSizeInBits() - 6609 TI->getType()->getPrimitiveSizeInBits(); 6610 Value *CV = ConstantInt::get(Operand->getType(), C); 6611 Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); 6612 return new TruncInst(V, TI->getType()); 6613 } 6614 } 6615 6616 break; 6617 case Intrinsic::powi: 6618 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { 6619 // powi(x, 0) -> 1.0 6620 if (Power->isZero()) 6621 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 6622 // powi(x, 1) -> x 6623 if (Power->isOne()) 6624 return ReplaceInstUsesWith(CI, II->getOperand(1)); 6625 // powi(x, -1) -> 1/x 6626 if (Power->isAllOnesValue()) 6627 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 6628 II->getOperand(1)); 6629 } 6630 break; 6631 6632 case Intrinsic::uadd_with_overflow: { 6633 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 6634 const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); 6635 uint32_t BitWidth = IT->getBitWidth(); 6636 APInt Mask = APInt::getSignBit(BitWidth); 6637 APInt LHSKnownZero(BitWidth, 0); 6638 APInt LHSKnownOne(BitWidth, 0); 6639 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 6640 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 6641 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 6642 6643 if (LHSKnownNegative || LHSKnownPositive) { 6644 APInt RHSKnownZero(BitWidth, 0); 6645 APInt RHSKnownOne(BitWidth, 0); 6646 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 6647 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 6648 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 6649 if (LHSKnownNegative && RHSKnownNegative) { 6650 // The sign bit is set in both cases: this MUST overflow. 6651 // Create a simple add instruction, and insert it into the struct. 6652 Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); 6653 Worklist.Add(Add); 6654 Constant *V[] = { 6655 UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) 6656 }; 6657 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 6658 return InsertValueInst::Create(Struct, Add, 0); 6659 } 6660 6661 if (LHSKnownPositive && RHSKnownPositive) { 6662 // The sign bit is clear in both cases: this CANNOT overflow. 6663 // Create a simple add instruction, and insert it into the struct. 6664 Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); 6665 Worklist.Add(Add); 6666 Constant *V[] = { 6667 UndefValue::get(LHS->getType()), 6668 ConstantInt::getFalse(II->getContext()) 6669 }; 6670 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 6671 return InsertValueInst::Create(Struct, Add, 0); 6672 } 6673 } 6674 } 6675 // FALL THROUGH uadd into sadd 6676 case Intrinsic::sadd_with_overflow: 6677 // Canonicalize constants into the RHS. 6678 if (isa<Constant>(II->getOperand(1)) && 6679 !isa<Constant>(II->getOperand(2))) { 6680 Value *LHS = II->getOperand(1); 6681 II->setOperand(1, II->getOperand(2)); 6682 II->setOperand(2, LHS); 6683 return II; 6684 } 6685 6686 // X + undef -> undef 6687 if (isa<UndefValue>(II->getOperand(2))) 6688 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 6689 6690 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 6691 // X + 0 -> {X, false} 6692 if (RHS->isZero()) { 6693 Constant *V[] = { 6694 UndefValue::get(II->getOperand(0)->getType()), 6695 ConstantInt::getFalse(II->getContext()) 6696 }; 6697 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 6698 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 6699 } 6700 } 6701 break; 6702 case Intrinsic::usub_with_overflow: 6703 case Intrinsic::ssub_with_overflow: 6704 // undef - X -> undef 6705 // X - undef -> undef 6706 if (isa<UndefValue>(II->getOperand(1)) || 6707 isa<UndefValue>(II->getOperand(2))) 6708 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 6709 6710 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 6711 // X - 0 -> {X, false} 6712 if (RHS->isZero()) { 6713 Constant *V[] = { 6714 UndefValue::get(II->getOperand(1)->getType()), 6715 ConstantInt::getFalse(II->getContext()) 6716 }; 6717 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 6718 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 6719 } 6720 } 6721 break; 6722 case Intrinsic::umul_with_overflow: 6723 case Intrinsic::smul_with_overflow: 6724 // Canonicalize constants into the RHS. 6725 if (isa<Constant>(II->getOperand(1)) && 6726 !isa<Constant>(II->getOperand(2))) { 6727 Value *LHS = II->getOperand(1); 6728 II->setOperand(1, II->getOperand(2)); 6729 II->setOperand(2, LHS); 6730 return II; 6731 } 6732 6733 // X * undef -> undef 6734 if (isa<UndefValue>(II->getOperand(2))) 6735 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 6736 6737 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { 6738 // X*0 -> {0, false} 6739 if (RHSI->isZero()) 6740 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 6741 6742 // X * 1 -> {X, false} 6743 if (RHSI->equalsInt(1)) { 6744 Constant *V[] = { 6745 UndefValue::get(II->getOperand(1)->getType()), 6746 ConstantInt::getFalse(II->getContext()) 6747 }; 6748 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 6749 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 6750 } 6751 } 6752 break; 6753 case Intrinsic::ppc_altivec_lvx: 6754 case Intrinsic::ppc_altivec_lvxl: 6755 case Intrinsic::x86_sse_loadu_ps: 6756 case Intrinsic::x86_sse2_loadu_pd: 6757 case Intrinsic::x86_sse2_loadu_dq: 6758 // Turn PPC lvx -> load if the pointer is known aligned. 6759 // Turn X86 loadups -> load if the pointer is known aligned. 6760 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 6761 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), 6762 PointerType::getUnqual(II->getType())); 6763 return new LoadInst(Ptr); 6764 } 6765 break; 6766 case Intrinsic::ppc_altivec_stvx: 6767 case Intrinsic::ppc_altivec_stvxl: 6768 // Turn stvx -> store if the pointer is known aligned. 6769 if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { 6770 const Type *OpPtrTy = 6771 PointerType::getUnqual(II->getOperand(1)->getType()); 6772 Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); 6773 return new StoreInst(II->getOperand(1), Ptr); 6774 } 6775 break; 6776 case Intrinsic::x86_sse_storeu_ps: 6777 case Intrinsic::x86_sse2_storeu_pd: 6778 case Intrinsic::x86_sse2_storeu_dq: 6779 // Turn X86 storeu -> store if the pointer is known aligned. 6780 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 6781 const Type *OpPtrTy = 6782 PointerType::getUnqual(II->getOperand(2)->getType()); 6783 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); 6784 return new StoreInst(II->getOperand(2), Ptr); 6785 } 6786 break; 6787 6788 case Intrinsic::x86_sse_cvttss2si: { 6789 // These intrinsics only demands the 0th element of its input vector. If 6790 // we can simplify the input based on that, do so now. 6791 unsigned VWidth = 6792 cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); 6793 APInt DemandedElts(VWidth, 1); 6794 APInt UndefElts(VWidth, 0); 6795 if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, 6796 UndefElts)) { 6797 II->setOperand(1, V); 6798 return II; 6799 } 6800 break; 6801 } 6802 6803 case Intrinsic::ppc_altivec_vperm: 6804 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 6805 if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { 6806 assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); 6807 6808 // Check that all of the elements are integer constants or undefs. 6809 bool AllEltsOk = true; 6810 for (unsigned i = 0; i != 16; ++i) { 6811 if (!isa<ConstantInt>(Mask->getOperand(i)) && 6812 !isa<UndefValue>(Mask->getOperand(i))) { 6813 AllEltsOk = false; 6814 break; 6815 } 6816 } 6817 6818 if (AllEltsOk) { 6819 // Cast the input vectors to byte vectors. 6820 Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); 6821 Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); 6822 Value *Result = UndefValue::get(Op0->getType()); 6823 6824 // Only extract each element once. 6825 Value *ExtractedElts[32]; 6826 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 6827 6828 for (unsigned i = 0; i != 16; ++i) { 6829 if (isa<UndefValue>(Mask->getOperand(i))) 6830 continue; 6831 unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); 6832 Idx &= 31; // Match the hardware behavior. 6833 6834 if (ExtractedElts[Idx] == 0) { 6835 ExtractedElts[Idx] = 6836 Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 6837 ConstantInt::get(Type::getInt32Ty(II->getContext()), 6838 Idx&15, false), "tmp"); 6839 } 6840 6841 // Insert this value into the result vector. 6842 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 6843 ConstantInt::get(Type::getInt32Ty(II->getContext()), 6844 i, false), "tmp"); 6845 } 6846 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 6847 } 6848 } 6849 break; 6850 6851 case Intrinsic::stackrestore: { 6852 // If the save is right next to the restore, remove the restore. This can 6853 // happen when variable allocas are DCE'd. 6854 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { 6855 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 6856 BasicBlock::iterator BI = SS; 6857 if (&*++BI == II) 6858 return EraseInstFromFunction(CI); 6859 } 6860 } 6861 6862 // Scan down this block to see if there is another stack restore in the 6863 // same block without an intervening call/alloca. 6864 BasicBlock::iterator BI = II; 6865 TerminatorInst *TI = II->getParent()->getTerminator(); 6866 bool CannotRemove = false; 6867 for (++BI; &*BI != TI; ++BI) { 6868 if (isa<AllocaInst>(BI) || isMalloc(BI)) { 6869 CannotRemove = true; 6870 break; 6871 } 6872 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 6873 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 6874 // If there is a stackrestore below this one, remove this one. 6875 if (II->getIntrinsicID() == Intrinsic::stackrestore) 6876 return EraseInstFromFunction(CI); 6877 // Otherwise, ignore the intrinsic. 6878 } else { 6879 // If we found a non-intrinsic call, we can't remove the stack 6880 // restore. 6881 CannotRemove = true; 6882 break; 6883 } 6884 } 6885 } 6886 6887 // If the stack restore is in a return/unwind block and if there are no 6888 // allocas or calls between the restore and the return, nuke the restore. 6889 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) 6890 return EraseInstFromFunction(CI); 6891 break; 6892 } 6893 } 6894 6895 return visitCallSite(II); 6896} 6897 6898// InvokeInst simplification 6899// 6900Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 6901 return visitCallSite(&II); 6902} 6903 6904/// isSafeToEliminateVarargsCast - If this cast does not affect the value 6905/// passed through the varargs area, we can eliminate the use of the cast. 6906static bool isSafeToEliminateVarargsCast(const CallSite CS, 6907 const CastInst * const CI, 6908 const TargetData * const TD, 6909 const int ix) { 6910 if (!CI->isLosslessCast()) 6911 return false; 6912 6913 // The size of ByVal arguments is derived from the type, so we 6914 // can't change to a type with a different size. If the size were 6915 // passed explicitly we could avoid this check. 6916 if (!CS.paramHasAttr(ix, Attribute::ByVal)) 6917 return true; 6918 6919 const Type* SrcTy = 6920 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 6921 const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 6922 if (!SrcTy->isSized() || !DstTy->isSized()) 6923 return false; 6924 if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) 6925 return false; 6926 return true; 6927} 6928 6929// visitCallSite - Improvements for call and invoke instructions. 6930// 6931Instruction *InstCombiner::visitCallSite(CallSite CS) { 6932 bool Changed = false; 6933 6934 // If the callee is a constexpr cast of a function, attempt to move the cast 6935 // to the arguments of the call/invoke. 6936 if (transformConstExprCastCall(CS)) return 0; 6937 6938 Value *Callee = CS.getCalledValue(); 6939 6940 if (Function *CalleeF = dyn_cast<Function>(Callee)) 6941 if (CalleeF->getCallingConv() != CS.getCallingConv()) { 6942 Instruction *OldCall = CS.getInstruction(); 6943 // If the call and callee calling conventions don't match, this call must 6944 // be unreachable, as the call is undefined. 6945 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 6946 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 6947 OldCall); 6948 // If OldCall dues not return void then replaceAllUsesWith undef. 6949 // This allows ValueHandlers and custom metadata to adjust itself. 6950 if (!OldCall->getType()->isVoidTy()) 6951 OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); 6952 if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. 6953 return EraseInstFromFunction(*OldCall); 6954 return 0; 6955 } 6956 6957 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 6958 // This instruction is not reachable, just remove it. We insert a store to 6959 // undef so that we know that this code is not reachable, despite the fact 6960 // that we can't modify the CFG here. 6961 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 6962 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 6963 CS.getInstruction()); 6964 6965 // If CS dues not return void then replaceAllUsesWith undef. 6966 // This allows ValueHandlers and custom metadata to adjust itself. 6967 if (!CS.getInstruction()->getType()->isVoidTy()) 6968 CS.getInstruction()-> 6969 replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); 6970 6971 if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { 6972 // Don't break the CFG, insert a dummy cond branch. 6973 BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), 6974 ConstantInt::getTrue(Callee->getContext()), II); 6975 } 6976 return EraseInstFromFunction(*CS.getInstruction()); 6977 } 6978 6979 if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) 6980 if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) 6981 if (In->getIntrinsicID() == Intrinsic::init_trampoline) 6982 return transformCallThroughTrampoline(CS); 6983 6984 const PointerType *PTy = cast<PointerType>(Callee->getType()); 6985 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 6986 if (FTy->isVarArg()) { 6987 int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); 6988 // See if we can optimize any arguments passed through the varargs area of 6989 // the call. 6990 for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), 6991 E = CS.arg_end(); I != E; ++I, ++ix) { 6992 CastInst *CI = dyn_cast<CastInst>(*I); 6993 if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { 6994 *I = CI->getOperand(0); 6995 Changed = true; 6996 } 6997 } 6998 } 6999 7000 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 7001 // Inline asm calls cannot throw - mark them 'nounwind'. 7002 CS.setDoesNotThrow(); 7003 Changed = true; 7004 } 7005 7006 return Changed ? CS.getInstruction() : 0; 7007} 7008 7009// transformConstExprCastCall - If the callee is a constexpr cast of a function, 7010// attempt to move the cast to the arguments of the call/invoke. 7011// 7012bool InstCombiner::transformConstExprCastCall(CallSite CS) { 7013 if (!isa<ConstantExpr>(CS.getCalledValue())) return false; 7014 ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); 7015 if (CE->getOpcode() != Instruction::BitCast || 7016 !isa<Function>(CE->getOperand(0))) 7017 return false; 7018 Function *Callee = cast<Function>(CE->getOperand(0)); 7019 Instruction *Caller = CS.getInstruction(); 7020 const AttrListPtr &CallerPAL = CS.getAttributes(); 7021 7022 // Okay, this is a cast from a function to a different type. Unless doing so 7023 // would cause a type conversion of one of our arguments, change this call to 7024 // be a direct call with arguments casted to the appropriate types. 7025 // 7026 const FunctionType *FT = Callee->getFunctionType(); 7027 const Type *OldRetTy = Caller->getType(); 7028 const Type *NewRetTy = FT->getReturnType(); 7029 7030 if (isa<StructType>(NewRetTy)) 7031 return false; // TODO: Handle multiple return values. 7032 7033 // Check to see if we are changing the return type... 7034 if (OldRetTy != NewRetTy) { 7035 if (Callee->isDeclaration() && 7036 // Conversion is ok if changing from one pointer type to another or from 7037 // a pointer to an integer of the same size. 7038 !((isa<PointerType>(OldRetTy) || !TD || 7039 OldRetTy == TD->getIntPtrType(Caller->getContext())) && 7040 (isa<PointerType>(NewRetTy) || !TD || 7041 NewRetTy == TD->getIntPtrType(Caller->getContext())))) 7042 return false; // Cannot transform this return value. 7043 7044 if (!Caller->use_empty() && 7045 // void -> non-void is handled specially 7046 !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) 7047 return false; // Cannot transform this return value. 7048 7049 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 7050 Attributes RAttrs = CallerPAL.getRetAttributes(); 7051 if (RAttrs & Attribute::typeIncompatible(NewRetTy)) 7052 return false; // Attribute not compatible with transformed value. 7053 } 7054 7055 // If the callsite is an invoke instruction, and the return value is used by 7056 // a PHI node in a successor, we cannot change the return type of the call 7057 // because there is no place to put the cast instruction (without breaking 7058 // the critical edge). Bail out in this case. 7059 if (!Caller->use_empty()) 7060 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 7061 for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); 7062 UI != E; ++UI) 7063 if (PHINode *PN = dyn_cast<PHINode>(*UI)) 7064 if (PN->getParent() == II->getNormalDest() || 7065 PN->getParent() == II->getUnwindDest()) 7066 return false; 7067 } 7068 7069 unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); 7070 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 7071 7072 CallSite::arg_iterator AI = CS.arg_begin(); 7073 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 7074 const Type *ParamTy = FT->getParamType(i); 7075 const Type *ActTy = (*AI)->getType(); 7076 7077 if (!CastInst::isCastable(ActTy, ParamTy)) 7078 return false; // Cannot transform this parameter value. 7079 7080 if (CallerPAL.getParamAttributes(i + 1) 7081 & Attribute::typeIncompatible(ParamTy)) 7082 return false; // Attribute not compatible with transformed value. 7083 7084 // Converting from one pointer type to another or between a pointer and an 7085 // integer of the same size is safe even if we do not have a body. 7086 bool isConvertible = ActTy == ParamTy || 7087 (TD && ((isa<PointerType>(ParamTy) || 7088 ParamTy == TD->getIntPtrType(Caller->getContext())) && 7089 (isa<PointerType>(ActTy) || 7090 ActTy == TD->getIntPtrType(Caller->getContext())))); 7091 if (Callee->isDeclaration() && !isConvertible) return false; 7092 } 7093 7094 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && 7095 Callee->isDeclaration()) 7096 return false; // Do not delete arguments unless we have a function body. 7097 7098 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 7099 !CallerPAL.isEmpty()) 7100 // In this case we have more arguments than the new function type, but we 7101 // won't be dropping them. Check that these extra arguments have attributes 7102 // that are compatible with being a vararg call argument. 7103 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 7104 if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) 7105 break; 7106 Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; 7107 if (PAttrs & Attribute::VarArgsIncompatible) 7108 return false; 7109 } 7110 7111 // Okay, we decided that this is a safe thing to do: go ahead and start 7112 // inserting cast instructions as necessary... 7113 std::vector<Value*> Args; 7114 Args.reserve(NumActualArgs); 7115 SmallVector<AttributeWithIndex, 8> attrVec; 7116 attrVec.reserve(NumCommonArgs); 7117 7118 // Get any return attributes. 7119 Attributes RAttrs = CallerPAL.getRetAttributes(); 7120 7121 // If the return value is not being used, the type may not be compatible 7122 // with the existing attributes. Wipe out any problematic attributes. 7123 RAttrs &= ~Attribute::typeIncompatible(NewRetTy); 7124 7125 // Add the new return attributes. 7126 if (RAttrs) 7127 attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); 7128 7129 AI = CS.arg_begin(); 7130 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 7131 const Type *ParamTy = FT->getParamType(i); 7132 if ((*AI)->getType() == ParamTy) { 7133 Args.push_back(*AI); 7134 } else { 7135 Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, 7136 false, ParamTy, false); 7137 Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); 7138 } 7139 7140 // Add any parameter attributes. 7141 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 7142 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 7143 } 7144 7145 // If the function takes more arguments than the call was taking, add them 7146 // now. 7147 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 7148 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 7149 7150 // If we are removing arguments to the function, emit an obnoxious warning. 7151 if (FT->getNumParams() < NumActualArgs) { 7152 if (!FT->isVarArg()) { 7153 errs() << "WARNING: While resolving call to function '" 7154 << Callee->getName() << "' arguments were dropped!\n"; 7155 } else { 7156 // Add all of the arguments in their promoted form to the arg list. 7157 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 7158 const Type *PTy = getPromotedType((*AI)->getType()); 7159 if (PTy != (*AI)->getType()) { 7160 // Must promote to pass through va_arg area! 7161 Instruction::CastOps opcode = 7162 CastInst::getCastOpcode(*AI, false, PTy, false); 7163 Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); 7164 } else { 7165 Args.push_back(*AI); 7166 } 7167 7168 // Add any parameter attributes. 7169 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 7170 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 7171 } 7172 } 7173 } 7174 7175 if (Attributes FnAttrs = CallerPAL.getFnAttributes()) 7176 attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); 7177 7178 if (NewRetTy->isVoidTy()) 7179 Caller->setName(""); // Void type should not have a name. 7180 7181 const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), 7182 attrVec.end()); 7183 7184 Instruction *NC; 7185 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 7186 NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), 7187 Args.begin(), Args.end(), 7188 Caller->getName(), Caller); 7189 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 7190 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 7191 } else { 7192 NC = CallInst::Create(Callee, Args.begin(), Args.end(), 7193 Caller->getName(), Caller); 7194 CallInst *CI = cast<CallInst>(Caller); 7195 if (CI->isTailCall()) 7196 cast<CallInst>(NC)->setTailCall(); 7197 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 7198 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 7199 } 7200 7201 // Insert a cast of the return type as necessary. 7202 Value *NV = NC; 7203 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 7204 if (!NV->getType()->isVoidTy()) { 7205 Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 7206 OldRetTy, false); 7207 NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); 7208 7209 // If this is an invoke instruction, we should insert it after the first 7210 // non-phi, instruction in the normal successor block. 7211 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 7212 BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); 7213 InsertNewInstBefore(NC, *I); 7214 } else { 7215 // Otherwise, it's a call, just insert cast right after the call instr 7216 InsertNewInstBefore(NC, *Caller); 7217 } 7218 Worklist.AddUsersToWorkList(*Caller); 7219 } else { 7220 NV = UndefValue::get(Caller->getType()); 7221 } 7222 } 7223 7224 7225 if (!Caller->use_empty()) 7226 Caller->replaceAllUsesWith(NV); 7227 7228 EraseInstFromFunction(*Caller); 7229 return true; 7230} 7231 7232// transformCallThroughTrampoline - Turn a call to a function created by the 7233// init_trampoline intrinsic into a direct call to the underlying function. 7234// 7235Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { 7236 Value *Callee = CS.getCalledValue(); 7237 const PointerType *PTy = cast<PointerType>(Callee->getType()); 7238 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 7239 const AttrListPtr &Attrs = CS.getAttributes(); 7240 7241 // If the call already has the 'nest' attribute somewhere then give up - 7242 // otherwise 'nest' would occur twice after splicing in the chain. 7243 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 7244 return 0; 7245 7246 IntrinsicInst *Tramp = 7247 cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); 7248 7249 Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); 7250 const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 7251 const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 7252 7253 const AttrListPtr &NestAttrs = NestF->getAttributes(); 7254 if (!NestAttrs.isEmpty()) { 7255 unsigned NestIdx = 1; 7256 const Type *NestTy = 0; 7257 Attributes NestAttr = Attribute::None; 7258 7259 // Look for a parameter marked with the 'nest' attribute. 7260 for (FunctionType::param_iterator I = NestFTy->param_begin(), 7261 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 7262 if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { 7263 // Record the parameter type and any other attributes. 7264 NestTy = *I; 7265 NestAttr = NestAttrs.getParamAttributes(NestIdx); 7266 break; 7267 } 7268 7269 if (NestTy) { 7270 Instruction *Caller = CS.getInstruction(); 7271 std::vector<Value*> NewArgs; 7272 NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); 7273 7274 SmallVector<AttributeWithIndex, 8> NewAttrs; 7275 NewAttrs.reserve(Attrs.getNumSlots() + 1); 7276 7277 // Insert the nest argument into the call argument list, which may 7278 // mean appending it. Likewise for attributes. 7279 7280 // Add any result attributes. 7281 if (Attributes Attr = Attrs.getRetAttributes()) 7282 NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); 7283 7284 { 7285 unsigned Idx = 1; 7286 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 7287 do { 7288 if (Idx == NestIdx) { 7289 // Add the chain argument and attributes. 7290 Value *NestVal = Tramp->getOperand(3); 7291 if (NestVal->getType() != NestTy) 7292 NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); 7293 NewArgs.push_back(NestVal); 7294 NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); 7295 } 7296 7297 if (I == E) 7298 break; 7299 7300 // Add the original argument and attributes. 7301 NewArgs.push_back(*I); 7302 if (Attributes Attr = Attrs.getParamAttributes(Idx)) 7303 NewAttrs.push_back 7304 (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); 7305 7306 ++Idx, ++I; 7307 } while (1); 7308 } 7309 7310 // Add any function attributes. 7311 if (Attributes Attr = Attrs.getFnAttributes()) 7312 NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); 7313 7314 // The trampoline may have been bitcast to a bogus type (FTy). 7315 // Handle this by synthesizing a new function type, equal to FTy 7316 // with the chain parameter inserted. 7317 7318 std::vector<const Type*> NewTypes; 7319 NewTypes.reserve(FTy->getNumParams()+1); 7320 7321 // Insert the chain's type into the list of parameter types, which may 7322 // mean appending it. 7323 { 7324 unsigned Idx = 1; 7325 FunctionType::param_iterator I = FTy->param_begin(), 7326 E = FTy->param_end(); 7327 7328 do { 7329 if (Idx == NestIdx) 7330 // Add the chain's type. 7331 NewTypes.push_back(NestTy); 7332 7333 if (I == E) 7334 break; 7335 7336 // Add the original type. 7337 NewTypes.push_back(*I); 7338 7339 ++Idx, ++I; 7340 } while (1); 7341 } 7342 7343 // Replace the trampoline call with a direct call. Let the generic 7344 // code sort out any function type mismatches. 7345 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 7346 FTy->isVarArg()); 7347 Constant *NewCallee = 7348 NestF->getType() == PointerType::getUnqual(NewFTy) ? 7349 NestF : ConstantExpr::getBitCast(NestF, 7350 PointerType::getUnqual(NewFTy)); 7351 const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), 7352 NewAttrs.end()); 7353 7354 Instruction *NewCaller; 7355 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 7356 NewCaller = InvokeInst::Create(NewCallee, 7357 II->getNormalDest(), II->getUnwindDest(), 7358 NewArgs.begin(), NewArgs.end(), 7359 Caller->getName(), Caller); 7360 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 7361 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 7362 } else { 7363 NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), 7364 Caller->getName(), Caller); 7365 if (cast<CallInst>(Caller)->isTailCall()) 7366 cast<CallInst>(NewCaller)->setTailCall(); 7367 cast<CallInst>(NewCaller)-> 7368 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 7369 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 7370 } 7371 if (!Caller->getType()->isVoidTy()) 7372 Caller->replaceAllUsesWith(NewCaller); 7373 Caller->eraseFromParent(); 7374 Worklist.Remove(Caller); 7375 return 0; 7376 } 7377 } 7378 7379 // Replace the trampoline call with a direct call. Since there is no 'nest' 7380 // parameter, there is no need to adjust the argument list. Let the generic 7381 // code sort out any function type mismatches. 7382 Constant *NewCallee = 7383 NestF->getType() == PTy ? NestF : 7384 ConstantExpr::getBitCast(NestF, PTy); 7385 CS.setCalledFunction(NewCallee); 7386 return CS.getInstruction(); 7387} 7388 7389/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] 7390/// and if a/b/c and the add's all have a single use, turn this into a phi 7391/// and a single binop. 7392Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { 7393 Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); 7394 assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); 7395 unsigned Opc = FirstInst->getOpcode(); 7396 Value *LHSVal = FirstInst->getOperand(0); 7397 Value *RHSVal = FirstInst->getOperand(1); 7398 7399 const Type *LHSType = LHSVal->getType(); 7400 const Type *RHSType = RHSVal->getType(); 7401 7402 // Scan to see if all operands are the same opcode, and all have one use. 7403 for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { 7404 Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); 7405 if (!I || I->getOpcode() != Opc || !I->hasOneUse() || 7406 // Verify type of the LHS matches so we don't fold cmp's of different 7407 // types or GEP's with different index types. 7408 I->getOperand(0)->getType() != LHSType || 7409 I->getOperand(1)->getType() != RHSType) 7410 return 0; 7411 7412 // If they are CmpInst instructions, check their predicates 7413 if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) 7414 if (cast<CmpInst>(I)->getPredicate() != 7415 cast<CmpInst>(FirstInst)->getPredicate()) 7416 return 0; 7417 7418 // Keep track of which operand needs a phi node. 7419 if (I->getOperand(0) != LHSVal) LHSVal = 0; 7420 if (I->getOperand(1) != RHSVal) RHSVal = 0; 7421 } 7422 7423 // If both LHS and RHS would need a PHI, don't do this transformation, 7424 // because it would increase the number of PHIs entering the block, 7425 // which leads to higher register pressure. This is especially 7426 // bad when the PHIs are in the header of a loop. 7427 if (!LHSVal && !RHSVal) 7428 return 0; 7429 7430 // Otherwise, this is safe to transform! 7431 7432 Value *InLHS = FirstInst->getOperand(0); 7433 Value *InRHS = FirstInst->getOperand(1); 7434 PHINode *NewLHS = 0, *NewRHS = 0; 7435 if (LHSVal == 0) { 7436 NewLHS = PHINode::Create(LHSType, 7437 FirstInst->getOperand(0)->getName() + ".pn"); 7438 NewLHS->reserveOperandSpace(PN.getNumOperands()/2); 7439 NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); 7440 InsertNewInstBefore(NewLHS, PN); 7441 LHSVal = NewLHS; 7442 } 7443 7444 if (RHSVal == 0) { 7445 NewRHS = PHINode::Create(RHSType, 7446 FirstInst->getOperand(1)->getName() + ".pn"); 7447 NewRHS->reserveOperandSpace(PN.getNumOperands()/2); 7448 NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); 7449 InsertNewInstBefore(NewRHS, PN); 7450 RHSVal = NewRHS; 7451 } 7452 7453 // Add all operands to the new PHIs. 7454 if (NewLHS || NewRHS) { 7455 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7456 Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); 7457 if (NewLHS) { 7458 Value *NewInLHS = InInst->getOperand(0); 7459 NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); 7460 } 7461 if (NewRHS) { 7462 Value *NewInRHS = InInst->getOperand(1); 7463 NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); 7464 } 7465 } 7466 } 7467 7468 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) 7469 return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); 7470 CmpInst *CIOp = cast<CmpInst>(FirstInst); 7471 return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 7472 LHSVal, RHSVal); 7473} 7474 7475Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { 7476 GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); 7477 7478 SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 7479 FirstInst->op_end()); 7480 // This is true if all GEP bases are allocas and if all indices into them are 7481 // constants. 7482 bool AllBasePointersAreAllocas = true; 7483 7484 // We don't want to replace this phi if the replacement would require 7485 // more than one phi, which leads to higher register pressure. This is 7486 // especially bad when the PHIs are in the header of a loop. 7487 bool NeededPhi = false; 7488 7489 // Scan to see if all operands are the same opcode, and all have one use. 7490 for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { 7491 GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); 7492 if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || 7493 GEP->getNumOperands() != FirstInst->getNumOperands()) 7494 return 0; 7495 7496 // Keep track of whether or not all GEPs are of alloca pointers. 7497 if (AllBasePointersAreAllocas && 7498 (!isa<AllocaInst>(GEP->getOperand(0)) || 7499 !GEP->hasAllConstantIndices())) 7500 AllBasePointersAreAllocas = false; 7501 7502 // Compare the operand lists. 7503 for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { 7504 if (FirstInst->getOperand(op) == GEP->getOperand(op)) 7505 continue; 7506 7507 // Don't merge two GEPs when two operands differ (introducing phi nodes) 7508 // if one of the PHIs has a constant for the index. The index may be 7509 // substantially cheaper to compute for the constants, so making it a 7510 // variable index could pessimize the path. This also handles the case 7511 // for struct indices, which must always be constant. 7512 if (isa<ConstantInt>(FirstInst->getOperand(op)) || 7513 isa<ConstantInt>(GEP->getOperand(op))) 7514 return 0; 7515 7516 if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) 7517 return 0; 7518 7519 // If we already needed a PHI for an earlier operand, and another operand 7520 // also requires a PHI, we'd be introducing more PHIs than we're 7521 // eliminating, which increases register pressure on entry to the PHI's 7522 // block. 7523 if (NeededPhi) 7524 return 0; 7525 7526 FixedOperands[op] = 0; // Needs a PHI. 7527 NeededPhi = true; 7528 } 7529 } 7530 7531 // If all of the base pointers of the PHI'd GEPs are from allocas, don't 7532 // bother doing this transformation. At best, this will just save a bit of 7533 // offset calculation, but all the predecessors will have to materialize the 7534 // stack address into a register anyway. We'd actually rather *clone* the 7535 // load up into the predecessors so that we have a load of a gep of an alloca, 7536 // which can usually all be folded into the load. 7537 if (AllBasePointersAreAllocas) 7538 return 0; 7539 7540 // Otherwise, this is safe to transform. Insert PHI nodes for each operand 7541 // that is variable. 7542 SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); 7543 7544 bool HasAnyPHIs = false; 7545 for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { 7546 if (FixedOperands[i]) continue; // operand doesn't need a phi. 7547 Value *FirstOp = FirstInst->getOperand(i); 7548 PHINode *NewPN = PHINode::Create(FirstOp->getType(), 7549 FirstOp->getName()+".pn"); 7550 InsertNewInstBefore(NewPN, PN); 7551 7552 NewPN->reserveOperandSpace(e); 7553 NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); 7554 OperandPhis[i] = NewPN; 7555 FixedOperands[i] = NewPN; 7556 HasAnyPHIs = true; 7557 } 7558 7559 7560 // Add all operands to the new PHIs. 7561 if (HasAnyPHIs) { 7562 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7563 GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); 7564 BasicBlock *InBB = PN.getIncomingBlock(i); 7565 7566 for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) 7567 if (PHINode *OpPhi = OperandPhis[op]) 7568 OpPhi->addIncoming(InGEP->getOperand(op), InBB); 7569 } 7570 } 7571 7572 Value *Base = FixedOperands[0]; 7573 return cast<GEPOperator>(FirstInst)->isInBounds() ? 7574 GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, 7575 FixedOperands.end()) : 7576 GetElementPtrInst::Create(Base, FixedOperands.begin()+1, 7577 FixedOperands.end()); 7578} 7579 7580 7581/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to 7582/// sink the load out of the block that defines it. This means that it must be 7583/// obvious the value of the load is not changed from the point of the load to 7584/// the end of the block it is in. 7585/// 7586/// Finally, it is safe, but not profitable, to sink a load targetting a 7587/// non-address-taken alloca. Doing so will cause us to not promote the alloca 7588/// to a register. 7589static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { 7590 BasicBlock::iterator BBI = L, E = L->getParent()->end(); 7591 7592 for (++BBI; BBI != E; ++BBI) 7593 if (BBI->mayWriteToMemory()) 7594 return false; 7595 7596 // Check for non-address taken alloca. If not address-taken already, it isn't 7597 // profitable to do this xform. 7598 if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { 7599 bool isAddressTaken = false; 7600 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 7601 UI != E; ++UI) { 7602 if (isa<LoadInst>(UI)) continue; 7603 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 7604 // If storing TO the alloca, then the address isn't taken. 7605 if (SI->getOperand(1) == AI) continue; 7606 } 7607 isAddressTaken = true; 7608 break; 7609 } 7610 7611 if (!isAddressTaken && AI->isStaticAlloca()) 7612 return false; 7613 } 7614 7615 // If this load is a load from a GEP with a constant offset from an alloca, 7616 // then we don't want to sink it. In its present form, it will be 7617 // load [constant stack offset]. Sinking it will cause us to have to 7618 // materialize the stack addresses in each predecessor in a register only to 7619 // do a shared load from register in the successor. 7620 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0))) 7621 if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) 7622 if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) 7623 return false; 7624 7625 return true; 7626} 7627 7628Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { 7629 LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); 7630 7631 // When processing loads, we need to propagate two bits of information to the 7632 // sunk load: whether it is volatile, and what its alignment is. We currently 7633 // don't sink loads when some have their alignment specified and some don't. 7634 // visitLoadInst will propagate an alignment onto the load when TD is around, 7635 // and if TD isn't around, we can't handle the mixed case. 7636 bool isVolatile = FirstLI->isVolatile(); 7637 unsigned LoadAlignment = FirstLI->getAlignment(); 7638 7639 // We can't sink the load if the loaded value could be modified between the 7640 // load and the PHI. 7641 if (FirstLI->getParent() != PN.getIncomingBlock(0) || 7642 !isSafeAndProfitableToSinkLoad(FirstLI)) 7643 return 0; 7644 7645 // If the PHI is of volatile loads and the load block has multiple 7646 // successors, sinking it would remove a load of the volatile value from 7647 // the path through the other successor. 7648 if (isVolatile && 7649 FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) 7650 return 0; 7651 7652 // Check to see if all arguments are the same operation. 7653 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7654 LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); 7655 if (!LI || !LI->hasOneUse()) 7656 return 0; 7657 7658 // We can't sink the load if the loaded value could be modified between 7659 // the load and the PHI. 7660 if (LI->isVolatile() != isVolatile || 7661 LI->getParent() != PN.getIncomingBlock(i) || 7662 !isSafeAndProfitableToSinkLoad(LI)) 7663 return 0; 7664 7665 // If some of the loads have an alignment specified but not all of them, 7666 // we can't do the transformation. 7667 if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) 7668 return 0; 7669 7670 LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); 7671 7672 // If the PHI is of volatile loads and the load block has multiple 7673 // successors, sinking it would remove a load of the volatile value from 7674 // the path through the other successor. 7675 if (isVolatile && 7676 LI->getParent()->getTerminator()->getNumSuccessors() != 1) 7677 return 0; 7678 } 7679 7680 // Okay, they are all the same operation. Create a new PHI node of the 7681 // correct type, and PHI together all of the LHS's of the instructions. 7682 PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), 7683 PN.getName()+".in"); 7684 NewPN->reserveOperandSpace(PN.getNumOperands()/2); 7685 7686 Value *InVal = FirstLI->getOperand(0); 7687 NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); 7688 7689 // Add all operands to the new PHI. 7690 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7691 Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); 7692 if (NewInVal != InVal) 7693 InVal = 0; 7694 NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); 7695 } 7696 7697 Value *PhiVal; 7698 if (InVal) { 7699 // The new PHI unions all of the same values together. This is really 7700 // common, so we handle it intelligently here for compile-time speed. 7701 PhiVal = InVal; 7702 delete NewPN; 7703 } else { 7704 InsertNewInstBefore(NewPN, PN); 7705 PhiVal = NewPN; 7706 } 7707 7708 // If this was a volatile load that we are merging, make sure to loop through 7709 // and mark all the input loads as non-volatile. If we don't do this, we will 7710 // insert a new volatile load and the old ones will not be deletable. 7711 if (isVolatile) 7712 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 7713 cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); 7714 7715 return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); 7716} 7717 7718 7719 7720/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" 7721/// operator and they all are only used by the PHI, PHI together their 7722/// inputs, and do the operation once, to the result of the PHI. 7723Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { 7724 Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); 7725 7726 if (isa<GetElementPtrInst>(FirstInst)) 7727 return FoldPHIArgGEPIntoPHI(PN); 7728 if (isa<LoadInst>(FirstInst)) 7729 return FoldPHIArgLoadIntoPHI(PN); 7730 7731 // Scan the instruction, looking for input operations that can be folded away. 7732 // If all input operands to the phi are the same instruction (e.g. a cast from 7733 // the same type or "+42") we can pull the operation through the PHI, reducing 7734 // code size and simplifying code. 7735 Constant *ConstantOp = 0; 7736 const Type *CastSrcTy = 0; 7737 7738 if (isa<CastInst>(FirstInst)) { 7739 CastSrcTy = FirstInst->getOperand(0)->getType(); 7740 7741 // Be careful about transforming integer PHIs. We don't want to pessimize 7742 // the code by turning an i32 into an i1293. 7743 if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { 7744 if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) 7745 return 0; 7746 } 7747 } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { 7748 // Can fold binop, compare or shift here if the RHS is a constant, 7749 // otherwise call FoldPHIArgBinOpIntoPHI. 7750 ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); 7751 if (ConstantOp == 0) 7752 return FoldPHIArgBinOpIntoPHI(PN); 7753 } else { 7754 return 0; // Cannot fold this operation. 7755 } 7756 7757 // Check to see if all arguments are the same operation. 7758 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7759 Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); 7760 if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) 7761 return 0; 7762 if (CastSrcTy) { 7763 if (I->getOperand(0)->getType() != CastSrcTy) 7764 return 0; // Cast operation must match. 7765 } else if (I->getOperand(1) != ConstantOp) { 7766 return 0; 7767 } 7768 } 7769 7770 // Okay, they are all the same operation. Create a new PHI node of the 7771 // correct type, and PHI together all of the LHS's of the instructions. 7772 PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), 7773 PN.getName()+".in"); 7774 NewPN->reserveOperandSpace(PN.getNumOperands()/2); 7775 7776 Value *InVal = FirstInst->getOperand(0); 7777 NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); 7778 7779 // Add all operands to the new PHI. 7780 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 7781 Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); 7782 if (NewInVal != InVal) 7783 InVal = 0; 7784 NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); 7785 } 7786 7787 Value *PhiVal; 7788 if (InVal) { 7789 // The new PHI unions all of the same values together. This is really 7790 // common, so we handle it intelligently here for compile-time speed. 7791 PhiVal = InVal; 7792 delete NewPN; 7793 } else { 7794 InsertNewInstBefore(NewPN, PN); 7795 PhiVal = NewPN; 7796 } 7797 7798 // Insert and return the new operation. 7799 if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) 7800 return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); 7801 7802 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) 7803 return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); 7804 7805 CmpInst *CIOp = cast<CmpInst>(FirstInst); 7806 return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 7807 PhiVal, ConstantOp); 7808} 7809 7810/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle 7811/// that is dead. 7812static bool DeadPHICycle(PHINode *PN, 7813 SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) { 7814 if (PN->use_empty()) return true; 7815 if (!PN->hasOneUse()) return false; 7816 7817 // Remember this node, and if we find the cycle, return. 7818 if (!PotentiallyDeadPHIs.insert(PN)) 7819 return true; 7820 7821 // Don't scan crazily complex things. 7822 if (PotentiallyDeadPHIs.size() == 16) 7823 return false; 7824 7825 if (PHINode *PU = dyn_cast<PHINode>(PN->use_back())) 7826 return DeadPHICycle(PU, PotentiallyDeadPHIs); 7827 7828 return false; 7829} 7830 7831/// PHIsEqualValue - Return true if this phi node is always equal to 7832/// NonPhiInVal. This happens with mutually cyclic phi nodes like: 7833/// z = some value; x = phi (y, z); y = phi (x, z) 7834static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 7835 SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { 7836 // See if we already saw this PHI node. 7837 if (!ValueEqualPHIs.insert(PN)) 7838 return true; 7839 7840 // Don't scan crazily complex things. 7841 if (ValueEqualPHIs.size() == 16) 7842 return false; 7843 7844 // Scan the operands to see if they are either phi nodes or are equal to 7845 // the value. 7846 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 7847 Value *Op = PN->getIncomingValue(i); 7848 if (PHINode *OpPN = dyn_cast<PHINode>(Op)) { 7849 if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) 7850 return false; 7851 } else if (Op != NonPhiInVal) 7852 return false; 7853 } 7854 7855 return true; 7856} 7857 7858 7859namespace { 7860struct PHIUsageRecord { 7861 unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) 7862 unsigned Shift; // The amount shifted. 7863 Instruction *Inst; // The trunc instruction. 7864 7865 PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) 7866 : PHIId(pn), Shift(Sh), Inst(User) {} 7867 7868 bool operator<(const PHIUsageRecord &RHS) const { 7869 if (PHIId < RHS.PHIId) return true; 7870 if (PHIId > RHS.PHIId) return false; 7871 if (Shift < RHS.Shift) return true; 7872 if (Shift > RHS.Shift) return false; 7873 return Inst->getType()->getPrimitiveSizeInBits() < 7874 RHS.Inst->getType()->getPrimitiveSizeInBits(); 7875 } 7876}; 7877 7878struct LoweredPHIRecord { 7879 PHINode *PN; // The PHI that was lowered. 7880 unsigned Shift; // The amount shifted. 7881 unsigned Width; // The width extracted. 7882 7883 LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) 7884 : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} 7885 7886 // Ctor form used by DenseMap. 7887 LoweredPHIRecord(PHINode *pn, unsigned Sh) 7888 : PN(pn), Shift(Sh), Width(0) {} 7889}; 7890} 7891 7892namespace llvm { 7893 template<> 7894 struct DenseMapInfo<LoweredPHIRecord> { 7895 static inline LoweredPHIRecord getEmptyKey() { 7896 return LoweredPHIRecord(0, 0); 7897 } 7898 static inline LoweredPHIRecord getTombstoneKey() { 7899 return LoweredPHIRecord(0, 1); 7900 } 7901 static unsigned getHashValue(const LoweredPHIRecord &Val) { 7902 return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ 7903 (Val.Width>>3); 7904 } 7905 static bool isEqual(const LoweredPHIRecord &LHS, 7906 const LoweredPHIRecord &RHS) { 7907 return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && 7908 LHS.Width == RHS.Width; 7909 } 7910 }; 7911 template <> 7912 struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; 7913} 7914 7915 7916/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an 7917/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If 7918/// so, we split the PHI into the various pieces being extracted. This sort of 7919/// thing is introduced when SROA promotes an aggregate to large integer values. 7920/// 7921/// TODO: The user of the trunc may be an bitcast to float/double/vector or an 7922/// inttoptr. We should produce new PHIs in the right type. 7923/// 7924Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { 7925 // PHIUsers - Keep track of all of the truncated values extracted from a set 7926 // of PHIs, along with their offset. These are the things we want to rewrite. 7927 SmallVector<PHIUsageRecord, 16> PHIUsers; 7928 7929 // PHIs are often mutually cyclic, so we keep track of a whole set of PHI 7930 // nodes which are extracted from. PHIsToSlice is a set we use to avoid 7931 // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to 7932 // check the uses of (to ensure they are all extracts). 7933 SmallVector<PHINode*, 8> PHIsToSlice; 7934 SmallPtrSet<PHINode*, 8> PHIsInspected; 7935 7936 PHIsToSlice.push_back(&FirstPhi); 7937 PHIsInspected.insert(&FirstPhi); 7938 7939 for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { 7940 PHINode *PN = PHIsToSlice[PHIId]; 7941 7942 // Scan the input list of the PHI. If any input is an invoke, and if the 7943 // input is defined in the predecessor, then we won't be split the critical 7944 // edge which is required to insert a truncate. Because of this, we have to 7945 // bail out. 7946 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 7947 InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); 7948 if (II == 0) continue; 7949 if (II->getParent() != PN->getIncomingBlock(i)) 7950 continue; 7951 7952 // If we have a phi, and if it's directly in the predecessor, then we have 7953 // a critical edge where we need to put the truncate. Since we can't 7954 // split the edge in instcombine, we have to bail out. 7955 return 0; 7956 } 7957 7958 7959 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); 7960 UI != E; ++UI) { 7961 Instruction *User = cast<Instruction>(*UI); 7962 7963 // If the user is a PHI, inspect its uses recursively. 7964 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 7965 if (PHIsInspected.insert(UserPN)) 7966 PHIsToSlice.push_back(UserPN); 7967 continue; 7968 } 7969 7970 // Truncates are always ok. 7971 if (isa<TruncInst>(User)) { 7972 PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); 7973 continue; 7974 } 7975 7976 // Otherwise it must be a lshr which can only be used by one trunc. 7977 if (User->getOpcode() != Instruction::LShr || 7978 !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || 7979 !isa<ConstantInt>(User->getOperand(1))) 7980 return 0; 7981 7982 unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); 7983 PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); 7984 } 7985 } 7986 7987 // If we have no users, they must be all self uses, just nuke the PHI. 7988 if (PHIUsers.empty()) 7989 return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); 7990 7991 // If this phi node is transformable, create new PHIs for all the pieces 7992 // extracted out of it. First, sort the users by their offset and size. 7993 array_pod_sort(PHIUsers.begin(), PHIUsers.end()); 7994 7995 DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; 7996 for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) 7997 errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; 7998 ); 7999 8000 // PredValues - This is a temporary used when rewriting PHI nodes. It is 8001 // hoisted out here to avoid construction/destruction thrashing. 8002 DenseMap<BasicBlock*, Value*> PredValues; 8003 8004 // ExtractedVals - Each new PHI we introduce is saved here so we don't 8005 // introduce redundant PHIs. 8006 DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; 8007 8008 for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { 8009 unsigned PHIId = PHIUsers[UserI].PHIId; 8010 PHINode *PN = PHIsToSlice[PHIId]; 8011 unsigned Offset = PHIUsers[UserI].Shift; 8012 const Type *Ty = PHIUsers[UserI].Inst->getType(); 8013 8014 PHINode *EltPHI; 8015 8016 // If we've already lowered a user like this, reuse the previously lowered 8017 // value. 8018 if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { 8019 8020 // Otherwise, Create the new PHI node for this user. 8021 EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); 8022 assert(EltPHI->getType() != PN->getType() && 8023 "Truncate didn't shrink phi?"); 8024 8025 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 8026 BasicBlock *Pred = PN->getIncomingBlock(i); 8027 Value *&PredVal = PredValues[Pred]; 8028 8029 // If we already have a value for this predecessor, reuse it. 8030 if (PredVal) { 8031 EltPHI->addIncoming(PredVal, Pred); 8032 continue; 8033 } 8034 8035 // Handle the PHI self-reuse case. 8036 Value *InVal = PN->getIncomingValue(i); 8037 if (InVal == PN) { 8038 PredVal = EltPHI; 8039 EltPHI->addIncoming(PredVal, Pred); 8040 continue; 8041 } 8042 8043 if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { 8044 // If the incoming value was a PHI, and if it was one of the PHIs we 8045 // already rewrote it, just use the lowered value. 8046 if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { 8047 PredVal = Res; 8048 EltPHI->addIncoming(PredVal, Pred); 8049 continue; 8050 } 8051 } 8052 8053 // Otherwise, do an extract in the predecessor. 8054 Builder->SetInsertPoint(Pred, Pred->getTerminator()); 8055 Value *Res = InVal; 8056 if (Offset) 8057 Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), 8058 Offset), "extract"); 8059 Res = Builder->CreateTrunc(Res, Ty, "extract.t"); 8060 PredVal = Res; 8061 EltPHI->addIncoming(Res, Pred); 8062 8063 // If the incoming value was a PHI, and if it was one of the PHIs we are 8064 // rewriting, we will ultimately delete the code we inserted. This 8065 // means we need to revisit that PHI to make sure we extract out the 8066 // needed piece. 8067 if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) 8068 if (PHIsInspected.count(OldInVal)) { 8069 unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), 8070 OldInVal)-PHIsToSlice.begin(); 8071 PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 8072 cast<Instruction>(Res))); 8073 ++UserE; 8074 } 8075 } 8076 PredValues.clear(); 8077 8078 DEBUG(errs() << " Made element PHI for offset " << Offset << ": " 8079 << *EltPHI << '\n'); 8080 ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; 8081 } 8082 8083 // Replace the use of this piece with the PHI node. 8084 ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); 8085 } 8086 8087 // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) 8088 // with undefs. 8089 Value *Undef = UndefValue::get(FirstPhi.getType()); 8090 for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) 8091 ReplaceInstUsesWith(*PHIsToSlice[i], Undef); 8092 return ReplaceInstUsesWith(FirstPhi, Undef); 8093} 8094 8095// PHINode simplification 8096// 8097Instruction *InstCombiner::visitPHINode(PHINode &PN) { 8098 // If LCSSA is around, don't mess with Phi nodes 8099 if (MustPreserveLCSSA) return 0; 8100 8101 if (Value *V = PN.hasConstantValue()) 8102 return ReplaceInstUsesWith(PN, V); 8103 8104 // If all PHI operands are the same operation, pull them through the PHI, 8105 // reducing code size. 8106 if (isa<Instruction>(PN.getIncomingValue(0)) && 8107 isa<Instruction>(PN.getIncomingValue(1)) && 8108 cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == 8109 cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && 8110 // FIXME: The hasOneUse check will fail for PHIs that use the value more 8111 // than themselves more than once. 8112 PN.getIncomingValue(0)->hasOneUse()) 8113 if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) 8114 return Result; 8115 8116 // If this is a trivial cycle in the PHI node graph, remove it. Basically, if 8117 // this PHI only has a single use (a PHI), and if that PHI only has one use (a 8118 // PHI)... break the cycle. 8119 if (PN.hasOneUse()) { 8120 Instruction *PHIUser = cast<Instruction>(PN.use_back()); 8121 if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { 8122 SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; 8123 PotentiallyDeadPHIs.insert(&PN); 8124 if (DeadPHICycle(PU, PotentiallyDeadPHIs)) 8125 return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); 8126 } 8127 8128 // If this phi has a single use, and if that use just computes a value for 8129 // the next iteration of a loop, delete the phi. This occurs with unused 8130 // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this 8131 // common case here is good because the only other things that catch this 8132 // are induction variable analysis (sometimes) and ADCE, which is only run 8133 // late. 8134 if (PHIUser->hasOneUse() && 8135 (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && 8136 PHIUser->use_back() == &PN) { 8137 return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); 8138 } 8139 } 8140 8141 // We sometimes end up with phi cycles that non-obviously end up being the 8142 // same value, for example: 8143 // z = some value; x = phi (y, z); y = phi (x, z) 8144 // where the phi nodes don't necessarily need to be in the same block. Do a 8145 // quick check to see if the PHI node only contains a single non-phi value, if 8146 // so, scan to see if the phi cycle is actually equal to that value. 8147 { 8148 unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); 8149 // Scan for the first non-phi operand. 8150 while (InValNo != NumOperandVals && 8151 isa<PHINode>(PN.getIncomingValue(InValNo))) 8152 ++InValNo; 8153 8154 if (InValNo != NumOperandVals) { 8155 Value *NonPhiInVal = PN.getOperand(InValNo); 8156 8157 // Scan the rest of the operands to see if there are any conflicts, if so 8158 // there is no need to recursively scan other phis. 8159 for (++InValNo; InValNo != NumOperandVals; ++InValNo) { 8160 Value *OpVal = PN.getIncomingValue(InValNo); 8161 if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) 8162 break; 8163 } 8164 8165 // If we scanned over all operands, then we have one unique value plus 8166 // phi values. Scan PHI nodes to see if they all merge in each other or 8167 // the value. 8168 if (InValNo == NumOperandVals) { 8169 SmallPtrSet<PHINode*, 16> ValueEqualPHIs; 8170 if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) 8171 return ReplaceInstUsesWith(PN, NonPhiInVal); 8172 } 8173 } 8174 } 8175 8176 // If there are multiple PHIs, sort their operands so that they all list 8177 // the blocks in the same order. This will help identical PHIs be eliminated 8178 // by other passes. Other passes shouldn't depend on this for correctness 8179 // however. 8180 PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); 8181 if (&PN != FirstPN) 8182 for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { 8183 BasicBlock *BBA = PN.getIncomingBlock(i); 8184 BasicBlock *BBB = FirstPN->getIncomingBlock(i); 8185 if (BBA != BBB) { 8186 Value *VA = PN.getIncomingValue(i); 8187 unsigned j = PN.getBasicBlockIndex(BBB); 8188 Value *VB = PN.getIncomingValue(j); 8189 PN.setIncomingBlock(i, BBB); 8190 PN.setIncomingValue(i, VB); 8191 PN.setIncomingBlock(j, BBA); 8192 PN.setIncomingValue(j, VA); 8193 // NOTE: Instcombine normally would want us to "return &PN" if we 8194 // modified any of the operands of an instruction. However, since we 8195 // aren't adding or removing uses (just rearranging them) we don't do 8196 // this in this case. 8197 } 8198 } 8199 8200 // If this is an integer PHI and we know that it has an illegal type, see if 8201 // it is only used by trunc or trunc(lshr) operations. If so, we split the 8202 // PHI into the various pieces being extracted. This sort of thing is 8203 // introduced when SROA promotes an aggregate to a single large integer type. 8204 if (isa<IntegerType>(PN.getType()) && TD && 8205 !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) 8206 if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) 8207 return Res; 8208 8209 return 0; 8210} 8211 8212Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { 8213 SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); 8214 8215 if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) 8216 return ReplaceInstUsesWith(GEP, V); 8217 8218 Value *PtrOp = GEP.getOperand(0); 8219 8220 if (isa<UndefValue>(GEP.getOperand(0))) 8221 return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); 8222 8223 // Eliminate unneeded casts for indices. 8224 if (TD) { 8225 bool MadeChange = false; 8226 unsigned PtrSize = TD->getPointerSizeInBits(); 8227 8228 gep_type_iterator GTI = gep_type_begin(GEP); 8229 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); 8230 I != E; ++I, ++GTI) { 8231 if (!isa<SequentialType>(*GTI)) continue; 8232 8233 // If we are using a wider index than needed for this platform, shrink it 8234 // to what we need. If narrower, sign-extend it to what we need. This 8235 // explicit cast can make subsequent optimizations more obvious. 8236 unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); 8237 if (OpBits == PtrSize) 8238 continue; 8239 8240 *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); 8241 MadeChange = true; 8242 } 8243 if (MadeChange) return &GEP; 8244 } 8245 8246 // Combine Indices - If the source pointer to this getelementptr instruction 8247 // is a getelementptr instruction, combine the indices of the two 8248 // getelementptr instructions into a single instruction. 8249 // 8250 if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { 8251 // Note that if our source is a gep chain itself that we wait for that 8252 // chain to be resolved before we perform this transformation. This 8253 // avoids us creating a TON of code in some cases. 8254 // 8255 if (GetElementPtrInst *SrcGEP = 8256 dyn_cast<GetElementPtrInst>(Src->getOperand(0))) 8257 if (SrcGEP->getNumOperands() == 2) 8258 return 0; // Wait until our source is folded to completion. 8259 8260 SmallVector<Value*, 8> Indices; 8261 8262 // Find out whether the last index in the source GEP is a sequential idx. 8263 bool EndsWithSequential = false; 8264 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); 8265 I != E; ++I) 8266 EndsWithSequential = !isa<StructType>(*I); 8267 8268 // Can we combine the two pointer arithmetics offsets? 8269 if (EndsWithSequential) { 8270 // Replace: gep (gep %P, long B), long A, ... 8271 // With: T = long A+B; gep %P, T, ... 8272 // 8273 Value *Sum; 8274 Value *SO1 = Src->getOperand(Src->getNumOperands()-1); 8275 Value *GO1 = GEP.getOperand(1); 8276 if (SO1 == Constant::getNullValue(SO1->getType())) { 8277 Sum = GO1; 8278 } else if (GO1 == Constant::getNullValue(GO1->getType())) { 8279 Sum = SO1; 8280 } else { 8281 // If they aren't the same type, then the input hasn't been processed 8282 // by the loop above yet (which canonicalizes sequential index types to 8283 // intptr_t). Just avoid transforming this until the input has been 8284 // normalized. 8285 if (SO1->getType() != GO1->getType()) 8286 return 0; 8287 Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); 8288 } 8289 8290 // Update the GEP in place if possible. 8291 if (Src->getNumOperands() == 2) { 8292 GEP.setOperand(0, Src->getOperand(0)); 8293 GEP.setOperand(1, Sum); 8294 return &GEP; 8295 } 8296 Indices.append(Src->op_begin()+1, Src->op_end()-1); 8297 Indices.push_back(Sum); 8298 Indices.append(GEP.op_begin()+2, GEP.op_end()); 8299 } else if (isa<Constant>(*GEP.idx_begin()) && 8300 cast<Constant>(*GEP.idx_begin())->isNullValue() && 8301 Src->getNumOperands() != 1) { 8302 // Otherwise we can do the fold if the first index of the GEP is a zero 8303 Indices.append(Src->op_begin()+1, Src->op_end()); 8304 Indices.append(GEP.idx_begin()+1, GEP.idx_end()); 8305 } 8306 8307 if (!Indices.empty()) 8308 return (cast<GEPOperator>(&GEP)->isInBounds() && 8309 Src->isInBounds()) ? 8310 GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), 8311 Indices.end(), GEP.getName()) : 8312 GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), 8313 Indices.end(), GEP.getName()); 8314 } 8315 8316 // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). 8317 if (Value *X = getBitCastOperand(PtrOp)) { 8318 assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); 8319 8320 // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 8321 // want to change the gep until the bitcasts are eliminated. 8322 if (getBitCastOperand(X)) { 8323 Worklist.AddValue(PtrOp); 8324 return 0; 8325 } 8326 8327 bool HasZeroPointerIndex = false; 8328 if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) 8329 HasZeroPointerIndex = C->isZero(); 8330 8331 // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... 8332 // into : GEP [10 x i8]* X, i32 0, ... 8333 // 8334 // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... 8335 // into : GEP i8* X, ... 8336 // 8337 // This occurs when the program declares an array extern like "int X[];" 8338 if (HasZeroPointerIndex) { 8339 const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); 8340 const PointerType *XTy = cast<PointerType>(X->getType()); 8341 if (const ArrayType *CATy = 8342 dyn_cast<ArrayType>(CPTy->getElementType())) { 8343 // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? 8344 if (CATy->getElementType() == XTy->getElementType()) { 8345 // -> GEP i8* X, ... 8346 SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); 8347 return cast<GEPOperator>(&GEP)->isInBounds() ? 8348 GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), 8349 GEP.getName()) : 8350 GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), 8351 GEP.getName()); 8352 } 8353 8354 if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){ 8355 // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? 8356 if (CATy->getElementType() == XATy->getElementType()) { 8357 // -> GEP [10 x i8]* X, i32 0, ... 8358 // At this point, we know that the cast source type is a pointer 8359 // to an array of the same type as the destination pointer 8360 // array. Because the array type is never stepped over (there 8361 // is a leading zero) we can fold the cast into this GEP. 8362 GEP.setOperand(0, X); 8363 return &GEP; 8364 } 8365 } 8366 } 8367 } else if (GEP.getNumOperands() == 2) { 8368 // Transform things like: 8369 // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V 8370 // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast 8371 const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType(); 8372 const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); 8373 if (TD && isa<ArrayType>(SrcElTy) && 8374 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == 8375 TD->getTypeAllocSize(ResElTy)) { 8376 Value *Idx[2]; 8377 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 8378 Idx[1] = GEP.getOperand(1); 8379 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 8380 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 8381 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 8382 // V and GEP are both pointer types --> BitCast 8383 return new BitCastInst(NewGEP, GEP.getType()); 8384 } 8385 8386 // Transform things like: 8387 // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp 8388 // (where tmp = 8*tmp2) into: 8389 // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast 8390 8391 if (TD && isa<ArrayType>(SrcElTy) && 8392 ResElTy == Type::getInt8Ty(GEP.getContext())) { 8393 uint64_t ArrayEltSize = 8394 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); 8395 8396 // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We 8397 // allow either a mul, shift, or constant here. 8398 Value *NewIdx = 0; 8399 ConstantInt *Scale = 0; 8400 if (ArrayEltSize == 1) { 8401 NewIdx = GEP.getOperand(1); 8402 Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); 8403 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { 8404 NewIdx = ConstantInt::get(CI->getType(), 1); 8405 Scale = CI; 8406 } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ 8407 if (Inst->getOpcode() == Instruction::Shl && 8408 isa<ConstantInt>(Inst->getOperand(1))) { 8409 ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); 8410 uint32_t ShAmtVal = ShAmt->getLimitedValue(64); 8411 Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 8412 1ULL << ShAmtVal); 8413 NewIdx = Inst->getOperand(0); 8414 } else if (Inst->getOpcode() == Instruction::Mul && 8415 isa<ConstantInt>(Inst->getOperand(1))) { 8416 Scale = cast<ConstantInt>(Inst->getOperand(1)); 8417 NewIdx = Inst->getOperand(0); 8418 } 8419 } 8420 8421 // If the index will be to exactly the right offset with the scale taken 8422 // out, perform the transformation. Note, we don't know whether Scale is 8423 // signed or not. We'll use unsigned version of division/modulo 8424 // operation after making sure Scale doesn't have the sign bit set. 8425 if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && 8426 Scale->getZExtValue() % ArrayEltSize == 0) { 8427 Scale = ConstantInt::get(Scale->getType(), 8428 Scale->getZExtValue() / ArrayEltSize); 8429 if (Scale->getZExtValue() != 1) { 8430 Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), 8431 false /*ZExt*/); 8432 NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); 8433 } 8434 8435 // Insert the new GEP instruction. 8436 Value *Idx[2]; 8437 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 8438 Idx[1] = NewIdx; 8439 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 8440 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 8441 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 8442 // The NewGEP must be pointer typed, so must the old one -> BitCast 8443 return new BitCastInst(NewGEP, GEP.getType()); 8444 } 8445 } 8446 } 8447 } 8448 8449 /// See if we can simplify: 8450 /// X = bitcast A* to B* 8451 /// Y = gep X, <...constant indices...> 8452 /// into a gep of the original struct. This is important for SROA and alias 8453 /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. 8454 if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { 8455 if (TD && 8456 !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { 8457 // Determine how much the GEP moves the pointer. We are guaranteed to get 8458 // a constant back from EmitGEPOffset. 8459 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); 8460 int64_t Offset = OffsetV->getSExtValue(); 8461 8462 // If this GEP instruction doesn't move the pointer, just replace the GEP 8463 // with a bitcast of the real input to the dest type. 8464 if (Offset == 0) { 8465 // If the bitcast is of an allocation, and the allocation will be 8466 // converted to match the type of the cast, don't touch this. 8467 if (isa<AllocaInst>(BCI->getOperand(0)) || 8468 isMalloc(BCI->getOperand(0))) { 8469 // See if the bitcast simplifies, if so, don't nuke this GEP yet. 8470 if (Instruction *I = visitBitCast(*BCI)) { 8471 if (I != BCI) { 8472 I->takeName(BCI); 8473 BCI->getParent()->getInstList().insert(BCI, I); 8474 ReplaceInstUsesWith(*BCI, I); 8475 } 8476 return &GEP; 8477 } 8478 } 8479 return new BitCastInst(BCI->getOperand(0), GEP.getType()); 8480 } 8481 8482 // Otherwise, if the offset is non-zero, we need to find out if there is a 8483 // field at Offset in 'A's type. If so, we can pull the cast through the 8484 // GEP. 8485 SmallVector<Value*, 8> NewIndices; 8486 const Type *InTy = 8487 cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); 8488 if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) { 8489 Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 8490 Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), 8491 NewIndices.end()) : 8492 Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), 8493 NewIndices.end()); 8494 8495 if (NGEP->getType() == GEP.getType()) 8496 return ReplaceInstUsesWith(GEP, NGEP); 8497 NGEP->takeName(&GEP); 8498 return new BitCastInst(NGEP, GEP.getType()); 8499 } 8500 } 8501 } 8502 8503 return 0; 8504} 8505 8506Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 8507 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 8508 if (AI.isArrayAllocation()) { // Check C != 1 8509 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 8510 const Type *NewTy = 8511 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 8512 assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); 8513 AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 8514 New->setAlignment(AI.getAlignment()); 8515 8516 // Scan to the end of the allocation instructions, to skip over a block of 8517 // allocas if possible...also skip interleaved debug info 8518 // 8519 BasicBlock::iterator It = New; 8520 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 8521 8522 // Now that I is pointing to the first non-allocation-inst in the block, 8523 // insert our getelementptr instruction... 8524 // 8525 Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); 8526 Value *Idx[2]; 8527 Idx[0] = NullIdx; 8528 Idx[1] = NullIdx; 8529 Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, 8530 New->getName()+".sub", It); 8531 8532 // Now make everything use the getelementptr instead of the original 8533 // allocation. 8534 return ReplaceInstUsesWith(AI, V); 8535 } else if (isa<UndefValue>(AI.getArraySize())) { 8536 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 8537 } 8538 } 8539 8540 if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { 8541 // If alloca'ing a zero byte object, replace the alloca with a null pointer. 8542 // Note that we only do this for alloca's, because malloc should allocate 8543 // and return a unique pointer, even for a zero byte allocation. 8544 if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 8545 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 8546 8547 // If the alignment is 0 (unspecified), assign it the preferred alignment. 8548 if (AI.getAlignment() == 0) 8549 AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 8550 } 8551 8552 return 0; 8553} 8554 8555Instruction *InstCombiner::visitFree(Instruction &FI) { 8556 Value *Op = FI.getOperand(1); 8557 8558 // free undef -> unreachable. 8559 if (isa<UndefValue>(Op)) { 8560 // Insert a new store to null because we cannot modify the CFG here. 8561 new StoreInst(ConstantInt::getTrue(FI.getContext()), 8562 UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); 8563 return EraseInstFromFunction(FI); 8564 } 8565 8566 // If we have 'free null' delete the instruction. This can happen in stl code 8567 // when lots of inlining happens. 8568 if (isa<ConstantPointerNull>(Op)) 8569 return EraseInstFromFunction(FI); 8570 8571 // If we have a malloc call whose only use is a free call, delete both. 8572 if (isMalloc(Op)) { 8573 if (CallInst* CI = extractMallocCallFromBitCast(Op)) { 8574 if (Op->hasOneUse() && CI->hasOneUse()) { 8575 EraseInstFromFunction(FI); 8576 EraseInstFromFunction(*CI); 8577 return EraseInstFromFunction(*cast<Instruction>(Op)); 8578 } 8579 } else { 8580 // Op is a call to malloc 8581 if (Op->hasOneUse()) { 8582 EraseInstFromFunction(FI); 8583 return EraseInstFromFunction(*cast<Instruction>(Op)); 8584 } 8585 } 8586 } 8587 8588 return 0; 8589} 8590 8591/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 8592static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 8593 const TargetData *TD) { 8594 User *CI = cast<User>(LI.getOperand(0)); 8595 Value *CastOp = CI->getOperand(0); 8596 8597 const PointerType *DestTy = cast<PointerType>(CI->getType()); 8598 const Type *DestPTy = DestTy->getElementType(); 8599 if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 8600 8601 // If the address spaces don't match, don't eliminate the cast. 8602 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 8603 return 0; 8604 8605 const Type *SrcPTy = SrcTy->getElementType(); 8606 8607 if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 8608 isa<VectorType>(DestPTy)) { 8609 // If the source is an array, the code below will not succeed. Check to 8610 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 8611 // constants. 8612 if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 8613 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 8614 if (ASrcTy->getNumElements() != 0) { 8615 Value *Idxs[2]; 8616 Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); 8617 Idxs[1] = Idxs[0]; 8618 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); 8619 SrcTy = cast<PointerType>(CastOp->getType()); 8620 SrcPTy = SrcTy->getElementType(); 8621 } 8622 8623 if (IC.getTargetData() && 8624 (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 8625 isa<VectorType>(SrcPTy)) && 8626 // Do not allow turning this into a load of an integer, which is then 8627 // casted to a pointer, this pessimizes pointer analysis a lot. 8628 (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && 8629 IC.getTargetData()->getTypeSizeInBits(SrcPTy) == 8630 IC.getTargetData()->getTypeSizeInBits(DestPTy)) { 8631 8632 // Okay, we are casting from one integer or pointer type to another of 8633 // the same size. Instead of casting the pointer before the load, cast 8634 // the result of the loaded value. 8635 Value *NewLoad = 8636 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 8637 // Now cast the result of the load. 8638 return new BitCastInst(NewLoad, LI.getType()); 8639 } 8640 } 8641 } 8642 return 0; 8643} 8644 8645Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 8646 Value *Op = LI.getOperand(0); 8647 8648 // Attempt to improve the alignment. 8649 if (TD) { 8650 unsigned KnownAlign = 8651 GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); 8652 if (KnownAlign > 8653 (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : 8654 LI.getAlignment())) 8655 LI.setAlignment(KnownAlign); 8656 } 8657 8658 // load (cast X) --> cast (load X) iff safe. 8659 if (isa<CastInst>(Op)) 8660 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 8661 return Res; 8662 8663 // None of the following transforms are legal for volatile loads. 8664 if (LI.isVolatile()) return 0; 8665 8666 // Do really simple store-to-load forwarding and load CSE, to catch cases 8667 // where there are several consequtive memory accesses to the same location, 8668 // separated by a few arithmetic operations. 8669 BasicBlock::iterator BBI = &LI; 8670 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 8671 return ReplaceInstUsesWith(LI, AvailableVal); 8672 8673 // load(gep null, ...) -> unreachable 8674 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 8675 const Value *GEPI0 = GEPI->getOperand(0); 8676 // TODO: Consider a target hook for valid address spaces for this xform. 8677 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 8678 // Insert a new store to null instruction before the load to indicate 8679 // that this code is not reachable. We do this instead of inserting 8680 // an unreachable instruction directly because we cannot modify the 8681 // CFG. 8682 new StoreInst(UndefValue::get(LI.getType()), 8683 Constant::getNullValue(Op->getType()), &LI); 8684 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 8685 } 8686 } 8687 8688 // load null/undef -> unreachable 8689 // TODO: Consider a target hook for valid address spaces for this xform. 8690 if (isa<UndefValue>(Op) || 8691 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 8692 // Insert a new store to null instruction before the load to indicate that 8693 // this code is not reachable. We do this instead of inserting an 8694 // unreachable instruction directly because we cannot modify the CFG. 8695 new StoreInst(UndefValue::get(LI.getType()), 8696 Constant::getNullValue(Op->getType()), &LI); 8697 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 8698 } 8699 8700 // Instcombine load (constantexpr_cast global) -> cast (load global) 8701 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 8702 if (CE->isCast()) 8703 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 8704 return Res; 8705 8706 if (Op->hasOneUse()) { 8707 // Change select and PHI nodes to select values instead of addresses: this 8708 // helps alias analysis out a lot, allows many others simplifications, and 8709 // exposes redundancy in the code. 8710 // 8711 // Note that we cannot do the transformation unless we know that the 8712 // introduced loads cannot trap! Something like this is valid as long as 8713 // the condition is always false: load (select bool %C, int* null, int* %G), 8714 // but it would not be valid if we transformed it to load from null 8715 // unconditionally. 8716 // 8717 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 8718 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 8719 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && 8720 isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { 8721 Value *V1 = Builder->CreateLoad(SI->getOperand(1), 8722 SI->getOperand(1)->getName()+".val"); 8723 Value *V2 = Builder->CreateLoad(SI->getOperand(2), 8724 SI->getOperand(2)->getName()+".val"); 8725 return SelectInst::Create(SI->getCondition(), V1, V2); 8726 } 8727 8728 // load (select (cond, null, P)) -> load P 8729 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 8730 if (C->isNullValue()) { 8731 LI.setOperand(0, SI->getOperand(2)); 8732 return &LI; 8733 } 8734 8735 // load (select (cond, P, null)) -> load P 8736 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 8737 if (C->isNullValue()) { 8738 LI.setOperand(0, SI->getOperand(1)); 8739 return &LI; 8740 } 8741 } 8742 } 8743 return 0; 8744} 8745 8746/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 8747/// when possible. This makes it generally easy to do alias analysis and/or 8748/// SROA/mem2reg of the memory object. 8749static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 8750 User *CI = cast<User>(SI.getOperand(1)); 8751 Value *CastOp = CI->getOperand(0); 8752 8753 const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 8754 const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 8755 if (SrcTy == 0) return 0; 8756 8757 const Type *SrcPTy = SrcTy->getElementType(); 8758 8759 if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) 8760 return 0; 8761 8762 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 8763 /// to its first element. This allows us to handle things like: 8764 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 8765 /// on 32-bit hosts. 8766 SmallVector<Value*, 4> NewGEPIndices; 8767 8768 // If the source is an array, the code below will not succeed. Check to 8769 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 8770 // constants. 8771 if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { 8772 // Index through pointer. 8773 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 8774 NewGEPIndices.push_back(Zero); 8775 8776 while (1) { 8777 if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { 8778 if (!STy->getNumElements()) /* Struct can be empty {} */ 8779 break; 8780 NewGEPIndices.push_back(Zero); 8781 SrcPTy = STy->getElementType(0); 8782 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 8783 NewGEPIndices.push_back(Zero); 8784 SrcPTy = ATy->getElementType(); 8785 } else { 8786 break; 8787 } 8788 } 8789 8790 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 8791 } 8792 8793 if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) 8794 return 0; 8795 8796 // If the pointers point into different address spaces or if they point to 8797 // values with different sizes, we can't do the transformation. 8798 if (!IC.getTargetData() || 8799 SrcTy->getAddressSpace() != 8800 cast<PointerType>(CI->getType())->getAddressSpace() || 8801 IC.getTargetData()->getTypeSizeInBits(SrcPTy) != 8802 IC.getTargetData()->getTypeSizeInBits(DestPTy)) 8803 return 0; 8804 8805 // Okay, we are casting from one integer or pointer type to another of 8806 // the same size. Instead of casting the pointer before 8807 // the store, cast the value to be stored. 8808 Value *NewCast; 8809 Value *SIOp0 = SI.getOperand(0); 8810 Instruction::CastOps opcode = Instruction::BitCast; 8811 const Type* CastSrcTy = SIOp0->getType(); 8812 const Type* CastDstTy = SrcPTy; 8813 if (isa<PointerType>(CastDstTy)) { 8814 if (CastSrcTy->isInteger()) 8815 opcode = Instruction::IntToPtr; 8816 } else if (isa<IntegerType>(CastDstTy)) { 8817 if (isa<PointerType>(SIOp0->getType())) 8818 opcode = Instruction::PtrToInt; 8819 } 8820 8821 // SIOp0 is a pointer to aggregate and this is a store to the first field, 8822 // emit a GEP to index into its first field. 8823 if (!NewGEPIndices.empty()) 8824 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), 8825 NewGEPIndices.end()); 8826 8827 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 8828 SIOp0->getName()+".c"); 8829 return new StoreInst(NewCast, CastOp); 8830} 8831 8832/// equivalentAddressValues - Test if A and B will obviously have the same 8833/// value. This includes recognizing that %t0 and %t1 will have the same 8834/// value in code like this: 8835/// %t0 = getelementptr \@a, 0, 3 8836/// store i32 0, i32* %t0 8837/// %t1 = getelementptr \@a, 0, 3 8838/// %t2 = load i32* %t1 8839/// 8840static bool equivalentAddressValues(Value *A, Value *B) { 8841 // Test if the values are trivially equivalent. 8842 if (A == B) return true; 8843 8844 // Test if the values come form identical arithmetic instructions. 8845 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 8846 // its only used to compare two uses within the same basic block, which 8847 // means that they'll always either have the same value or one of them 8848 // will have an undefined value. 8849 if (isa<BinaryOperator>(A) || 8850 isa<CastInst>(A) || 8851 isa<PHINode>(A) || 8852 isa<GetElementPtrInst>(A)) 8853 if (Instruction *BI = dyn_cast<Instruction>(B)) 8854 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 8855 return true; 8856 8857 // Otherwise they may not be equivalent. 8858 return false; 8859} 8860 8861// If this instruction has two uses, one of which is a llvm.dbg.declare, 8862// return the llvm.dbg.declare. 8863DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { 8864 if (!V->hasNUses(2)) 8865 return 0; 8866 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 8867 UI != E; ++UI) { 8868 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) 8869 return DI; 8870 if (isa<BitCastInst>(UI) && UI->hasOneUse()) { 8871 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) 8872 return DI; 8873 } 8874 } 8875 return 0; 8876} 8877 8878Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 8879 Value *Val = SI.getOperand(0); 8880 Value *Ptr = SI.getOperand(1); 8881 8882 // If the RHS is an alloca with a single use, zapify the store, making the 8883 // alloca dead. 8884 // If the RHS is an alloca with a two uses, the other one being a 8885 // llvm.dbg.declare, zapify the store and the declare, making the 8886 // alloca dead. We must do this to prevent declare's from affecting 8887 // codegen. 8888 if (!SI.isVolatile()) { 8889 if (Ptr->hasOneUse()) { 8890 if (isa<AllocaInst>(Ptr)) { 8891 EraseInstFromFunction(SI); 8892 ++NumCombined; 8893 return 0; 8894 } 8895 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 8896 if (isa<AllocaInst>(GEP->getOperand(0))) { 8897 if (GEP->getOperand(0)->hasOneUse()) { 8898 EraseInstFromFunction(SI); 8899 ++NumCombined; 8900 return 0; 8901 } 8902 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { 8903 EraseInstFromFunction(*DI); 8904 EraseInstFromFunction(SI); 8905 ++NumCombined; 8906 return 0; 8907 } 8908 } 8909 } 8910 } 8911 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { 8912 EraseInstFromFunction(*DI); 8913 EraseInstFromFunction(SI); 8914 ++NumCombined; 8915 return 0; 8916 } 8917 } 8918 8919 // Attempt to improve the alignment. 8920 if (TD) { 8921 unsigned KnownAlign = 8922 GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); 8923 if (KnownAlign > 8924 (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : 8925 SI.getAlignment())) 8926 SI.setAlignment(KnownAlign); 8927 } 8928 8929 // Do really simple DSE, to catch cases where there are several consecutive 8930 // stores to the same location, separated by a few arithmetic operations. This 8931 // situation often occurs with bitfield accesses. 8932 BasicBlock::iterator BBI = &SI; 8933 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 8934 --ScanInsts) { 8935 --BBI; 8936 // Don't count debug info directives, lest they affect codegen, 8937 // and we skip pointer-to-pointer bitcasts, which are NOPs. 8938 // It is necessary for correctness to skip those that feed into a 8939 // llvm.dbg.declare, as these are not present when debugging is off. 8940 if (isa<DbgInfoIntrinsic>(BBI) || 8941 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { 8942 ScanInsts++; 8943 continue; 8944 } 8945 8946 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 8947 // Prev store isn't volatile, and stores to the same location? 8948 if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), 8949 SI.getOperand(1))) { 8950 ++NumDeadStore; 8951 ++BBI; 8952 EraseInstFromFunction(*PrevSI); 8953 continue; 8954 } 8955 break; 8956 } 8957 8958 // If this is a load, we have to stop. However, if the loaded value is from 8959 // the pointer we're loading and is producing the pointer we're storing, 8960 // then *this* store is dead (X = load P; store X -> P). 8961 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 8962 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 8963 !SI.isVolatile()) { 8964 EraseInstFromFunction(SI); 8965 ++NumCombined; 8966 return 0; 8967 } 8968 // Otherwise, this is a load from some other location. Stores before it 8969 // may not be dead. 8970 break; 8971 } 8972 8973 // Don't skip over loads or things that can modify memory. 8974 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 8975 break; 8976 } 8977 8978 8979 if (SI.isVolatile()) return 0; // Don't hack volatile stores. 8980 8981 // store X, null -> turns into 'unreachable' in SimplifyCFG 8982 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 8983 if (!isa<UndefValue>(Val)) { 8984 SI.setOperand(0, UndefValue::get(Val->getType())); 8985 if (Instruction *U = dyn_cast<Instruction>(Val)) 8986 Worklist.Add(U); // Dropped a use. 8987 ++NumCombined; 8988 } 8989 return 0; // Do not modify these! 8990 } 8991 8992 // store undef, Ptr -> noop 8993 if (isa<UndefValue>(Val)) { 8994 EraseInstFromFunction(SI); 8995 ++NumCombined; 8996 return 0; 8997 } 8998 8999 // If the pointer destination is a cast, see if we can fold the cast into the 9000 // source instead. 9001 if (isa<CastInst>(Ptr)) 9002 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 9003 return Res; 9004 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 9005 if (CE->isCast()) 9006 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 9007 return Res; 9008 9009 9010 // If this store is the last instruction in the basic block (possibly 9011 // excepting debug info instructions and the pointer bitcasts that feed 9012 // into them), and if the block ends with an unconditional branch, try 9013 // to move it to the successor block. 9014 BBI = &SI; 9015 do { 9016 ++BBI; 9017 } while (isa<DbgInfoIntrinsic>(BBI) || 9018 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))); 9019 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 9020 if (BI->isUnconditional()) 9021 if (SimplifyStoreAtEndOfBlock(SI)) 9022 return 0; // xform done! 9023 9024 return 0; 9025} 9026 9027/// SimplifyStoreAtEndOfBlock - Turn things like: 9028/// if () { *P = v1; } else { *P = v2 } 9029/// into a phi node with a store in the successor. 9030/// 9031/// Simplify things like: 9032/// *P = v1; if () { *P = v2; } 9033/// into a phi node with a store in the successor. 9034/// 9035bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 9036 BasicBlock *StoreBB = SI.getParent(); 9037 9038 // Check to see if the successor block has exactly two incoming edges. If 9039 // so, see if the other predecessor contains a store to the same location. 9040 // if so, insert a PHI node (if needed) and move the stores down. 9041 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 9042 9043 // Determine whether Dest has exactly two predecessors and, if so, compute 9044 // the other predecessor. 9045 pred_iterator PI = pred_begin(DestBB); 9046 BasicBlock *OtherBB = 0; 9047 if (*PI != StoreBB) 9048 OtherBB = *PI; 9049 ++PI; 9050 if (PI == pred_end(DestBB)) 9051 return false; 9052 9053 if (*PI != StoreBB) { 9054 if (OtherBB) 9055 return false; 9056 OtherBB = *PI; 9057 } 9058 if (++PI != pred_end(DestBB)) 9059 return false; 9060 9061 // Bail out if all the relevant blocks aren't distinct (this can happen, 9062 // for example, if SI is in an infinite loop) 9063 if (StoreBB == DestBB || OtherBB == DestBB) 9064 return false; 9065 9066 // Verify that the other block ends in a branch and is not otherwise empty. 9067 BasicBlock::iterator BBI = OtherBB->getTerminator(); 9068 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 9069 if (!OtherBr || BBI == OtherBB->begin()) 9070 return false; 9071 9072 // If the other block ends in an unconditional branch, check for the 'if then 9073 // else' case. there is an instruction before the branch. 9074 StoreInst *OtherStore = 0; 9075 if (OtherBr->isUnconditional()) { 9076 --BBI; 9077 // Skip over debugging info. 9078 while (isa<DbgInfoIntrinsic>(BBI) || 9079 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { 9080 if (BBI==OtherBB->begin()) 9081 return false; 9082 --BBI; 9083 } 9084 // If this isn't a store, isn't a store to the same location, or if the 9085 // alignments differ, bail out. 9086 OtherStore = dyn_cast<StoreInst>(BBI); 9087 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 9088 OtherStore->getAlignment() != SI.getAlignment()) 9089 return false; 9090 } else { 9091 // Otherwise, the other block ended with a conditional branch. If one of the 9092 // destinations is StoreBB, then we have the if/then case. 9093 if (OtherBr->getSuccessor(0) != StoreBB && 9094 OtherBr->getSuccessor(1) != StoreBB) 9095 return false; 9096 9097 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 9098 // if/then triangle. See if there is a store to the same ptr as SI that 9099 // lives in OtherBB. 9100 for (;; --BBI) { 9101 // Check to see if we find the matching store. 9102 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 9103 if (OtherStore->getOperand(1) != SI.getOperand(1) || 9104 OtherStore->getAlignment() != SI.getAlignment()) 9105 return false; 9106 break; 9107 } 9108 // If we find something that may be using or overwriting the stored 9109 // value, or if we run out of instructions, we can't do the xform. 9110 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 9111 BBI == OtherBB->begin()) 9112 return false; 9113 } 9114 9115 // In order to eliminate the store in OtherBr, we have to 9116 // make sure nothing reads or overwrites the stored value in 9117 // StoreBB. 9118 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 9119 // FIXME: This should really be AA driven. 9120 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 9121 return false; 9122 } 9123 } 9124 9125 // Insert a PHI node now if we need it. 9126 Value *MergedVal = OtherStore->getOperand(0); 9127 if (MergedVal != SI.getOperand(0)) { 9128 PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); 9129 PN->reserveOperandSpace(2); 9130 PN->addIncoming(SI.getOperand(0), SI.getParent()); 9131 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 9132 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 9133 } 9134 9135 // Advance to a place where it is safe to insert the new store and 9136 // insert it. 9137 BBI = DestBB->getFirstNonPHI(); 9138 InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), 9139 OtherStore->isVolatile(), 9140 SI.getAlignment()), *BBI); 9141 9142 // Nuke the old stores. 9143 EraseInstFromFunction(SI); 9144 EraseInstFromFunction(*OtherStore); 9145 ++NumCombined; 9146 return true; 9147} 9148 9149 9150Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { 9151 // Change br (not X), label True, label False to: br X, label False, True 9152 Value *X = 0; 9153 BasicBlock *TrueDest; 9154 BasicBlock *FalseDest; 9155 if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && 9156 !isa<Constant>(X)) { 9157 // Swap Destinations and condition... 9158 BI.setCondition(X); 9159 BI.setSuccessor(0, FalseDest); 9160 BI.setSuccessor(1, TrueDest); 9161 return &BI; 9162 } 9163 9164 // Cannonicalize fcmp_one -> fcmp_oeq 9165 FCmpInst::Predicate FPred; Value *Y; 9166 if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 9167 TrueDest, FalseDest)) && 9168 BI.getCondition()->hasOneUse()) 9169 if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || 9170 FPred == FCmpInst::FCMP_OGE) { 9171 FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); 9172 Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); 9173 9174 // Swap Destinations and condition. 9175 BI.setSuccessor(0, FalseDest); 9176 BI.setSuccessor(1, TrueDest); 9177 Worklist.Add(Cond); 9178 return &BI; 9179 } 9180 9181 // Cannonicalize icmp_ne -> icmp_eq 9182 ICmpInst::Predicate IPred; 9183 if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), 9184 TrueDest, FalseDest)) && 9185 BI.getCondition()->hasOneUse()) 9186 if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || 9187 IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || 9188 IPred == ICmpInst::ICMP_SGE) { 9189 ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); 9190 Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); 9191 // Swap Destinations and condition. 9192 BI.setSuccessor(0, FalseDest); 9193 BI.setSuccessor(1, TrueDest); 9194 Worklist.Add(Cond); 9195 return &BI; 9196 } 9197 9198 return 0; 9199} 9200 9201Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { 9202 Value *Cond = SI.getCondition(); 9203 if (Instruction *I = dyn_cast<Instruction>(Cond)) { 9204 if (I->getOpcode() == Instruction::Add) 9205 if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 9206 // change 'switch (X+4) case 1:' into 'switch (X) case -3' 9207 for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) 9208 SI.setOperand(i, 9209 ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), 9210 AddRHS)); 9211 SI.setOperand(0, I->getOperand(0)); 9212 Worklist.Add(I); 9213 return &SI; 9214 } 9215 } 9216 return 0; 9217} 9218 9219Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { 9220 Value *Agg = EV.getAggregateOperand(); 9221 9222 if (!EV.hasIndices()) 9223 return ReplaceInstUsesWith(EV, Agg); 9224 9225 if (Constant *C = dyn_cast<Constant>(Agg)) { 9226 if (isa<UndefValue>(C)) 9227 return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); 9228 9229 if (isa<ConstantAggregateZero>(C)) 9230 return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); 9231 9232 if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { 9233 // Extract the element indexed by the first index out of the constant 9234 Value *V = C->getOperand(*EV.idx_begin()); 9235 if (EV.getNumIndices() > 1) 9236 // Extract the remaining indices out of the constant indexed by the 9237 // first index 9238 return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); 9239 else 9240 return ReplaceInstUsesWith(EV, V); 9241 } 9242 return 0; // Can't handle other constants 9243 } 9244 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { 9245 // We're extracting from an insertvalue instruction, compare the indices 9246 const unsigned *exti, *exte, *insi, *inse; 9247 for (exti = EV.idx_begin(), insi = IV->idx_begin(), 9248 exte = EV.idx_end(), inse = IV->idx_end(); 9249 exti != exte && insi != inse; 9250 ++exti, ++insi) { 9251 if (*insi != *exti) 9252 // The insert and extract both reference distinctly different elements. 9253 // This means the extract is not influenced by the insert, and we can 9254 // replace the aggregate operand of the extract with the aggregate 9255 // operand of the insert. i.e., replace 9256 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 9257 // %E = extractvalue { i32, { i32 } } %I, 0 9258 // with 9259 // %E = extractvalue { i32, { i32 } } %A, 0 9260 return ExtractValueInst::Create(IV->getAggregateOperand(), 9261 EV.idx_begin(), EV.idx_end()); 9262 } 9263 if (exti == exte && insi == inse) 9264 // Both iterators are at the end: Index lists are identical. Replace 9265 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 9266 // %C = extractvalue { i32, { i32 } } %B, 1, 0 9267 // with "i32 42" 9268 return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); 9269 if (exti == exte) { 9270 // The extract list is a prefix of the insert list. i.e. replace 9271 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 9272 // %E = extractvalue { i32, { i32 } } %I, 1 9273 // with 9274 // %X = extractvalue { i32, { i32 } } %A, 1 9275 // %E = insertvalue { i32 } %X, i32 42, 0 9276 // by switching the order of the insert and extract (though the 9277 // insertvalue should be left in, since it may have other uses). 9278 Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), 9279 EV.idx_begin(), EV.idx_end()); 9280 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), 9281 insi, inse); 9282 } 9283 if (insi == inse) 9284 // The insert list is a prefix of the extract list 9285 // We can simply remove the common indices from the extract and make it 9286 // operate on the inserted value instead of the insertvalue result. 9287 // i.e., replace 9288 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 9289 // %E = extractvalue { i32, { i32 } } %I, 1, 0 9290 // with 9291 // %E extractvalue { i32 } { i32 42 }, 0 9292 return ExtractValueInst::Create(IV->getInsertedValueOperand(), 9293 exti, exte); 9294 } 9295 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { 9296 // We're extracting from an intrinsic, see if we're the only user, which 9297 // allows us to simplify multiple result intrinsics to simpler things that 9298 // just get one value.. 9299 if (II->hasOneUse()) { 9300 // Check if we're grabbing the overflow bit or the result of a 'with 9301 // overflow' intrinsic. If it's the latter we can remove the intrinsic 9302 // and replace it with a traditional binary instruction. 9303 switch (II->getIntrinsicID()) { 9304 case Intrinsic::uadd_with_overflow: 9305 case Intrinsic::sadd_with_overflow: 9306 if (*EV.idx_begin() == 0) { // Normal result. 9307 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 9308 II->replaceAllUsesWith(UndefValue::get(II->getType())); 9309 EraseInstFromFunction(*II); 9310 return BinaryOperator::CreateAdd(LHS, RHS); 9311 } 9312 break; 9313 case Intrinsic::usub_with_overflow: 9314 case Intrinsic::ssub_with_overflow: 9315 if (*EV.idx_begin() == 0) { // Normal result. 9316 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 9317 II->replaceAllUsesWith(UndefValue::get(II->getType())); 9318 EraseInstFromFunction(*II); 9319 return BinaryOperator::CreateSub(LHS, RHS); 9320 } 9321 break; 9322 case Intrinsic::umul_with_overflow: 9323 case Intrinsic::smul_with_overflow: 9324 if (*EV.idx_begin() == 0) { // Normal result. 9325 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 9326 II->replaceAllUsesWith(UndefValue::get(II->getType())); 9327 EraseInstFromFunction(*II); 9328 return BinaryOperator::CreateMul(LHS, RHS); 9329 } 9330 break; 9331 default: 9332 break; 9333 } 9334 } 9335 } 9336 // Can't simplify extracts from other values. Note that nested extracts are 9337 // already simplified implicitely by the above (extract ( extract (insert) ) 9338 // will be translated into extract ( insert ( extract ) ) first and then just 9339 // the value inserted, if appropriate). 9340 return 0; 9341} 9342 9343/// CheapToScalarize - Return true if the value is cheaper to scalarize than it 9344/// is to leave as a vector operation. 9345static bool CheapToScalarize(Value *V, bool isConstant) { 9346 if (isa<ConstantAggregateZero>(V)) 9347 return true; 9348 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { 9349 if (isConstant) return true; 9350 // If all elts are the same, we can extract. 9351 Constant *Op0 = C->getOperand(0); 9352 for (unsigned i = 1; i < C->getNumOperands(); ++i) 9353 if (C->getOperand(i) != Op0) 9354 return false; 9355 return true; 9356 } 9357 Instruction *I = dyn_cast<Instruction>(V); 9358 if (!I) return false; 9359 9360 // Insert element gets simplified to the inserted element or is deleted if 9361 // this is constant idx extract element and its a constant idx insertelt. 9362 if (I->getOpcode() == Instruction::InsertElement && isConstant && 9363 isa<ConstantInt>(I->getOperand(2))) 9364 return true; 9365 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 9366 return true; 9367 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 9368 if (BO->hasOneUse() && 9369 (CheapToScalarize(BO->getOperand(0), isConstant) || 9370 CheapToScalarize(BO->getOperand(1), isConstant))) 9371 return true; 9372 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 9373 if (CI->hasOneUse() && 9374 (CheapToScalarize(CI->getOperand(0), isConstant) || 9375 CheapToScalarize(CI->getOperand(1), isConstant))) 9376 return true; 9377 9378 return false; 9379} 9380 9381/// Read and decode a shufflevector mask. 9382/// 9383/// It turns undef elements into values that are larger than the number of 9384/// elements in the input. 9385static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { 9386 unsigned NElts = SVI->getType()->getNumElements(); 9387 if (isa<ConstantAggregateZero>(SVI->getOperand(2))) 9388 return std::vector<unsigned>(NElts, 0); 9389 if (isa<UndefValue>(SVI->getOperand(2))) 9390 return std::vector<unsigned>(NElts, 2*NElts); 9391 9392 std::vector<unsigned> Result; 9393 const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); 9394 for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) 9395 if (isa<UndefValue>(*i)) 9396 Result.push_back(NElts*2); // undef -> 8 9397 else 9398 Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); 9399 return Result; 9400} 9401 9402/// FindScalarElement - Given a vector and an element number, see if the scalar 9403/// value is already around as a register, for example if it were inserted then 9404/// extracted from the vector. 9405static Value *FindScalarElement(Value *V, unsigned EltNo) { 9406 assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); 9407 const VectorType *PTy = cast<VectorType>(V->getType()); 9408 unsigned Width = PTy->getNumElements(); 9409 if (EltNo >= Width) // Out of range access. 9410 return UndefValue::get(PTy->getElementType()); 9411 9412 if (isa<UndefValue>(V)) 9413 return UndefValue::get(PTy->getElementType()); 9414 else if (isa<ConstantAggregateZero>(V)) 9415 return Constant::getNullValue(PTy->getElementType()); 9416 else if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) 9417 return CP->getOperand(EltNo); 9418 else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 9419 // If this is an insert to a variable element, we don't know what it is. 9420 if (!isa<ConstantInt>(III->getOperand(2))) 9421 return 0; 9422 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 9423 9424 // If this is an insert to the element we are looking for, return the 9425 // inserted value. 9426 if (EltNo == IIElt) 9427 return III->getOperand(1); 9428 9429 // Otherwise, the insertelement doesn't modify the value, recurse on its 9430 // vector input. 9431 return FindScalarElement(III->getOperand(0), EltNo); 9432 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 9433 unsigned LHSWidth = 9434 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); 9435 unsigned InEl = getShuffleMask(SVI)[EltNo]; 9436 if (InEl < LHSWidth) 9437 return FindScalarElement(SVI->getOperand(0), InEl); 9438 else if (InEl < LHSWidth*2) 9439 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); 9440 else 9441 return UndefValue::get(PTy->getElementType()); 9442 } 9443 9444 // Otherwise, we don't know. 9445 return 0; 9446} 9447 9448Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 9449 // If vector val is undef, replace extract with scalar undef. 9450 if (isa<UndefValue>(EI.getOperand(0))) 9451 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 9452 9453 // If vector val is constant 0, replace extract with scalar 0. 9454 if (isa<ConstantAggregateZero>(EI.getOperand(0))) 9455 return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); 9456 9457 if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { 9458 // If vector val is constant with all elements the same, replace EI with 9459 // that element. When the elements are not identical, we cannot replace yet 9460 // (we do that below, but only when the index is constant). 9461 Constant *op0 = C->getOperand(0); 9462 for (unsigned i = 1; i != C->getNumOperands(); ++i) 9463 if (C->getOperand(i) != op0) { 9464 op0 = 0; 9465 break; 9466 } 9467 if (op0) 9468 return ReplaceInstUsesWith(EI, op0); 9469 } 9470 9471 // If extracting a specified index from the vector, see if we can recursively 9472 // find a previously computed scalar that was inserted into the vector. 9473 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 9474 unsigned IndexVal = IdxC->getZExtValue(); 9475 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 9476 9477 // If this is extracting an invalid index, turn this into undef, to avoid 9478 // crashing the code below. 9479 if (IndexVal >= VectorWidth) 9480 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 9481 9482 // This instruction only demands the single element from the input vector. 9483 // If the input vector has a single use, simplify it based on this use 9484 // property. 9485 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 9486 APInt UndefElts(VectorWidth, 0); 9487 APInt DemandedMask(VectorWidth, 1 << IndexVal); 9488 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 9489 DemandedMask, UndefElts)) { 9490 EI.setOperand(0, V); 9491 return &EI; 9492 } 9493 } 9494 9495 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) 9496 return ReplaceInstUsesWith(EI, Elt); 9497 9498 // If the this extractelement is directly using a bitcast from a vector of 9499 // the same number of elements, see if we can find the source element from 9500 // it. In this case, we will end up needing to bitcast the scalars. 9501 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 9502 if (const VectorType *VT = 9503 dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 9504 if (VT->getNumElements() == VectorWidth) 9505 if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) 9506 return new BitCastInst(Elt, EI.getType()); 9507 } 9508 } 9509 9510 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 9511 // Push extractelement into predecessor operation if legal and 9512 // profitable to do so 9513 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 9514 if (I->hasOneUse() && 9515 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 9516 Value *newEI0 = 9517 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 9518 EI.getName()+".lhs"); 9519 Value *newEI1 = 9520 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 9521 EI.getName()+".rhs"); 9522 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 9523 } 9524 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 9525 // Extracting the inserted element? 9526 if (IE->getOperand(2) == EI.getOperand(1)) 9527 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 9528 // If the inserted and extracted elements are constants, they must not 9529 // be the same value, extract from the pre-inserted value instead. 9530 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 9531 Worklist.AddValue(EI.getOperand(0)); 9532 EI.setOperand(0, IE->getOperand(0)); 9533 return &EI; 9534 } 9535 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 9536 // If this is extracting an element from a shufflevector, figure out where 9537 // it came from and extract from the appropriate input element instead. 9538 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 9539 unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; 9540 Value *Src; 9541 unsigned LHSWidth = 9542 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); 9543 9544 if (SrcIdx < LHSWidth) 9545 Src = SVI->getOperand(0); 9546 else if (SrcIdx < LHSWidth*2) { 9547 SrcIdx -= LHSWidth; 9548 Src = SVI->getOperand(1); 9549 } else { 9550 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 9551 } 9552 return ExtractElementInst::Create(Src, 9553 ConstantInt::get(Type::getInt32Ty(EI.getContext()), 9554 SrcIdx, false)); 9555 } 9556 } 9557 // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) 9558 } 9559 return 0; 9560} 9561 9562/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 9563/// elements from either LHS or RHS, return the shuffle mask and true. 9564/// Otherwise, return false. 9565static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 9566 std::vector<Constant*> &Mask) { 9567 assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && 9568 "Invalid CollectSingleShuffleElements"); 9569 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 9570 9571 if (isa<UndefValue>(V)) { 9572 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 9573 return true; 9574 } 9575 9576 if (V == LHS) { 9577 for (unsigned i = 0; i != NumElts; ++i) 9578 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 9579 return true; 9580 } 9581 9582 if (V == RHS) { 9583 for (unsigned i = 0; i != NumElts; ++i) 9584 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 9585 i+NumElts)); 9586 return true; 9587 } 9588 9589 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 9590 // If this is an insert of an extract from some other vector, include it. 9591 Value *VecOp = IEI->getOperand(0); 9592 Value *ScalarOp = IEI->getOperand(1); 9593 Value *IdxOp = IEI->getOperand(2); 9594 9595 if (!isa<ConstantInt>(IdxOp)) 9596 return false; 9597 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 9598 9599 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 9600 // Okay, we can handle this if the vector we are insertinting into is 9601 // transitively ok. 9602 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 9603 // If so, update the mask to reflect the inserted undef. 9604 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 9605 return true; 9606 } 9607 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 9608 if (isa<ConstantInt>(EI->getOperand(1)) && 9609 EI->getOperand(0)->getType() == V->getType()) { 9610 unsigned ExtractedIdx = 9611 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 9612 9613 // This must be extracting from either LHS or RHS. 9614 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 9615 // Okay, we can handle this if the vector we are insertinting into is 9616 // transitively ok. 9617 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 9618 // If so, update the mask to reflect the inserted value. 9619 if (EI->getOperand(0) == LHS) { 9620 Mask[InsertedIdx % NumElts] = 9621 ConstantInt::get(Type::getInt32Ty(V->getContext()), 9622 ExtractedIdx); 9623 } else { 9624 assert(EI->getOperand(0) == RHS); 9625 Mask[InsertedIdx % NumElts] = 9626 ConstantInt::get(Type::getInt32Ty(V->getContext()), 9627 ExtractedIdx+NumElts); 9628 9629 } 9630 return true; 9631 } 9632 } 9633 } 9634 } 9635 } 9636 // TODO: Handle shufflevector here! 9637 9638 return false; 9639} 9640 9641/// CollectShuffleElements - We are building a shuffle of V, using RHS as the 9642/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask 9643/// that computes V and the LHS value of the shuffle. 9644static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, 9645 Value *&RHS) { 9646 assert(isa<VectorType>(V->getType()) && 9647 (RHS == 0 || V->getType() == RHS->getType()) && 9648 "Invalid shuffle!"); 9649 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 9650 9651 if (isa<UndefValue>(V)) { 9652 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 9653 return V; 9654 } else if (isa<ConstantAggregateZero>(V)) { 9655 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 9656 return V; 9657 } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 9658 // If this is an insert of an extract from some other vector, include it. 9659 Value *VecOp = IEI->getOperand(0); 9660 Value *ScalarOp = IEI->getOperand(1); 9661 Value *IdxOp = IEI->getOperand(2); 9662 9663 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 9664 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 9665 EI->getOperand(0)->getType() == V->getType()) { 9666 unsigned ExtractedIdx = 9667 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 9668 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 9669 9670 // Either the extracted from or inserted into vector must be RHSVec, 9671 // otherwise we'd end up with a shuffle of three inputs. 9672 if (EI->getOperand(0) == RHS || RHS == 0) { 9673 RHS = EI->getOperand(0); 9674 Value *V = CollectShuffleElements(VecOp, Mask, RHS); 9675 Mask[InsertedIdx % NumElts] = 9676 ConstantInt::get(Type::getInt32Ty(V->getContext()), 9677 NumElts+ExtractedIdx); 9678 return V; 9679 } 9680 9681 if (VecOp == RHS) { 9682 Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); 9683 // Everything but the extracted element is replaced with the RHS. 9684 for (unsigned i = 0; i != NumElts; ++i) { 9685 if (i != InsertedIdx) 9686 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), 9687 NumElts+i); 9688 } 9689 return V; 9690 } 9691 9692 // If this insertelement is a chain that comes from exactly these two 9693 // vectors, return the vector and the effective shuffle. 9694 if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) 9695 return EI->getOperand(0); 9696 } 9697 } 9698 } 9699 // TODO: Handle shufflevector here! 9700 9701 // Otherwise, can't do anything fancy. Return an identity vector. 9702 for (unsigned i = 0; i != NumElts; ++i) 9703 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 9704 return V; 9705} 9706 9707Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 9708 Value *VecOp = IE.getOperand(0); 9709 Value *ScalarOp = IE.getOperand(1); 9710 Value *IdxOp = IE.getOperand(2); 9711 9712 // Inserting an undef or into an undefined place, remove this. 9713 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 9714 ReplaceInstUsesWith(IE, VecOp); 9715 9716 // If the inserted element was extracted from some other vector, and if the 9717 // indexes are constant, try to turn this into a shufflevector operation. 9718 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 9719 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 9720 EI->getOperand(0)->getType() == IE.getType()) { 9721 unsigned NumVectorElts = IE.getType()->getNumElements(); 9722 unsigned ExtractedIdx = 9723 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 9724 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 9725 9726 if (ExtractedIdx >= NumVectorElts) // Out of range extract. 9727 return ReplaceInstUsesWith(IE, VecOp); 9728 9729 if (InsertedIdx >= NumVectorElts) // Out of range insert. 9730 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 9731 9732 // If we are extracting a value from a vector, then inserting it right 9733 // back into the same place, just use the input vector. 9734 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 9735 return ReplaceInstUsesWith(IE, VecOp); 9736 9737 // If this insertelement isn't used by some other insertelement, turn it 9738 // (and any insertelements it points to), into one big shuffle. 9739 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { 9740 std::vector<Constant*> Mask; 9741 Value *RHS = 0; 9742 Value *LHS = CollectShuffleElements(&IE, Mask, RHS); 9743 if (RHS == 0) RHS = UndefValue::get(LHS->getType()); 9744 // We now have a shuffle of LHS, RHS, Mask. 9745 return new ShuffleVectorInst(LHS, RHS, 9746 ConstantVector::get(Mask)); 9747 } 9748 } 9749 } 9750 9751 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 9752 APInt UndefElts(VWidth, 0); 9753 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 9754 if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) 9755 return &IE; 9756 9757 return 0; 9758} 9759 9760 9761Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 9762 Value *LHS = SVI.getOperand(0); 9763 Value *RHS = SVI.getOperand(1); 9764 std::vector<unsigned> Mask = getShuffleMask(&SVI); 9765 9766 bool MadeChange = false; 9767 9768 // Undefined shuffle mask -> undefined value. 9769 if (isa<UndefValue>(SVI.getOperand(2))) 9770 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 9771 9772 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 9773 9774 if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) 9775 return 0; 9776 9777 APInt UndefElts(VWidth, 0); 9778 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 9779 if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 9780 LHS = SVI.getOperand(0); 9781 RHS = SVI.getOperand(1); 9782 MadeChange = true; 9783 } 9784 9785 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 9786 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 9787 if (LHS == RHS || isa<UndefValue>(LHS)) { 9788 if (isa<UndefValue>(LHS) && LHS == RHS) { 9789 // shuffle(undef,undef,mask) -> undef. 9790 return ReplaceInstUsesWith(SVI, LHS); 9791 } 9792 9793 // Remap any references to RHS to use LHS. 9794 std::vector<Constant*> Elts; 9795 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 9796 if (Mask[i] >= 2*e) 9797 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 9798 else { 9799 if ((Mask[i] >= e && isa<UndefValue>(RHS)) || 9800 (Mask[i] < e && isa<UndefValue>(LHS))) { 9801 Mask[i] = 2*e; // Turn into undef. 9802 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 9803 } else { 9804 Mask[i] = Mask[i] % e; // Force to LHS. 9805 Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), 9806 Mask[i])); 9807 } 9808 } 9809 } 9810 SVI.setOperand(0, SVI.getOperand(1)); 9811 SVI.setOperand(1, UndefValue::get(RHS->getType())); 9812 SVI.setOperand(2, ConstantVector::get(Elts)); 9813 LHS = SVI.getOperand(0); 9814 RHS = SVI.getOperand(1); 9815 MadeChange = true; 9816 } 9817 9818 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 9819 bool isLHSID = true, isRHSID = true; 9820 9821 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 9822 if (Mask[i] >= e*2) continue; // Ignore undef values. 9823 // Is this an identity shuffle of the LHS value? 9824 isLHSID &= (Mask[i] == i); 9825 9826 // Is this an identity shuffle of the RHS value? 9827 isRHSID &= (Mask[i]-e == i); 9828 } 9829 9830 // Eliminate identity shuffles. 9831 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 9832 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 9833 9834 // If the LHS is a shufflevector itself, see if we can combine it with this 9835 // one without producing an unusual shuffle. Here we are really conservative: 9836 // we are absolutely afraid of producing a shuffle mask not in the input 9837 // program, because the code gen may not be smart enough to turn a merged 9838 // shuffle into two specific shuffles: it may produce worse code. As such, 9839 // we only merge two shuffles if the result is one of the two input shuffle 9840 // masks. In this case, merging the shuffles just removes one instruction, 9841 // which we know is safe. This is good for things like turning: 9842 // (splat(splat)) -> splat. 9843 if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { 9844 if (isa<UndefValue>(RHS)) { 9845 std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); 9846 9847 if (LHSMask.size() == Mask.size()) { 9848 std::vector<unsigned> NewMask; 9849 for (unsigned i = 0, e = Mask.size(); i != e; ++i) 9850 if (Mask[i] >= e) 9851 NewMask.push_back(2*e); 9852 else 9853 NewMask.push_back(LHSMask[Mask[i]]); 9854 9855 // If the result mask is equal to the src shuffle or this 9856 // shuffle mask, do the replacement. 9857 if (NewMask == LHSMask || NewMask == Mask) { 9858 unsigned LHSInNElts = 9859 cast<VectorType>(LHSSVI->getOperand(0)->getType())-> 9860 getNumElements(); 9861 std::vector<Constant*> Elts; 9862 for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { 9863 if (NewMask[i] >= LHSInNElts*2) { 9864 Elts.push_back(UndefValue::get( 9865 Type::getInt32Ty(SVI.getContext()))); 9866 } else { 9867 Elts.push_back(ConstantInt::get( 9868 Type::getInt32Ty(SVI.getContext()), 9869 NewMask[i])); 9870 } 9871 } 9872 return new ShuffleVectorInst(LHSSVI->getOperand(0), 9873 LHSSVI->getOperand(1), 9874 ConstantVector::get(Elts)); 9875 } 9876 } 9877 } 9878 } 9879 9880 return MadeChange ? &SVI : 0; 9881} 9882 9883 9884 9885 9886/// TryToSinkInstruction - Try to move the specified instruction from its 9887/// current block into the beginning of DestBlock, which can only happen if it's 9888/// safe to move the instruction past all of the instructions between it and the 9889/// end of its block. 9890static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { 9891 assert(I->hasOneUse() && "Invariants didn't hold!"); 9892 9893 // Cannot move control-flow-involving, volatile loads, vaarg, etc. 9894 if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) 9895 return false; 9896 9897 // Do not sink alloca instructions out of the entry block. 9898 if (isa<AllocaInst>(I) && I->getParent() == 9899 &DestBlock->getParent()->getEntryBlock()) 9900 return false; 9901 9902 // We can only sink load instructions if there is nothing between the load and 9903 // the end of block that could change the value. 9904 if (I->mayReadFromMemory()) { 9905 for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); 9906 Scan != E; ++Scan) 9907 if (Scan->mayWriteToMemory()) 9908 return false; 9909 } 9910 9911 BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); 9912 9913 CopyPrecedingStopPoint(I, InsertPos); 9914 I->moveBefore(InsertPos); 9915 ++NumSunkInst; 9916 return true; 9917} 9918 9919 9920/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding 9921/// all reachable code to the worklist. 9922/// 9923/// This has a couple of tricks to make the code faster and more powerful. In 9924/// particular, we constant fold and DCE instructions as we go, to avoid adding 9925/// them to the worklist (this significantly speeds up instcombine on code where 9926/// many instructions are dead or constant). Additionally, if we find a branch 9927/// whose condition is a known constant, we only visit the reachable successors. 9928/// 9929static bool AddReachableCodeToWorklist(BasicBlock *BB, 9930 SmallPtrSet<BasicBlock*, 64> &Visited, 9931 InstCombiner &IC, 9932 const TargetData *TD) { 9933 bool MadeIRChange = false; 9934 SmallVector<BasicBlock*, 256> Worklist; 9935 Worklist.push_back(BB); 9936 9937 std::vector<Instruction*> InstrsForInstCombineWorklist; 9938 InstrsForInstCombineWorklist.reserve(128); 9939 9940 SmallPtrSet<ConstantExpr*, 64> FoldedConstants; 9941 9942 while (!Worklist.empty()) { 9943 BB = Worklist.back(); 9944 Worklist.pop_back(); 9945 9946 // We have now visited this block! If we've already been here, ignore it. 9947 if (!Visited.insert(BB)) continue; 9948 9949 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 9950 Instruction *Inst = BBI++; 9951 9952 // DCE instruction if trivially dead. 9953 if (isInstructionTriviallyDead(Inst)) { 9954 ++NumDeadInst; 9955 DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); 9956 Inst->eraseFromParent(); 9957 continue; 9958 } 9959 9960 // ConstantProp instruction if trivially constant. 9961 if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) 9962 if (Constant *C = ConstantFoldInstruction(Inst, TD)) { 9963 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " 9964 << *Inst << '\n'); 9965 Inst->replaceAllUsesWith(C); 9966 ++NumConstProp; 9967 Inst->eraseFromParent(); 9968 continue; 9969 } 9970 9971 9972 9973 if (TD) { 9974 // See if we can constant fold its operands. 9975 for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); 9976 i != e; ++i) { 9977 ConstantExpr *CE = dyn_cast<ConstantExpr>(i); 9978 if (CE == 0) continue; 9979 9980 // If we already folded this constant, don't try again. 9981 if (!FoldedConstants.insert(CE)) 9982 continue; 9983 9984 Constant *NewC = ConstantFoldConstantExpression(CE, TD); 9985 if (NewC && NewC != CE) { 9986 *i = NewC; 9987 MadeIRChange = true; 9988 } 9989 } 9990 } 9991 9992 9993 InstrsForInstCombineWorklist.push_back(Inst); 9994 } 9995 9996 // Recursively visit successors. If this is a branch or switch on a 9997 // constant, only visit the reachable successor. 9998 TerminatorInst *TI = BB->getTerminator(); 9999 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 10000 if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { 10001 bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); 10002 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); 10003 Worklist.push_back(ReachableBB); 10004 continue; 10005 } 10006 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 10007 if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { 10008 // See if this is an explicit destination. 10009 for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) 10010 if (SI->getCaseValue(i) == Cond) { 10011 BasicBlock *ReachableBB = SI->getSuccessor(i); 10012 Worklist.push_back(ReachableBB); 10013 continue; 10014 } 10015 10016 // Otherwise it is the default destination. 10017 Worklist.push_back(SI->getSuccessor(0)); 10018 continue; 10019 } 10020 } 10021 10022 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 10023 Worklist.push_back(TI->getSuccessor(i)); 10024 } 10025 10026 // Once we've found all of the instructions to add to instcombine's worklist, 10027 // add them in reverse order. This way instcombine will visit from the top 10028 // of the function down. This jives well with the way that it adds all uses 10029 // of instructions to the worklist after doing a transformation, thus avoiding 10030 // some N^2 behavior in pathological cases. 10031 IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], 10032 InstrsForInstCombineWorklist.size()); 10033 10034 return MadeIRChange; 10035} 10036 10037bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { 10038 MadeIRChange = false; 10039 10040 DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " 10041 << F.getNameStr() << "\n"); 10042 10043 { 10044 // Do a depth-first traversal of the function, populate the worklist with 10045 // the reachable instructions. Ignore blocks that are not reachable. Keep 10046 // track of which blocks we visit. 10047 SmallPtrSet<BasicBlock*, 64> Visited; 10048 MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); 10049 10050 // Do a quick scan over the function. If we find any blocks that are 10051 // unreachable, remove any instructions inside of them. This prevents 10052 // the instcombine code from having to deal with some bad special cases. 10053 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 10054 if (!Visited.count(BB)) { 10055 Instruction *Term = BB->getTerminator(); 10056 while (Term != BB->begin()) { // Remove instrs bottom-up 10057 BasicBlock::iterator I = Term; --I; 10058 10059 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 10060 // A debug intrinsic shouldn't force another iteration if we weren't 10061 // going to do one without it. 10062 if (!isa<DbgInfoIntrinsic>(I)) { 10063 ++NumDeadInst; 10064 MadeIRChange = true; 10065 } 10066 10067 // If I is not void type then replaceAllUsesWith undef. 10068 // This allows ValueHandlers and custom metadata to adjust itself. 10069 if (!I->getType()->isVoidTy()) 10070 I->replaceAllUsesWith(UndefValue::get(I->getType())); 10071 I->eraseFromParent(); 10072 } 10073 } 10074 } 10075 10076 while (!Worklist.isEmpty()) { 10077 Instruction *I = Worklist.RemoveOne(); 10078 if (I == 0) continue; // skip null values. 10079 10080 // Check to see if we can DCE the instruction. 10081 if (isInstructionTriviallyDead(I)) { 10082 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 10083 EraseInstFromFunction(*I); 10084 ++NumDeadInst; 10085 MadeIRChange = true; 10086 continue; 10087 } 10088 10089 // Instruction isn't dead, see if we can constant propagate it. 10090 if (!I->use_empty() && isa<Constant>(I->getOperand(0))) 10091 if (Constant *C = ConstantFoldInstruction(I, TD)) { 10092 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); 10093 10094 // Add operands to the worklist. 10095 ReplaceInstUsesWith(*I, C); 10096 ++NumConstProp; 10097 EraseInstFromFunction(*I); 10098 MadeIRChange = true; 10099 continue; 10100 } 10101 10102 // See if we can trivially sink this instruction to a successor basic block. 10103 if (I->hasOneUse()) { 10104 BasicBlock *BB = I->getParent(); 10105 Instruction *UserInst = cast<Instruction>(I->use_back()); 10106 BasicBlock *UserParent; 10107 10108 // Get the block the use occurs in. 10109 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) 10110 UserParent = PN->getIncomingBlock(I->use_begin().getUse()); 10111 else 10112 UserParent = UserInst->getParent(); 10113 10114 if (UserParent != BB) { 10115 bool UserIsSuccessor = false; 10116 // See if the user is one of our successors. 10117 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) 10118 if (*SI == UserParent) { 10119 UserIsSuccessor = true; 10120 break; 10121 } 10122 10123 // If the user is one of our immediate successors, and if that successor 10124 // only has us as a predecessors (we'd have to split the critical edge 10125 // otherwise), we can keep going. 10126 if (UserIsSuccessor && UserParent->getSinglePredecessor()) 10127 // Okay, the CFG is simple enough, try to sink this instruction. 10128 MadeIRChange |= TryToSinkInstruction(I, UserParent); 10129 } 10130 } 10131 10132 // Now that we have an instruction, try combining it to simplify it. 10133 Builder->SetInsertPoint(I->getParent(), I); 10134 10135#ifndef NDEBUG 10136 std::string OrigI; 10137#endif 10138 DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); 10139 DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); 10140 10141 if (Instruction *Result = visit(*I)) { 10142 ++NumCombined; 10143 // Should we replace the old instruction with a new one? 10144 if (Result != I) { 10145 DEBUG(errs() << "IC: Old = " << *I << '\n' 10146 << " New = " << *Result << '\n'); 10147 10148 // Everything uses the new instruction now. 10149 I->replaceAllUsesWith(Result); 10150 10151 // Push the new instruction and any users onto the worklist. 10152 Worklist.Add(Result); 10153 Worklist.AddUsersToWorkList(*Result); 10154 10155 // Move the name to the new instruction first. 10156 Result->takeName(I); 10157 10158 // Insert the new instruction into the basic block... 10159 BasicBlock *InstParent = I->getParent(); 10160 BasicBlock::iterator InsertPos = I; 10161 10162 if (!isa<PHINode>(Result)) // If combining a PHI, don't insert 10163 while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. 10164 ++InsertPos; 10165 10166 InstParent->getInstList().insert(InsertPos, Result); 10167 10168 EraseInstFromFunction(*I); 10169 } else { 10170#ifndef NDEBUG 10171 DEBUG(errs() << "IC: Mod = " << OrigI << '\n' 10172 << " New = " << *I << '\n'); 10173#endif 10174 10175 // If the instruction was modified, it's possible that it is now dead. 10176 // if so, remove it. 10177 if (isInstructionTriviallyDead(I)) { 10178 EraseInstFromFunction(*I); 10179 } else { 10180 Worklist.Add(I); 10181 Worklist.AddUsersToWorkList(*I); 10182 } 10183 } 10184 MadeIRChange = true; 10185 } 10186 } 10187 10188 Worklist.Zap(); 10189 return MadeIRChange; 10190} 10191 10192 10193bool InstCombiner::runOnFunction(Function &F) { 10194 MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 10195 TD = getAnalysisIfAvailable<TargetData>(); 10196 10197 10198 /// Builder - This is an IRBuilder that automatically inserts new 10199 /// instructions into the worklist when they are created. 10200 IRBuilder<true, TargetFolder, InstCombineIRInserter> 10201 TheBuilder(F.getContext(), TargetFolder(TD), 10202 InstCombineIRInserter(Worklist)); 10203 Builder = &TheBuilder; 10204 10205 bool EverMadeChange = false; 10206 10207 // Iterate while there is work to do. 10208 unsigned Iteration = 0; 10209 while (DoOneIteration(F, Iteration++)) 10210 EverMadeChange = true; 10211 10212 Builder = 0; 10213 return EverMadeChange; 10214} 10215 10216FunctionPass *llvm::createInstructionCombiningPass() { 10217 return new InstCombiner(); 10218} 10219