InstructionCombining.cpp revision 32c0cf5af933031f4e842bf44d08e96d76a70386
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// InstructionCombining - Combine instructions to form fewer, simple 11// instructions. This pass does not modify the CFG. This pass is where 12// algebraic simplification happens. 13// 14// This pass combines things like: 15// %Y = add i32 %X, 1 16// %Z = add i32 %Y, 1 17// into: 18// %Z = add i32 %X, 2 19// 20// This is a simple worklist driven algorithm. 21// 22// This pass guarantees that the following canonicalizations are performed on 23// the program: 24// 1. If a binary operator has a constant operand, it is moved to the RHS 25// 2. Bitwise operators with constant operands are always grouped so that 26// shifts are performed first, then or's, then and's, then xor's. 27// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible 28// 4. All cmp instructions on boolean values are replaced with logical ops 29// 5. add X, X is represented as (X*2) => (X << 1) 30// 6. Multiplies with a power-of-two constant argument are transformed into 31// shifts. 32// ... etc. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "instcombine" 37#include "llvm/Transforms/Scalar.h" 38#include "InstCombine.h" 39#include "llvm/IntrinsicInst.h" 40#include "llvm/LLVMContext.h" 41#include "llvm/DerivedTypes.h" 42#include "llvm/GlobalVariable.h" 43#include "llvm/Operator.h" 44#include "llvm/Analysis/ConstantFolding.h" 45#include "llvm/Analysis/InstructionSimplify.h" 46#include "llvm/Analysis/MemoryBuiltins.h" 47#include "llvm/Target/TargetData.h" 48#include "llvm/Transforms/Utils/BasicBlockUtils.h" 49#include "llvm/Transforms/Utils/Local.h" 50#include "llvm/Support/CallSite.h" 51#include "llvm/Support/Debug.h" 52#include "llvm/Support/ErrorHandling.h" 53#include "llvm/Support/GetElementPtrTypeIterator.h" 54#include "llvm/Support/MathExtras.h" 55#include "llvm/Support/PatternMatch.h" 56#include "llvm/ADT/SmallPtrSet.h" 57#include "llvm/ADT/Statistic.h" 58#include "llvm/ADT/STLExtras.h" 59#include <algorithm> 60#include <climits> 61using namespace llvm; 62using namespace llvm::PatternMatch; 63 64STATISTIC(NumCombined , "Number of insts combined"); 65STATISTIC(NumConstProp, "Number of constant folds"); 66STATISTIC(NumDeadInst , "Number of dead inst eliminated"); 67STATISTIC(NumSunkInst , "Number of instructions sunk"); 68 69 70char InstCombiner::ID = 0; 71static RegisterPass<InstCombiner> 72X("instcombine", "Combine redundant instructions"); 73 74void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 75 AU.addPreservedID(LCSSAID); 76 AU.setPreservesCFG(); 77} 78 79 80// isOnlyUse - Return true if this instruction will be deleted if we stop using 81// it. 82static bool isOnlyUse(Value *V) { 83 return V->hasOneUse() || isa<Constant>(V); 84} 85 86// getPromotedType - Return the specified type promoted as it would be to pass 87// though a va_arg area... 88static const Type *getPromotedType(const Type *Ty) { 89 if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 90 if (ITy->getBitWidth() < 32) 91 return Type::getInt32Ty(Ty->getContext()); 92 } 93 return Ty; 94} 95 96/// ShouldChangeType - Return true if it is desirable to convert a computation 97/// from 'From' to 'To'. We don't want to convert from a legal to an illegal 98/// type for example, or from a smaller to a larger illegal type. 99bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { 100 assert(isa<IntegerType>(From) && isa<IntegerType>(To)); 101 102 // If we don't have TD, we don't know if the source/dest are legal. 103 if (!TD) return false; 104 105 unsigned FromWidth = From->getPrimitiveSizeInBits(); 106 unsigned ToWidth = To->getPrimitiveSizeInBits(); 107 bool FromLegal = TD->isLegalInteger(FromWidth); 108 bool ToLegal = TD->isLegalInteger(ToWidth); 109 110 // If this is a legal integer from type, and the result would be an illegal 111 // type, don't do the transformation. 112 if (FromLegal && !ToLegal) 113 return false; 114 115 // Otherwise, if both are illegal, do not increase the size of the result. We 116 // do allow things like i160 -> i64, but not i64 -> i160. 117 if (!FromLegal && !ToLegal && ToWidth > FromWidth) 118 return false; 119 120 return true; 121} 122 123/// getBitCastOperand - If the specified operand is a CastInst, a constant 124/// expression bitcast, or a GetElementPtrInst with all zero indices, return the 125/// operand value, otherwise return null. 126static Value *getBitCastOperand(Value *V) { 127 if (Operator *O = dyn_cast<Operator>(V)) { 128 if (O->getOpcode() == Instruction::BitCast) 129 return O->getOperand(0); 130 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) 131 if (GEP->hasAllZeroIndices()) 132 return GEP->getPointerOperand(); 133 } 134 return 0; 135} 136 137 138 139// SimplifyCommutative - This performs a few simplifications for commutative 140// operators: 141// 142// 1. Order operands such that they are listed from right (least complex) to 143// left (most complex). This puts constants before unary operators before 144// binary operators. 145// 146// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) 147// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 148// 149bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { 150 bool Changed = false; 151 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) 152 Changed = !I.swapOperands(); 153 154 if (!I.isAssociative()) return Changed; 155 Instruction::BinaryOps Opcode = I.getOpcode(); 156 if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) 157 if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { 158 if (isa<Constant>(I.getOperand(1))) { 159 Constant *Folded = ConstantExpr::get(I.getOpcode(), 160 cast<Constant>(I.getOperand(1)), 161 cast<Constant>(Op->getOperand(1))); 162 I.setOperand(0, Op->getOperand(0)); 163 I.setOperand(1, Folded); 164 return true; 165 } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1))) 166 if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && 167 isOnlyUse(Op) && isOnlyUse(Op1)) { 168 Constant *C1 = cast<Constant>(Op->getOperand(1)); 169 Constant *C2 = cast<Constant>(Op1->getOperand(1)); 170 171 // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 172 Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); 173 Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), 174 Op1->getOperand(0), 175 Op1->getName(), &I); 176 Worklist.Add(New); 177 I.setOperand(0, New); 178 I.setOperand(1, Folded); 179 return true; 180 } 181 } 182 return Changed; 183} 184 185// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction 186// if the LHS is a constant zero (which is the 'negate' form). 187// 188Value *InstCombiner::dyn_castNegVal(Value *V) const { 189 if (BinaryOperator::isNeg(V)) 190 return BinaryOperator::getNegArgument(V); 191 192 // Constants can be considered to be negated values if they can be folded. 193 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 194 return ConstantExpr::getNeg(C); 195 196 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 197 if (C->getType()->getElementType()->isInteger()) 198 return ConstantExpr::getNeg(C); 199 200 return 0; 201} 202 203// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the 204// instruction if the LHS is a constant negative zero (which is the 'negate' 205// form). 206// 207Value *InstCombiner::dyn_castFNegVal(Value *V) const { 208 if (BinaryOperator::isFNeg(V)) 209 return BinaryOperator::getFNegArgument(V); 210 211 // Constants can be considered to be negated values if they can be folded. 212 if (ConstantFP *C = dyn_cast<ConstantFP>(V)) 213 return ConstantExpr::getFNeg(C); 214 215 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 216 if (C->getType()->getElementType()->isFloatingPoint()) 217 return ConstantExpr::getFNeg(C); 218 219 return 0; 220} 221 222/// isFreeToInvert - Return true if the specified value is free to invert (apply 223/// ~ to). This happens in cases where the ~ can be eliminated. 224static inline bool isFreeToInvert(Value *V) { 225 // ~(~(X)) -> X. 226 if (BinaryOperator::isNot(V)) 227 return true; 228 229 // Constants can be considered to be not'ed values. 230 if (isa<ConstantInt>(V)) 231 return true; 232 233 // Compares can be inverted if they have a single use. 234 if (CmpInst *CI = dyn_cast<CmpInst>(V)) 235 return CI->hasOneUse(); 236 237 return false; 238} 239 240static inline Value *dyn_castNotVal(Value *V) { 241 // If this is not(not(x)) don't return that this is a not: we want the two 242 // not's to be folded first. 243 if (BinaryOperator::isNot(V)) { 244 Value *Operand = BinaryOperator::getNotArgument(V); 245 if (!isFreeToInvert(Operand)) 246 return Operand; 247 } 248 249 // Constants can be considered to be not'ed values... 250 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 251 return ConstantInt::get(C->getType(), ~C->getValue()); 252 return 0; 253} 254 255 256 257// dyn_castFoldableMul - If this value is a multiply that can be folded into 258// other computations (because it has a constant operand), return the 259// non-constant operand of the multiply, and set CST to point to the multiplier. 260// Otherwise, return null. 261// 262static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 263 if (V->hasOneUse() && V->getType()->isInteger()) 264 if (Instruction *I = dyn_cast<Instruction>(V)) { 265 if (I->getOpcode() == Instruction::Mul) 266 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 267 return I->getOperand(0); 268 if (I->getOpcode() == Instruction::Shl) 269 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 270 // The multiplier is really 1 << CST. 271 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 272 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 273 CST = ConstantInt::get(V->getType()->getContext(), 274 APInt(BitWidth, 1).shl(CSTVal)); 275 return I->getOperand(0); 276 } 277 } 278 return 0; 279} 280 281/// AddOne - Add one to a ConstantInt. 282static Constant *AddOne(Constant *C) { 283 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 284} 285/// SubOne - Subtract one from a ConstantInt. 286static Constant *SubOne(ConstantInt *C) { 287 return ConstantInt::get(C->getContext(), C->getValue()-1); 288} 289 290 291/// AssociativeOpt - Perform an optimization on an associative operator. This 292/// function is designed to check a chain of associative operators for a 293/// potential to apply a certain optimization. Since the optimization may be 294/// applicable if the expression was reassociated, this checks the chain, then 295/// reassociates the expression as necessary to expose the optimization 296/// opportunity. This makes use of a special Functor, which must define 297/// 'shouldApply' and 'apply' methods. 298/// 299template<typename Functor> 300static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { 301 // Quick check, see if the immediate LHS matches... 302 if (F.shouldApply(Root.getOperand(0))) 303 return F.apply(Root); 304 305 return 0; 306} 307 308static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, 309 InstCombiner *IC) { 310 if (CastInst *CI = dyn_cast<CastInst>(&I)) 311 return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); 312 313 // Figure out if the constant is the left or the right argument. 314 bool ConstIsRHS = isa<Constant>(I.getOperand(1)); 315 Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); 316 317 if (Constant *SOC = dyn_cast<Constant>(SO)) { 318 if (ConstIsRHS) 319 return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); 320 return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); 321 } 322 323 Value *Op0 = SO, *Op1 = ConstOperand; 324 if (!ConstIsRHS) 325 std::swap(Op0, Op1); 326 327 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 328 return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, 329 SO->getName()+".op"); 330 if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) 331 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 332 SO->getName()+".cmp"); 333 if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) 334 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 335 SO->getName()+".cmp"); 336 llvm_unreachable("Unknown binary instruction type!"); 337} 338 339// FoldOpIntoSelect - Given an instruction with a select as one operand and a 340// constant as the other operand, try to fold the binary operator into the 341// select arguments. This also works for Cast instructions, which obviously do 342// not have a second operand. 343Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { 344 // Don't modify shared select instructions 345 if (!SI->hasOneUse()) return 0; 346 Value *TV = SI->getOperand(1); 347 Value *FV = SI->getOperand(2); 348 349 if (isa<Constant>(TV) || isa<Constant>(FV)) { 350 // Bool selects with constant operands can be folded to logical ops. 351 if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0; 352 353 Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); 354 Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); 355 356 return SelectInst::Create(SI->getCondition(), SelectTrueVal, 357 SelectFalseVal); 358 } 359 return 0; 360} 361 362 363/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which 364/// has a PHI node as operand #0, see if we can fold the instruction into the 365/// PHI (which is only possible if all operands to the PHI are constants). 366/// 367/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms 368/// that would normally be unprofitable because they strongly encourage jump 369/// threading. 370Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, 371 bool AllowAggressive) { 372 AllowAggressive = false; 373 PHINode *PN = cast<PHINode>(I.getOperand(0)); 374 unsigned NumPHIValues = PN->getNumIncomingValues(); 375 if (NumPHIValues == 0 || 376 // We normally only transform phis with a single use, unless we're trying 377 // hard to make jump threading happen. 378 (!PN->hasOneUse() && !AllowAggressive)) 379 return 0; 380 381 382 // Check to see if all of the operands of the PHI are simple constants 383 // (constantint/constantfp/undef). If there is one non-constant value, 384 // remember the BB it is in. If there is more than one or if *it* is a PHI, 385 // bail out. We don't do arbitrary constant expressions here because moving 386 // their computation can be expensive without a cost model. 387 BasicBlock *NonConstBB = 0; 388 for (unsigned i = 0; i != NumPHIValues; ++i) 389 if (!isa<Constant>(PN->getIncomingValue(i)) || 390 isa<ConstantExpr>(PN->getIncomingValue(i))) { 391 if (NonConstBB) return 0; // More than one non-const value. 392 if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. 393 NonConstBB = PN->getIncomingBlock(i); 394 395 // If the incoming non-constant value is in I's block, we have an infinite 396 // loop. 397 if (NonConstBB == I.getParent()) 398 return 0; 399 } 400 401 // If there is exactly one non-constant value, we can insert a copy of the 402 // operation in that block. However, if this is a critical edge, we would be 403 // inserting the computation one some other paths (e.g. inside a loop). Only 404 // do this if the pred block is unconditionally branching into the phi block. 405 if (NonConstBB != 0 && !AllowAggressive) { 406 BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); 407 if (!BI || !BI->isUnconditional()) return 0; 408 } 409 410 // Okay, we can do the transformation: create the new PHI node. 411 PHINode *NewPN = PHINode::Create(I.getType(), ""); 412 NewPN->reserveOperandSpace(PN->getNumOperands()/2); 413 InsertNewInstBefore(NewPN, *PN); 414 NewPN->takeName(PN); 415 416 // Next, add all of the operands to the PHI. 417 if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { 418 // We only currently try to fold the condition of a select when it is a phi, 419 // not the true/false values. 420 Value *TrueV = SI->getTrueValue(); 421 Value *FalseV = SI->getFalseValue(); 422 BasicBlock *PhiTransBB = PN->getParent(); 423 for (unsigned i = 0; i != NumPHIValues; ++i) { 424 BasicBlock *ThisBB = PN->getIncomingBlock(i); 425 Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); 426 Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); 427 Value *InV = 0; 428 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 429 InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; 430 } else { 431 assert(PN->getIncomingBlock(i) == NonConstBB); 432 InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, 433 FalseVInPred, 434 "phitmp", NonConstBB->getTerminator()); 435 Worklist.Add(cast<Instruction>(InV)); 436 } 437 NewPN->addIncoming(InV, ThisBB); 438 } 439 } else if (I.getNumOperands() == 2) { 440 Constant *C = cast<Constant>(I.getOperand(1)); 441 for (unsigned i = 0; i != NumPHIValues; ++i) { 442 Value *InV = 0; 443 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 444 if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 445 InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); 446 else 447 InV = ConstantExpr::get(I.getOpcode(), InC, C); 448 } else { 449 assert(PN->getIncomingBlock(i) == NonConstBB); 450 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 451 InV = BinaryOperator::Create(BO->getOpcode(), 452 PN->getIncomingValue(i), C, "phitmp", 453 NonConstBB->getTerminator()); 454 else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 455 InV = CmpInst::Create(CI->getOpcode(), 456 CI->getPredicate(), 457 PN->getIncomingValue(i), C, "phitmp", 458 NonConstBB->getTerminator()); 459 else 460 llvm_unreachable("Unknown binop!"); 461 462 Worklist.Add(cast<Instruction>(InV)); 463 } 464 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 465 } 466 } else { 467 CastInst *CI = cast<CastInst>(&I); 468 const Type *RetTy = CI->getType(); 469 for (unsigned i = 0; i != NumPHIValues; ++i) { 470 Value *InV; 471 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 472 InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); 473 } else { 474 assert(PN->getIncomingBlock(i) == NonConstBB); 475 InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 476 I.getType(), "phitmp", 477 NonConstBB->getTerminator()); 478 Worklist.Add(cast<Instruction>(InV)); 479 } 480 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 481 } 482 } 483 return ReplaceInstUsesWith(I, NewPN); 484} 485 486 487/// WillNotOverflowSignedAdd - Return true if we can prove that: 488/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 489/// This basically requires proving that the add in the original type would not 490/// overflow to change the sign bit or have a carry out. 491bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 492 // There are different heuristics we can use for this. Here are some simple 493 // ones. 494 495 // Add has the property that adding any two 2's complement numbers can only 496 // have one carry bit which can change a sign. As such, if LHS and RHS each 497 // have at least two sign bits, we know that the addition of the two values 498 // will sign extend fine. 499 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 500 return true; 501 502 503 // If one of the operands only has one non-zero bit, and if the other operand 504 // has a known-zero bit in a more significant place than it (not including the 505 // sign bit) the ripple may go up to and fill the zero, but won't change the 506 // sign. For example, (X & ~4) + 1. 507 508 // TODO: Implement. 509 510 return false; 511} 512 513 514Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 515 bool Changed = SimplifyCommutative(I); 516 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 517 518 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 519 I.hasNoUnsignedWrap(), TD)) 520 return ReplaceInstUsesWith(I, V); 521 522 523 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 524 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { 525 // X + (signbit) --> X ^ signbit 526 const APInt& Val = CI->getValue(); 527 uint32_t BitWidth = Val.getBitWidth(); 528 if (Val == APInt::getSignBit(BitWidth)) 529 return BinaryOperator::CreateXor(LHS, RHS); 530 531 // See if SimplifyDemandedBits can simplify this. This handles stuff like 532 // (X & 254)+1 -> (X&254)|1 533 if (SimplifyDemandedInstructionBits(I)) 534 return &I; 535 536 // zext(bool) + C -> bool ? C + 1 : C 537 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 538 if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) 539 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 540 } 541 542 if (isa<PHINode>(LHS)) 543 if (Instruction *NV = FoldOpIntoPhi(I)) 544 return NV; 545 546 ConstantInt *XorRHS = 0; 547 Value *XorLHS = 0; 548 if (isa<ConstantInt>(RHSC) && 549 match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 550 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 551 const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); 552 553 uint32_t Size = TySizeBits / 2; 554 APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); 555 APInt CFF80Val(-C0080Val); 556 do { 557 if (TySizeBits > Size) { 558 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 559 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 560 if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || 561 (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { 562 // This is a sign extend if the top bits are known zero. 563 if (!MaskedValueIsZero(XorLHS, 564 APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) 565 Size = 0; // Not a sign ext, but can't be any others either. 566 break; 567 } 568 } 569 Size >>= 1; 570 C0080Val = APIntOps::lshr(C0080Val, Size); 571 CFF80Val = APIntOps::ashr(CFF80Val, Size); 572 } while (Size >= 1); 573 574 // FIXME: This shouldn't be necessary. When the backends can handle types 575 // with funny bit widths then this switch statement should be removed. It 576 // is just here to get the size of the "middle" type back up to something 577 // that the back ends can handle. 578 const Type *MiddleType = 0; 579 switch (Size) { 580 default: break; 581 case 32: 582 case 16: 583 case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; 584 } 585 if (MiddleType) { 586 Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); 587 return new SExtInst(NewTrunc, I.getType(), I.getName()); 588 } 589 } 590 } 591 592 if (I.getType() == Type::getInt1Ty(I.getContext())) 593 return BinaryOperator::CreateXor(LHS, RHS); 594 595 if (I.getType()->isInteger()) { 596 // X + X --> X << 1 597 if (LHS == RHS) 598 return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); 599 600 if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { 601 if (RHSI->getOpcode() == Instruction::Sub) 602 if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B 603 return ReplaceInstUsesWith(I, RHSI->getOperand(0)); 604 } 605 if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 606 if (LHSI->getOpcode() == Instruction::Sub) 607 if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B 608 return ReplaceInstUsesWith(I, LHSI->getOperand(0)); 609 } 610 } 611 612 // -A + B --> B - A 613 // -A + -B --> -(A + B) 614 if (Value *LHSV = dyn_castNegVal(LHS)) { 615 if (LHS->getType()->isIntOrIntVector()) { 616 if (Value *RHSV = dyn_castNegVal(RHS)) { 617 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 618 return BinaryOperator::CreateNeg(NewAdd); 619 } 620 } 621 622 return BinaryOperator::CreateSub(RHS, LHSV); 623 } 624 625 // A + -B --> A - B 626 if (!isa<Constant>(RHS)) 627 if (Value *V = dyn_castNegVal(RHS)) 628 return BinaryOperator::CreateSub(LHS, V); 629 630 631 ConstantInt *C2; 632 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 633 if (X == RHS) // X*C + X --> X * (C+1) 634 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 635 636 // X*C1 + X*C2 --> X * (C1+C2) 637 ConstantInt *C1; 638 if (X == dyn_castFoldableMul(RHS, C1)) 639 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 640 } 641 642 // X + X*C --> X * (C+1) 643 if (dyn_castFoldableMul(RHS, C2) == LHS) 644 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 645 646 // X + ~X --> -1 since ~X = -X-1 647 if (dyn_castNotVal(LHS) == RHS || 648 dyn_castNotVal(RHS) == LHS) 649 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 650 651 652 // A+B --> A|B iff A and B have no bits set in common. 653 if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 654 APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); 655 APInt LHSKnownOne(IT->getBitWidth(), 0); 656 APInt LHSKnownZero(IT->getBitWidth(), 0); 657 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 658 if (LHSKnownZero != 0) { 659 APInt RHSKnownOne(IT->getBitWidth(), 0); 660 APInt RHSKnownZero(IT->getBitWidth(), 0); 661 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 662 663 // No bits in common -> bitwise or. 664 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 665 return BinaryOperator::CreateOr(LHS, RHS); 666 } 667 } 668 669 // W*X + Y*Z --> W * (X+Z) iff W == Y 670 if (I.getType()->isIntOrIntVector()) { 671 Value *W, *X, *Y, *Z; 672 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 673 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 674 if (W != Y) { 675 if (W == Z) { 676 std::swap(Y, Z); 677 } else if (Y == X) { 678 std::swap(W, X); 679 } else if (X == Z) { 680 std::swap(Y, Z); 681 std::swap(W, X); 682 } 683 } 684 685 if (W == Y) { 686 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 687 return BinaryOperator::CreateMul(W, NewAdd); 688 } 689 } 690 } 691 692 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 693 Value *X = 0; 694 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 695 return BinaryOperator::CreateSub(SubOne(CRHS), X); 696 697 // (X & FF00) + xx00 -> (X+xx00) & FF00 698 if (LHS->hasOneUse() && 699 match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { 700 Constant *Anded = ConstantExpr::getAnd(CRHS, C2); 701 if (Anded == CRHS) { 702 // See if all bits from the first bit set in the Add RHS up are included 703 // in the mask. First, get the rightmost bit. 704 const APInt &AddRHSV = CRHS->getValue(); 705 706 // Form a mask of all bits from the lowest bit added through the top. 707 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 708 709 // See if the and mask includes all of these bits. 710 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 711 712 if (AddRHSHighBits == AddRHSHighBitsAnd) { 713 // Okay, the xform is safe. Insert the new add pronto. 714 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 715 return BinaryOperator::CreateAnd(NewAdd, C2); 716 } 717 } 718 } 719 720 // Try to fold constant add into select arguments. 721 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 722 if (Instruction *R = FoldOpIntoSelect(I, SI)) 723 return R; 724 } 725 726 // add (select X 0 (sub n A)) A --> select X A n 727 { 728 SelectInst *SI = dyn_cast<SelectInst>(LHS); 729 Value *A = RHS; 730 if (!SI) { 731 SI = dyn_cast<SelectInst>(RHS); 732 A = LHS; 733 } 734 if (SI && SI->hasOneUse()) { 735 Value *TV = SI->getTrueValue(); 736 Value *FV = SI->getFalseValue(); 737 Value *N; 738 739 // Can we fold the add into the argument of the select? 740 // We check both true and false select arguments for a matching subtract. 741 if (match(FV, m_Zero()) && 742 match(TV, m_Sub(m_Value(N), m_Specific(A)))) 743 // Fold the add into the true select value. 744 return SelectInst::Create(SI->getCondition(), N, A); 745 if (match(TV, m_Zero()) && 746 match(FV, m_Sub(m_Value(N), m_Specific(A)))) 747 // Fold the add into the false select value. 748 return SelectInst::Create(SI->getCondition(), A, N); 749 } 750 } 751 752 // Check for (add (sext x), y), see if we can merge this into an 753 // integer add followed by a sext. 754 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 755 // (add (sext x), cst) --> (sext (add x, cst')) 756 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 757 Constant *CI = 758 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 759 if (LHSConv->hasOneUse() && 760 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 761 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 762 // Insert the new, smaller add. 763 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 764 CI, "addconv"); 765 return new SExtInst(NewAdd, I.getType()); 766 } 767 } 768 769 // (add (sext x), (sext y)) --> (sext (add int x, y)) 770 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 771 // Only do this if x/y have the same type, if at last one of them has a 772 // single use (so we don't increase the number of sexts), and if the 773 // integer add will not overflow. 774 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 775 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 776 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 777 RHSConv->getOperand(0))) { 778 // Insert the new integer add. 779 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 780 RHSConv->getOperand(0), "addconv"); 781 return new SExtInst(NewAdd, I.getType()); 782 } 783 } 784 } 785 786 return Changed ? &I : 0; 787} 788 789Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 790 bool Changed = SimplifyCommutative(I); 791 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 792 793 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 794 // X + 0 --> X 795 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { 796 if (CFP->isExactlyValue(ConstantFP::getNegativeZero 797 (I.getType())->getValueAPF())) 798 return ReplaceInstUsesWith(I, LHS); 799 } 800 801 if (isa<PHINode>(LHS)) 802 if (Instruction *NV = FoldOpIntoPhi(I)) 803 return NV; 804 } 805 806 // -A + B --> B - A 807 // -A + -B --> -(A + B) 808 if (Value *LHSV = dyn_castFNegVal(LHS)) 809 return BinaryOperator::CreateFSub(RHS, LHSV); 810 811 // A + -B --> A - B 812 if (!isa<Constant>(RHS)) 813 if (Value *V = dyn_castFNegVal(RHS)) 814 return BinaryOperator::CreateFSub(LHS, V); 815 816 // Check for X+0.0. Simplify it to X if we know X is not -0.0. 817 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) 818 if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) 819 return ReplaceInstUsesWith(I, LHS); 820 821 // Check for (add double (sitofp x), y), see if we can merge this into an 822 // integer add followed by a promotion. 823 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 824 // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 825 // ... if the constant fits in the integer value. This is useful for things 826 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 827 // requires a constant pool load, and generally allows the add to be better 828 // instcombined. 829 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 830 Constant *CI = 831 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 832 if (LHSConv->hasOneUse() && 833 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 834 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 835 // Insert the new integer add. 836 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 837 CI, "addconv"); 838 return new SIToFPInst(NewAdd, I.getType()); 839 } 840 } 841 842 // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 843 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 844 // Only do this if x/y have the same type, if at last one of them has a 845 // single use (so we don't increase the number of int->fp conversions), 846 // and if the integer add will not overflow. 847 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 848 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 849 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 850 RHSConv->getOperand(0))) { 851 // Insert the new integer add. 852 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 853 RHSConv->getOperand(0),"addconv"); 854 return new SIToFPInst(NewAdd, I.getType()); 855 } 856 } 857 } 858 859 return Changed ? &I : 0; 860} 861 862 863/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the 864/// code necessary to compute the offset from the base pointer (without adding 865/// in the base pointer). Return the result as a signed integer of intptr size. 866Value *InstCombiner::EmitGEPOffset(User *GEP) { 867 TargetData &TD = *getTargetData(); 868 gep_type_iterator GTI = gep_type_begin(GEP); 869 const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); 870 Value *Result = Constant::getNullValue(IntPtrTy); 871 872 // Build a mask for high order bits. 873 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 874 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); 875 876 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; 877 ++i, ++GTI) { 878 Value *Op = *i; 879 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; 880 if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { 881 if (OpC->isZero()) continue; 882 883 // Handle a struct index, which adds its field offset to the pointer. 884 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 885 Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 886 887 Result = Builder->CreateAdd(Result, 888 ConstantInt::get(IntPtrTy, Size), 889 GEP->getName()+".offs"); 890 continue; 891 } 892 893 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 894 Constant *OC = 895 ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); 896 Scale = ConstantExpr::getMul(OC, Scale); 897 // Emit an add instruction. 898 Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); 899 continue; 900 } 901 // Convert to correct type. 902 if (Op->getType() != IntPtrTy) 903 Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); 904 if (Size != 1) { 905 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 906 // We'll let instcombine(mul) convert this to a shl if possible. 907 Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); 908 } 909 910 // Emit an add instruction. 911 Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); 912 } 913 return Result; 914} 915 916 917 918 919/// Optimize pointer differences into the same array into a size. Consider: 920/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 921/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 922/// 923Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 924 const Type *Ty) { 925 assert(TD && "Must have target data info for this"); 926 927 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 928 // this. 929 bool Swapped = false; 930 GetElementPtrInst *GEP = 0; 931 ConstantExpr *CstGEP = 0; 932 933 // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". 934 // For now we require one side to be the base pointer "A" or a constant 935 // expression derived from it. 936 if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { 937 // (gep X, ...) - X 938 if (LHSGEP->getOperand(0) == RHS) { 939 GEP = LHSGEP; 940 Swapped = false; 941 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { 942 // (gep X, ...) - (ce_gep X, ...) 943 if (CE->getOpcode() == Instruction::GetElementPtr && 944 LHSGEP->getOperand(0) == CE->getOperand(0)) { 945 CstGEP = CE; 946 GEP = LHSGEP; 947 Swapped = false; 948 } 949 } 950 } 951 952 if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { 953 // X - (gep X, ...) 954 if (RHSGEP->getOperand(0) == LHS) { 955 GEP = RHSGEP; 956 Swapped = true; 957 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { 958 // (ce_gep X, ...) - (gep X, ...) 959 if (CE->getOpcode() == Instruction::GetElementPtr && 960 RHSGEP->getOperand(0) == CE->getOperand(0)) { 961 CstGEP = CE; 962 GEP = RHSGEP; 963 Swapped = true; 964 } 965 } 966 } 967 968 if (GEP == 0) 969 return 0; 970 971 // Emit the offset of the GEP and an intptr_t. 972 Value *Result = EmitGEPOffset(GEP); 973 974 // If we had a constant expression GEP on the other side offsetting the 975 // pointer, subtract it from the offset we have. 976 if (CstGEP) { 977 Value *CstOffset = EmitGEPOffset(CstGEP); 978 Result = Builder->CreateSub(Result, CstOffset); 979 } 980 981 982 // If we have p - gep(p, ...) then we have to negate the result. 983 if (Swapped) 984 Result = Builder->CreateNeg(Result, "diff.neg"); 985 986 return Builder->CreateIntCast(Result, Ty, true); 987} 988 989 990Instruction *InstCombiner::visitSub(BinaryOperator &I) { 991 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 992 993 if (Op0 == Op1) // sub X, X -> 0 994 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 995 996 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 997 if (Value *V = dyn_castNegVal(Op1)) { 998 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 999 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 1000 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 1001 return Res; 1002 } 1003 1004 if (isa<UndefValue>(Op0)) 1005 return ReplaceInstUsesWith(I, Op0); // undef - X -> undef 1006 if (isa<UndefValue>(Op1)) 1007 return ReplaceInstUsesWith(I, Op1); // X - undef -> undef 1008 if (I.getType() == Type::getInt1Ty(I.getContext())) 1009 return BinaryOperator::CreateXor(Op0, Op1); 1010 1011 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 1012 // Replace (-1 - A) with (~A). 1013 if (C->isAllOnesValue()) 1014 return BinaryOperator::CreateNot(Op1); 1015 1016 // C - ~X == X + (1+C) 1017 Value *X = 0; 1018 if (match(Op1, m_Not(m_Value(X)))) 1019 return BinaryOperator::CreateAdd(X, AddOne(C)); 1020 1021 // -(X >>u 31) -> (X >>s 31) 1022 // -(X >>s 31) -> (X >>u 31) 1023 if (C->isZero()) { 1024 if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { 1025 if (SI->getOpcode() == Instruction::LShr) { 1026 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1027 // Check to see if we are shifting out everything but the sign bit. 1028 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 1029 SI->getType()->getPrimitiveSizeInBits()-1) { 1030 // Ok, the transformation is safe. Insert AShr. 1031 return BinaryOperator::Create(Instruction::AShr, 1032 SI->getOperand(0), CU, SI->getName()); 1033 } 1034 } 1035 } else if (SI->getOpcode() == Instruction::AShr) { 1036 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1037 // Check to see if we are shifting out everything but the sign bit. 1038 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 1039 SI->getType()->getPrimitiveSizeInBits()-1) { 1040 // Ok, the transformation is safe. Insert LShr. 1041 return BinaryOperator::CreateLShr( 1042 SI->getOperand(0), CU, SI->getName()); 1043 } 1044 } 1045 } 1046 } 1047 } 1048 1049 // Try to fold constant sub into select arguments. 1050 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 1051 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1052 return R; 1053 1054 // C - zext(bool) -> bool ? C - 1 : C 1055 if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) 1056 if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) 1057 return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); 1058 } 1059 1060 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 1061 if (Op1I->getOpcode() == Instruction::Add) { 1062 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 1063 return BinaryOperator::CreateNeg(Op1I->getOperand(1), 1064 I.getName()); 1065 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 1066 return BinaryOperator::CreateNeg(Op1I->getOperand(0), 1067 I.getName()); 1068 else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { 1069 if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) 1070 // C1-(X+C2) --> (C1-C2)-X 1071 return BinaryOperator::CreateSub( 1072 ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); 1073 } 1074 } 1075 1076 if (Op1I->hasOneUse()) { 1077 // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression 1078 // is not used by anyone else... 1079 // 1080 if (Op1I->getOpcode() == Instruction::Sub) { 1081 // Swap the two operands of the subexpr... 1082 Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); 1083 Op1I->setOperand(0, IIOp1); 1084 Op1I->setOperand(1, IIOp0); 1085 1086 // Create the new top level add instruction... 1087 return BinaryOperator::CreateAdd(Op0, Op1); 1088 } 1089 1090 // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... 1091 // 1092 if (Op1I->getOpcode() == Instruction::And && 1093 (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { 1094 Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); 1095 1096 Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); 1097 return BinaryOperator::CreateAnd(Op0, NewNot); 1098 } 1099 1100 // 0 - (X sdiv C) -> (X sdiv -C) 1101 if (Op1I->getOpcode() == Instruction::SDiv) 1102 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 1103 if (CSI->isZero()) 1104 if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) 1105 return BinaryOperator::CreateSDiv(Op1I->getOperand(0), 1106 ConstantExpr::getNeg(DivRHS)); 1107 1108 // X - X*C --> X * (1-C) 1109 ConstantInt *C2 = 0; 1110 if (dyn_castFoldableMul(Op1I, C2) == Op0) { 1111 Constant *CP1 = 1112 ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), 1113 C2); 1114 return BinaryOperator::CreateMul(Op0, CP1); 1115 } 1116 } 1117 } 1118 1119 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 1120 if (Op0I->getOpcode() == Instruction::Add) { 1121 if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X 1122 return ReplaceInstUsesWith(I, Op0I->getOperand(1)); 1123 else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X 1124 return ReplaceInstUsesWith(I, Op0I->getOperand(0)); 1125 } else if (Op0I->getOpcode() == Instruction::Sub) { 1126 if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y 1127 return BinaryOperator::CreateNeg(Op0I->getOperand(1), 1128 I.getName()); 1129 } 1130 } 1131 1132 ConstantInt *C1; 1133 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 1134 if (X == Op1) // X*C - X --> X * (C-1) 1135 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 1136 1137 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 1138 if (X == dyn_castFoldableMul(Op1, C2)) 1139 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 1140 } 1141 1142 // Optimize pointer differences into the same array into a size. Consider: 1143 // &A[10] - &A[0]: we should compile this to "10". 1144 if (TD) { 1145 Value *LHSOp, *RHSOp; 1146 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 1147 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 1148 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1149 return ReplaceInstUsesWith(I, Res); 1150 1151 // trunc(p)-trunc(q) -> trunc(p-q) 1152 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 1153 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 1154 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1155 return ReplaceInstUsesWith(I, Res); 1156 } 1157 1158 return 0; 1159} 1160 1161Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 1162 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1163 1164 // If this is a 'B = x-(-A)', change to B = x+A... 1165 if (Value *V = dyn_castFNegVal(Op1)) 1166 return BinaryOperator::CreateFAdd(Op0, V); 1167 1168 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 1169 if (Op1I->getOpcode() == Instruction::FAdd) { 1170 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 1171 return BinaryOperator::CreateFNeg(Op1I->getOperand(1), 1172 I.getName()); 1173 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 1174 return BinaryOperator::CreateFNeg(Op1I->getOperand(0), 1175 I.getName()); 1176 } 1177 } 1178 1179 return 0; 1180} 1181 1182/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits 1183/// are carefully arranged to allow folding of expressions such as: 1184/// 1185/// (A < B) | (A > B) --> (A != B) 1186/// 1187/// Note that this is only valid if the first and second predicates have the 1188/// same sign. Is illegal to do: (A u< B) | (A s> B) 1189/// 1190/// Three bits are used to represent the condition, as follows: 1191/// 0 A > B 1192/// 1 A == B 1193/// 2 A < B 1194/// 1195/// <=> Value Definition 1196/// 000 0 Always false 1197/// 001 1 A > B 1198/// 010 2 A == B 1199/// 011 3 A >= B 1200/// 100 4 A < B 1201/// 101 5 A != B 1202/// 110 6 A <= B 1203/// 111 7 Always true 1204/// 1205static unsigned getICmpCode(const ICmpInst *ICI) { 1206 switch (ICI->getPredicate()) { 1207 // False -> 0 1208 case ICmpInst::ICMP_UGT: return 1; // 001 1209 case ICmpInst::ICMP_SGT: return 1; // 001 1210 case ICmpInst::ICMP_EQ: return 2; // 010 1211 case ICmpInst::ICMP_UGE: return 3; // 011 1212 case ICmpInst::ICMP_SGE: return 3; // 011 1213 case ICmpInst::ICMP_ULT: return 4; // 100 1214 case ICmpInst::ICMP_SLT: return 4; // 100 1215 case ICmpInst::ICMP_NE: return 5; // 101 1216 case ICmpInst::ICMP_ULE: return 6; // 110 1217 case ICmpInst::ICMP_SLE: return 6; // 110 1218 // True -> 7 1219 default: 1220 llvm_unreachable("Invalid ICmp predicate!"); 1221 return 0; 1222 } 1223} 1224 1225/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp 1226/// predicate into a three bit mask. It also returns whether it is an ordered 1227/// predicate by reference. 1228static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { 1229 isOrdered = false; 1230 switch (CC) { 1231 case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 1232 case FCmpInst::FCMP_UNO: return 0; // 000 1233 case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 1234 case FCmpInst::FCMP_UGT: return 1; // 001 1235 case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 1236 case FCmpInst::FCMP_UEQ: return 2; // 010 1237 case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 1238 case FCmpInst::FCMP_UGE: return 3; // 011 1239 case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 1240 case FCmpInst::FCMP_ULT: return 4; // 100 1241 case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 1242 case FCmpInst::FCMP_UNE: return 5; // 101 1243 case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 1244 case FCmpInst::FCMP_ULE: return 6; // 110 1245 // True -> 7 1246 default: 1247 // Not expecting FCMP_FALSE and FCMP_TRUE; 1248 llvm_unreachable("Unexpected FCmp predicate!"); 1249 return 0; 1250 } 1251} 1252 1253/// getICmpValue - This is the complement of getICmpCode, which turns an 1254/// opcode and two operands into either a constant true or false, or a brand 1255/// new ICmp instruction. The sign is passed in to determine which kind 1256/// of predicate to use in the new icmp instruction. 1257static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { 1258 switch (code) { 1259 default: llvm_unreachable("Illegal ICmp code!"); 1260 case 0: return ConstantInt::getFalse(LHS->getContext()); 1261 case 1: 1262 if (sign) 1263 return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); 1264 else 1265 return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); 1266 case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); 1267 case 3: 1268 if (sign) 1269 return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); 1270 else 1271 return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); 1272 case 4: 1273 if (sign) 1274 return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); 1275 else 1276 return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); 1277 case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); 1278 case 6: 1279 if (sign) 1280 return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); 1281 else 1282 return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); 1283 case 7: return ConstantInt::getTrue(LHS->getContext()); 1284 } 1285} 1286 1287/// getFCmpValue - This is the complement of getFCmpCode, which turns an 1288/// opcode and two operands into either a FCmp instruction. isordered is passed 1289/// in to determine which kind of predicate to use in the new fcmp instruction. 1290static Value *getFCmpValue(bool isordered, unsigned code, 1291 Value *LHS, Value *RHS) { 1292 switch (code) { 1293 default: llvm_unreachable("Illegal FCmp code!"); 1294 case 0: 1295 if (isordered) 1296 return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); 1297 else 1298 return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); 1299 case 1: 1300 if (isordered) 1301 return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); 1302 else 1303 return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); 1304 case 2: 1305 if (isordered) 1306 return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); 1307 else 1308 return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); 1309 case 3: 1310 if (isordered) 1311 return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); 1312 else 1313 return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); 1314 case 4: 1315 if (isordered) 1316 return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); 1317 else 1318 return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); 1319 case 5: 1320 if (isordered) 1321 return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); 1322 else 1323 return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); 1324 case 6: 1325 if (isordered) 1326 return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); 1327 else 1328 return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); 1329 case 7: return ConstantInt::getTrue(LHS->getContext()); 1330 } 1331} 1332 1333/// PredicatesFoldable - Return true if both predicates match sign or if at 1334/// least one of them is an equality comparison (which is signless). 1335static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { 1336 return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || 1337 (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || 1338 (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); 1339} 1340 1341namespace { 1342// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 1343struct FoldICmpLogical { 1344 InstCombiner &IC; 1345 Value *LHS, *RHS; 1346 ICmpInst::Predicate pred; 1347 FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) 1348 : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), 1349 pred(ICI->getPredicate()) {} 1350 bool shouldApply(Value *V) const { 1351 if (ICmpInst *ICI = dyn_cast<ICmpInst>(V)) 1352 if (PredicatesFoldable(pred, ICI->getPredicate())) 1353 return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || 1354 (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); 1355 return false; 1356 } 1357 Instruction *apply(Instruction &Log) const { 1358 ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0)); 1359 if (ICI->getOperand(0) != LHS) { 1360 assert(ICI->getOperand(1) == LHS); 1361 ICI->swapOperands(); // Swap the LHS and RHS of the ICmp 1362 } 1363 1364 ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1)); 1365 unsigned LHSCode = getICmpCode(ICI); 1366 unsigned RHSCode = getICmpCode(RHSICI); 1367 unsigned Code; 1368 switch (Log.getOpcode()) { 1369 case Instruction::And: Code = LHSCode & RHSCode; break; 1370 case Instruction::Or: Code = LHSCode | RHSCode; break; 1371 case Instruction::Xor: Code = LHSCode ^ RHSCode; break; 1372 default: llvm_unreachable("Illegal logical opcode!"); return 0; 1373 } 1374 1375 bool isSigned = RHSICI->isSigned() || ICI->isSigned(); 1376 Value *RV = getICmpValue(isSigned, Code, LHS, RHS); 1377 if (Instruction *I = dyn_cast<Instruction>(RV)) 1378 return I; 1379 // Otherwise, it's a constant boolean value... 1380 return IC.ReplaceInstUsesWith(Log, RV); 1381 } 1382}; 1383} // end anonymous namespace 1384 1385// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where 1386// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is 1387// guaranteed to be a binary operator. 1388Instruction *InstCombiner::OptAndOp(Instruction *Op, 1389 ConstantInt *OpRHS, 1390 ConstantInt *AndRHS, 1391 BinaryOperator &TheAnd) { 1392 Value *X = Op->getOperand(0); 1393 Constant *Together = 0; 1394 if (!Op->isShift()) 1395 Together = ConstantExpr::getAnd(AndRHS, OpRHS); 1396 1397 switch (Op->getOpcode()) { 1398 case Instruction::Xor: 1399 if (Op->hasOneUse()) { 1400 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 1401 Value *And = Builder->CreateAnd(X, AndRHS); 1402 And->takeName(Op); 1403 return BinaryOperator::CreateXor(And, Together); 1404 } 1405 break; 1406 case Instruction::Or: 1407 if (Together == AndRHS) // (X | C) & C --> C 1408 return ReplaceInstUsesWith(TheAnd, AndRHS); 1409 1410 if (Op->hasOneUse() && Together != OpRHS) { 1411 // (X | C1) & C2 --> (X | (C1&C2)) & C2 1412 Value *Or = Builder->CreateOr(X, Together); 1413 Or->takeName(Op); 1414 return BinaryOperator::CreateAnd(Or, AndRHS); 1415 } 1416 break; 1417 case Instruction::Add: 1418 if (Op->hasOneUse()) { 1419 // Adding a one to a single bit bit-field should be turned into an XOR 1420 // of the bit. First thing to check is to see if this AND is with a 1421 // single bit constant. 1422 const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); 1423 1424 // If there is only one bit set. 1425 if (AndRHSV.isPowerOf2()) { 1426 // Ok, at this point, we know that we are masking the result of the 1427 // ADD down to exactly one bit. If the constant we are adding has 1428 // no bits set below this bit, then we can eliminate the ADD. 1429 const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); 1430 1431 // Check to see if any bits below the one bit set in AndRHSV are set. 1432 if ((AddRHS & (AndRHSV-1)) == 0) { 1433 // If not, the only thing that can effect the output of the AND is 1434 // the bit specified by AndRHSV. If that bit is set, the effect of 1435 // the XOR is to toggle the bit. If it is clear, then the ADD has 1436 // no effect. 1437 if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop 1438 TheAnd.setOperand(0, X); 1439 return &TheAnd; 1440 } else { 1441 // Pull the XOR out of the AND. 1442 Value *NewAnd = Builder->CreateAnd(X, AndRHS); 1443 NewAnd->takeName(Op); 1444 return BinaryOperator::CreateXor(NewAnd, AndRHS); 1445 } 1446 } 1447 } 1448 } 1449 break; 1450 1451 case Instruction::Shl: { 1452 // We know that the AND will not produce any of the bits shifted in, so if 1453 // the anded constant includes them, clear them now! 1454 // 1455 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 1456 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 1457 APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); 1458 ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), 1459 AndRHS->getValue() & ShlMask); 1460 1461 if (CI->getValue() == ShlMask) { 1462 // Masking out bits that the shift already masks 1463 return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. 1464 } else if (CI != AndRHS) { // Reducing bits set in and. 1465 TheAnd.setOperand(1, CI); 1466 return &TheAnd; 1467 } 1468 break; 1469 } 1470 case Instruction::LShr: { 1471 // We know that the AND will not produce any of the bits shifted in, so if 1472 // the anded constant includes them, clear them now! This only applies to 1473 // unsigned shifts, because a signed shr may bring in set bits! 1474 // 1475 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 1476 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 1477 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 1478 ConstantInt *CI = ConstantInt::get(Op->getContext(), 1479 AndRHS->getValue() & ShrMask); 1480 1481 if (CI->getValue() == ShrMask) { 1482 // Masking out bits that the shift already masks. 1483 return ReplaceInstUsesWith(TheAnd, Op); 1484 } else if (CI != AndRHS) { 1485 TheAnd.setOperand(1, CI); // Reduce bits set in and cst. 1486 return &TheAnd; 1487 } 1488 break; 1489 } 1490 case Instruction::AShr: 1491 // Signed shr. 1492 // See if this is shifting in some sign extension, then masking it out 1493 // with an and. 1494 if (Op->hasOneUse()) { 1495 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 1496 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 1497 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 1498 Constant *C = ConstantInt::get(Op->getContext(), 1499 AndRHS->getValue() & ShrMask); 1500 if (C == AndRHS) { // Masking out bits shifted in. 1501 // (Val ashr C1) & C2 -> (Val lshr C1) & C2 1502 // Make the argument unsigned. 1503 Value *ShVal = Op->getOperand(0); 1504 ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); 1505 return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); 1506 } 1507 } 1508 break; 1509 } 1510 return 0; 1511} 1512 1513 1514/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is 1515/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient 1516/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates 1517/// whether to treat the V, Lo and HI as signed or not. IB is the location to 1518/// insert new instructions. 1519Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, 1520 bool isSigned, bool Inside, 1521 Instruction &IB) { 1522 assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 1523 ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && 1524 "Lo is not <= Hi in range emission code!"); 1525 1526 if (Inside) { 1527 if (Lo == Hi) // Trivially false. 1528 return new ICmpInst(ICmpInst::ICMP_NE, V, V); 1529 1530 // V >= Min && V < Hi --> V < Hi 1531 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 1532 ICmpInst::Predicate pred = (isSigned ? 1533 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); 1534 return new ICmpInst(pred, V, Hi); 1535 } 1536 1537 // Emit V-Lo <u Hi-Lo 1538 Constant *NegLo = ConstantExpr::getNeg(Lo); 1539 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 1540 Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); 1541 return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); 1542 } 1543 1544 if (Lo == Hi) // Trivially true. 1545 return new ICmpInst(ICmpInst::ICMP_EQ, V, V); 1546 1547 // V < Min || V >= Hi -> V > Hi-1 1548 Hi = SubOne(cast<ConstantInt>(Hi)); 1549 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 1550 ICmpInst::Predicate pred = (isSigned ? 1551 ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 1552 return new ICmpInst(pred, V, Hi); 1553 } 1554 1555 // Emit V-Lo >u Hi-1-Lo 1556 // Note that Hi has already had one subtracted from it, above. 1557 ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); 1558 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 1559 Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); 1560 return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); 1561} 1562 1563// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with 1564// any number of 0s on either side. The 1s are allowed to wrap from LSB to 1565// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is 1566// not, since all 1s are not contiguous. 1567static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { 1568 const APInt& V = Val->getValue(); 1569 uint32_t BitWidth = Val->getType()->getBitWidth(); 1570 if (!APIntOps::isShiftedMask(BitWidth, V)) return false; 1571 1572 // look for the first zero bit after the run of ones 1573 MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); 1574 // look for the first non-zero bit 1575 ME = V.getActiveBits(); 1576 return true; 1577} 1578 1579/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, 1580/// where isSub determines whether the operator is a sub. If we can fold one of 1581/// the following xforms: 1582/// 1583/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask 1584/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 1585/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 1586/// 1587/// return (A +/- B). 1588/// 1589Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, 1590 ConstantInt *Mask, bool isSub, 1591 Instruction &I) { 1592 Instruction *LHSI = dyn_cast<Instruction>(LHS); 1593 if (!LHSI || LHSI->getNumOperands() != 2 || 1594 !isa<ConstantInt>(LHSI->getOperand(1))) return 0; 1595 1596 ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); 1597 1598 switch (LHSI->getOpcode()) { 1599 default: return 0; 1600 case Instruction::And: 1601 if (ConstantExpr::getAnd(N, Mask) == Mask) { 1602 // If the AndRHS is a power of two minus one (0+1+), this is simple. 1603 if ((Mask->getValue().countLeadingZeros() + 1604 Mask->getValue().countPopulation()) == 1605 Mask->getValue().getBitWidth()) 1606 break; 1607 1608 // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ 1609 // part, we don't need any explicit masks to take them out of A. If that 1610 // is all N is, ignore it. 1611 uint32_t MB = 0, ME = 0; 1612 if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive 1613 uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); 1614 APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); 1615 if (MaskedValueIsZero(RHS, Mask)) 1616 break; 1617 } 1618 } 1619 return 0; 1620 case Instruction::Or: 1621 case Instruction::Xor: 1622 // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 1623 if ((Mask->getValue().countLeadingZeros() + 1624 Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() 1625 && ConstantExpr::getAnd(N, Mask)->isNullValue()) 1626 break; 1627 return 0; 1628 } 1629 1630 if (isSub) 1631 return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); 1632 return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); 1633} 1634 1635/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. 1636Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, 1637 ICmpInst *LHS, ICmpInst *RHS) { 1638 Value *Val, *Val2; 1639 ConstantInt *LHSCst, *RHSCst; 1640 ICmpInst::Predicate LHSCC, RHSCC; 1641 1642 // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). 1643 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), 1644 m_ConstantInt(LHSCst))) || 1645 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), 1646 m_ConstantInt(RHSCst)))) 1647 return 0; 1648 1649 if (LHSCst == RHSCst && LHSCC == RHSCC) { 1650 // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) 1651 // where C is a power of 2 1652 if (LHSCC == ICmpInst::ICMP_ULT && 1653 LHSCst->getValue().isPowerOf2()) { 1654 Value *NewOr = Builder->CreateOr(Val, Val2); 1655 return new ICmpInst(LHSCC, NewOr, LHSCst); 1656 } 1657 1658 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 1659 if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { 1660 Value *NewOr = Builder->CreateOr(Val, Val2); 1661 return new ICmpInst(LHSCC, NewOr, LHSCst); 1662 } 1663 } 1664 1665 // From here on, we only handle: 1666 // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. 1667 if (Val != Val2) return 0; 1668 1669 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 1670 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 1671 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 1672 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 1673 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 1674 return 0; 1675 1676 // We can't fold (ugt x, C) & (sgt x, C2). 1677 if (!PredicatesFoldable(LHSCC, RHSCC)) 1678 return 0; 1679 1680 // Ensure that the larger constant is on the RHS. 1681 bool ShouldSwap; 1682 if (CmpInst::isSigned(LHSCC) || 1683 (ICmpInst::isEquality(LHSCC) && 1684 CmpInst::isSigned(RHSCC))) 1685 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 1686 else 1687 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 1688 1689 if (ShouldSwap) { 1690 std::swap(LHS, RHS); 1691 std::swap(LHSCst, RHSCst); 1692 std::swap(LHSCC, RHSCC); 1693 } 1694 1695 // At this point, we know we have have two icmp instructions 1696 // comparing a value against two constants and and'ing the result 1697 // together. Because of the above check, we know that we only have 1698 // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 1699 // (from the FoldICmpLogical check above), that the two constants 1700 // are not equal and that the larger constant is on the RHS 1701 assert(LHSCst != RHSCst && "Compares not folded above?"); 1702 1703 switch (LHSCC) { 1704 default: llvm_unreachable("Unknown integer condition code!"); 1705 case ICmpInst::ICMP_EQ: 1706 switch (RHSCC) { 1707 default: llvm_unreachable("Unknown integer condition code!"); 1708 case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false 1709 case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false 1710 case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false 1711 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1712 case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 1713 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 1714 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 1715 return ReplaceInstUsesWith(I, LHS); 1716 } 1717 case ICmpInst::ICMP_NE: 1718 switch (RHSCC) { 1719 default: llvm_unreachable("Unknown integer condition code!"); 1720 case ICmpInst::ICMP_ULT: 1721 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 1722 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); 1723 break; // (X != 13 & X u< 15) -> no change 1724 case ICmpInst::ICMP_SLT: 1725 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 1726 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); 1727 break; // (X != 13 & X s< 15) -> no change 1728 case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 1729 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 1730 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 1731 return ReplaceInstUsesWith(I, RHS); 1732 case ICmpInst::ICMP_NE: 1733 if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 1734 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 1735 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 1736 return new ICmpInst(ICmpInst::ICMP_UGT, Add, 1737 ConstantInt::get(Add->getType(), 1)); 1738 } 1739 break; // (X != 13 & X != 15) -> no change 1740 } 1741 break; 1742 case ICmpInst::ICMP_ULT: 1743 switch (RHSCC) { 1744 default: llvm_unreachable("Unknown integer condition code!"); 1745 case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false 1746 case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false 1747 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1748 case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change 1749 break; 1750 case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 1751 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 1752 return ReplaceInstUsesWith(I, LHS); 1753 case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change 1754 break; 1755 } 1756 break; 1757 case ICmpInst::ICMP_SLT: 1758 switch (RHSCC) { 1759 default: llvm_unreachable("Unknown integer condition code!"); 1760 case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false 1761 case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false 1762 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1763 case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change 1764 break; 1765 case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 1766 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 1767 return ReplaceInstUsesWith(I, LHS); 1768 case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change 1769 break; 1770 } 1771 break; 1772 case ICmpInst::ICMP_UGT: 1773 switch (RHSCC) { 1774 default: llvm_unreachable("Unknown integer condition code!"); 1775 case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 1776 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 1777 return ReplaceInstUsesWith(I, RHS); 1778 case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change 1779 break; 1780 case ICmpInst::ICMP_NE: 1781 if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 1782 return new ICmpInst(LHSCC, Val, RHSCst); 1783 break; // (X u> 13 & X != 15) -> no change 1784 case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 1785 return InsertRangeTest(Val, AddOne(LHSCst), 1786 RHSCst, false, true, I); 1787 case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change 1788 break; 1789 } 1790 break; 1791 case ICmpInst::ICMP_SGT: 1792 switch (RHSCC) { 1793 default: llvm_unreachable("Unknown integer condition code!"); 1794 case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 1795 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 1796 return ReplaceInstUsesWith(I, RHS); 1797 case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change 1798 break; 1799 case ICmpInst::ICMP_NE: 1800 if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 1801 return new ICmpInst(LHSCC, Val, RHSCst); 1802 break; // (X s> 13 & X != 15) -> no change 1803 case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 1804 return InsertRangeTest(Val, AddOne(LHSCst), 1805 RHSCst, true, true, I); 1806 case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change 1807 break; 1808 } 1809 break; 1810 } 1811 1812 return 0; 1813} 1814 1815Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, 1816 FCmpInst *RHS) { 1817 1818 if (LHS->getPredicate() == FCmpInst::FCMP_ORD && 1819 RHS->getPredicate() == FCmpInst::FCMP_ORD) { 1820 // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) 1821 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 1822 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 1823 // If either of the constants are nans, then the whole thing returns 1824 // false. 1825 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 1826 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1827 return new FCmpInst(FCmpInst::FCMP_ORD, 1828 LHS->getOperand(0), RHS->getOperand(0)); 1829 } 1830 1831 // Handle vector zeros. This occurs because the canonical form of 1832 // "fcmp ord x,x" is "fcmp ord x, 0". 1833 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 1834 isa<ConstantAggregateZero>(RHS->getOperand(1))) 1835 return new FCmpInst(FCmpInst::FCMP_ORD, 1836 LHS->getOperand(0), RHS->getOperand(0)); 1837 return 0; 1838 } 1839 1840 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 1841 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 1842 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 1843 1844 1845 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 1846 // Swap RHS operands to match LHS. 1847 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 1848 std::swap(Op1LHS, Op1RHS); 1849 } 1850 1851 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 1852 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 1853 if (Op0CC == Op1CC) 1854 return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 1855 1856 if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) 1857 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1858 if (Op0CC == FCmpInst::FCMP_TRUE) 1859 return ReplaceInstUsesWith(I, RHS); 1860 if (Op1CC == FCmpInst::FCMP_TRUE) 1861 return ReplaceInstUsesWith(I, LHS); 1862 1863 bool Op0Ordered; 1864 bool Op1Ordered; 1865 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 1866 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 1867 if (Op1Pred == 0) { 1868 std::swap(LHS, RHS); 1869 std::swap(Op0Pred, Op1Pred); 1870 std::swap(Op0Ordered, Op1Ordered); 1871 } 1872 if (Op0Pred == 0) { 1873 // uno && ueq -> uno && (uno || eq) -> ueq 1874 // ord && olt -> ord && (ord && lt) -> olt 1875 if (Op0Ordered == Op1Ordered) 1876 return ReplaceInstUsesWith(I, RHS); 1877 1878 // uno && oeq -> uno && (ord && eq) -> false 1879 // uno && ord -> false 1880 if (!Op0Ordered) 1881 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1882 // ord && ueq -> ord && (uno || eq) -> oeq 1883 return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); 1884 } 1885 } 1886 1887 return 0; 1888} 1889 1890 1891Instruction *InstCombiner::visitAnd(BinaryOperator &I) { 1892 bool Changed = SimplifyCommutative(I); 1893 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1894 1895 if (Value *V = SimplifyAndInst(Op0, Op1, TD)) 1896 return ReplaceInstUsesWith(I, V); 1897 1898 // See if we can simplify any instructions used by the instruction whose sole 1899 // purpose is to compute bits we don't care about. 1900 if (SimplifyDemandedInstructionBits(I)) 1901 return &I; 1902 1903 if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { 1904 const APInt &AndRHSMask = AndRHS->getValue(); 1905 APInt NotAndRHS(~AndRHSMask); 1906 1907 // Optimize a variety of ((val OP C1) & C2) combinations... 1908 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 1909 Value *Op0LHS = Op0I->getOperand(0); 1910 Value *Op0RHS = Op0I->getOperand(1); 1911 switch (Op0I->getOpcode()) { 1912 default: break; 1913 case Instruction::Xor: 1914 case Instruction::Or: 1915 // If the mask is only needed on one incoming arm, push it up. 1916 if (!Op0I->hasOneUse()) break; 1917 1918 if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { 1919 // Not masking anything out for the LHS, move to RHS. 1920 Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, 1921 Op0RHS->getName()+".masked"); 1922 return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); 1923 } 1924 if (!isa<Constant>(Op0RHS) && 1925 MaskedValueIsZero(Op0RHS, NotAndRHS)) { 1926 // Not masking anything out for the RHS, move to LHS. 1927 Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, 1928 Op0LHS->getName()+".masked"); 1929 return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); 1930 } 1931 1932 break; 1933 case Instruction::Add: 1934 // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. 1935 // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1936 // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1937 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) 1938 return BinaryOperator::CreateAnd(V, AndRHS); 1939 if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) 1940 return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes 1941 break; 1942 1943 case Instruction::Sub: 1944 // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. 1945 // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1946 // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1947 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) 1948 return BinaryOperator::CreateAnd(V, AndRHS); 1949 1950 // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS 1951 // has 1's for all bits that the subtraction with A might affect. 1952 if (Op0I->hasOneUse()) { 1953 uint32_t BitWidth = AndRHSMask.getBitWidth(); 1954 uint32_t Zeros = AndRHSMask.countLeadingZeros(); 1955 APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); 1956 1957 ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); 1958 if (!(A && A->isZero()) && // avoid infinite recursion. 1959 MaskedValueIsZero(Op0LHS, Mask)) { 1960 Value *NewNeg = Builder->CreateNeg(Op0RHS); 1961 return BinaryOperator::CreateAnd(NewNeg, AndRHS); 1962 } 1963 } 1964 break; 1965 1966 case Instruction::Shl: 1967 case Instruction::LShr: 1968 // (1 << x) & 1 --> zext(x == 0) 1969 // (1 >> x) & 1 --> zext(x == 0) 1970 if (AndRHSMask == 1 && Op0LHS == AndRHS) { 1971 Value *NewICmp = 1972 Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); 1973 return new ZExtInst(NewICmp, I.getType()); 1974 } 1975 break; 1976 } 1977 1978 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) 1979 if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) 1980 return Res; 1981 } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { 1982 // If this is an integer truncation or change from signed-to-unsigned, and 1983 // if the source is an and/or with immediate, transform it. This 1984 // frequently occurs for bitfield accesses. 1985 if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { 1986 if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && 1987 CastOp->getNumOperands() == 2) 1988 if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ 1989 if (CastOp->getOpcode() == Instruction::And) { 1990 // Change: and (cast (and X, C1) to T), C2 1991 // into : and (cast X to T), trunc_or_bitcast(C1)&C2 1992 // This will fold the two constants together, which may allow 1993 // other simplifications. 1994 Value *NewCast = Builder->CreateTruncOrBitCast( 1995 CastOp->getOperand(0), I.getType(), 1996 CastOp->getName()+".shrunk"); 1997 // trunc_or_bitcast(C1)&C2 1998 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 1999 C3 = ConstantExpr::getAnd(C3, AndRHS); 2000 return BinaryOperator::CreateAnd(NewCast, C3); 2001 } else if (CastOp->getOpcode() == Instruction::Or) { 2002 // Change: and (cast (or X, C1) to T), C2 2003 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 2004 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 2005 if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) 2006 // trunc(C1)&C2 2007 return ReplaceInstUsesWith(I, AndRHS); 2008 } 2009 } 2010 } 2011 } 2012 2013 // Try to fold constant and into select arguments. 2014 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2015 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2016 return R; 2017 if (isa<PHINode>(Op0)) 2018 if (Instruction *NV = FoldOpIntoPhi(I)) 2019 return NV; 2020 } 2021 2022 2023 // (~A & ~B) == (~(A | B)) - De Morgan's Law 2024 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 2025 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 2026 if (Op0->hasOneUse() && Op1->hasOneUse()) { 2027 Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, 2028 I.getName()+".demorgan"); 2029 return BinaryOperator::CreateNot(Or); 2030 } 2031 2032 { 2033 Value *A = 0, *B = 0, *C = 0, *D = 0; 2034 // (A|B) & ~(A&B) -> A^B 2035 if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 2036 match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && 2037 ((A == C && B == D) || (A == D && B == C))) 2038 return BinaryOperator::CreateXor(A, B); 2039 2040 // ~(A&B) & (A|B) -> A^B 2041 if (match(Op1, m_Or(m_Value(A), m_Value(B))) && 2042 match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && 2043 ((A == C && B == D) || (A == D && B == C))) 2044 return BinaryOperator::CreateXor(A, B); 2045 2046 if (Op0->hasOneUse() && 2047 match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 2048 if (A == Op1) { // (A^B)&A -> A&(A^B) 2049 I.swapOperands(); // Simplify below 2050 std::swap(Op0, Op1); 2051 } else if (B == Op1) { // (A^B)&B -> B&(B^A) 2052 cast<BinaryOperator>(Op0)->swapOperands(); 2053 I.swapOperands(); // Simplify below 2054 std::swap(Op0, Op1); 2055 } 2056 } 2057 2058 if (Op1->hasOneUse() && 2059 match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 2060 if (B == Op0) { // B&(A^B) -> B&(B^A) 2061 cast<BinaryOperator>(Op1)->swapOperands(); 2062 std::swap(A, B); 2063 } 2064 if (A == Op0) // A&(A^B) -> A & ~B 2065 return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); 2066 } 2067 2068 // (A&((~A)|B)) -> A&B 2069 if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || 2070 match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) 2071 return BinaryOperator::CreateAnd(A, Op1); 2072 if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || 2073 match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) 2074 return BinaryOperator::CreateAnd(A, Op0); 2075 } 2076 2077 if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) { 2078 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 2079 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 2080 return R; 2081 2082 if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) 2083 if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) 2084 return Res; 2085 } 2086 2087 // fold (and (cast A), (cast B)) -> (cast (and A, B)) 2088 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) 2089 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 2090 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? 2091 const Type *SrcTy = Op0C->getOperand(0)->getType(); 2092 if (SrcTy == Op1C->getOperand(0)->getType() && 2093 SrcTy->isIntOrIntVector() && 2094 // Only do this if the casts both really cause code to be generated. 2095 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 2096 I.getType()) && 2097 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 2098 I.getType())) { 2099 Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), 2100 Op1C->getOperand(0), I.getName()); 2101 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2102 } 2103 } 2104 2105 // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. 2106 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 2107 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 2108 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 2109 SI0->getOperand(1) == SI1->getOperand(1) && 2110 (SI0->hasOneUse() || SI1->hasOneUse())) { 2111 Value *NewOp = 2112 Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), 2113 SI0->getName()); 2114 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 2115 SI1->getOperand(1)); 2116 } 2117 } 2118 2119 // If and'ing two fcmp, try combine them into one. 2120 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 2121 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 2122 if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) 2123 return Res; 2124 } 2125 2126 return Changed ? &I : 0; 2127} 2128 2129/// CollectBSwapParts - Analyze the specified subexpression and see if it is 2130/// capable of providing pieces of a bswap. The subexpression provides pieces 2131/// of a bswap if it is proven that each of the non-zero bytes in the output of 2132/// the expression came from the corresponding "byte swapped" byte in some other 2133/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then 2134/// we know that the expression deposits the low byte of %X into the high byte 2135/// of the bswap result and that all other bytes are zero. This expression is 2136/// accepted, the high byte of ByteValues is set to X to indicate a correct 2137/// match. 2138/// 2139/// This function returns true if the match was unsuccessful and false if so. 2140/// On entry to the function the "OverallLeftShift" is a signed integer value 2141/// indicating the number of bytes that the subexpression is later shifted. For 2142/// example, if the expression is later right shifted by 16 bits, the 2143/// OverallLeftShift value would be -2 on entry. This is used to specify which 2144/// byte of ByteValues is actually being set. 2145/// 2146/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding 2147/// byte is masked to zero by a user. For example, in (X & 255), X will be 2148/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits 2149/// this function to working on up to 32-byte (256 bit) values. ByteMask is 2150/// always in the local (OverallLeftShift) coordinate space. 2151/// 2152static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, 2153 SmallVector<Value*, 8> &ByteValues) { 2154 if (Instruction *I = dyn_cast<Instruction>(V)) { 2155 // If this is an or instruction, it may be an inner node of the bswap. 2156 if (I->getOpcode() == Instruction::Or) { 2157 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 2158 ByteValues) || 2159 CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, 2160 ByteValues); 2161 } 2162 2163 // If this is a logical shift by a constant multiple of 8, recurse with 2164 // OverallLeftShift and ByteMask adjusted. 2165 if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { 2166 unsigned ShAmt = 2167 cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); 2168 // Ensure the shift amount is defined and of a byte value. 2169 if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) 2170 return true; 2171 2172 unsigned ByteShift = ShAmt >> 3; 2173 if (I->getOpcode() == Instruction::Shl) { 2174 // X << 2 -> collect(X, +2) 2175 OverallLeftShift += ByteShift; 2176 ByteMask >>= ByteShift; 2177 } else { 2178 // X >>u 2 -> collect(X, -2) 2179 OverallLeftShift -= ByteShift; 2180 ByteMask <<= ByteShift; 2181 ByteMask &= (~0U >> (32-ByteValues.size())); 2182 } 2183 2184 if (OverallLeftShift >= (int)ByteValues.size()) return true; 2185 if (OverallLeftShift <= -(int)ByteValues.size()) return true; 2186 2187 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 2188 ByteValues); 2189 } 2190 2191 // If this is a logical 'and' with a mask that clears bytes, clear the 2192 // corresponding bytes in ByteMask. 2193 if (I->getOpcode() == Instruction::And && 2194 isa<ConstantInt>(I->getOperand(1))) { 2195 // Scan every byte of the and mask, seeing if the byte is either 0 or 255. 2196 unsigned NumBytes = ByteValues.size(); 2197 APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); 2198 const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); 2199 2200 for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { 2201 // If this byte is masked out by a later operation, we don't care what 2202 // the and mask is. 2203 if ((ByteMask & (1 << i)) == 0) 2204 continue; 2205 2206 // If the AndMask is all zeros for this byte, clear the bit. 2207 APInt MaskB = AndMask & Byte; 2208 if (MaskB == 0) { 2209 ByteMask &= ~(1U << i); 2210 continue; 2211 } 2212 2213 // If the AndMask is not all ones for this byte, it's not a bytezap. 2214 if (MaskB != Byte) 2215 return true; 2216 2217 // Otherwise, this byte is kept. 2218 } 2219 2220 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 2221 ByteValues); 2222 } 2223 } 2224 2225 // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be 2226 // the input value to the bswap. Some observations: 1) if more than one byte 2227 // is demanded from this input, then it could not be successfully assembled 2228 // into a byteswap. At least one of the two bytes would not be aligned with 2229 // their ultimate destination. 2230 if (!isPowerOf2_32(ByteMask)) return true; 2231 unsigned InputByteNo = CountTrailingZeros_32(ByteMask); 2232 2233 // 2) The input and ultimate destinations must line up: if byte 3 of an i32 2234 // is demanded, it needs to go into byte 0 of the result. This means that the 2235 // byte needs to be shifted until it lands in the right byte bucket. The 2236 // shift amount depends on the position: if the byte is coming from the high 2237 // part of the value (e.g. byte 3) then it must be shifted right. If from the 2238 // low part, it must be shifted left. 2239 unsigned DestByteNo = InputByteNo + OverallLeftShift; 2240 if (InputByteNo < ByteValues.size()/2) { 2241 if (ByteValues.size()-1-DestByteNo != InputByteNo) 2242 return true; 2243 } else { 2244 if (ByteValues.size()-1-DestByteNo != InputByteNo) 2245 return true; 2246 } 2247 2248 // If the destination byte value is already defined, the values are or'd 2249 // together, which isn't a bswap (unless it's an or of the same bits). 2250 if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) 2251 return true; 2252 ByteValues[DestByteNo] = V; 2253 return false; 2254} 2255 2256/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. 2257/// If so, insert the new bswap intrinsic and return it. 2258Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { 2259 const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); 2260 if (!ITy || ITy->getBitWidth() % 16 || 2261 // ByteMask only allows up to 32-byte values. 2262 ITy->getBitWidth() > 32*8) 2263 return 0; // Can only bswap pairs of bytes. Can't do vectors. 2264 2265 /// ByteValues - For each byte of the result, we keep track of which value 2266 /// defines each byte. 2267 SmallVector<Value*, 8> ByteValues; 2268 ByteValues.resize(ITy->getBitWidth()/8); 2269 2270 // Try to find all the pieces corresponding to the bswap. 2271 uint32_t ByteMask = ~0U >> (32-ByteValues.size()); 2272 if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) 2273 return 0; 2274 2275 // Check to see if all of the bytes come from the same value. 2276 Value *V = ByteValues[0]; 2277 if (V == 0) return 0; // Didn't find a byte? Must be zero. 2278 2279 // Check to make sure that all of the bytes come from the same value. 2280 for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) 2281 if (ByteValues[i] != V) 2282 return 0; 2283 const Type *Tys[] = { ITy }; 2284 Module *M = I.getParent()->getParent()->getParent(); 2285 Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); 2286 return CallInst::Create(F, V); 2287} 2288 2289/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check 2290/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then 2291/// we can simplify this expression to "cond ? C : D or B". 2292static Instruction *MatchSelectFromAndOr(Value *A, Value *B, 2293 Value *C, Value *D) { 2294 // If A is not a select of -1/0, this cannot match. 2295 Value *Cond = 0; 2296 if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) 2297 return 0; 2298 2299 // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. 2300 if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) 2301 return SelectInst::Create(Cond, C, B); 2302 if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 2303 return SelectInst::Create(Cond, C, B); 2304 // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. 2305 if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) 2306 return SelectInst::Create(Cond, C, D); 2307 if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 2308 return SelectInst::Create(Cond, C, D); 2309 return 0; 2310} 2311 2312/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. 2313Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, 2314 ICmpInst *LHS, ICmpInst *RHS) { 2315 Value *Val, *Val2; 2316 ConstantInt *LHSCst, *RHSCst; 2317 ICmpInst::Predicate LHSCC, RHSCC; 2318 2319 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 2320 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || 2321 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) 2322 return 0; 2323 2324 2325 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 2326 if (LHSCst == RHSCst && LHSCC == RHSCC && 2327 LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { 2328 Value *NewOr = Builder->CreateOr(Val, Val2); 2329 return new ICmpInst(LHSCC, NewOr, LHSCst); 2330 } 2331 2332 // From here on, we only handle: 2333 // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. 2334 if (Val != Val2) return 0; 2335 2336 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 2337 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 2338 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 2339 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 2340 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 2341 return 0; 2342 2343 // We can't fold (ugt x, C) | (sgt x, C2). 2344 if (!PredicatesFoldable(LHSCC, RHSCC)) 2345 return 0; 2346 2347 // Ensure that the larger constant is on the RHS. 2348 bool ShouldSwap; 2349 if (CmpInst::isSigned(LHSCC) || 2350 (ICmpInst::isEquality(LHSCC) && 2351 CmpInst::isSigned(RHSCC))) 2352 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 2353 else 2354 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 2355 2356 if (ShouldSwap) { 2357 std::swap(LHS, RHS); 2358 std::swap(LHSCst, RHSCst); 2359 std::swap(LHSCC, RHSCC); 2360 } 2361 2362 // At this point, we know we have have two icmp instructions 2363 // comparing a value against two constants and or'ing the result 2364 // together. Because of the above check, we know that we only have 2365 // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the 2366 // FoldICmpLogical check above), that the two constants are not 2367 // equal. 2368 assert(LHSCst != RHSCst && "Compares not folded above?"); 2369 2370 switch (LHSCC) { 2371 default: llvm_unreachable("Unknown integer condition code!"); 2372 case ICmpInst::ICMP_EQ: 2373 switch (RHSCC) { 2374 default: llvm_unreachable("Unknown integer condition code!"); 2375 case ICmpInst::ICMP_EQ: 2376 if (LHSCst == SubOne(RHSCst)) { 2377 // (X == 13 | X == 14) -> X-13 <u 2 2378 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 2379 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 2380 AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); 2381 return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); 2382 } 2383 break; // (X == 13 | X == 15) -> no change 2384 case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change 2385 case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change 2386 break; 2387 case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 2388 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 2389 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 2390 return ReplaceInstUsesWith(I, RHS); 2391 } 2392 break; 2393 case ICmpInst::ICMP_NE: 2394 switch (RHSCC) { 2395 default: llvm_unreachable("Unknown integer condition code!"); 2396 case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 2397 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 2398 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 2399 return ReplaceInstUsesWith(I, LHS); 2400 case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true 2401 case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true 2402 case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true 2403 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 2404 } 2405 break; 2406 case ICmpInst::ICMP_ULT: 2407 switch (RHSCC) { 2408 default: llvm_unreachable("Unknown integer condition code!"); 2409 case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change 2410 break; 2411 case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 2412 // If RHSCst is [us]MAXINT, it is always false. Not handling 2413 // this can cause overflow. 2414 if (RHSCst->isMaxValue(false)) 2415 return ReplaceInstUsesWith(I, LHS); 2416 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 2417 false, false, I); 2418 case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change 2419 break; 2420 case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 2421 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 2422 return ReplaceInstUsesWith(I, RHS); 2423 case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change 2424 break; 2425 } 2426 break; 2427 case ICmpInst::ICMP_SLT: 2428 switch (RHSCC) { 2429 default: llvm_unreachable("Unknown integer condition code!"); 2430 case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change 2431 break; 2432 case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 2433 // If RHSCst is [us]MAXINT, it is always false. Not handling 2434 // this can cause overflow. 2435 if (RHSCst->isMaxValue(true)) 2436 return ReplaceInstUsesWith(I, LHS); 2437 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 2438 true, false, I); 2439 case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change 2440 break; 2441 case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 2442 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 2443 return ReplaceInstUsesWith(I, RHS); 2444 case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change 2445 break; 2446 } 2447 break; 2448 case ICmpInst::ICMP_UGT: 2449 switch (RHSCC) { 2450 default: llvm_unreachable("Unknown integer condition code!"); 2451 case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 2452 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 2453 return ReplaceInstUsesWith(I, LHS); 2454 case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change 2455 break; 2456 case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true 2457 case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true 2458 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 2459 case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change 2460 break; 2461 } 2462 break; 2463 case ICmpInst::ICMP_SGT: 2464 switch (RHSCC) { 2465 default: llvm_unreachable("Unknown integer condition code!"); 2466 case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 2467 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 2468 return ReplaceInstUsesWith(I, LHS); 2469 case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change 2470 break; 2471 case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true 2472 case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true 2473 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 2474 case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change 2475 break; 2476 } 2477 break; 2478 } 2479 return 0; 2480} 2481 2482Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, 2483 FCmpInst *RHS) { 2484 if (LHS->getPredicate() == FCmpInst::FCMP_UNO && 2485 RHS->getPredicate() == FCmpInst::FCMP_UNO && 2486 LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { 2487 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 2488 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 2489 // If either of the constants are nans, then the whole thing returns 2490 // true. 2491 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 2492 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 2493 2494 // Otherwise, no need to compare the two constants, compare the 2495 // rest. 2496 return new FCmpInst(FCmpInst::FCMP_UNO, 2497 LHS->getOperand(0), RHS->getOperand(0)); 2498 } 2499 2500 // Handle vector zeros. This occurs because the canonical form of 2501 // "fcmp uno x,x" is "fcmp uno x, 0". 2502 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 2503 isa<ConstantAggregateZero>(RHS->getOperand(1))) 2504 return new FCmpInst(FCmpInst::FCMP_UNO, 2505 LHS->getOperand(0), RHS->getOperand(0)); 2506 2507 return 0; 2508 } 2509 2510 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 2511 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 2512 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 2513 2514 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 2515 // Swap RHS operands to match LHS. 2516 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 2517 std::swap(Op1LHS, Op1RHS); 2518 } 2519 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 2520 // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). 2521 if (Op0CC == Op1CC) 2522 return new FCmpInst((FCmpInst::Predicate)Op0CC, 2523 Op0LHS, Op0RHS); 2524 if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) 2525 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 2526 if (Op0CC == FCmpInst::FCMP_FALSE) 2527 return ReplaceInstUsesWith(I, RHS); 2528 if (Op1CC == FCmpInst::FCMP_FALSE) 2529 return ReplaceInstUsesWith(I, LHS); 2530 bool Op0Ordered; 2531 bool Op1Ordered; 2532 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 2533 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 2534 if (Op0Ordered == Op1Ordered) { 2535 // If both are ordered or unordered, return a new fcmp with 2536 // or'ed predicates. 2537 Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); 2538 if (Instruction *I = dyn_cast<Instruction>(RV)) 2539 return I; 2540 // Otherwise, it's a constant boolean value... 2541 return ReplaceInstUsesWith(I, RV); 2542 } 2543 } 2544 return 0; 2545} 2546 2547/// FoldOrWithConstants - This helper function folds: 2548/// 2549/// ((A | B) & C1) | (B & C2) 2550/// 2551/// into: 2552/// 2553/// (A & C1) | B 2554/// 2555/// when the XOR of the two constants is "all ones" (-1). 2556Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, 2557 Value *A, Value *B, Value *C) { 2558 ConstantInt *CI1 = dyn_cast<ConstantInt>(C); 2559 if (!CI1) return 0; 2560 2561 Value *V1 = 0; 2562 ConstantInt *CI2 = 0; 2563 if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; 2564 2565 APInt Xor = CI1->getValue() ^ CI2->getValue(); 2566 if (!Xor.isAllOnesValue()) return 0; 2567 2568 if (V1 == A || V1 == B) { 2569 Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); 2570 return BinaryOperator::CreateOr(NewOp, V1); 2571 } 2572 2573 return 0; 2574} 2575 2576Instruction *InstCombiner::visitOr(BinaryOperator &I) { 2577 bool Changed = SimplifyCommutative(I); 2578 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2579 2580 if (Value *V = SimplifyOrInst(Op0, Op1, TD)) 2581 return ReplaceInstUsesWith(I, V); 2582 2583 2584 // See if we can simplify any instructions used by the instruction whose sole 2585 // purpose is to compute bits we don't care about. 2586 if (SimplifyDemandedInstructionBits(I)) 2587 return &I; 2588 2589 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 2590 ConstantInt *C1 = 0; Value *X = 0; 2591 // (X & C1) | C2 --> (X | C2) & (C1|C2) 2592 if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && 2593 isOnlyUse(Op0)) { 2594 Value *Or = Builder->CreateOr(X, RHS); 2595 Or->takeName(Op0); 2596 return BinaryOperator::CreateAnd(Or, 2597 ConstantInt::get(I.getContext(), 2598 RHS->getValue() | C1->getValue())); 2599 } 2600 2601 // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) 2602 if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && 2603 isOnlyUse(Op0)) { 2604 Value *Or = Builder->CreateOr(X, RHS); 2605 Or->takeName(Op0); 2606 return BinaryOperator::CreateXor(Or, 2607 ConstantInt::get(I.getContext(), 2608 C1->getValue() & ~RHS->getValue())); 2609 } 2610 2611 // Try to fold constant and into select arguments. 2612 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2613 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2614 return R; 2615 if (isa<PHINode>(Op0)) 2616 if (Instruction *NV = FoldOpIntoPhi(I)) 2617 return NV; 2618 } 2619 2620 Value *A = 0, *B = 0; 2621 ConstantInt *C1 = 0, *C2 = 0; 2622 2623 // (A | B) | C and A | (B | C) -> bswap if possible. 2624 // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. 2625 if (match(Op0, m_Or(m_Value(), m_Value())) || 2626 match(Op1, m_Or(m_Value(), m_Value())) || 2627 (match(Op0, m_Shift(m_Value(), m_Value())) && 2628 match(Op1, m_Shift(m_Value(), m_Value())))) { 2629 if (Instruction *BSwap = MatchBSwap(I)) 2630 return BSwap; 2631 } 2632 2633 // (X^C)|Y -> (X|Y)^C iff Y&C == 0 2634 if (Op0->hasOneUse() && 2635 match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && 2636 MaskedValueIsZero(Op1, C1->getValue())) { 2637 Value *NOr = Builder->CreateOr(A, Op1); 2638 NOr->takeName(Op0); 2639 return BinaryOperator::CreateXor(NOr, C1); 2640 } 2641 2642 // Y|(X^C) -> (X|Y)^C iff Y&C == 0 2643 if (Op1->hasOneUse() && 2644 match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && 2645 MaskedValueIsZero(Op0, C1->getValue())) { 2646 Value *NOr = Builder->CreateOr(A, Op0); 2647 NOr->takeName(Op0); 2648 return BinaryOperator::CreateXor(NOr, C1); 2649 } 2650 2651 // (A & C)|(B & D) 2652 Value *C = 0, *D = 0; 2653 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 2654 match(Op1, m_And(m_Value(B), m_Value(D)))) { 2655 Value *V1 = 0, *V2 = 0, *V3 = 0; 2656 C1 = dyn_cast<ConstantInt>(C); 2657 C2 = dyn_cast<ConstantInt>(D); 2658 if (C1 && C2) { // (A & C1)|(B & C2) 2659 // If we have: ((V + N) & C1) | (V & C2) 2660 // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 2661 // replace with V+N. 2662 if (C1->getValue() == ~C2->getValue()) { 2663 if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ 2664 match(A, m_Add(m_Value(V1), m_Value(V2)))) { 2665 // Add commutes, try both ways. 2666 if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) 2667 return ReplaceInstUsesWith(I, A); 2668 if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) 2669 return ReplaceInstUsesWith(I, A); 2670 } 2671 // Or commutes, try both ways. 2672 if ((C1->getValue() & (C1->getValue()+1)) == 0 && 2673 match(B, m_Add(m_Value(V1), m_Value(V2)))) { 2674 // Add commutes, try both ways. 2675 if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) 2676 return ReplaceInstUsesWith(I, B); 2677 if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) 2678 return ReplaceInstUsesWith(I, B); 2679 } 2680 } 2681 2682 // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) 2683 // iff (C1&C2) == 0 and (N&~C1) == 0 2684 if ((C1->getValue() & C2->getValue()) == 0) { 2685 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && 2686 ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) 2687 (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) 2688 return BinaryOperator::CreateAnd(A, 2689 ConstantInt::get(A->getContext(), 2690 C1->getValue()|C2->getValue())); 2691 // Or commutes, try both ways. 2692 if (match(B, m_Or(m_Value(V1), m_Value(V2))) && 2693 ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) 2694 (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) 2695 return BinaryOperator::CreateAnd(B, 2696 ConstantInt::get(B->getContext(), 2697 C1->getValue()|C2->getValue())); 2698 } 2699 } 2700 2701 // Check to see if we have any common things being and'ed. If so, find the 2702 // terms for V1 & (V2|V3). 2703 if (isOnlyUse(Op0) || isOnlyUse(Op1)) { 2704 V1 = 0; 2705 if (A == B) // (A & C)|(A & D) == A & (C|D) 2706 V1 = A, V2 = C, V3 = D; 2707 else if (A == D) // (A & C)|(B & A) == A & (B|C) 2708 V1 = A, V2 = B, V3 = C; 2709 else if (C == B) // (A & C)|(C & D) == C & (A|D) 2710 V1 = C, V2 = A, V3 = D; 2711 else if (C == D) // (A & C)|(B & C) == C & (A|B) 2712 V1 = C, V2 = A, V3 = B; 2713 2714 if (V1) { 2715 Value *Or = Builder->CreateOr(V2, V3, "tmp"); 2716 return BinaryOperator::CreateAnd(V1, Or); 2717 } 2718 } 2719 2720 // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants 2721 if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) 2722 return Match; 2723 if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) 2724 return Match; 2725 if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) 2726 return Match; 2727 if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) 2728 return Match; 2729 2730 // ((A&~B)|(~A&B)) -> A^B 2731 if ((match(C, m_Not(m_Specific(D))) && 2732 match(B, m_Not(m_Specific(A))))) 2733 return BinaryOperator::CreateXor(A, D); 2734 // ((~B&A)|(~A&B)) -> A^B 2735 if ((match(A, m_Not(m_Specific(D))) && 2736 match(B, m_Not(m_Specific(C))))) 2737 return BinaryOperator::CreateXor(C, D); 2738 // ((A&~B)|(B&~A)) -> A^B 2739 if ((match(C, m_Not(m_Specific(B))) && 2740 match(D, m_Not(m_Specific(A))))) 2741 return BinaryOperator::CreateXor(A, B); 2742 // ((~B&A)|(B&~A)) -> A^B 2743 if ((match(A, m_Not(m_Specific(B))) && 2744 match(D, m_Not(m_Specific(C))))) 2745 return BinaryOperator::CreateXor(C, B); 2746 } 2747 2748 // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. 2749 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 2750 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 2751 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 2752 SI0->getOperand(1) == SI1->getOperand(1) && 2753 (SI0->hasOneUse() || SI1->hasOneUse())) { 2754 Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), 2755 SI0->getName()); 2756 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 2757 SI1->getOperand(1)); 2758 } 2759 } 2760 2761 // ((A|B)&1)|(B&-2) -> (A&1) | B 2762 if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 2763 match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 2764 Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); 2765 if (Ret) return Ret; 2766 } 2767 // (B&-2)|((A|B)&1) -> (A&1) | B 2768 if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 2769 match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 2770 Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); 2771 if (Ret) return Ret; 2772 } 2773 2774 // (~A | ~B) == (~(A & B)) - De Morgan's Law 2775 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 2776 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 2777 if (Op0->hasOneUse() && Op1->hasOneUse()) { 2778 Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, 2779 I.getName()+".demorgan"); 2780 return BinaryOperator::CreateNot(And); 2781 } 2782 2783 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 2784 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { 2785 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 2786 return R; 2787 2788 if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 2789 if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) 2790 return Res; 2791 } 2792 2793 // fold (or (cast A), (cast B)) -> (cast (or A, B)) 2794 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 2795 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 2796 if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? 2797 if (!isa<ICmpInst>(Op0C->getOperand(0)) || 2798 !isa<ICmpInst>(Op1C->getOperand(0))) { 2799 const Type *SrcTy = Op0C->getOperand(0)->getType(); 2800 if (SrcTy == Op1C->getOperand(0)->getType() && 2801 SrcTy->isIntOrIntVector() && 2802 // Only do this if the casts both really cause code to be 2803 // generated. 2804 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 2805 I.getType()) && 2806 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 2807 I.getType())) { 2808 Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), 2809 Op1C->getOperand(0), I.getName()); 2810 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2811 } 2812 } 2813 } 2814 } 2815 2816 2817 // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) 2818 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 2819 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 2820 if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) 2821 return Res; 2822 } 2823 2824 return Changed ? &I : 0; 2825} 2826 2827namespace { 2828 2829// XorSelf - Implements: X ^ X --> 0 2830struct XorSelf { 2831 Value *RHS; 2832 XorSelf(Value *rhs) : RHS(rhs) {} 2833 bool shouldApply(Value *LHS) const { return LHS == RHS; } 2834 Instruction *apply(BinaryOperator &Xor) const { 2835 return &Xor; 2836 } 2837}; 2838 2839} 2840 2841Instruction *InstCombiner::visitXor(BinaryOperator &I) { 2842 bool Changed = SimplifyCommutative(I); 2843 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2844 2845 if (isa<UndefValue>(Op1)) { 2846 if (isa<UndefValue>(Op0)) 2847 // Handle undef ^ undef -> 0 special case. This is a common 2848 // idiom (misuse). 2849 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2850 return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef 2851 } 2852 2853 // xor X, X = 0, even if X is nested in a sequence of Xor's. 2854 if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { 2855 assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; 2856 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2857 } 2858 2859 // See if we can simplify any instructions used by the instruction whose sole 2860 // purpose is to compute bits we don't care about. 2861 if (SimplifyDemandedInstructionBits(I)) 2862 return &I; 2863 if (isa<VectorType>(I.getType())) 2864 if (isa<ConstantAggregateZero>(Op1)) 2865 return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X 2866 2867 // Is this a ~ operation? 2868 if (Value *NotOp = dyn_castNotVal(&I)) { 2869 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { 2870 if (Op0I->getOpcode() == Instruction::And || 2871 Op0I->getOpcode() == Instruction::Or) { 2872 // ~(~X & Y) --> (X | ~Y) - De Morgan's Law 2873 // ~(~X | Y) === (X & ~Y) - De Morgan's Law 2874 if (dyn_castNotVal(Op0I->getOperand(1))) 2875 Op0I->swapOperands(); 2876 if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { 2877 Value *NotY = 2878 Builder->CreateNot(Op0I->getOperand(1), 2879 Op0I->getOperand(1)->getName()+".not"); 2880 if (Op0I->getOpcode() == Instruction::And) 2881 return BinaryOperator::CreateOr(Op0NotVal, NotY); 2882 return BinaryOperator::CreateAnd(Op0NotVal, NotY); 2883 } 2884 2885 // ~(X & Y) --> (~X | ~Y) - De Morgan's Law 2886 // ~(X | Y) === (~X & ~Y) - De Morgan's Law 2887 if (isFreeToInvert(Op0I->getOperand(0)) && 2888 isFreeToInvert(Op0I->getOperand(1))) { 2889 Value *NotX = 2890 Builder->CreateNot(Op0I->getOperand(0), "notlhs"); 2891 Value *NotY = 2892 Builder->CreateNot(Op0I->getOperand(1), "notrhs"); 2893 if (Op0I->getOpcode() == Instruction::And) 2894 return BinaryOperator::CreateOr(NotX, NotY); 2895 return BinaryOperator::CreateAnd(NotX, NotY); 2896 } 2897 } 2898 } 2899 } 2900 2901 2902 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 2903 if (RHS->isOne() && Op0->hasOneUse()) { 2904 // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B 2905 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) 2906 return new ICmpInst(ICI->getInversePredicate(), 2907 ICI->getOperand(0), ICI->getOperand(1)); 2908 2909 if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) 2910 return new FCmpInst(FCI->getInversePredicate(), 2911 FCI->getOperand(0), FCI->getOperand(1)); 2912 } 2913 2914 // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). 2915 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 2916 if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { 2917 if (CI->hasOneUse() && Op0C->hasOneUse()) { 2918 Instruction::CastOps Opcode = Op0C->getOpcode(); 2919 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && 2920 (RHS == ConstantExpr::getCast(Opcode, 2921 ConstantInt::getTrue(I.getContext()), 2922 Op0C->getDestTy()))) { 2923 CI->setPredicate(CI->getInversePredicate()); 2924 return CastInst::Create(Opcode, CI, Op0C->getType()); 2925 } 2926 } 2927 } 2928 } 2929 2930 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 2931 // ~(c-X) == X-c-1 == X+(-c-1) 2932 if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) 2933 if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { 2934 Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); 2935 Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, 2936 ConstantInt::get(I.getType(), 1)); 2937 return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); 2938 } 2939 2940 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 2941 if (Op0I->getOpcode() == Instruction::Add) { 2942 // ~(X-c) --> (-c-1)-X 2943 if (RHS->isAllOnesValue()) { 2944 Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); 2945 return BinaryOperator::CreateSub( 2946 ConstantExpr::getSub(NegOp0CI, 2947 ConstantInt::get(I.getType(), 1)), 2948 Op0I->getOperand(0)); 2949 } else if (RHS->getValue().isSignBit()) { 2950 // (X + C) ^ signbit -> (X + C + signbit) 2951 Constant *C = ConstantInt::get(I.getContext(), 2952 RHS->getValue() + Op0CI->getValue()); 2953 return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); 2954 2955 } 2956 } else if (Op0I->getOpcode() == Instruction::Or) { 2957 // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 2958 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { 2959 Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); 2960 // Anything in both C1 and C2 is known to be zero, remove it from 2961 // NewRHS. 2962 Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); 2963 NewRHS = ConstantExpr::getAnd(NewRHS, 2964 ConstantExpr::getNot(CommonBits)); 2965 Worklist.Add(Op0I); 2966 I.setOperand(0, Op0I->getOperand(0)); 2967 I.setOperand(1, NewRHS); 2968 return &I; 2969 } 2970 } 2971 } 2972 } 2973 2974 // Try to fold constant and into select arguments. 2975 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2976 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2977 return R; 2978 if (isa<PHINode>(Op0)) 2979 if (Instruction *NV = FoldOpIntoPhi(I)) 2980 return NV; 2981 } 2982 2983 if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 2984 if (X == Op1) 2985 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 2986 2987 if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 2988 if (X == Op0) 2989 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 2990 2991 2992 BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); 2993 if (Op1I) { 2994 Value *A, *B; 2995 if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 2996 if (A == Op0) { // B^(B|A) == (A|B)^B 2997 Op1I->swapOperands(); 2998 I.swapOperands(); 2999 std::swap(Op0, Op1); 3000 } else if (B == Op0) { // B^(A|B) == (A|B)^B 3001 I.swapOperands(); // Simplified below. 3002 std::swap(Op0, Op1); 3003 } 3004 } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { 3005 return ReplaceInstUsesWith(I, B); // A^(A^B) == B 3006 } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { 3007 return ReplaceInstUsesWith(I, A); // A^(B^A) == B 3008 } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 3009 Op1I->hasOneUse()){ 3010 if (A == Op0) { // A^(A&B) -> A^(B&A) 3011 Op1I->swapOperands(); 3012 std::swap(A, B); 3013 } 3014 if (B == Op0) { // A^(B&A) -> (B&A)^A 3015 I.swapOperands(); // Simplified below. 3016 std::swap(Op0, Op1); 3017 } 3018 } 3019 } 3020 3021 BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); 3022 if (Op0I) { 3023 Value *A, *B; 3024 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 3025 Op0I->hasOneUse()) { 3026 if (A == Op1) // (B|A)^B == (A|B)^B 3027 std::swap(A, B); 3028 if (B == Op1) // (A|B)^B == A & ~B 3029 return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); 3030 } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { 3031 return ReplaceInstUsesWith(I, B); // (A^B)^A == B 3032 } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { 3033 return ReplaceInstUsesWith(I, A); // (B^A)^A == B 3034 } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 3035 Op0I->hasOneUse()){ 3036 if (A == Op1) // (A&B)^A -> (B&A)^A 3037 std::swap(A, B); 3038 if (B == Op1 && // (B&A)^A == ~B & A 3039 !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C 3040 return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); 3041 } 3042 } 3043 } 3044 3045 // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. 3046 if (Op0I && Op1I && Op0I->isShift() && 3047 Op0I->getOpcode() == Op1I->getOpcode() && 3048 Op0I->getOperand(1) == Op1I->getOperand(1) && 3049 (Op1I->hasOneUse() || Op1I->hasOneUse())) { 3050 Value *NewOp = 3051 Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), 3052 Op0I->getName()); 3053 return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 3054 Op1I->getOperand(1)); 3055 } 3056 3057 if (Op0I && Op1I) { 3058 Value *A, *B, *C, *D; 3059 // (A & B)^(A | B) -> A ^ B 3060 if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 3061 match(Op1I, m_Or(m_Value(C), m_Value(D)))) { 3062 if ((A == C && B == D) || (A == D && B == C)) 3063 return BinaryOperator::CreateXor(A, B); 3064 } 3065 // (A | B)^(A & B) -> A ^ B 3066 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 3067 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 3068 if ((A == C && B == D) || (A == D && B == C)) 3069 return BinaryOperator::CreateXor(A, B); 3070 } 3071 3072 // (A & B)^(C & D) 3073 if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && 3074 match(Op0I, m_And(m_Value(A), m_Value(B))) && 3075 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 3076 // (X & Y)^(X & Y) -> (Y^Z) & X 3077 Value *X = 0, *Y = 0, *Z = 0; 3078 if (A == C) 3079 X = A, Y = B, Z = D; 3080 else if (A == D) 3081 X = A, Y = B, Z = C; 3082 else if (B == C) 3083 X = B, Y = A, Z = D; 3084 else if (B == D) 3085 X = B, Y = A, Z = C; 3086 3087 if (X) { 3088 Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); 3089 return BinaryOperator::CreateAnd(NewOp, X); 3090 } 3091 } 3092 } 3093 3094 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 3095 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 3096 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 3097 return R; 3098 3099 // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) 3100 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 3101 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 3102 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? 3103 const Type *SrcTy = Op0C->getOperand(0)->getType(); 3104 if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && 3105 // Only do this if the casts both really cause code to be generated. 3106 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 3107 I.getType()) && 3108 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 3109 I.getType())) { 3110 Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), 3111 Op1C->getOperand(0), I.getName()); 3112 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 3113 } 3114 } 3115 } 3116 3117 return Changed ? &I : 0; 3118} 3119 3120 3121Instruction *InstCombiner::visitShl(BinaryOperator &I) { 3122 return commonShiftTransforms(I); 3123} 3124 3125Instruction *InstCombiner::visitLShr(BinaryOperator &I) { 3126 return commonShiftTransforms(I); 3127} 3128 3129Instruction *InstCombiner::visitAShr(BinaryOperator &I) { 3130 if (Instruction *R = commonShiftTransforms(I)) 3131 return R; 3132 3133 Value *Op0 = I.getOperand(0); 3134 3135 // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) 3136 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 3137 if (CSI->isAllOnesValue()) 3138 return ReplaceInstUsesWith(I, CSI); 3139 3140 // See if we can turn a signed shr into an unsigned shr. 3141 if (MaskedValueIsZero(Op0, 3142 APInt::getSignBit(I.getType()->getScalarSizeInBits()))) 3143 return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); 3144 3145 // Arithmetic shifting an all-sign-bit value is a no-op. 3146 unsigned NumSignBits = ComputeNumSignBits(Op0); 3147 if (NumSignBits == Op0->getType()->getScalarSizeInBits()) 3148 return ReplaceInstUsesWith(I, Op0); 3149 3150 return 0; 3151} 3152 3153Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { 3154 assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); 3155 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3156 3157 // shl X, 0 == X and shr X, 0 == X 3158 // shl 0, X == 0 and shr 0, X == 0 3159 if (Op1 == Constant::getNullValue(Op1->getType()) || 3160 Op0 == Constant::getNullValue(Op0->getType())) 3161 return ReplaceInstUsesWith(I, Op0); 3162 3163 if (isa<UndefValue>(Op0)) { 3164 if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef 3165 return ReplaceInstUsesWith(I, Op0); 3166 else // undef << X -> 0, undef >>u X -> 0 3167 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3168 } 3169 if (isa<UndefValue>(Op1)) { 3170 if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X 3171 return ReplaceInstUsesWith(I, Op0); 3172 else // X << undef, X >>u undef -> 0 3173 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3174 } 3175 3176 // See if we can fold away this shift. 3177 if (SimplifyDemandedInstructionBits(I)) 3178 return &I; 3179 3180 // Try to fold constant and into select arguments. 3181 if (isa<Constant>(Op0)) 3182 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 3183 if (Instruction *R = FoldOpIntoSelect(I, SI)) 3184 return R; 3185 3186 if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) 3187 if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) 3188 return Res; 3189 return 0; 3190} 3191 3192Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, 3193 BinaryOperator &I) { 3194 bool isLeftShift = I.getOpcode() == Instruction::Shl; 3195 3196 // See if we can simplify any instructions used by the instruction whose sole 3197 // purpose is to compute bits we don't care about. 3198 uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); 3199 3200 // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate 3201 // a signed shift. 3202 // 3203 if (Op1->uge(TypeBits)) { 3204 if (I.getOpcode() != Instruction::AShr) 3205 return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); 3206 else { 3207 I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); 3208 return &I; 3209 } 3210 } 3211 3212 // ((X*C1) << C2) == (X * (C1 << C2)) 3213 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) 3214 if (BO->getOpcode() == Instruction::Mul && isLeftShift) 3215 if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) 3216 return BinaryOperator::CreateMul(BO->getOperand(0), 3217 ConstantExpr::getShl(BOOp, Op1)); 3218 3219 // Try to fold constant and into select arguments. 3220 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3221 if (Instruction *R = FoldOpIntoSelect(I, SI)) 3222 return R; 3223 if (isa<PHINode>(Op0)) 3224 if (Instruction *NV = FoldOpIntoPhi(I)) 3225 return NV; 3226 3227 // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) 3228 if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { 3229 Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); 3230 // If 'shift2' is an ashr, we would have to get the sign bit into a funny 3231 // place. Don't try to do this transformation in this case. Also, we 3232 // require that the input operand is a shift-by-constant so that we have 3233 // confidence that the shifts will get folded together. We could do this 3234 // xform in more cases, but it is unlikely to be profitable. 3235 if (TrOp && I.isLogicalShift() && TrOp->isShift() && 3236 isa<ConstantInt>(TrOp->getOperand(1))) { 3237 // Okay, we'll do this xform. Make the shift of shift. 3238 Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); 3239 // (shift2 (shift1 & 0x00FF), c2) 3240 Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); 3241 3242 // For logical shifts, the truncation has the effect of making the high 3243 // part of the register be zeros. Emulate this by inserting an AND to 3244 // clear the top bits as needed. This 'and' will usually be zapped by 3245 // other xforms later if dead. 3246 unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); 3247 unsigned DstSize = TI->getType()->getScalarSizeInBits(); 3248 APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); 3249 3250 // The mask we constructed says what the trunc would do if occurring 3251 // between the shifts. We want to know the effect *after* the second 3252 // shift. We know that it is a logical shift by a constant, so adjust the 3253 // mask as appropriate. 3254 if (I.getOpcode() == Instruction::Shl) 3255 MaskV <<= Op1->getZExtValue(); 3256 else { 3257 assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); 3258 MaskV = MaskV.lshr(Op1->getZExtValue()); 3259 } 3260 3261 // shift1 & 0x00FF 3262 Value *And = Builder->CreateAnd(NSh, 3263 ConstantInt::get(I.getContext(), MaskV), 3264 TI->getName()); 3265 3266 // Return the value truncated to the interesting size. 3267 return new TruncInst(And, I.getType()); 3268 } 3269 } 3270 3271 if (Op0->hasOneUse()) { 3272 if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { 3273 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 3274 Value *V1, *V2; 3275 ConstantInt *CC; 3276 switch (Op0BO->getOpcode()) { 3277 default: break; 3278 case Instruction::Add: 3279 case Instruction::And: 3280 case Instruction::Or: 3281 case Instruction::Xor: { 3282 // These operators commute. 3283 // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) 3284 if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && 3285 match(Op0BO->getOperand(1), m_Shr(m_Value(V1), 3286 m_Specific(Op1)))) { 3287 Value *YS = // (Y << C) 3288 Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); 3289 // (X + (Y << C)) 3290 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, 3291 Op0BO->getOperand(1)->getName()); 3292 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 3293 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 3294 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 3295 } 3296 3297 // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) 3298 Value *Op0BOOp1 = Op0BO->getOperand(1); 3299 if (isLeftShift && Op0BOOp1->hasOneUse() && 3300 match(Op0BOOp1, 3301 m_And(m_Shr(m_Value(V1), m_Specific(Op1)), 3302 m_ConstantInt(CC))) && 3303 cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { 3304 Value *YS = // (Y << C) 3305 Builder->CreateShl(Op0BO->getOperand(0), Op1, 3306 Op0BO->getName()); 3307 // X & (CC << C) 3308 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 3309 V1->getName()+".mask"); 3310 return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); 3311 } 3312 } 3313 3314 // FALL THROUGH. 3315 case Instruction::Sub: { 3316 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 3317 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 3318 match(Op0BO->getOperand(0), m_Shr(m_Value(V1), 3319 m_Specific(Op1)))) { 3320 Value *YS = // (Y << C) 3321 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 3322 // (X + (Y << C)) 3323 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, 3324 Op0BO->getOperand(0)->getName()); 3325 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 3326 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 3327 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 3328 } 3329 3330 // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) 3331 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 3332 match(Op0BO->getOperand(0), 3333 m_And(m_Shr(m_Value(V1), m_Value(V2)), 3334 m_ConstantInt(CC))) && V2 == Op1 && 3335 cast<BinaryOperator>(Op0BO->getOperand(0)) 3336 ->getOperand(0)->hasOneUse()) { 3337 Value *YS = // (Y << C) 3338 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 3339 // X & (CC << C) 3340 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 3341 V1->getName()+".mask"); 3342 3343 return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); 3344 } 3345 3346 break; 3347 } 3348 } 3349 3350 3351 // If the operand is an bitwise operator with a constant RHS, and the 3352 // shift is the only use, we can pull it out of the shift. 3353 if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { 3354 bool isValid = true; // Valid only for And, Or, Xor 3355 bool highBitSet = false; // Transform if high bit of constant set? 3356 3357 switch (Op0BO->getOpcode()) { 3358 default: isValid = false; break; // Do not perform transform! 3359 case Instruction::Add: 3360 isValid = isLeftShift; 3361 break; 3362 case Instruction::Or: 3363 case Instruction::Xor: 3364 highBitSet = false; 3365 break; 3366 case Instruction::And: 3367 highBitSet = true; 3368 break; 3369 } 3370 3371 // If this is a signed shift right, and the high bit is modified 3372 // by the logical operation, do not perform the transformation. 3373 // The highBitSet boolean indicates the value of the high bit of 3374 // the constant which would cause it to be modified for this 3375 // operation. 3376 // 3377 if (isValid && I.getOpcode() == Instruction::AShr) 3378 isValid = Op0C->getValue()[TypeBits-1] == highBitSet; 3379 3380 if (isValid) { 3381 Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); 3382 3383 Value *NewShift = 3384 Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); 3385 NewShift->takeName(Op0BO); 3386 3387 return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, 3388 NewRHS); 3389 } 3390 } 3391 } 3392 } 3393 3394 // Find out if this is a shift of a shift by a constant. 3395 BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); 3396 if (ShiftOp && !ShiftOp->isShift()) 3397 ShiftOp = 0; 3398 3399 if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { 3400 ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); 3401 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); 3402 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); 3403 assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); 3404 if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. 3405 Value *X = ShiftOp->getOperand(0); 3406 3407 uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. 3408 3409 const IntegerType *Ty = cast<IntegerType>(I.getType()); 3410 3411 // Check for (X << c1) << c2 and (X >> c1) >> c2 3412 if (I.getOpcode() == ShiftOp->getOpcode()) { 3413 // If this is oversized composite shift, then unsigned shifts get 0, ashr 3414 // saturates. 3415 if (AmtSum >= TypeBits) { 3416 if (I.getOpcode() != Instruction::AShr) 3417 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3418 AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. 3419 } 3420 3421 return BinaryOperator::Create(I.getOpcode(), X, 3422 ConstantInt::get(Ty, AmtSum)); 3423 } 3424 3425 if (ShiftOp->getOpcode() == Instruction::LShr && 3426 I.getOpcode() == Instruction::AShr) { 3427 if (AmtSum >= TypeBits) 3428 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3429 3430 // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. 3431 return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); 3432 } 3433 3434 if (ShiftOp->getOpcode() == Instruction::AShr && 3435 I.getOpcode() == Instruction::LShr) { 3436 // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. 3437 if (AmtSum >= TypeBits) 3438 AmtSum = TypeBits-1; 3439 3440 Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); 3441 3442 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 3443 return BinaryOperator::CreateAnd(Shift, 3444 ConstantInt::get(I.getContext(), Mask)); 3445 } 3446 3447 // Okay, if we get here, one shift must be left, and the other shift must be 3448 // right. See if the amounts are equal. 3449 if (ShiftAmt1 == ShiftAmt2) { 3450 // If we have ((X >>? C) << C), turn this into X & (-1 << C). 3451 if (I.getOpcode() == Instruction::Shl) { 3452 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); 3453 return BinaryOperator::CreateAnd(X, 3454 ConstantInt::get(I.getContext(),Mask)); 3455 } 3456 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). 3457 if (I.getOpcode() == Instruction::LShr) { 3458 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); 3459 return BinaryOperator::CreateAnd(X, 3460 ConstantInt::get(I.getContext(), Mask)); 3461 } 3462 // We can simplify ((X << C) >>s C) into a trunc + sext. 3463 // NOTE: we could do this for any C, but that would make 'unusual' integer 3464 // types. For now, just stick to ones well-supported by the code 3465 // generators. 3466 const Type *SExtType = 0; 3467 switch (Ty->getBitWidth() - ShiftAmt1) { 3468 case 1 : 3469 case 8 : 3470 case 16 : 3471 case 32 : 3472 case 64 : 3473 case 128: 3474 SExtType = IntegerType::get(I.getContext(), 3475 Ty->getBitWidth() - ShiftAmt1); 3476 break; 3477 default: break; 3478 } 3479 if (SExtType) 3480 return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); 3481 // Otherwise, we can't handle it yet. 3482 } else if (ShiftAmt1 < ShiftAmt2) { 3483 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; 3484 3485 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) 3486 if (I.getOpcode() == Instruction::Shl) { 3487 assert(ShiftOp->getOpcode() == Instruction::LShr || 3488 ShiftOp->getOpcode() == Instruction::AShr); 3489 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 3490 3491 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 3492 return BinaryOperator::CreateAnd(Shift, 3493 ConstantInt::get(I.getContext(),Mask)); 3494 } 3495 3496 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) 3497 if (I.getOpcode() == Instruction::LShr) { 3498 assert(ShiftOp->getOpcode() == Instruction::Shl); 3499 Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); 3500 3501 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 3502 return BinaryOperator::CreateAnd(Shift, 3503 ConstantInt::get(I.getContext(),Mask)); 3504 } 3505 3506 // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. 3507 } else { 3508 assert(ShiftAmt2 < ShiftAmt1); 3509 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; 3510 3511 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) 3512 if (I.getOpcode() == Instruction::Shl) { 3513 assert(ShiftOp->getOpcode() == Instruction::LShr || 3514 ShiftOp->getOpcode() == Instruction::AShr); 3515 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, 3516 ConstantInt::get(Ty, ShiftDiff)); 3517 3518 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 3519 return BinaryOperator::CreateAnd(Shift, 3520 ConstantInt::get(I.getContext(),Mask)); 3521 } 3522 3523 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) 3524 if (I.getOpcode() == Instruction::LShr) { 3525 assert(ShiftOp->getOpcode() == Instruction::Shl); 3526 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 3527 3528 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 3529 return BinaryOperator::CreateAnd(Shift, 3530 ConstantInt::get(I.getContext(),Mask)); 3531 } 3532 3533 // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. 3534 } 3535 } 3536 return 0; 3537} 3538 3539 3540 3541/// FindElementAtOffset - Given a type and a constant offset, determine whether 3542/// or not there is a sequence of GEP indices into the type that will land us at 3543/// the specified offset. If so, fill them into NewIndices and return the 3544/// resultant element type, otherwise return null. 3545const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, 3546 SmallVectorImpl<Value*> &NewIndices) { 3547 if (!TD) return 0; 3548 if (!Ty->isSized()) return 0; 3549 3550 // Start with the index over the outer type. Note that the type size 3551 // might be zero (even if the offset isn't zero) if the indexed type 3552 // is something like [0 x {int, int}] 3553 const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); 3554 int64_t FirstIdx = 0; 3555 if (int64_t TySize = TD->getTypeAllocSize(Ty)) { 3556 FirstIdx = Offset/TySize; 3557 Offset -= FirstIdx*TySize; 3558 3559 // Handle hosts where % returns negative instead of values [0..TySize). 3560 if (Offset < 0) { 3561 --FirstIdx; 3562 Offset += TySize; 3563 assert(Offset >= 0); 3564 } 3565 assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); 3566 } 3567 3568 NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); 3569 3570 // Index into the types. If we fail, set OrigBase to null. 3571 while (Offset) { 3572 // Indexing into tail padding between struct/array elements. 3573 if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) 3574 return 0; 3575 3576 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 3577 const StructLayout *SL = TD->getStructLayout(STy); 3578 assert(Offset < (int64_t)SL->getSizeInBytes() && 3579 "Offset must stay within the indexed type"); 3580 3581 unsigned Elt = SL->getElementContainingOffset(Offset); 3582 NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 3583 Elt)); 3584 3585 Offset -= SL->getElementOffset(Elt); 3586 Ty = STy->getElementType(Elt); 3587 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 3588 uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); 3589 assert(EltSize && "Cannot index into a zero-sized array"); 3590 NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); 3591 Offset %= EltSize; 3592 Ty = AT->getElementType(); 3593 } else { 3594 // Otherwise, we can't index into the middle of this atomic type, bail. 3595 return 0; 3596 } 3597 } 3598 3599 return Ty; 3600} 3601 3602 3603/// EnforceKnownAlignment - If the specified pointer points to an object that 3604/// we control, modify the object's alignment to PrefAlign. This isn't 3605/// often possible though. If alignment is important, a more reliable approach 3606/// is to simply align all global variables and allocation instructions to 3607/// their preferred alignment from the beginning. 3608/// 3609static unsigned EnforceKnownAlignment(Value *V, 3610 unsigned Align, unsigned PrefAlign) { 3611 3612 User *U = dyn_cast<User>(V); 3613 if (!U) return Align; 3614 3615 switch (Operator::getOpcode(U)) { 3616 default: break; 3617 case Instruction::BitCast: 3618 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 3619 case Instruction::GetElementPtr: { 3620 // If all indexes are zero, it is just the alignment of the base pointer. 3621 bool AllZeroOperands = true; 3622 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) 3623 if (!isa<Constant>(*i) || 3624 !cast<Constant>(*i)->isNullValue()) { 3625 AllZeroOperands = false; 3626 break; 3627 } 3628 3629 if (AllZeroOperands) { 3630 // Treat this like a bitcast. 3631 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 3632 } 3633 break; 3634 } 3635 } 3636 3637 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 3638 // If there is a large requested alignment and we can, bump up the alignment 3639 // of the global. 3640 if (!GV->isDeclaration()) { 3641 if (GV->getAlignment() >= PrefAlign) 3642 Align = GV->getAlignment(); 3643 else { 3644 GV->setAlignment(PrefAlign); 3645 Align = PrefAlign; 3646 } 3647 } 3648 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 3649 // If there is a requested alignment and if this is an alloca, round up. 3650 if (AI->getAlignment() >= PrefAlign) 3651 Align = AI->getAlignment(); 3652 else { 3653 AI->setAlignment(PrefAlign); 3654 Align = PrefAlign; 3655 } 3656 } 3657 3658 return Align; 3659} 3660 3661/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that 3662/// we can determine, return it, otherwise return 0. If PrefAlign is specified, 3663/// and it is more than the alignment of the ultimate object, see if we can 3664/// increase the alignment of the ultimate object, making this check succeed. 3665unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, 3666 unsigned PrefAlign) { 3667 unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 3668 sizeof(PrefAlign) * CHAR_BIT; 3669 APInt Mask = APInt::getAllOnesValue(BitWidth); 3670 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 3671 ComputeMaskedBits(V, Mask, KnownZero, KnownOne); 3672 unsigned TrailZ = KnownZero.countTrailingOnes(); 3673 unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); 3674 3675 if (PrefAlign > Align) 3676 Align = EnforceKnownAlignment(V, Align, PrefAlign); 3677 3678 // We don't need to make any adjustment. 3679 return Align; 3680} 3681 3682Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 3683 unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); 3684 unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); 3685 unsigned MinAlign = std::min(DstAlign, SrcAlign); 3686 unsigned CopyAlign = MI->getAlignment(); 3687 3688 if (CopyAlign < MinAlign) { 3689 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 3690 MinAlign, false)); 3691 return MI; 3692 } 3693 3694 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 3695 // load/store. 3696 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); 3697 if (MemOpLength == 0) return 0; 3698 3699 // Source and destination pointer types are always "i8*" for intrinsic. See 3700 // if the size is something we can handle with a single primitive load/store. 3701 // A single load+store correctly handles overlapping memory in the memmove 3702 // case. 3703 unsigned Size = MemOpLength->getZExtValue(); 3704 if (Size == 0) return MI; // Delete this mem transfer. 3705 3706 if (Size > 8 || (Size&(Size-1))) 3707 return 0; // If not 1/2/4/8 bytes, exit. 3708 3709 // Use an integer load+store unless we can find something better. 3710 Type *NewPtrTy = 3711 PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); 3712 3713 // Memcpy forces the use of i8* for the source and destination. That means 3714 // that if you're using memcpy to move one double around, you'll get a cast 3715 // from double* to i8*. We'd much rather use a double load+store rather than 3716 // an i64 load+store, here because this improves the odds that the source or 3717 // dest address will be promotable. See if we can find a better type than the 3718 // integer datatype. 3719 if (Value *Op = getBitCastOperand(MI->getOperand(1))) { 3720 const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); 3721 if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { 3722 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 3723 // down through these levels if so. 3724 while (!SrcETy->isSingleValueType()) { 3725 if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { 3726 if (STy->getNumElements() == 1) 3727 SrcETy = STy->getElementType(0); 3728 else 3729 break; 3730 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { 3731 if (ATy->getNumElements() == 1) 3732 SrcETy = ATy->getElementType(); 3733 else 3734 break; 3735 } else 3736 break; 3737 } 3738 3739 if (SrcETy->isSingleValueType()) 3740 NewPtrTy = PointerType::getUnqual(SrcETy); 3741 } 3742 } 3743 3744 3745 // If the memcpy/memmove provides better alignment info than we can 3746 // infer, use it. 3747 SrcAlign = std::max(SrcAlign, CopyAlign); 3748 DstAlign = std::max(DstAlign, CopyAlign); 3749 3750 Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); 3751 Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); 3752 Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); 3753 InsertNewInstBefore(L, *MI); 3754 InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); 3755 3756 // Set the size of the copy to 0, it will be deleted on the next iteration. 3757 MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); 3758 return MI; 3759} 3760 3761Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 3762 unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); 3763 if (MI->getAlignment() < Alignment) { 3764 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 3765 Alignment, false)); 3766 return MI; 3767 } 3768 3769 // Extract the length and alignment and fill if they are constant. 3770 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 3771 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 3772 if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext())) 3773 return 0; 3774 uint64_t Len = LenC->getZExtValue(); 3775 Alignment = MI->getAlignment(); 3776 3777 // If the length is zero, this is a no-op 3778 if (Len == 0) return MI; // memset(d,c,0,a) -> noop 3779 3780 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 3781 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 3782 const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 3783 3784 Value *Dest = MI->getDest(); 3785 Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); 3786 3787 // Alignment 0 is identity for alignment 1 for memset, but not store. 3788 if (Alignment == 0) Alignment = 1; 3789 3790 // Extract the fill value and store. 3791 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 3792 InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), 3793 Dest, false, Alignment), *MI); 3794 3795 // Set the size of the copy to 0, it will be deleted on the next iteration. 3796 MI->setLength(Constant::getNullValue(LenC->getType())); 3797 return MI; 3798 } 3799 3800 return 0; 3801} 3802 3803 3804/// visitCallInst - CallInst simplification. This mostly only handles folding 3805/// of intrinsic instructions. For normal calls, it allows visitCallSite to do 3806/// the heavy lifting. 3807/// 3808Instruction *InstCombiner::visitCallInst(CallInst &CI) { 3809 if (isFreeCall(&CI)) 3810 return visitFree(CI); 3811 3812 // If the caller function is nounwind, mark the call as nounwind, even if the 3813 // callee isn't. 3814 if (CI.getParent()->getParent()->doesNotThrow() && 3815 !CI.doesNotThrow()) { 3816 CI.setDoesNotThrow(); 3817 return &CI; 3818 } 3819 3820 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 3821 if (!II) return visitCallSite(&CI); 3822 3823 // Intrinsics cannot occur in an invoke, so handle them here instead of in 3824 // visitCallSite. 3825 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 3826 bool Changed = false; 3827 3828 // memmove/cpy/set of zero bytes is a noop. 3829 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 3830 if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); 3831 3832 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 3833 if (CI->getZExtValue() == 1) { 3834 // Replace the instruction with just byte operations. We would 3835 // transform other cases to loads/stores, but we don't know if 3836 // alignment is sufficient. 3837 } 3838 } 3839 3840 // If we have a memmove and the source operation is a constant global, 3841 // then the source and dest pointers can't alias, so we can change this 3842 // into a call to memcpy. 3843 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 3844 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 3845 if (GVSrc->isConstant()) { 3846 Module *M = CI.getParent()->getParent()->getParent(); 3847 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 3848 const Type *Tys[1]; 3849 Tys[0] = CI.getOperand(3)->getType(); 3850 CI.setOperand(0, 3851 Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); 3852 Changed = true; 3853 } 3854 } 3855 3856 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 3857 // memmove(x,x,size) -> noop. 3858 if (MTI->getSource() == MTI->getDest()) 3859 return EraseInstFromFunction(CI); 3860 } 3861 3862 // If we can determine a pointer alignment that is bigger than currently 3863 // set, update the alignment. 3864 if (isa<MemTransferInst>(MI)) { 3865 if (Instruction *I = SimplifyMemTransfer(MI)) 3866 return I; 3867 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 3868 if (Instruction *I = SimplifyMemSet(MSI)) 3869 return I; 3870 } 3871 3872 if (Changed) return II; 3873 } 3874 3875 switch (II->getIntrinsicID()) { 3876 default: break; 3877 case Intrinsic::bswap: 3878 // bswap(bswap(x)) -> x 3879 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) 3880 if (Operand->getIntrinsicID() == Intrinsic::bswap) 3881 return ReplaceInstUsesWith(CI, Operand->getOperand(1)); 3882 3883 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 3884 if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { 3885 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) 3886 if (Operand->getIntrinsicID() == Intrinsic::bswap) { 3887 unsigned C = Operand->getType()->getPrimitiveSizeInBits() - 3888 TI->getType()->getPrimitiveSizeInBits(); 3889 Value *CV = ConstantInt::get(Operand->getType(), C); 3890 Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); 3891 return new TruncInst(V, TI->getType()); 3892 } 3893 } 3894 3895 break; 3896 case Intrinsic::powi: 3897 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { 3898 // powi(x, 0) -> 1.0 3899 if (Power->isZero()) 3900 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 3901 // powi(x, 1) -> x 3902 if (Power->isOne()) 3903 return ReplaceInstUsesWith(CI, II->getOperand(1)); 3904 // powi(x, -1) -> 1/x 3905 if (Power->isAllOnesValue()) 3906 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 3907 II->getOperand(1)); 3908 } 3909 break; 3910 3911 case Intrinsic::uadd_with_overflow: { 3912 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 3913 const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); 3914 uint32_t BitWidth = IT->getBitWidth(); 3915 APInt Mask = APInt::getSignBit(BitWidth); 3916 APInt LHSKnownZero(BitWidth, 0); 3917 APInt LHSKnownOne(BitWidth, 0); 3918 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 3919 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 3920 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 3921 3922 if (LHSKnownNegative || LHSKnownPositive) { 3923 APInt RHSKnownZero(BitWidth, 0); 3924 APInt RHSKnownOne(BitWidth, 0); 3925 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 3926 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 3927 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 3928 if (LHSKnownNegative && RHSKnownNegative) { 3929 // The sign bit is set in both cases: this MUST overflow. 3930 // Create a simple add instruction, and insert it into the struct. 3931 Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); 3932 Worklist.Add(Add); 3933 Constant *V[] = { 3934 UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) 3935 }; 3936 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 3937 return InsertValueInst::Create(Struct, Add, 0); 3938 } 3939 3940 if (LHSKnownPositive && RHSKnownPositive) { 3941 // The sign bit is clear in both cases: this CANNOT overflow. 3942 // Create a simple add instruction, and insert it into the struct. 3943 Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); 3944 Worklist.Add(Add); 3945 Constant *V[] = { 3946 UndefValue::get(LHS->getType()), 3947 ConstantInt::getFalse(II->getContext()) 3948 }; 3949 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 3950 return InsertValueInst::Create(Struct, Add, 0); 3951 } 3952 } 3953 } 3954 // FALL THROUGH uadd into sadd 3955 case Intrinsic::sadd_with_overflow: 3956 // Canonicalize constants into the RHS. 3957 if (isa<Constant>(II->getOperand(1)) && 3958 !isa<Constant>(II->getOperand(2))) { 3959 Value *LHS = II->getOperand(1); 3960 II->setOperand(1, II->getOperand(2)); 3961 II->setOperand(2, LHS); 3962 return II; 3963 } 3964 3965 // X + undef -> undef 3966 if (isa<UndefValue>(II->getOperand(2))) 3967 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 3968 3969 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 3970 // X + 0 -> {X, false} 3971 if (RHS->isZero()) { 3972 Constant *V[] = { 3973 UndefValue::get(II->getOperand(0)->getType()), 3974 ConstantInt::getFalse(II->getContext()) 3975 }; 3976 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 3977 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 3978 } 3979 } 3980 break; 3981 case Intrinsic::usub_with_overflow: 3982 case Intrinsic::ssub_with_overflow: 3983 // undef - X -> undef 3984 // X - undef -> undef 3985 if (isa<UndefValue>(II->getOperand(1)) || 3986 isa<UndefValue>(II->getOperand(2))) 3987 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 3988 3989 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 3990 // X - 0 -> {X, false} 3991 if (RHS->isZero()) { 3992 Constant *V[] = { 3993 UndefValue::get(II->getOperand(1)->getType()), 3994 ConstantInt::getFalse(II->getContext()) 3995 }; 3996 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 3997 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 3998 } 3999 } 4000 break; 4001 case Intrinsic::umul_with_overflow: 4002 case Intrinsic::smul_with_overflow: 4003 // Canonicalize constants into the RHS. 4004 if (isa<Constant>(II->getOperand(1)) && 4005 !isa<Constant>(II->getOperand(2))) { 4006 Value *LHS = II->getOperand(1); 4007 II->setOperand(1, II->getOperand(2)); 4008 II->setOperand(2, LHS); 4009 return II; 4010 } 4011 4012 // X * undef -> undef 4013 if (isa<UndefValue>(II->getOperand(2))) 4014 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 4015 4016 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { 4017 // X*0 -> {0, false} 4018 if (RHSI->isZero()) 4019 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 4020 4021 // X * 1 -> {X, false} 4022 if (RHSI->equalsInt(1)) { 4023 Constant *V[] = { 4024 UndefValue::get(II->getOperand(1)->getType()), 4025 ConstantInt::getFalse(II->getContext()) 4026 }; 4027 Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); 4028 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 4029 } 4030 } 4031 break; 4032 case Intrinsic::ppc_altivec_lvx: 4033 case Intrinsic::ppc_altivec_lvxl: 4034 case Intrinsic::x86_sse_loadu_ps: 4035 case Intrinsic::x86_sse2_loadu_pd: 4036 case Intrinsic::x86_sse2_loadu_dq: 4037 // Turn PPC lvx -> load if the pointer is known aligned. 4038 // Turn X86 loadups -> load if the pointer is known aligned. 4039 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 4040 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), 4041 PointerType::getUnqual(II->getType())); 4042 return new LoadInst(Ptr); 4043 } 4044 break; 4045 case Intrinsic::ppc_altivec_stvx: 4046 case Intrinsic::ppc_altivec_stvxl: 4047 // Turn stvx -> store if the pointer is known aligned. 4048 if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { 4049 const Type *OpPtrTy = 4050 PointerType::getUnqual(II->getOperand(1)->getType()); 4051 Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); 4052 return new StoreInst(II->getOperand(1), Ptr); 4053 } 4054 break; 4055 case Intrinsic::x86_sse_storeu_ps: 4056 case Intrinsic::x86_sse2_storeu_pd: 4057 case Intrinsic::x86_sse2_storeu_dq: 4058 // Turn X86 storeu -> store if the pointer is known aligned. 4059 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 4060 const Type *OpPtrTy = 4061 PointerType::getUnqual(II->getOperand(2)->getType()); 4062 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); 4063 return new StoreInst(II->getOperand(2), Ptr); 4064 } 4065 break; 4066 4067 case Intrinsic::x86_sse_cvttss2si: { 4068 // These intrinsics only demands the 0th element of its input vector. If 4069 // we can simplify the input based on that, do so now. 4070 unsigned VWidth = 4071 cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); 4072 APInt DemandedElts(VWidth, 1); 4073 APInt UndefElts(VWidth, 0); 4074 if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, 4075 UndefElts)) { 4076 II->setOperand(1, V); 4077 return II; 4078 } 4079 break; 4080 } 4081 4082 case Intrinsic::ppc_altivec_vperm: 4083 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 4084 if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { 4085 assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); 4086 4087 // Check that all of the elements are integer constants or undefs. 4088 bool AllEltsOk = true; 4089 for (unsigned i = 0; i != 16; ++i) { 4090 if (!isa<ConstantInt>(Mask->getOperand(i)) && 4091 !isa<UndefValue>(Mask->getOperand(i))) { 4092 AllEltsOk = false; 4093 break; 4094 } 4095 } 4096 4097 if (AllEltsOk) { 4098 // Cast the input vectors to byte vectors. 4099 Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); 4100 Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); 4101 Value *Result = UndefValue::get(Op0->getType()); 4102 4103 // Only extract each element once. 4104 Value *ExtractedElts[32]; 4105 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 4106 4107 for (unsigned i = 0; i != 16; ++i) { 4108 if (isa<UndefValue>(Mask->getOperand(i))) 4109 continue; 4110 unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); 4111 Idx &= 31; // Match the hardware behavior. 4112 4113 if (ExtractedElts[Idx] == 0) { 4114 ExtractedElts[Idx] = 4115 Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 4116 ConstantInt::get(Type::getInt32Ty(II->getContext()), 4117 Idx&15, false), "tmp"); 4118 } 4119 4120 // Insert this value into the result vector. 4121 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 4122 ConstantInt::get(Type::getInt32Ty(II->getContext()), 4123 i, false), "tmp"); 4124 } 4125 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 4126 } 4127 } 4128 break; 4129 4130 case Intrinsic::stackrestore: { 4131 // If the save is right next to the restore, remove the restore. This can 4132 // happen when variable allocas are DCE'd. 4133 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { 4134 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 4135 BasicBlock::iterator BI = SS; 4136 if (&*++BI == II) 4137 return EraseInstFromFunction(CI); 4138 } 4139 } 4140 4141 // Scan down this block to see if there is another stack restore in the 4142 // same block without an intervening call/alloca. 4143 BasicBlock::iterator BI = II; 4144 TerminatorInst *TI = II->getParent()->getTerminator(); 4145 bool CannotRemove = false; 4146 for (++BI; &*BI != TI; ++BI) { 4147 if (isa<AllocaInst>(BI) || isMalloc(BI)) { 4148 CannotRemove = true; 4149 break; 4150 } 4151 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 4152 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 4153 // If there is a stackrestore below this one, remove this one. 4154 if (II->getIntrinsicID() == Intrinsic::stackrestore) 4155 return EraseInstFromFunction(CI); 4156 // Otherwise, ignore the intrinsic. 4157 } else { 4158 // If we found a non-intrinsic call, we can't remove the stack 4159 // restore. 4160 CannotRemove = true; 4161 break; 4162 } 4163 } 4164 } 4165 4166 // If the stack restore is in a return/unwind block and if there are no 4167 // allocas or calls between the restore and the return, nuke the restore. 4168 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) 4169 return EraseInstFromFunction(CI); 4170 break; 4171 } 4172 } 4173 4174 return visitCallSite(II); 4175} 4176 4177// InvokeInst simplification 4178// 4179Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 4180 return visitCallSite(&II); 4181} 4182 4183/// isSafeToEliminateVarargsCast - If this cast does not affect the value 4184/// passed through the varargs area, we can eliminate the use of the cast. 4185static bool isSafeToEliminateVarargsCast(const CallSite CS, 4186 const CastInst * const CI, 4187 const TargetData * const TD, 4188 const int ix) { 4189 if (!CI->isLosslessCast()) 4190 return false; 4191 4192 // The size of ByVal arguments is derived from the type, so we 4193 // can't change to a type with a different size. If the size were 4194 // passed explicitly we could avoid this check. 4195 if (!CS.paramHasAttr(ix, Attribute::ByVal)) 4196 return true; 4197 4198 const Type* SrcTy = 4199 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 4200 const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 4201 if (!SrcTy->isSized() || !DstTy->isSized()) 4202 return false; 4203 if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) 4204 return false; 4205 return true; 4206} 4207 4208// visitCallSite - Improvements for call and invoke instructions. 4209// 4210Instruction *InstCombiner::visitCallSite(CallSite CS) { 4211 bool Changed = false; 4212 4213 // If the callee is a constexpr cast of a function, attempt to move the cast 4214 // to the arguments of the call/invoke. 4215 if (transformConstExprCastCall(CS)) return 0; 4216 4217 Value *Callee = CS.getCalledValue(); 4218 4219 if (Function *CalleeF = dyn_cast<Function>(Callee)) 4220 if (CalleeF->getCallingConv() != CS.getCallingConv()) { 4221 Instruction *OldCall = CS.getInstruction(); 4222 // If the call and callee calling conventions don't match, this call must 4223 // be unreachable, as the call is undefined. 4224 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 4225 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 4226 OldCall); 4227 // If OldCall dues not return void then replaceAllUsesWith undef. 4228 // This allows ValueHandlers and custom metadata to adjust itself. 4229 if (!OldCall->getType()->isVoidTy()) 4230 OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); 4231 if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. 4232 return EraseInstFromFunction(*OldCall); 4233 return 0; 4234 } 4235 4236 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 4237 // This instruction is not reachable, just remove it. We insert a store to 4238 // undef so that we know that this code is not reachable, despite the fact 4239 // that we can't modify the CFG here. 4240 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 4241 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 4242 CS.getInstruction()); 4243 4244 // If CS dues not return void then replaceAllUsesWith undef. 4245 // This allows ValueHandlers and custom metadata to adjust itself. 4246 if (!CS.getInstruction()->getType()->isVoidTy()) 4247 CS.getInstruction()-> 4248 replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); 4249 4250 if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { 4251 // Don't break the CFG, insert a dummy cond branch. 4252 BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), 4253 ConstantInt::getTrue(Callee->getContext()), II); 4254 } 4255 return EraseInstFromFunction(*CS.getInstruction()); 4256 } 4257 4258 if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) 4259 if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) 4260 if (In->getIntrinsicID() == Intrinsic::init_trampoline) 4261 return transformCallThroughTrampoline(CS); 4262 4263 const PointerType *PTy = cast<PointerType>(Callee->getType()); 4264 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 4265 if (FTy->isVarArg()) { 4266 int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); 4267 // See if we can optimize any arguments passed through the varargs area of 4268 // the call. 4269 for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), 4270 E = CS.arg_end(); I != E; ++I, ++ix) { 4271 CastInst *CI = dyn_cast<CastInst>(*I); 4272 if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { 4273 *I = CI->getOperand(0); 4274 Changed = true; 4275 } 4276 } 4277 } 4278 4279 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 4280 // Inline asm calls cannot throw - mark them 'nounwind'. 4281 CS.setDoesNotThrow(); 4282 Changed = true; 4283 } 4284 4285 return Changed ? CS.getInstruction() : 0; 4286} 4287 4288// transformConstExprCastCall - If the callee is a constexpr cast of a function, 4289// attempt to move the cast to the arguments of the call/invoke. 4290// 4291bool InstCombiner::transformConstExprCastCall(CallSite CS) { 4292 if (!isa<ConstantExpr>(CS.getCalledValue())) return false; 4293 ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); 4294 if (CE->getOpcode() != Instruction::BitCast || 4295 !isa<Function>(CE->getOperand(0))) 4296 return false; 4297 Function *Callee = cast<Function>(CE->getOperand(0)); 4298 Instruction *Caller = CS.getInstruction(); 4299 const AttrListPtr &CallerPAL = CS.getAttributes(); 4300 4301 // Okay, this is a cast from a function to a different type. Unless doing so 4302 // would cause a type conversion of one of our arguments, change this call to 4303 // be a direct call with arguments casted to the appropriate types. 4304 // 4305 const FunctionType *FT = Callee->getFunctionType(); 4306 const Type *OldRetTy = Caller->getType(); 4307 const Type *NewRetTy = FT->getReturnType(); 4308 4309 if (isa<StructType>(NewRetTy)) 4310 return false; // TODO: Handle multiple return values. 4311 4312 // Check to see if we are changing the return type... 4313 if (OldRetTy != NewRetTy) { 4314 if (Callee->isDeclaration() && 4315 // Conversion is ok if changing from one pointer type to another or from 4316 // a pointer to an integer of the same size. 4317 !((isa<PointerType>(OldRetTy) || !TD || 4318 OldRetTy == TD->getIntPtrType(Caller->getContext())) && 4319 (isa<PointerType>(NewRetTy) || !TD || 4320 NewRetTy == TD->getIntPtrType(Caller->getContext())))) 4321 return false; // Cannot transform this return value. 4322 4323 if (!Caller->use_empty() && 4324 // void -> non-void is handled specially 4325 !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) 4326 return false; // Cannot transform this return value. 4327 4328 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 4329 Attributes RAttrs = CallerPAL.getRetAttributes(); 4330 if (RAttrs & Attribute::typeIncompatible(NewRetTy)) 4331 return false; // Attribute not compatible with transformed value. 4332 } 4333 4334 // If the callsite is an invoke instruction, and the return value is used by 4335 // a PHI node in a successor, we cannot change the return type of the call 4336 // because there is no place to put the cast instruction (without breaking 4337 // the critical edge). Bail out in this case. 4338 if (!Caller->use_empty()) 4339 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 4340 for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); 4341 UI != E; ++UI) 4342 if (PHINode *PN = dyn_cast<PHINode>(*UI)) 4343 if (PN->getParent() == II->getNormalDest() || 4344 PN->getParent() == II->getUnwindDest()) 4345 return false; 4346 } 4347 4348 unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); 4349 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 4350 4351 CallSite::arg_iterator AI = CS.arg_begin(); 4352 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 4353 const Type *ParamTy = FT->getParamType(i); 4354 const Type *ActTy = (*AI)->getType(); 4355 4356 if (!CastInst::isCastable(ActTy, ParamTy)) 4357 return false; // Cannot transform this parameter value. 4358 4359 if (CallerPAL.getParamAttributes(i + 1) 4360 & Attribute::typeIncompatible(ParamTy)) 4361 return false; // Attribute not compatible with transformed value. 4362 4363 // Converting from one pointer type to another or between a pointer and an 4364 // integer of the same size is safe even if we do not have a body. 4365 bool isConvertible = ActTy == ParamTy || 4366 (TD && ((isa<PointerType>(ParamTy) || 4367 ParamTy == TD->getIntPtrType(Caller->getContext())) && 4368 (isa<PointerType>(ActTy) || 4369 ActTy == TD->getIntPtrType(Caller->getContext())))); 4370 if (Callee->isDeclaration() && !isConvertible) return false; 4371 } 4372 4373 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && 4374 Callee->isDeclaration()) 4375 return false; // Do not delete arguments unless we have a function body. 4376 4377 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 4378 !CallerPAL.isEmpty()) 4379 // In this case we have more arguments than the new function type, but we 4380 // won't be dropping them. Check that these extra arguments have attributes 4381 // that are compatible with being a vararg call argument. 4382 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 4383 if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) 4384 break; 4385 Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; 4386 if (PAttrs & Attribute::VarArgsIncompatible) 4387 return false; 4388 } 4389 4390 // Okay, we decided that this is a safe thing to do: go ahead and start 4391 // inserting cast instructions as necessary... 4392 std::vector<Value*> Args; 4393 Args.reserve(NumActualArgs); 4394 SmallVector<AttributeWithIndex, 8> attrVec; 4395 attrVec.reserve(NumCommonArgs); 4396 4397 // Get any return attributes. 4398 Attributes RAttrs = CallerPAL.getRetAttributes(); 4399 4400 // If the return value is not being used, the type may not be compatible 4401 // with the existing attributes. Wipe out any problematic attributes. 4402 RAttrs &= ~Attribute::typeIncompatible(NewRetTy); 4403 4404 // Add the new return attributes. 4405 if (RAttrs) 4406 attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); 4407 4408 AI = CS.arg_begin(); 4409 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 4410 const Type *ParamTy = FT->getParamType(i); 4411 if ((*AI)->getType() == ParamTy) { 4412 Args.push_back(*AI); 4413 } else { 4414 Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, 4415 false, ParamTy, false); 4416 Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); 4417 } 4418 4419 // Add any parameter attributes. 4420 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 4421 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 4422 } 4423 4424 // If the function takes more arguments than the call was taking, add them 4425 // now. 4426 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 4427 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 4428 4429 // If we are removing arguments to the function, emit an obnoxious warning. 4430 if (FT->getNumParams() < NumActualArgs) { 4431 if (!FT->isVarArg()) { 4432 errs() << "WARNING: While resolving call to function '" 4433 << Callee->getName() << "' arguments were dropped!\n"; 4434 } else { 4435 // Add all of the arguments in their promoted form to the arg list. 4436 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 4437 const Type *PTy = getPromotedType((*AI)->getType()); 4438 if (PTy != (*AI)->getType()) { 4439 // Must promote to pass through va_arg area! 4440 Instruction::CastOps opcode = 4441 CastInst::getCastOpcode(*AI, false, PTy, false); 4442 Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); 4443 } else { 4444 Args.push_back(*AI); 4445 } 4446 4447 // Add any parameter attributes. 4448 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 4449 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 4450 } 4451 } 4452 } 4453 4454 if (Attributes FnAttrs = CallerPAL.getFnAttributes()) 4455 attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); 4456 4457 if (NewRetTy->isVoidTy()) 4458 Caller->setName(""); // Void type should not have a name. 4459 4460 const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), 4461 attrVec.end()); 4462 4463 Instruction *NC; 4464 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 4465 NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), 4466 Args.begin(), Args.end(), 4467 Caller->getName(), Caller); 4468 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 4469 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 4470 } else { 4471 NC = CallInst::Create(Callee, Args.begin(), Args.end(), 4472 Caller->getName(), Caller); 4473 CallInst *CI = cast<CallInst>(Caller); 4474 if (CI->isTailCall()) 4475 cast<CallInst>(NC)->setTailCall(); 4476 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 4477 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 4478 } 4479 4480 // Insert a cast of the return type as necessary. 4481 Value *NV = NC; 4482 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 4483 if (!NV->getType()->isVoidTy()) { 4484 Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 4485 OldRetTy, false); 4486 NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); 4487 4488 // If this is an invoke instruction, we should insert it after the first 4489 // non-phi, instruction in the normal successor block. 4490 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 4491 BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); 4492 InsertNewInstBefore(NC, *I); 4493 } else { 4494 // Otherwise, it's a call, just insert cast right after the call instr 4495 InsertNewInstBefore(NC, *Caller); 4496 } 4497 Worklist.AddUsersToWorkList(*Caller); 4498 } else { 4499 NV = UndefValue::get(Caller->getType()); 4500 } 4501 } 4502 4503 4504 if (!Caller->use_empty()) 4505 Caller->replaceAllUsesWith(NV); 4506 4507 EraseInstFromFunction(*Caller); 4508 return true; 4509} 4510 4511// transformCallThroughTrampoline - Turn a call to a function created by the 4512// init_trampoline intrinsic into a direct call to the underlying function. 4513// 4514Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { 4515 Value *Callee = CS.getCalledValue(); 4516 const PointerType *PTy = cast<PointerType>(Callee->getType()); 4517 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 4518 const AttrListPtr &Attrs = CS.getAttributes(); 4519 4520 // If the call already has the 'nest' attribute somewhere then give up - 4521 // otherwise 'nest' would occur twice after splicing in the chain. 4522 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 4523 return 0; 4524 4525 IntrinsicInst *Tramp = 4526 cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); 4527 4528 Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); 4529 const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 4530 const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 4531 4532 const AttrListPtr &NestAttrs = NestF->getAttributes(); 4533 if (!NestAttrs.isEmpty()) { 4534 unsigned NestIdx = 1; 4535 const Type *NestTy = 0; 4536 Attributes NestAttr = Attribute::None; 4537 4538 // Look for a parameter marked with the 'nest' attribute. 4539 for (FunctionType::param_iterator I = NestFTy->param_begin(), 4540 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 4541 if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { 4542 // Record the parameter type and any other attributes. 4543 NestTy = *I; 4544 NestAttr = NestAttrs.getParamAttributes(NestIdx); 4545 break; 4546 } 4547 4548 if (NestTy) { 4549 Instruction *Caller = CS.getInstruction(); 4550 std::vector<Value*> NewArgs; 4551 NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); 4552 4553 SmallVector<AttributeWithIndex, 8> NewAttrs; 4554 NewAttrs.reserve(Attrs.getNumSlots() + 1); 4555 4556 // Insert the nest argument into the call argument list, which may 4557 // mean appending it. Likewise for attributes. 4558 4559 // Add any result attributes. 4560 if (Attributes Attr = Attrs.getRetAttributes()) 4561 NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); 4562 4563 { 4564 unsigned Idx = 1; 4565 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 4566 do { 4567 if (Idx == NestIdx) { 4568 // Add the chain argument and attributes. 4569 Value *NestVal = Tramp->getOperand(3); 4570 if (NestVal->getType() != NestTy) 4571 NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); 4572 NewArgs.push_back(NestVal); 4573 NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); 4574 } 4575 4576 if (I == E) 4577 break; 4578 4579 // Add the original argument and attributes. 4580 NewArgs.push_back(*I); 4581 if (Attributes Attr = Attrs.getParamAttributes(Idx)) 4582 NewAttrs.push_back 4583 (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); 4584 4585 ++Idx, ++I; 4586 } while (1); 4587 } 4588 4589 // Add any function attributes. 4590 if (Attributes Attr = Attrs.getFnAttributes()) 4591 NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); 4592 4593 // The trampoline may have been bitcast to a bogus type (FTy). 4594 // Handle this by synthesizing a new function type, equal to FTy 4595 // with the chain parameter inserted. 4596 4597 std::vector<const Type*> NewTypes; 4598 NewTypes.reserve(FTy->getNumParams()+1); 4599 4600 // Insert the chain's type into the list of parameter types, which may 4601 // mean appending it. 4602 { 4603 unsigned Idx = 1; 4604 FunctionType::param_iterator I = FTy->param_begin(), 4605 E = FTy->param_end(); 4606 4607 do { 4608 if (Idx == NestIdx) 4609 // Add the chain's type. 4610 NewTypes.push_back(NestTy); 4611 4612 if (I == E) 4613 break; 4614 4615 // Add the original type. 4616 NewTypes.push_back(*I); 4617 4618 ++Idx, ++I; 4619 } while (1); 4620 } 4621 4622 // Replace the trampoline call with a direct call. Let the generic 4623 // code sort out any function type mismatches. 4624 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 4625 FTy->isVarArg()); 4626 Constant *NewCallee = 4627 NestF->getType() == PointerType::getUnqual(NewFTy) ? 4628 NestF : ConstantExpr::getBitCast(NestF, 4629 PointerType::getUnqual(NewFTy)); 4630 const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), 4631 NewAttrs.end()); 4632 4633 Instruction *NewCaller; 4634 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 4635 NewCaller = InvokeInst::Create(NewCallee, 4636 II->getNormalDest(), II->getUnwindDest(), 4637 NewArgs.begin(), NewArgs.end(), 4638 Caller->getName(), Caller); 4639 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 4640 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 4641 } else { 4642 NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), 4643 Caller->getName(), Caller); 4644 if (cast<CallInst>(Caller)->isTailCall()) 4645 cast<CallInst>(NewCaller)->setTailCall(); 4646 cast<CallInst>(NewCaller)-> 4647 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 4648 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 4649 } 4650 if (!Caller->getType()->isVoidTy()) 4651 Caller->replaceAllUsesWith(NewCaller); 4652 Caller->eraseFromParent(); 4653 Worklist.Remove(Caller); 4654 return 0; 4655 } 4656 } 4657 4658 // Replace the trampoline call with a direct call. Since there is no 'nest' 4659 // parameter, there is no need to adjust the argument list. Let the generic 4660 // code sort out any function type mismatches. 4661 Constant *NewCallee = 4662 NestF->getType() == PTy ? NestF : 4663 ConstantExpr::getBitCast(NestF, PTy); 4664 CS.setCalledFunction(NewCallee); 4665 return CS.getInstruction(); 4666} 4667 4668 4669 4670Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { 4671 SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); 4672 4673 if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) 4674 return ReplaceInstUsesWith(GEP, V); 4675 4676 Value *PtrOp = GEP.getOperand(0); 4677 4678 if (isa<UndefValue>(GEP.getOperand(0))) 4679 return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); 4680 4681 // Eliminate unneeded casts for indices. 4682 if (TD) { 4683 bool MadeChange = false; 4684 unsigned PtrSize = TD->getPointerSizeInBits(); 4685 4686 gep_type_iterator GTI = gep_type_begin(GEP); 4687 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); 4688 I != E; ++I, ++GTI) { 4689 if (!isa<SequentialType>(*GTI)) continue; 4690 4691 // If we are using a wider index than needed for this platform, shrink it 4692 // to what we need. If narrower, sign-extend it to what we need. This 4693 // explicit cast can make subsequent optimizations more obvious. 4694 unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); 4695 if (OpBits == PtrSize) 4696 continue; 4697 4698 *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); 4699 MadeChange = true; 4700 } 4701 if (MadeChange) return &GEP; 4702 } 4703 4704 // Combine Indices - If the source pointer to this getelementptr instruction 4705 // is a getelementptr instruction, combine the indices of the two 4706 // getelementptr instructions into a single instruction. 4707 // 4708 if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { 4709 // Note that if our source is a gep chain itself that we wait for that 4710 // chain to be resolved before we perform this transformation. This 4711 // avoids us creating a TON of code in some cases. 4712 // 4713 if (GetElementPtrInst *SrcGEP = 4714 dyn_cast<GetElementPtrInst>(Src->getOperand(0))) 4715 if (SrcGEP->getNumOperands() == 2) 4716 return 0; // Wait until our source is folded to completion. 4717 4718 SmallVector<Value*, 8> Indices; 4719 4720 // Find out whether the last index in the source GEP is a sequential idx. 4721 bool EndsWithSequential = false; 4722 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); 4723 I != E; ++I) 4724 EndsWithSequential = !isa<StructType>(*I); 4725 4726 // Can we combine the two pointer arithmetics offsets? 4727 if (EndsWithSequential) { 4728 // Replace: gep (gep %P, long B), long A, ... 4729 // With: T = long A+B; gep %P, T, ... 4730 // 4731 Value *Sum; 4732 Value *SO1 = Src->getOperand(Src->getNumOperands()-1); 4733 Value *GO1 = GEP.getOperand(1); 4734 if (SO1 == Constant::getNullValue(SO1->getType())) { 4735 Sum = GO1; 4736 } else if (GO1 == Constant::getNullValue(GO1->getType())) { 4737 Sum = SO1; 4738 } else { 4739 // If they aren't the same type, then the input hasn't been processed 4740 // by the loop above yet (which canonicalizes sequential index types to 4741 // intptr_t). Just avoid transforming this until the input has been 4742 // normalized. 4743 if (SO1->getType() != GO1->getType()) 4744 return 0; 4745 Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); 4746 } 4747 4748 // Update the GEP in place if possible. 4749 if (Src->getNumOperands() == 2) { 4750 GEP.setOperand(0, Src->getOperand(0)); 4751 GEP.setOperand(1, Sum); 4752 return &GEP; 4753 } 4754 Indices.append(Src->op_begin()+1, Src->op_end()-1); 4755 Indices.push_back(Sum); 4756 Indices.append(GEP.op_begin()+2, GEP.op_end()); 4757 } else if (isa<Constant>(*GEP.idx_begin()) && 4758 cast<Constant>(*GEP.idx_begin())->isNullValue() && 4759 Src->getNumOperands() != 1) { 4760 // Otherwise we can do the fold if the first index of the GEP is a zero 4761 Indices.append(Src->op_begin()+1, Src->op_end()); 4762 Indices.append(GEP.idx_begin()+1, GEP.idx_end()); 4763 } 4764 4765 if (!Indices.empty()) 4766 return (cast<GEPOperator>(&GEP)->isInBounds() && 4767 Src->isInBounds()) ? 4768 GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), 4769 Indices.end(), GEP.getName()) : 4770 GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), 4771 Indices.end(), GEP.getName()); 4772 } 4773 4774 // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). 4775 if (Value *X = getBitCastOperand(PtrOp)) { 4776 assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); 4777 4778 // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 4779 // want to change the gep until the bitcasts are eliminated. 4780 if (getBitCastOperand(X)) { 4781 Worklist.AddValue(PtrOp); 4782 return 0; 4783 } 4784 4785 bool HasZeroPointerIndex = false; 4786 if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) 4787 HasZeroPointerIndex = C->isZero(); 4788 4789 // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... 4790 // into : GEP [10 x i8]* X, i32 0, ... 4791 // 4792 // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... 4793 // into : GEP i8* X, ... 4794 // 4795 // This occurs when the program declares an array extern like "int X[];" 4796 if (HasZeroPointerIndex) { 4797 const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); 4798 const PointerType *XTy = cast<PointerType>(X->getType()); 4799 if (const ArrayType *CATy = 4800 dyn_cast<ArrayType>(CPTy->getElementType())) { 4801 // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? 4802 if (CATy->getElementType() == XTy->getElementType()) { 4803 // -> GEP i8* X, ... 4804 SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); 4805 return cast<GEPOperator>(&GEP)->isInBounds() ? 4806 GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), 4807 GEP.getName()) : 4808 GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), 4809 GEP.getName()); 4810 } 4811 4812 if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){ 4813 // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? 4814 if (CATy->getElementType() == XATy->getElementType()) { 4815 // -> GEP [10 x i8]* X, i32 0, ... 4816 // At this point, we know that the cast source type is a pointer 4817 // to an array of the same type as the destination pointer 4818 // array. Because the array type is never stepped over (there 4819 // is a leading zero) we can fold the cast into this GEP. 4820 GEP.setOperand(0, X); 4821 return &GEP; 4822 } 4823 } 4824 } 4825 } else if (GEP.getNumOperands() == 2) { 4826 // Transform things like: 4827 // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V 4828 // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast 4829 const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType(); 4830 const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); 4831 if (TD && isa<ArrayType>(SrcElTy) && 4832 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == 4833 TD->getTypeAllocSize(ResElTy)) { 4834 Value *Idx[2]; 4835 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 4836 Idx[1] = GEP.getOperand(1); 4837 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 4838 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 4839 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 4840 // V and GEP are both pointer types --> BitCast 4841 return new BitCastInst(NewGEP, GEP.getType()); 4842 } 4843 4844 // Transform things like: 4845 // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp 4846 // (where tmp = 8*tmp2) into: 4847 // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast 4848 4849 if (TD && isa<ArrayType>(SrcElTy) && 4850 ResElTy == Type::getInt8Ty(GEP.getContext())) { 4851 uint64_t ArrayEltSize = 4852 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); 4853 4854 // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We 4855 // allow either a mul, shift, or constant here. 4856 Value *NewIdx = 0; 4857 ConstantInt *Scale = 0; 4858 if (ArrayEltSize == 1) { 4859 NewIdx = GEP.getOperand(1); 4860 Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); 4861 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { 4862 NewIdx = ConstantInt::get(CI->getType(), 1); 4863 Scale = CI; 4864 } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ 4865 if (Inst->getOpcode() == Instruction::Shl && 4866 isa<ConstantInt>(Inst->getOperand(1))) { 4867 ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); 4868 uint32_t ShAmtVal = ShAmt->getLimitedValue(64); 4869 Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 4870 1ULL << ShAmtVal); 4871 NewIdx = Inst->getOperand(0); 4872 } else if (Inst->getOpcode() == Instruction::Mul && 4873 isa<ConstantInt>(Inst->getOperand(1))) { 4874 Scale = cast<ConstantInt>(Inst->getOperand(1)); 4875 NewIdx = Inst->getOperand(0); 4876 } 4877 } 4878 4879 // If the index will be to exactly the right offset with the scale taken 4880 // out, perform the transformation. Note, we don't know whether Scale is 4881 // signed or not. We'll use unsigned version of division/modulo 4882 // operation after making sure Scale doesn't have the sign bit set. 4883 if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && 4884 Scale->getZExtValue() % ArrayEltSize == 0) { 4885 Scale = ConstantInt::get(Scale->getType(), 4886 Scale->getZExtValue() / ArrayEltSize); 4887 if (Scale->getZExtValue() != 1) { 4888 Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), 4889 false /*ZExt*/); 4890 NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); 4891 } 4892 4893 // Insert the new GEP instruction. 4894 Value *Idx[2]; 4895 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 4896 Idx[1] = NewIdx; 4897 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 4898 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 4899 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 4900 // The NewGEP must be pointer typed, so must the old one -> BitCast 4901 return new BitCastInst(NewGEP, GEP.getType()); 4902 } 4903 } 4904 } 4905 } 4906 4907 /// See if we can simplify: 4908 /// X = bitcast A* to B* 4909 /// Y = gep X, <...constant indices...> 4910 /// into a gep of the original struct. This is important for SROA and alias 4911 /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. 4912 if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { 4913 if (TD && 4914 !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { 4915 // Determine how much the GEP moves the pointer. We are guaranteed to get 4916 // a constant back from EmitGEPOffset. 4917 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); 4918 int64_t Offset = OffsetV->getSExtValue(); 4919 4920 // If this GEP instruction doesn't move the pointer, just replace the GEP 4921 // with a bitcast of the real input to the dest type. 4922 if (Offset == 0) { 4923 // If the bitcast is of an allocation, and the allocation will be 4924 // converted to match the type of the cast, don't touch this. 4925 if (isa<AllocaInst>(BCI->getOperand(0)) || 4926 isMalloc(BCI->getOperand(0))) { 4927 // See if the bitcast simplifies, if so, don't nuke this GEP yet. 4928 if (Instruction *I = visitBitCast(*BCI)) { 4929 if (I != BCI) { 4930 I->takeName(BCI); 4931 BCI->getParent()->getInstList().insert(BCI, I); 4932 ReplaceInstUsesWith(*BCI, I); 4933 } 4934 return &GEP; 4935 } 4936 } 4937 return new BitCastInst(BCI->getOperand(0), GEP.getType()); 4938 } 4939 4940 // Otherwise, if the offset is non-zero, we need to find out if there is a 4941 // field at Offset in 'A's type. If so, we can pull the cast through the 4942 // GEP. 4943 SmallVector<Value*, 8> NewIndices; 4944 const Type *InTy = 4945 cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); 4946 if (FindElementAtOffset(InTy, Offset, NewIndices)) { 4947 Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 4948 Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), 4949 NewIndices.end()) : 4950 Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), 4951 NewIndices.end()); 4952 4953 if (NGEP->getType() == GEP.getType()) 4954 return ReplaceInstUsesWith(GEP, NGEP); 4955 NGEP->takeName(&GEP); 4956 return new BitCastInst(NGEP, GEP.getType()); 4957 } 4958 } 4959 } 4960 4961 return 0; 4962} 4963 4964Instruction *InstCombiner::visitFree(Instruction &FI) { 4965 Value *Op = FI.getOperand(1); 4966 4967 // free undef -> unreachable. 4968 if (isa<UndefValue>(Op)) { 4969 // Insert a new store to null because we cannot modify the CFG here. 4970 new StoreInst(ConstantInt::getTrue(FI.getContext()), 4971 UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); 4972 return EraseInstFromFunction(FI); 4973 } 4974 4975 // If we have 'free null' delete the instruction. This can happen in stl code 4976 // when lots of inlining happens. 4977 if (isa<ConstantPointerNull>(Op)) 4978 return EraseInstFromFunction(FI); 4979 4980 // If we have a malloc call whose only use is a free call, delete both. 4981 if (isMalloc(Op)) { 4982 if (CallInst* CI = extractMallocCallFromBitCast(Op)) { 4983 if (Op->hasOneUse() && CI->hasOneUse()) { 4984 EraseInstFromFunction(FI); 4985 EraseInstFromFunction(*CI); 4986 return EraseInstFromFunction(*cast<Instruction>(Op)); 4987 } 4988 } else { 4989 // Op is a call to malloc 4990 if (Op->hasOneUse()) { 4991 EraseInstFromFunction(FI); 4992 return EraseInstFromFunction(*cast<Instruction>(Op)); 4993 } 4994 } 4995 } 4996 4997 return 0; 4998} 4999 5000 5001 5002Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { 5003 // Change br (not X), label True, label False to: br X, label False, True 5004 Value *X = 0; 5005 BasicBlock *TrueDest; 5006 BasicBlock *FalseDest; 5007 if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && 5008 !isa<Constant>(X)) { 5009 // Swap Destinations and condition... 5010 BI.setCondition(X); 5011 BI.setSuccessor(0, FalseDest); 5012 BI.setSuccessor(1, TrueDest); 5013 return &BI; 5014 } 5015 5016 // Cannonicalize fcmp_one -> fcmp_oeq 5017 FCmpInst::Predicate FPred; Value *Y; 5018 if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 5019 TrueDest, FalseDest)) && 5020 BI.getCondition()->hasOneUse()) 5021 if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || 5022 FPred == FCmpInst::FCMP_OGE) { 5023 FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); 5024 Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); 5025 5026 // Swap Destinations and condition. 5027 BI.setSuccessor(0, FalseDest); 5028 BI.setSuccessor(1, TrueDest); 5029 Worklist.Add(Cond); 5030 return &BI; 5031 } 5032 5033 // Cannonicalize icmp_ne -> icmp_eq 5034 ICmpInst::Predicate IPred; 5035 if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), 5036 TrueDest, FalseDest)) && 5037 BI.getCondition()->hasOneUse()) 5038 if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || 5039 IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || 5040 IPred == ICmpInst::ICMP_SGE) { 5041 ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); 5042 Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); 5043 // Swap Destinations and condition. 5044 BI.setSuccessor(0, FalseDest); 5045 BI.setSuccessor(1, TrueDest); 5046 Worklist.Add(Cond); 5047 return &BI; 5048 } 5049 5050 return 0; 5051} 5052 5053Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { 5054 Value *Cond = SI.getCondition(); 5055 if (Instruction *I = dyn_cast<Instruction>(Cond)) { 5056 if (I->getOpcode() == Instruction::Add) 5057 if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 5058 // change 'switch (X+4) case 1:' into 'switch (X) case -3' 5059 for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) 5060 SI.setOperand(i, 5061 ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), 5062 AddRHS)); 5063 SI.setOperand(0, I->getOperand(0)); 5064 Worklist.Add(I); 5065 return &SI; 5066 } 5067 } 5068 return 0; 5069} 5070 5071Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { 5072 Value *Agg = EV.getAggregateOperand(); 5073 5074 if (!EV.hasIndices()) 5075 return ReplaceInstUsesWith(EV, Agg); 5076 5077 if (Constant *C = dyn_cast<Constant>(Agg)) { 5078 if (isa<UndefValue>(C)) 5079 return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); 5080 5081 if (isa<ConstantAggregateZero>(C)) 5082 return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); 5083 5084 if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { 5085 // Extract the element indexed by the first index out of the constant 5086 Value *V = C->getOperand(*EV.idx_begin()); 5087 if (EV.getNumIndices() > 1) 5088 // Extract the remaining indices out of the constant indexed by the 5089 // first index 5090 return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); 5091 else 5092 return ReplaceInstUsesWith(EV, V); 5093 } 5094 return 0; // Can't handle other constants 5095 } 5096 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { 5097 // We're extracting from an insertvalue instruction, compare the indices 5098 const unsigned *exti, *exte, *insi, *inse; 5099 for (exti = EV.idx_begin(), insi = IV->idx_begin(), 5100 exte = EV.idx_end(), inse = IV->idx_end(); 5101 exti != exte && insi != inse; 5102 ++exti, ++insi) { 5103 if (*insi != *exti) 5104 // The insert and extract both reference distinctly different elements. 5105 // This means the extract is not influenced by the insert, and we can 5106 // replace the aggregate operand of the extract with the aggregate 5107 // operand of the insert. i.e., replace 5108 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 5109 // %E = extractvalue { i32, { i32 } } %I, 0 5110 // with 5111 // %E = extractvalue { i32, { i32 } } %A, 0 5112 return ExtractValueInst::Create(IV->getAggregateOperand(), 5113 EV.idx_begin(), EV.idx_end()); 5114 } 5115 if (exti == exte && insi == inse) 5116 // Both iterators are at the end: Index lists are identical. Replace 5117 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 5118 // %C = extractvalue { i32, { i32 } } %B, 1, 0 5119 // with "i32 42" 5120 return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); 5121 if (exti == exte) { 5122 // The extract list is a prefix of the insert list. i.e. replace 5123 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 5124 // %E = extractvalue { i32, { i32 } } %I, 1 5125 // with 5126 // %X = extractvalue { i32, { i32 } } %A, 1 5127 // %E = insertvalue { i32 } %X, i32 42, 0 5128 // by switching the order of the insert and extract (though the 5129 // insertvalue should be left in, since it may have other uses). 5130 Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), 5131 EV.idx_begin(), EV.idx_end()); 5132 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), 5133 insi, inse); 5134 } 5135 if (insi == inse) 5136 // The insert list is a prefix of the extract list 5137 // We can simply remove the common indices from the extract and make it 5138 // operate on the inserted value instead of the insertvalue result. 5139 // i.e., replace 5140 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 5141 // %E = extractvalue { i32, { i32 } } %I, 1, 0 5142 // with 5143 // %E extractvalue { i32 } { i32 42 }, 0 5144 return ExtractValueInst::Create(IV->getInsertedValueOperand(), 5145 exti, exte); 5146 } 5147 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { 5148 // We're extracting from an intrinsic, see if we're the only user, which 5149 // allows us to simplify multiple result intrinsics to simpler things that 5150 // just get one value.. 5151 if (II->hasOneUse()) { 5152 // Check if we're grabbing the overflow bit or the result of a 'with 5153 // overflow' intrinsic. If it's the latter we can remove the intrinsic 5154 // and replace it with a traditional binary instruction. 5155 switch (II->getIntrinsicID()) { 5156 case Intrinsic::uadd_with_overflow: 5157 case Intrinsic::sadd_with_overflow: 5158 if (*EV.idx_begin() == 0) { // Normal result. 5159 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 5160 II->replaceAllUsesWith(UndefValue::get(II->getType())); 5161 EraseInstFromFunction(*II); 5162 return BinaryOperator::CreateAdd(LHS, RHS); 5163 } 5164 break; 5165 case Intrinsic::usub_with_overflow: 5166 case Intrinsic::ssub_with_overflow: 5167 if (*EV.idx_begin() == 0) { // Normal result. 5168 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 5169 II->replaceAllUsesWith(UndefValue::get(II->getType())); 5170 EraseInstFromFunction(*II); 5171 return BinaryOperator::CreateSub(LHS, RHS); 5172 } 5173 break; 5174 case Intrinsic::umul_with_overflow: 5175 case Intrinsic::smul_with_overflow: 5176 if (*EV.idx_begin() == 0) { // Normal result. 5177 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 5178 II->replaceAllUsesWith(UndefValue::get(II->getType())); 5179 EraseInstFromFunction(*II); 5180 return BinaryOperator::CreateMul(LHS, RHS); 5181 } 5182 break; 5183 default: 5184 break; 5185 } 5186 } 5187 } 5188 // Can't simplify extracts from other values. Note that nested extracts are 5189 // already simplified implicitely by the above (extract ( extract (insert) ) 5190 // will be translated into extract ( insert ( extract ) ) first and then just 5191 // the value inserted, if appropriate). 5192 return 0; 5193} 5194 5195 5196 5197 5198/// TryToSinkInstruction - Try to move the specified instruction from its 5199/// current block into the beginning of DestBlock, which can only happen if it's 5200/// safe to move the instruction past all of the instructions between it and the 5201/// end of its block. 5202static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { 5203 assert(I->hasOneUse() && "Invariants didn't hold!"); 5204 5205 // Cannot move control-flow-involving, volatile loads, vaarg, etc. 5206 if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) 5207 return false; 5208 5209 // Do not sink alloca instructions out of the entry block. 5210 if (isa<AllocaInst>(I) && I->getParent() == 5211 &DestBlock->getParent()->getEntryBlock()) 5212 return false; 5213 5214 // We can only sink load instructions if there is nothing between the load and 5215 // the end of block that could change the value. 5216 if (I->mayReadFromMemory()) { 5217 for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); 5218 Scan != E; ++Scan) 5219 if (Scan->mayWriteToMemory()) 5220 return false; 5221 } 5222 5223 BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); 5224 5225 I->moveBefore(InsertPos); 5226 ++NumSunkInst; 5227 return true; 5228} 5229 5230 5231/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding 5232/// all reachable code to the worklist. 5233/// 5234/// This has a couple of tricks to make the code faster and more powerful. In 5235/// particular, we constant fold and DCE instructions as we go, to avoid adding 5236/// them to the worklist (this significantly speeds up instcombine on code where 5237/// many instructions are dead or constant). Additionally, if we find a branch 5238/// whose condition is a known constant, we only visit the reachable successors. 5239/// 5240static bool AddReachableCodeToWorklist(BasicBlock *BB, 5241 SmallPtrSet<BasicBlock*, 64> &Visited, 5242 InstCombiner &IC, 5243 const TargetData *TD) { 5244 bool MadeIRChange = false; 5245 SmallVector<BasicBlock*, 256> Worklist; 5246 Worklist.push_back(BB); 5247 5248 std::vector<Instruction*> InstrsForInstCombineWorklist; 5249 InstrsForInstCombineWorklist.reserve(128); 5250 5251 SmallPtrSet<ConstantExpr*, 64> FoldedConstants; 5252 5253 while (!Worklist.empty()) { 5254 BB = Worklist.back(); 5255 Worklist.pop_back(); 5256 5257 // We have now visited this block! If we've already been here, ignore it. 5258 if (!Visited.insert(BB)) continue; 5259 5260 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 5261 Instruction *Inst = BBI++; 5262 5263 // DCE instruction if trivially dead. 5264 if (isInstructionTriviallyDead(Inst)) { 5265 ++NumDeadInst; 5266 DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); 5267 Inst->eraseFromParent(); 5268 continue; 5269 } 5270 5271 // ConstantProp instruction if trivially constant. 5272 if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) 5273 if (Constant *C = ConstantFoldInstruction(Inst, TD)) { 5274 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " 5275 << *Inst << '\n'); 5276 Inst->replaceAllUsesWith(C); 5277 ++NumConstProp; 5278 Inst->eraseFromParent(); 5279 continue; 5280 } 5281 5282 5283 5284 if (TD) { 5285 // See if we can constant fold its operands. 5286 for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); 5287 i != e; ++i) { 5288 ConstantExpr *CE = dyn_cast<ConstantExpr>(i); 5289 if (CE == 0) continue; 5290 5291 // If we already folded this constant, don't try again. 5292 if (!FoldedConstants.insert(CE)) 5293 continue; 5294 5295 Constant *NewC = ConstantFoldConstantExpression(CE, TD); 5296 if (NewC && NewC != CE) { 5297 *i = NewC; 5298 MadeIRChange = true; 5299 } 5300 } 5301 } 5302 5303 5304 InstrsForInstCombineWorklist.push_back(Inst); 5305 } 5306 5307 // Recursively visit successors. If this is a branch or switch on a 5308 // constant, only visit the reachable successor. 5309 TerminatorInst *TI = BB->getTerminator(); 5310 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 5311 if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { 5312 bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); 5313 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); 5314 Worklist.push_back(ReachableBB); 5315 continue; 5316 } 5317 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 5318 if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { 5319 // See if this is an explicit destination. 5320 for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) 5321 if (SI->getCaseValue(i) == Cond) { 5322 BasicBlock *ReachableBB = SI->getSuccessor(i); 5323 Worklist.push_back(ReachableBB); 5324 continue; 5325 } 5326 5327 // Otherwise it is the default destination. 5328 Worklist.push_back(SI->getSuccessor(0)); 5329 continue; 5330 } 5331 } 5332 5333 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 5334 Worklist.push_back(TI->getSuccessor(i)); 5335 } 5336 5337 // Once we've found all of the instructions to add to instcombine's worklist, 5338 // add them in reverse order. This way instcombine will visit from the top 5339 // of the function down. This jives well with the way that it adds all uses 5340 // of instructions to the worklist after doing a transformation, thus avoiding 5341 // some N^2 behavior in pathological cases. 5342 IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], 5343 InstrsForInstCombineWorklist.size()); 5344 5345 return MadeIRChange; 5346} 5347 5348bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { 5349 MadeIRChange = false; 5350 5351 DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " 5352 << F.getNameStr() << "\n"); 5353 5354 { 5355 // Do a depth-first traversal of the function, populate the worklist with 5356 // the reachable instructions. Ignore blocks that are not reachable. Keep 5357 // track of which blocks we visit. 5358 SmallPtrSet<BasicBlock*, 64> Visited; 5359 MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); 5360 5361 // Do a quick scan over the function. If we find any blocks that are 5362 // unreachable, remove any instructions inside of them. This prevents 5363 // the instcombine code from having to deal with some bad special cases. 5364 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 5365 if (!Visited.count(BB)) { 5366 Instruction *Term = BB->getTerminator(); 5367 while (Term != BB->begin()) { // Remove instrs bottom-up 5368 BasicBlock::iterator I = Term; --I; 5369 5370 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 5371 // A debug intrinsic shouldn't force another iteration if we weren't 5372 // going to do one without it. 5373 if (!isa<DbgInfoIntrinsic>(I)) { 5374 ++NumDeadInst; 5375 MadeIRChange = true; 5376 } 5377 5378 // If I is not void type then replaceAllUsesWith undef. 5379 // This allows ValueHandlers and custom metadata to adjust itself. 5380 if (!I->getType()->isVoidTy()) 5381 I->replaceAllUsesWith(UndefValue::get(I->getType())); 5382 I->eraseFromParent(); 5383 } 5384 } 5385 } 5386 5387 while (!Worklist.isEmpty()) { 5388 Instruction *I = Worklist.RemoveOne(); 5389 if (I == 0) continue; // skip null values. 5390 5391 // Check to see if we can DCE the instruction. 5392 if (isInstructionTriviallyDead(I)) { 5393 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 5394 EraseInstFromFunction(*I); 5395 ++NumDeadInst; 5396 MadeIRChange = true; 5397 continue; 5398 } 5399 5400 // Instruction isn't dead, see if we can constant propagate it. 5401 if (!I->use_empty() && isa<Constant>(I->getOperand(0))) 5402 if (Constant *C = ConstantFoldInstruction(I, TD)) { 5403 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); 5404 5405 // Add operands to the worklist. 5406 ReplaceInstUsesWith(*I, C); 5407 ++NumConstProp; 5408 EraseInstFromFunction(*I); 5409 MadeIRChange = true; 5410 continue; 5411 } 5412 5413 // See if we can trivially sink this instruction to a successor basic block. 5414 if (I->hasOneUse()) { 5415 BasicBlock *BB = I->getParent(); 5416 Instruction *UserInst = cast<Instruction>(I->use_back()); 5417 BasicBlock *UserParent; 5418 5419 // Get the block the use occurs in. 5420 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) 5421 UserParent = PN->getIncomingBlock(I->use_begin().getUse()); 5422 else 5423 UserParent = UserInst->getParent(); 5424 5425 if (UserParent != BB) { 5426 bool UserIsSuccessor = false; 5427 // See if the user is one of our successors. 5428 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) 5429 if (*SI == UserParent) { 5430 UserIsSuccessor = true; 5431 break; 5432 } 5433 5434 // If the user is one of our immediate successors, and if that successor 5435 // only has us as a predecessors (we'd have to split the critical edge 5436 // otherwise), we can keep going. 5437 if (UserIsSuccessor && UserParent->getSinglePredecessor()) 5438 // Okay, the CFG is simple enough, try to sink this instruction. 5439 MadeIRChange |= TryToSinkInstruction(I, UserParent); 5440 } 5441 } 5442 5443 // Now that we have an instruction, try combining it to simplify it. 5444 Builder->SetInsertPoint(I->getParent(), I); 5445 5446#ifndef NDEBUG 5447 std::string OrigI; 5448#endif 5449 DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); 5450 DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); 5451 5452 if (Instruction *Result = visit(*I)) { 5453 ++NumCombined; 5454 // Should we replace the old instruction with a new one? 5455 if (Result != I) { 5456 DEBUG(errs() << "IC: Old = " << *I << '\n' 5457 << " New = " << *Result << '\n'); 5458 5459 // Everything uses the new instruction now. 5460 I->replaceAllUsesWith(Result); 5461 5462 // Push the new instruction and any users onto the worklist. 5463 Worklist.Add(Result); 5464 Worklist.AddUsersToWorkList(*Result); 5465 5466 // Move the name to the new instruction first. 5467 Result->takeName(I); 5468 5469 // Insert the new instruction into the basic block... 5470 BasicBlock *InstParent = I->getParent(); 5471 BasicBlock::iterator InsertPos = I; 5472 5473 if (!isa<PHINode>(Result)) // If combining a PHI, don't insert 5474 while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. 5475 ++InsertPos; 5476 5477 InstParent->getInstList().insert(InsertPos, Result); 5478 5479 EraseInstFromFunction(*I); 5480 } else { 5481#ifndef NDEBUG 5482 DEBUG(errs() << "IC: Mod = " << OrigI << '\n' 5483 << " New = " << *I << '\n'); 5484#endif 5485 5486 // If the instruction was modified, it's possible that it is now dead. 5487 // if so, remove it. 5488 if (isInstructionTriviallyDead(I)) { 5489 EraseInstFromFunction(*I); 5490 } else { 5491 Worklist.Add(I); 5492 Worklist.AddUsersToWorkList(*I); 5493 } 5494 } 5495 MadeIRChange = true; 5496 } 5497 } 5498 5499 Worklist.Zap(); 5500 return MadeIRChange; 5501} 5502 5503 5504bool InstCombiner::runOnFunction(Function &F) { 5505 MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 5506 TD = getAnalysisIfAvailable<TargetData>(); 5507 5508 5509 /// Builder - This is an IRBuilder that automatically inserts new 5510 /// instructions into the worklist when they are created. 5511 IRBuilder<true, TargetFolder, InstCombineIRInserter> 5512 TheBuilder(F.getContext(), TargetFolder(TD), 5513 InstCombineIRInserter(Worklist)); 5514 Builder = &TheBuilder; 5515 5516 bool EverMadeChange = false; 5517 5518 // Iterate while there is work to do. 5519 unsigned Iteration = 0; 5520 while (DoOneIteration(F, Iteration++)) 5521 EverMadeChange = true; 5522 5523 Builder = 0; 5524 return EverMadeChange; 5525} 5526 5527FunctionPass *llvm::createInstructionCombiningPass() { 5528 return new InstCombiner(); 5529} 5530