InstructionCombining.cpp revision 3df5c6fff17296620a79ce80aa8c5939a85f9597
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// InstructionCombining - Combine instructions to form fewer, simple 11// instructions. This pass does not modify the CFG. This pass is where 12// algebraic simplification happens. 13// 14// This pass combines things like: 15// %Y = add i32 %X, 1 16// %Z = add i32 %Y, 1 17// into: 18// %Z = add i32 %X, 2 19// 20// This is a simple worklist driven algorithm. 21// 22// This pass guarantees that the following canonicalizations are performed on 23// the program: 24// 1. If a binary operator has a constant operand, it is moved to the RHS 25// 2. Bitwise operators with constant operands are always grouped so that 26// shifts are performed first, then or's, then and's, then xor's. 27// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible 28// 4. All cmp instructions on boolean values are replaced with logical ops 29// 5. add X, X is represented as (X*2) => (X << 1) 30// 6. Multiplies with a power-of-two constant argument are transformed into 31// shifts. 32// ... etc. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "instcombine" 37#include "llvm/Transforms/Scalar.h" 38#include "InstCombineWorklist.h" 39#include "llvm/IntrinsicInst.h" 40#include "llvm/LLVMContext.h" 41#include "llvm/Pass.h" 42#include "llvm/DerivedTypes.h" 43#include "llvm/GlobalVariable.h" 44#include "llvm/Operator.h" 45#include "llvm/Analysis/ConstantFolding.h" 46#include "llvm/Analysis/InstructionSimplify.h" 47#include "llvm/Analysis/MemoryBuiltins.h" 48#include "llvm/Analysis/ValueTracking.h" 49#include "llvm/Target/TargetData.h" 50#include "llvm/Transforms/Utils/BasicBlockUtils.h" 51#include "llvm/Transforms/Utils/Local.h" 52#include "llvm/Support/CallSite.h" 53#include "llvm/Support/ConstantRange.h" 54#include "llvm/Support/Debug.h" 55#include "llvm/Support/ErrorHandling.h" 56#include "llvm/Support/GetElementPtrTypeIterator.h" 57#include "llvm/Support/InstVisitor.h" 58#include "llvm/Support/IRBuilder.h" 59#include "llvm/Support/MathExtras.h" 60#include "llvm/Support/PatternMatch.h" 61#include "llvm/Support/TargetFolder.h" 62#include "llvm/Support/raw_ostream.h" 63#include "llvm/ADT/DenseMap.h" 64#include "llvm/ADT/SmallVector.h" 65#include "llvm/ADT/SmallPtrSet.h" 66#include "llvm/ADT/Statistic.h" 67#include "llvm/ADT/STLExtras.h" 68#include <algorithm> 69#include <climits> 70using namespace llvm; 71using namespace llvm::PatternMatch; 72 73STATISTIC(NumCombined , "Number of insts combined"); 74STATISTIC(NumConstProp, "Number of constant folds"); 75STATISTIC(NumDeadInst , "Number of dead inst eliminated"); 76STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 77STATISTIC(NumSunkInst , "Number of instructions sunk"); 78 79/// SelectPatternFlavor - We can match a variety of different patterns for 80/// select operations. 81enum SelectPatternFlavor { 82 SPF_UNKNOWN = 0, 83 SPF_SMIN, SPF_UMIN, 84 SPF_SMAX, SPF_UMAX 85 //SPF_ABS - TODO. 86}; 87 88 89namespace { 90 /// InstCombineIRInserter - This is an IRBuilder insertion helper that works 91 /// just like the normal insertion helper, but also adds any new instructions 92 /// to the instcombine worklist. 93 class InstCombineIRInserter : public IRBuilderDefaultInserter<true> { 94 InstCombineWorklist &Worklist; 95 public: 96 InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} 97 98 void InsertHelper(Instruction *I, const Twine &Name, 99 BasicBlock *BB, BasicBlock::iterator InsertPt) const { 100 IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); 101 Worklist.Add(I); 102 } 103 }; 104} // end anonymous namespace 105 106 107namespace { 108 class InstCombiner : public FunctionPass, 109 public InstVisitor<InstCombiner, Instruction*> { 110 TargetData *TD; 111 bool MustPreserveLCSSA; 112 bool MadeIRChange; 113 public: 114 /// Worklist - All of the instructions that need to be simplified. 115 InstCombineWorklist Worklist; 116 117 /// Builder - This is an IRBuilder that automatically inserts new 118 /// instructions into the worklist when they are created. 119 typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; 120 BuilderTy *Builder; 121 122 static char ID; // Pass identification, replacement for typeid 123 InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} 124 125 LLVMContext *Context; 126 LLVMContext *getContext() const { return Context; } 127 128 public: 129 virtual bool runOnFunction(Function &F); 130 131 bool DoOneIteration(Function &F, unsigned ItNum); 132 133 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 134 AU.addPreservedID(LCSSAID); 135 AU.setPreservesCFG(); 136 } 137 138 TargetData *getTargetData() const { return TD; } 139 140 // Visitation implementation - Implement instruction combining for different 141 // instruction types. The semantics are as follows: 142 // Return Value: 143 // null - No change was made 144 // I - Change was made, I is still valid, I may be dead though 145 // otherwise - Change was made, replace I with returned instruction 146 // 147 Instruction *visitAdd(BinaryOperator &I); 148 Instruction *visitFAdd(BinaryOperator &I); 149 Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); 150 Instruction *visitSub(BinaryOperator &I); 151 Instruction *visitFSub(BinaryOperator &I); 152 Instruction *visitMul(BinaryOperator &I); 153 Instruction *visitFMul(BinaryOperator &I); 154 Instruction *visitURem(BinaryOperator &I); 155 Instruction *visitSRem(BinaryOperator &I); 156 Instruction *visitFRem(BinaryOperator &I); 157 bool SimplifyDivRemOfSelect(BinaryOperator &I); 158 Instruction *commonRemTransforms(BinaryOperator &I); 159 Instruction *commonIRemTransforms(BinaryOperator &I); 160 Instruction *commonDivTransforms(BinaryOperator &I); 161 Instruction *commonIDivTransforms(BinaryOperator &I); 162 Instruction *visitUDiv(BinaryOperator &I); 163 Instruction *visitSDiv(BinaryOperator &I); 164 Instruction *visitFDiv(BinaryOperator &I); 165 Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); 166 Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); 167 Instruction *visitAnd(BinaryOperator &I); 168 Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); 169 Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); 170 Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, 171 Value *A, Value *B, Value *C); 172 Instruction *visitOr (BinaryOperator &I); 173 Instruction *visitXor(BinaryOperator &I); 174 Instruction *visitShl(BinaryOperator &I); 175 Instruction *visitAShr(BinaryOperator &I); 176 Instruction *visitLShr(BinaryOperator &I); 177 Instruction *commonShiftTransforms(BinaryOperator &I); 178 Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, 179 Constant *RHSC); 180 Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, 181 GlobalVariable *GV, CmpInst &ICI, 182 ConstantInt *AndCst = 0); 183 Instruction *visitFCmpInst(FCmpInst &I); 184 Instruction *visitICmpInst(ICmpInst &I); 185 Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); 186 Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, 187 Instruction *LHS, 188 ConstantInt *RHS); 189 Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, 190 ConstantInt *DivRHS); 191 Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, 192 ICmpInst::Predicate Pred, Value *TheAdd); 193 Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, 194 ICmpInst::Predicate Cond, Instruction &I); 195 Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, 196 BinaryOperator &I); 197 Instruction *commonCastTransforms(CastInst &CI); 198 Instruction *commonIntCastTransforms(CastInst &CI); 199 Instruction *commonPointerCastTransforms(CastInst &CI); 200 Instruction *visitTrunc(TruncInst &CI); 201 Instruction *visitZExt(ZExtInst &CI); 202 Instruction *visitSExt(SExtInst &CI); 203 Instruction *visitFPTrunc(FPTruncInst &CI); 204 Instruction *visitFPExt(CastInst &CI); 205 Instruction *visitFPToUI(FPToUIInst &FI); 206 Instruction *visitFPToSI(FPToSIInst &FI); 207 Instruction *visitUIToFP(CastInst &CI); 208 Instruction *visitSIToFP(CastInst &CI); 209 Instruction *visitPtrToInt(PtrToIntInst &CI); 210 Instruction *visitIntToPtr(IntToPtrInst &CI); 211 Instruction *visitBitCast(BitCastInst &CI); 212 Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, 213 Instruction *FI); 214 Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); 215 Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, 216 Value *A, Value *B, Instruction &Outer, 217 SelectPatternFlavor SPF2, Value *C); 218 Instruction *visitSelectInst(SelectInst &SI); 219 Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); 220 Instruction *visitCallInst(CallInst &CI); 221 Instruction *visitInvokeInst(InvokeInst &II); 222 223 Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); 224 Instruction *visitPHINode(PHINode &PN); 225 Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); 226 Instruction *visitAllocaInst(AllocaInst &AI); 227 Instruction *visitFree(Instruction &FI); 228 Instruction *visitLoadInst(LoadInst &LI); 229 Instruction *visitStoreInst(StoreInst &SI); 230 Instruction *visitBranchInst(BranchInst &BI); 231 Instruction *visitSwitchInst(SwitchInst &SI); 232 Instruction *visitInsertElementInst(InsertElementInst &IE); 233 Instruction *visitExtractElementInst(ExtractElementInst &EI); 234 Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); 235 Instruction *visitExtractValueInst(ExtractValueInst &EV); 236 237 // visitInstruction - Specify what to return for unhandled instructions... 238 Instruction *visitInstruction(Instruction &I) { return 0; } 239 240 private: 241 Instruction *visitCallSite(CallSite CS); 242 bool transformConstExprCastCall(CallSite CS); 243 Instruction *transformCallThroughTrampoline(CallSite CS); 244 Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, 245 bool DoXform = true); 246 bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); 247 DbgDeclareInst *hasOneUsePlusDeclare(Value *V); 248 249 250 public: 251 // InsertNewInstBefore - insert an instruction New before instruction Old 252 // in the program. Add the new instruction to the worklist. 253 // 254 Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { 255 assert(New && New->getParent() == 0 && 256 "New instruction already inserted into a basic block!"); 257 BasicBlock *BB = Old.getParent(); 258 BB->getInstList().insert(&Old, New); // Insert inst 259 Worklist.Add(New); 260 return New; 261 } 262 263 // ReplaceInstUsesWith - This method is to be used when an instruction is 264 // found to be dead, replacable with another preexisting expression. Here 265 // we add all uses of I to the worklist, replace all uses of I with the new 266 // value, then return I, so that the inst combiner will know that I was 267 // modified. 268 // 269 Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { 270 Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. 271 272 // If we are replacing the instruction with itself, this must be in a 273 // segment of unreachable code, so just clobber the instruction. 274 if (&I == V) 275 V = UndefValue::get(I.getType()); 276 277 I.replaceAllUsesWith(V); 278 return &I; 279 } 280 281 // EraseInstFromFunction - When dealing with an instruction that has side 282 // effects or produces a void value, we can't rely on DCE to delete the 283 // instruction. Instead, visit methods should return the value returned by 284 // this function. 285 Instruction *EraseInstFromFunction(Instruction &I) { 286 DEBUG(errs() << "IC: ERASE " << I << '\n'); 287 288 assert(I.use_empty() && "Cannot erase instruction that is used!"); 289 // Make sure that we reprocess all operands now that we reduced their 290 // use counts. 291 if (I.getNumOperands() < 8) { 292 for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) 293 if (Instruction *Op = dyn_cast<Instruction>(*i)) 294 Worklist.Add(Op); 295 } 296 Worklist.Remove(&I); 297 I.eraseFromParent(); 298 MadeIRChange = true; 299 return 0; // Don't do anything with FI 300 } 301 302 void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, 303 APInt &KnownOne, unsigned Depth = 0) const { 304 return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); 305 } 306 307 bool MaskedValueIsZero(Value *V, const APInt &Mask, 308 unsigned Depth = 0) const { 309 return llvm::MaskedValueIsZero(V, Mask, TD, Depth); 310 } 311 unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { 312 return llvm::ComputeNumSignBits(Op, TD, Depth); 313 } 314 315 private: 316 317 /// SimplifyCommutative - This performs a few simplifications for 318 /// commutative operators. 319 bool SimplifyCommutative(BinaryOperator &I); 320 321 /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value 322 /// based on the demanded bits. 323 Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 324 APInt& KnownZero, APInt& KnownOne, 325 unsigned Depth); 326 bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 327 APInt& KnownZero, APInt& KnownOne, 328 unsigned Depth=0); 329 330 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that 331 /// SimplifyDemandedBits knows about. See if the instruction has any 332 /// properties that allow us to simplify its operands. 333 bool SimplifyDemandedInstructionBits(Instruction &Inst); 334 335 Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, 336 APInt& UndefElts, unsigned Depth = 0); 337 338 // FoldOpIntoPhi - Given a binary operator, cast instruction, or select 339 // which has a PHI node as operand #0, see if we can fold the instruction 340 // into the PHI (which is only possible if all operands to the PHI are 341 // constants). 342 // 343 // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms 344 // that would normally be unprofitable because they strongly encourage jump 345 // threading. 346 Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); 347 348 // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" 349 // operator and they all are only used by the PHI, PHI together their 350 // inputs, and do the operation once, to the result of the PHI. 351 Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); 352 Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); 353 Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); 354 Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); 355 356 357 Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, 358 ConstantInt *AndRHS, BinaryOperator &TheAnd); 359 360 Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, 361 bool isSub, Instruction &I); 362 Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, 363 bool isSigned, bool Inside, Instruction &IB); 364 Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); 365 Instruction *MatchBSwap(BinaryOperator &I); 366 bool SimplifyStoreAtEndOfBlock(StoreInst &SI); 367 Instruction *SimplifyMemTransfer(MemIntrinsic *MI); 368 Instruction *SimplifyMemSet(MemSetInst *MI); 369 370 371 Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); 372 373 bool CanEvaluateInDifferentType(Value *V, const Type *Ty, 374 unsigned CastOpc, int &NumCastsRemoved); 375 unsigned GetOrEnforceKnownAlignment(Value *V, 376 unsigned PrefAlign = 0); 377 378 }; 379} // end anonymous namespace 380 381char InstCombiner::ID = 0; 382static RegisterPass<InstCombiner> 383X("instcombine", "Combine redundant instructions"); 384 385// getComplexity: Assign a complexity or rank value to LLVM Values... 386// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst 387static unsigned getComplexity(Value *V) { 388 if (isa<Instruction>(V)) { 389 if (BinaryOperator::isNeg(V) || 390 BinaryOperator::isFNeg(V) || 391 BinaryOperator::isNot(V)) 392 return 3; 393 return 4; 394 } 395 if (isa<Argument>(V)) return 3; 396 return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; 397} 398 399// isOnlyUse - Return true if this instruction will be deleted if we stop using 400// it. 401static bool isOnlyUse(Value *V) { 402 return V->hasOneUse() || isa<Constant>(V); 403} 404 405// getPromotedType - Return the specified type promoted as it would be to pass 406// though a va_arg area... 407static const Type *getPromotedType(const Type *Ty) { 408 if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 409 if (ITy->getBitWidth() < 32) 410 return Type::getInt32Ty(Ty->getContext()); 411 } 412 return Ty; 413} 414 415/// ShouldChangeType - Return true if it is desirable to convert a computation 416/// from 'From' to 'To'. We don't want to convert from a legal to an illegal 417/// type for example, or from a smaller to a larger illegal type. 418static bool ShouldChangeType(const Type *From, const Type *To, 419 const TargetData *TD) { 420 assert(isa<IntegerType>(From) && isa<IntegerType>(To)); 421 422 // If we don't have TD, we don't know if the source/dest are legal. 423 if (!TD) return false; 424 425 unsigned FromWidth = From->getPrimitiveSizeInBits(); 426 unsigned ToWidth = To->getPrimitiveSizeInBits(); 427 bool FromLegal = TD->isLegalInteger(FromWidth); 428 bool ToLegal = TD->isLegalInteger(ToWidth); 429 430 // If this is a legal integer from type, and the result would be an illegal 431 // type, don't do the transformation. 432 if (FromLegal && !ToLegal) 433 return false; 434 435 // Otherwise, if both are illegal, do not increase the size of the result. We 436 // do allow things like i160 -> i64, but not i64 -> i160. 437 if (!FromLegal && !ToLegal && ToWidth > FromWidth) 438 return false; 439 440 return true; 441} 442 443/// getBitCastOperand - If the specified operand is a CastInst, a constant 444/// expression bitcast, or a GetElementPtrInst with all zero indices, return the 445/// operand value, otherwise return null. 446static Value *getBitCastOperand(Value *V) { 447 if (Operator *O = dyn_cast<Operator>(V)) { 448 if (O->getOpcode() == Instruction::BitCast) 449 return O->getOperand(0); 450 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) 451 if (GEP->hasAllZeroIndices()) 452 return GEP->getPointerOperand(); 453 } 454 return 0; 455} 456 457/// This function is a wrapper around CastInst::isEliminableCastPair. It 458/// simply extracts arguments and returns what that function returns. 459static Instruction::CastOps 460isEliminableCastPair( 461 const CastInst *CI, ///< The first cast instruction 462 unsigned opcode, ///< The opcode of the second cast instruction 463 const Type *DstTy, ///< The target type for the second cast instruction 464 TargetData *TD ///< The target data for pointer size 465) { 466 467 const Type *SrcTy = CI->getOperand(0)->getType(); // A from above 468 const Type *MidTy = CI->getType(); // B from above 469 470 // Get the opcodes of the two Cast instructions 471 Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); 472 Instruction::CastOps secondOp = Instruction::CastOps(opcode); 473 474 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, 475 DstTy, 476 TD ? TD->getIntPtrType(CI->getContext()) : 0); 477 478 // We don't want to form an inttoptr or ptrtoint that converts to an integer 479 // type that differs from the pointer size. 480 if ((Res == Instruction::IntToPtr && 481 (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || 482 (Res == Instruction::PtrToInt && 483 (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) 484 Res = 0; 485 486 return Instruction::CastOps(Res); 487} 488 489/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results 490/// in any code being generated. It does not require codegen if V is simple 491/// enough or if the cast can be folded into other casts. 492static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, 493 const Type *Ty, TargetData *TD) { 494 if (V->getType() == Ty || isa<Constant>(V)) return false; 495 496 // If this is another cast that can be eliminated, it isn't codegen either. 497 if (const CastInst *CI = dyn_cast<CastInst>(V)) 498 if (isEliminableCastPair(CI, opcode, Ty, TD)) 499 return false; 500 return true; 501} 502 503// SimplifyCommutative - This performs a few simplifications for commutative 504// operators: 505// 506// 1. Order operands such that they are listed from right (least complex) to 507// left (most complex). This puts constants before unary operators before 508// binary operators. 509// 510// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) 511// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 512// 513bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { 514 bool Changed = false; 515 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) 516 Changed = !I.swapOperands(); 517 518 if (!I.isAssociative()) return Changed; 519 Instruction::BinaryOps Opcode = I.getOpcode(); 520 if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) 521 if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { 522 if (isa<Constant>(I.getOperand(1))) { 523 Constant *Folded = ConstantExpr::get(I.getOpcode(), 524 cast<Constant>(I.getOperand(1)), 525 cast<Constant>(Op->getOperand(1))); 526 I.setOperand(0, Op->getOperand(0)); 527 I.setOperand(1, Folded); 528 return true; 529 } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1))) 530 if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && 531 isOnlyUse(Op) && isOnlyUse(Op1)) { 532 Constant *C1 = cast<Constant>(Op->getOperand(1)); 533 Constant *C2 = cast<Constant>(Op1->getOperand(1)); 534 535 // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 536 Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); 537 Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), 538 Op1->getOperand(0), 539 Op1->getName(), &I); 540 Worklist.Add(New); 541 I.setOperand(0, New); 542 I.setOperand(1, Folded); 543 return true; 544 } 545 } 546 return Changed; 547} 548 549// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction 550// if the LHS is a constant zero (which is the 'negate' form). 551// 552static inline Value *dyn_castNegVal(Value *V) { 553 if (BinaryOperator::isNeg(V)) 554 return BinaryOperator::getNegArgument(V); 555 556 // Constants can be considered to be negated values if they can be folded. 557 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 558 return ConstantExpr::getNeg(C); 559 560 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 561 if (C->getType()->getElementType()->isInteger()) 562 return ConstantExpr::getNeg(C); 563 564 return 0; 565} 566 567// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the 568// instruction if the LHS is a constant negative zero (which is the 'negate' 569// form). 570// 571static inline Value *dyn_castFNegVal(Value *V) { 572 if (BinaryOperator::isFNeg(V)) 573 return BinaryOperator::getFNegArgument(V); 574 575 // Constants can be considered to be negated values if they can be folded. 576 if (ConstantFP *C = dyn_cast<ConstantFP>(V)) 577 return ConstantExpr::getFNeg(C); 578 579 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 580 if (C->getType()->getElementType()->isFloatingPoint()) 581 return ConstantExpr::getFNeg(C); 582 583 return 0; 584} 585 586/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, 587/// returning the kind and providing the out parameter results if we 588/// successfully match. 589static SelectPatternFlavor 590MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { 591 SelectInst *SI = dyn_cast<SelectInst>(V); 592 if (SI == 0) return SPF_UNKNOWN; 593 594 ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); 595 if (ICI == 0) return SPF_UNKNOWN; 596 597 LHS = ICI->getOperand(0); 598 RHS = ICI->getOperand(1); 599 600 // (icmp X, Y) ? X : Y 601 if (SI->getTrueValue() == ICI->getOperand(0) && 602 SI->getFalseValue() == ICI->getOperand(1)) { 603 switch (ICI->getPredicate()) { 604 default: return SPF_UNKNOWN; // Equality. 605 case ICmpInst::ICMP_UGT: 606 case ICmpInst::ICMP_UGE: return SPF_UMAX; 607 case ICmpInst::ICMP_SGT: 608 case ICmpInst::ICMP_SGE: return SPF_SMAX; 609 case ICmpInst::ICMP_ULT: 610 case ICmpInst::ICMP_ULE: return SPF_UMIN; 611 case ICmpInst::ICMP_SLT: 612 case ICmpInst::ICMP_SLE: return SPF_SMIN; 613 } 614 } 615 616 // (icmp X, Y) ? Y : X 617 if (SI->getTrueValue() == ICI->getOperand(1) && 618 SI->getFalseValue() == ICI->getOperand(0)) { 619 switch (ICI->getPredicate()) { 620 default: return SPF_UNKNOWN; // Equality. 621 case ICmpInst::ICMP_UGT: 622 case ICmpInst::ICMP_UGE: return SPF_UMIN; 623 case ICmpInst::ICMP_SGT: 624 case ICmpInst::ICMP_SGE: return SPF_SMIN; 625 case ICmpInst::ICMP_ULT: 626 case ICmpInst::ICMP_ULE: return SPF_UMAX; 627 case ICmpInst::ICMP_SLT: 628 case ICmpInst::ICMP_SLE: return SPF_SMAX; 629 } 630 } 631 632 // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) 633 634 return SPF_UNKNOWN; 635} 636 637/// isFreeToInvert - Return true if the specified value is free to invert (apply 638/// ~ to). This happens in cases where the ~ can be eliminated. 639static inline bool isFreeToInvert(Value *V) { 640 // ~(~(X)) -> X. 641 if (BinaryOperator::isNot(V)) 642 return true; 643 644 // Constants can be considered to be not'ed values. 645 if (isa<ConstantInt>(V)) 646 return true; 647 648 // Compares can be inverted if they have a single use. 649 if (CmpInst *CI = dyn_cast<CmpInst>(V)) 650 return CI->hasOneUse(); 651 652 return false; 653} 654 655static inline Value *dyn_castNotVal(Value *V) { 656 // If this is not(not(x)) don't return that this is a not: we want the two 657 // not's to be folded first. 658 if (BinaryOperator::isNot(V)) { 659 Value *Operand = BinaryOperator::getNotArgument(V); 660 if (!isFreeToInvert(Operand)) 661 return Operand; 662 } 663 664 // Constants can be considered to be not'ed values... 665 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 666 return ConstantInt::get(C->getType(), ~C->getValue()); 667 return 0; 668} 669 670 671 672// dyn_castFoldableMul - If this value is a multiply that can be folded into 673// other computations (because it has a constant operand), return the 674// non-constant operand of the multiply, and set CST to point to the multiplier. 675// Otherwise, return null. 676// 677static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 678 if (V->hasOneUse() && V->getType()->isInteger()) 679 if (Instruction *I = dyn_cast<Instruction>(V)) { 680 if (I->getOpcode() == Instruction::Mul) 681 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 682 return I->getOperand(0); 683 if (I->getOpcode() == Instruction::Shl) 684 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 685 // The multiplier is really 1 << CST. 686 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 687 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 688 CST = ConstantInt::get(V->getType()->getContext(), 689 APInt(BitWidth, 1).shl(CSTVal)); 690 return I->getOperand(0); 691 } 692 } 693 return 0; 694} 695 696/// AddOne - Add one to a ConstantInt 697static Constant *AddOne(Constant *C) { 698 return ConstantExpr::getAdd(C, 699 ConstantInt::get(C->getType(), 1)); 700} 701/// SubOne - Subtract one from a ConstantInt 702static Constant *SubOne(ConstantInt *C) { 703 return ConstantExpr::getSub(C, 704 ConstantInt::get(C->getType(), 1)); 705} 706/// MultiplyOverflows - True if the multiply can not be expressed in an int 707/// this size. 708static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { 709 uint32_t W = C1->getBitWidth(); 710 APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); 711 if (sign) { 712 LHSExt.sext(W * 2); 713 RHSExt.sext(W * 2); 714 } else { 715 LHSExt.zext(W * 2); 716 RHSExt.zext(W * 2); 717 } 718 719 APInt MulExt = LHSExt * RHSExt; 720 721 if (!sign) 722 return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); 723 724 APInt Min = APInt::getSignedMinValue(W).sext(W * 2); 725 APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); 726 return MulExt.slt(Min) || MulExt.sgt(Max); 727} 728 729 730/// ShrinkDemandedConstant - Check to see if the specified operand of the 731/// specified instruction is a constant integer. If so, check to see if there 732/// are any bits set in the constant that are not demanded. If so, shrink the 733/// constant and return true. 734static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 735 APInt Demanded) { 736 assert(I && "No instruction?"); 737 assert(OpNo < I->getNumOperands() && "Operand index too large"); 738 739 // If the operand is not a constant integer, nothing to do. 740 ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo)); 741 if (!OpC) return false; 742 743 // If there are no bits set that aren't demanded, nothing to do. 744 Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); 745 if ((~Demanded & OpC->getValue()) == 0) 746 return false; 747 748 // This instruction is producing bits that are not demanded. Shrink the RHS. 749 Demanded &= OpC->getValue(); 750 I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); 751 return true; 752} 753 754// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 755// set of known zero and one bits, compute the maximum and minimum values that 756// could have the specified known zero and known one bits, returning them in 757// min/max. 758static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, 759 const APInt& KnownOne, 760 APInt& Min, APInt& Max) { 761 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && 762 KnownZero.getBitWidth() == Min.getBitWidth() && 763 KnownZero.getBitWidth() == Max.getBitWidth() && 764 "KnownZero, KnownOne and Min, Max must have equal bitwidth."); 765 APInt UnknownBits = ~(KnownZero|KnownOne); 766 767 // The minimum value is when all unknown bits are zeros, EXCEPT for the sign 768 // bit if it is unknown. 769 Min = KnownOne; 770 Max = KnownOne|UnknownBits; 771 772 if (UnknownBits.isNegative()) { // Sign bit is unknown 773 Min.set(Min.getBitWidth()-1); 774 Max.clear(Max.getBitWidth()-1); 775 } 776} 777 778// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and 779// a set of known zero and one bits, compute the maximum and minimum values that 780// could have the specified known zero and known one bits, returning them in 781// min/max. 782static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, 783 const APInt &KnownOne, 784 APInt &Min, APInt &Max) { 785 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && 786 KnownZero.getBitWidth() == Min.getBitWidth() && 787 KnownZero.getBitWidth() == Max.getBitWidth() && 788 "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); 789 APInt UnknownBits = ~(KnownZero|KnownOne); 790 791 // The minimum value is when the unknown bits are all zeros. 792 Min = KnownOne; 793 // The maximum value is when the unknown bits are all ones. 794 Max = KnownOne|UnknownBits; 795} 796 797/// SimplifyDemandedInstructionBits - Inst is an integer instruction that 798/// SimplifyDemandedBits knows about. See if the instruction has any 799/// properties that allow us to simplify its operands. 800bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { 801 unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); 802 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 803 APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); 804 805 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 806 KnownZero, KnownOne, 0); 807 if (V == 0) return false; 808 if (V == &Inst) return true; 809 ReplaceInstUsesWith(Inst, V); 810 return true; 811} 812 813/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the 814/// specified instruction operand if possible, updating it in place. It returns 815/// true if it made any change and false otherwise. 816bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 817 APInt &KnownZero, APInt &KnownOne, 818 unsigned Depth) { 819 Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, 820 KnownZero, KnownOne, Depth); 821 if (NewVal == 0) return false; 822 U = NewVal; 823 return true; 824} 825 826 827/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler 828/// value based on the demanded bits. When this function is called, it is known 829/// that only the bits set in DemandedMask of the result of V are ever used 830/// downstream. Consequently, depending on the mask and V, it may be possible 831/// to replace V with a constant or one of its operands. In such cases, this 832/// function does the replacement and returns true. In all other cases, it 833/// returns false after analyzing the expression and setting KnownOne and known 834/// to be one in the expression. KnownZero contains all the bits that are known 835/// to be zero in the expression. These are provided to potentially allow the 836/// caller (which might recursively be SimplifyDemandedBits itself) to simplify 837/// the expression. KnownOne and KnownZero always follow the invariant that 838/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that 839/// the bits in KnownOne and KnownZero may only be accurate for those bits set 840/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero 841/// and KnownOne must all be the same. 842/// 843/// This returns null if it did not change anything and it permits no 844/// simplification. This returns V itself if it did some simplification of V's 845/// operands based on the information about what bits are demanded. This returns 846/// some other non-null value if it found out that V is equal to another value 847/// in the context where the specified bits are demanded, but not for all users. 848Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 849 APInt &KnownZero, APInt &KnownOne, 850 unsigned Depth) { 851 assert(V != 0 && "Null pointer of Value???"); 852 assert(Depth <= 6 && "Limit Search Depth"); 853 uint32_t BitWidth = DemandedMask.getBitWidth(); 854 const Type *VTy = V->getType(); 855 assert((TD || !isa<PointerType>(VTy)) && 856 "SimplifyDemandedBits needs to know bit widths!"); 857 assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && 858 (!VTy->isIntOrIntVector() || 859 VTy->getScalarSizeInBits() == BitWidth) && 860 KnownZero.getBitWidth() == BitWidth && 861 KnownOne.getBitWidth() == BitWidth && 862 "Value *V, DemandedMask, KnownZero and KnownOne " 863 "must have same BitWidth"); 864 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { 865 // We know all of the bits for a constant! 866 KnownOne = CI->getValue() & DemandedMask; 867 KnownZero = ~KnownOne & DemandedMask; 868 return 0; 869 } 870 if (isa<ConstantPointerNull>(V)) { 871 // We know all of the bits for a constant! 872 KnownOne.clear(); 873 KnownZero = DemandedMask; 874 return 0; 875 } 876 877 KnownZero.clear(); 878 KnownOne.clear(); 879 if (DemandedMask == 0) { // Not demanding any bits from V. 880 if (isa<UndefValue>(V)) 881 return 0; 882 return UndefValue::get(VTy); 883 } 884 885 if (Depth == 6) // Limit search depth. 886 return 0; 887 888 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); 889 APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; 890 891 Instruction *I = dyn_cast<Instruction>(V); 892 if (!I) { 893 ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); 894 return 0; // Only analyze instructions. 895 } 896 897 // If there are multiple uses of this value and we aren't at the root, then 898 // we can't do any simplifications of the operands, because DemandedMask 899 // only reflects the bits demanded by *one* of the users. 900 if (Depth != 0 && !I->hasOneUse()) { 901 // Despite the fact that we can't simplify this instruction in all User's 902 // context, we can at least compute the knownzero/knownone bits, and we can 903 // do simplifications that apply to *just* the one user if we know that 904 // this instruction has a simpler value in that context. 905 if (I->getOpcode() == Instruction::And) { 906 // If either the LHS or the RHS are Zero, the result is zero. 907 ComputeMaskedBits(I->getOperand(1), DemandedMask, 908 RHSKnownZero, RHSKnownOne, Depth+1); 909 ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, 910 LHSKnownZero, LHSKnownOne, Depth+1); 911 912 // If all of the demanded bits are known 1 on one side, return the other. 913 // These bits cannot contribute to the result of the 'and' in this 914 // context. 915 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 916 (DemandedMask & ~LHSKnownZero)) 917 return I->getOperand(0); 918 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 919 (DemandedMask & ~RHSKnownZero)) 920 return I->getOperand(1); 921 922 // If all of the demanded bits in the inputs are known zeros, return zero. 923 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 924 return Constant::getNullValue(VTy); 925 926 } else if (I->getOpcode() == Instruction::Or) { 927 // We can simplify (X|Y) -> X or Y in the user's context if we know that 928 // only bits from X or Y are demanded. 929 930 // If either the LHS or the RHS are One, the result is One. 931 ComputeMaskedBits(I->getOperand(1), DemandedMask, 932 RHSKnownZero, RHSKnownOne, Depth+1); 933 ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 934 LHSKnownZero, LHSKnownOne, Depth+1); 935 936 // If all of the demanded bits are known zero on one side, return the 937 // other. These bits cannot contribute to the result of the 'or' in this 938 // context. 939 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 940 (DemandedMask & ~LHSKnownOne)) 941 return I->getOperand(0); 942 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 943 (DemandedMask & ~RHSKnownOne)) 944 return I->getOperand(1); 945 946 // If all of the potentially set bits on one side are known to be set on 947 // the other side, just use the 'other' side. 948 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 949 (DemandedMask & (~RHSKnownZero))) 950 return I->getOperand(0); 951 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 952 (DemandedMask & (~LHSKnownZero))) 953 return I->getOperand(1); 954 } 955 956 // Compute the KnownZero/KnownOne bits to simplify things downstream. 957 ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); 958 return 0; 959 } 960 961 // If this is the root being simplified, allow it to have multiple uses, 962 // just set the DemandedMask to all bits so that we can try to simplify the 963 // operands. This allows visitTruncInst (for example) to simplify the 964 // operand of a trunc without duplicating all the logic below. 965 if (Depth == 0 && !V->hasOneUse()) 966 DemandedMask = APInt::getAllOnesValue(BitWidth); 967 968 switch (I->getOpcode()) { 969 default: 970 ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); 971 break; 972 case Instruction::And: 973 // If either the LHS or the RHS are Zero, the result is zero. 974 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 975 RHSKnownZero, RHSKnownOne, Depth+1) || 976 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, 977 LHSKnownZero, LHSKnownOne, Depth+1)) 978 return I; 979 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 980 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 981 982 // If all of the demanded bits are known 1 on one side, return the other. 983 // These bits cannot contribute to the result of the 'and'. 984 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 985 (DemandedMask & ~LHSKnownZero)) 986 return I->getOperand(0); 987 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 988 (DemandedMask & ~RHSKnownZero)) 989 return I->getOperand(1); 990 991 // If all of the demanded bits in the inputs are known zeros, return zero. 992 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) 993 return Constant::getNullValue(VTy); 994 995 // If the RHS is a constant, see if we can simplify it. 996 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) 997 return I; 998 999 // Output known-1 bits are only known if set in both the LHS & RHS. 1000 RHSKnownOne &= LHSKnownOne; 1001 // Output known-0 are known to be clear if zero in either the LHS | RHS. 1002 RHSKnownZero |= LHSKnownZero; 1003 break; 1004 case Instruction::Or: 1005 // If either the LHS or the RHS are One, the result is One. 1006 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 1007 RHSKnownZero, RHSKnownOne, Depth+1) || 1008 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 1009 LHSKnownZero, LHSKnownOne, Depth+1)) 1010 return I; 1011 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1012 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 1013 1014 // If all of the demanded bits are known zero on one side, return the other. 1015 // These bits cannot contribute to the result of the 'or'. 1016 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 1017 (DemandedMask & ~LHSKnownOne)) 1018 return I->getOperand(0); 1019 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 1020 (DemandedMask & ~RHSKnownOne)) 1021 return I->getOperand(1); 1022 1023 // If all of the potentially set bits on one side are known to be set on 1024 // the other side, just use the 'other' side. 1025 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 1026 (DemandedMask & (~RHSKnownZero))) 1027 return I->getOperand(0); 1028 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 1029 (DemandedMask & (~LHSKnownZero))) 1030 return I->getOperand(1); 1031 1032 // If the RHS is a constant, see if we can simplify it. 1033 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 1034 return I; 1035 1036 // Output known-0 bits are only known if clear in both the LHS & RHS. 1037 RHSKnownZero &= LHSKnownZero; 1038 // Output known-1 are known to be set if set in either the LHS | RHS. 1039 RHSKnownOne |= LHSKnownOne; 1040 break; 1041 case Instruction::Xor: { 1042 if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 1043 RHSKnownZero, RHSKnownOne, Depth+1) || 1044 SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 1045 LHSKnownZero, LHSKnownOne, Depth+1)) 1046 return I; 1047 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1048 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 1049 1050 // If all of the demanded bits are known zero on one side, return the other. 1051 // These bits cannot contribute to the result of the 'xor'. 1052 if ((DemandedMask & RHSKnownZero) == DemandedMask) 1053 return I->getOperand(0); 1054 if ((DemandedMask & LHSKnownZero) == DemandedMask) 1055 return I->getOperand(1); 1056 1057 // Output known-0 bits are known if clear or set in both the LHS & RHS. 1058 APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | 1059 (RHSKnownOne & LHSKnownOne); 1060 // Output known-1 are known to be set if set in only one of the LHS, RHS. 1061 APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | 1062 (RHSKnownOne & LHSKnownZero); 1063 1064 // If all of the demanded bits are known to be zero on one side or the 1065 // other, turn this into an *inclusive* or. 1066 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 1067 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { 1068 Instruction *Or = 1069 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 1070 I->getName()); 1071 return InsertNewInstBefore(Or, *I); 1072 } 1073 1074 // If all of the demanded bits on one side are known, and all of the set 1075 // bits on that side are also known to be set on the other side, turn this 1076 // into an AND, as we know the bits will be cleared. 1077 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 1078 if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 1079 // all known 1080 if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { 1081 Constant *AndC = Constant::getIntegerValue(VTy, 1082 ~RHSKnownOne & DemandedMask); 1083 Instruction *And = 1084 BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); 1085 return InsertNewInstBefore(And, *I); 1086 } 1087 } 1088 1089 // If the RHS is a constant, see if we can simplify it. 1090 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 1091 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 1092 return I; 1093 1094 // If our LHS is an 'and' and if it has one use, and if any of the bits we 1095 // are flipping are known to be set, then the xor is just resetting those 1096 // bits to zero. We can just knock out bits from the 'and' and the 'xor', 1097 // simplifying both of them. 1098 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) 1099 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && 1100 isa<ConstantInt>(I->getOperand(1)) && 1101 isa<ConstantInt>(LHSInst->getOperand(1)) && 1102 (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { 1103 ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); 1104 ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); 1105 APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); 1106 1107 Constant *AndC = 1108 ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); 1109 Instruction *NewAnd = 1110 BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); 1111 InsertNewInstBefore(NewAnd, *I); 1112 1113 Constant *XorC = 1114 ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); 1115 Instruction *NewXor = 1116 BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); 1117 return InsertNewInstBefore(NewXor, *I); 1118 } 1119 1120 1121 RHSKnownZero = KnownZeroOut; 1122 RHSKnownOne = KnownOneOut; 1123 break; 1124 } 1125 case Instruction::Select: 1126 if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, 1127 RHSKnownZero, RHSKnownOne, Depth+1) || 1128 SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 1129 LHSKnownZero, LHSKnownOne, Depth+1)) 1130 return I; 1131 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1132 assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 1133 1134 // If the operands are constants, see if we can simplify them. 1135 if (ShrinkDemandedConstant(I, 1, DemandedMask) || 1136 ShrinkDemandedConstant(I, 2, DemandedMask)) 1137 return I; 1138 1139 // Only known if known in both the LHS and RHS. 1140 RHSKnownOne &= LHSKnownOne; 1141 RHSKnownZero &= LHSKnownZero; 1142 break; 1143 case Instruction::Trunc: { 1144 unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); 1145 DemandedMask.zext(truncBf); 1146 RHSKnownZero.zext(truncBf); 1147 RHSKnownOne.zext(truncBf); 1148 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 1149 RHSKnownZero, RHSKnownOne, Depth+1)) 1150 return I; 1151 DemandedMask.trunc(BitWidth); 1152 RHSKnownZero.trunc(BitWidth); 1153 RHSKnownOne.trunc(BitWidth); 1154 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1155 break; 1156 } 1157 case Instruction::BitCast: 1158 if (!I->getOperand(0)->getType()->isIntOrIntVector()) 1159 return false; // vector->int or fp->int? 1160 1161 if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { 1162 if (const VectorType *SrcVTy = 1163 dyn_cast<VectorType>(I->getOperand(0)->getType())) { 1164 if (DstVTy->getNumElements() != SrcVTy->getNumElements()) 1165 // Don't touch a bitcast between vectors of different element counts. 1166 return false; 1167 } else 1168 // Don't touch a scalar-to-vector bitcast. 1169 return false; 1170 } else if (isa<VectorType>(I->getOperand(0)->getType())) 1171 // Don't touch a vector-to-scalar bitcast. 1172 return false; 1173 1174 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 1175 RHSKnownZero, RHSKnownOne, Depth+1)) 1176 return I; 1177 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1178 break; 1179 case Instruction::ZExt: { 1180 // Compute the bits in the result that are not present in the input. 1181 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 1182 1183 DemandedMask.trunc(SrcBitWidth); 1184 RHSKnownZero.trunc(SrcBitWidth); 1185 RHSKnownOne.trunc(SrcBitWidth); 1186 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 1187 RHSKnownZero, RHSKnownOne, Depth+1)) 1188 return I; 1189 DemandedMask.zext(BitWidth); 1190 RHSKnownZero.zext(BitWidth); 1191 RHSKnownOne.zext(BitWidth); 1192 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1193 // The top bits are known to be zero. 1194 RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); 1195 break; 1196 } 1197 case Instruction::SExt: { 1198 // Compute the bits in the result that are not present in the input. 1199 unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); 1200 1201 APInt InputDemandedBits = DemandedMask & 1202 APInt::getLowBitsSet(BitWidth, SrcBitWidth); 1203 1204 APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); 1205 // If any of the sign extended bits are demanded, we know that the sign 1206 // bit is demanded. 1207 if ((NewBits & DemandedMask) != 0) 1208 InputDemandedBits.set(SrcBitWidth-1); 1209 1210 InputDemandedBits.trunc(SrcBitWidth); 1211 RHSKnownZero.trunc(SrcBitWidth); 1212 RHSKnownOne.trunc(SrcBitWidth); 1213 if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, 1214 RHSKnownZero, RHSKnownOne, Depth+1)) 1215 return I; 1216 InputDemandedBits.zext(BitWidth); 1217 RHSKnownZero.zext(BitWidth); 1218 RHSKnownOne.zext(BitWidth); 1219 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1220 1221 // If the sign bit of the input is known set or clear, then we know the 1222 // top bits of the result. 1223 1224 // If the input sign bit is known zero, or if the NewBits are not demanded 1225 // convert this into a zero extension. 1226 if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { 1227 // Convert to ZExt cast 1228 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); 1229 return InsertNewInstBefore(NewCast, *I); 1230 } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set 1231 RHSKnownOne |= NewBits; 1232 } 1233 break; 1234 } 1235 case Instruction::Add: { 1236 // Figure out what the input bits are. If the top bits of the and result 1237 // are not demanded, then the add doesn't demand them from its input 1238 // either. 1239 unsigned NLZ = DemandedMask.countLeadingZeros(); 1240 1241 // If there is a constant on the RHS, there are a variety of xformations 1242 // we can do. 1243 if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 1244 // If null, this should be simplified elsewhere. Some of the xforms here 1245 // won't work if the RHS is zero. 1246 if (RHS->isZero()) 1247 break; 1248 1249 // If the top bit of the output is demanded, demand everything from the 1250 // input. Otherwise, we demand all the input bits except NLZ top bits. 1251 APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); 1252 1253 // Find information about known zero/one bits in the input. 1254 if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 1255 LHSKnownZero, LHSKnownOne, Depth+1)) 1256 return I; 1257 1258 // If the RHS of the add has bits set that can't affect the input, reduce 1259 // the constant. 1260 if (ShrinkDemandedConstant(I, 1, InDemandedBits)) 1261 return I; 1262 1263 // Avoid excess work. 1264 if (LHSKnownZero == 0 && LHSKnownOne == 0) 1265 break; 1266 1267 // Turn it into OR if input bits are zero. 1268 if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { 1269 Instruction *Or = 1270 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 1271 I->getName()); 1272 return InsertNewInstBefore(Or, *I); 1273 } 1274 1275 // We can say something about the output known-zero and known-one bits, 1276 // depending on potential carries from the input constant and the 1277 // unknowns. For example if the LHS is known to have at most the 0x0F0F0 1278 // bits set and the RHS constant is 0x01001, then we know we have a known 1279 // one mask of 0x00001 and a known zero mask of 0xE0F0E. 1280 1281 // To compute this, we first compute the potential carry bits. These are 1282 // the bits which may be modified. I'm not aware of a better way to do 1283 // this scan. 1284 const APInt &RHSVal = RHS->getValue(); 1285 APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); 1286 1287 // Now that we know which bits have carries, compute the known-1/0 sets. 1288 1289 // Bits are known one if they are known zero in one operand and one in the 1290 // other, and there is no input carry. 1291 RHSKnownOne = ((LHSKnownZero & RHSVal) | 1292 (LHSKnownOne & ~RHSVal)) & ~CarryBits; 1293 1294 // Bits are known zero if they are known zero in both operands and there 1295 // is no input carry. 1296 RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; 1297 } else { 1298 // If the high-bits of this ADD are not demanded, then it does not demand 1299 // the high bits of its LHS or RHS. 1300 if (DemandedMask[BitWidth-1] == 0) { 1301 // Right fill the mask of bits for this ADD to demand the most 1302 // significant bit and all those below it. 1303 APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); 1304 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, 1305 LHSKnownZero, LHSKnownOne, Depth+1) || 1306 SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, 1307 LHSKnownZero, LHSKnownOne, Depth+1)) 1308 return I; 1309 } 1310 } 1311 break; 1312 } 1313 case Instruction::Sub: 1314 // If the high-bits of this SUB are not demanded, then it does not demand 1315 // the high bits of its LHS or RHS. 1316 if (DemandedMask[BitWidth-1] == 0) { 1317 // Right fill the mask of bits for this SUB to demand the most 1318 // significant bit and all those below it. 1319 uint32_t NLZ = DemandedMask.countLeadingZeros(); 1320 APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); 1321 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, 1322 LHSKnownZero, LHSKnownOne, Depth+1) || 1323 SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, 1324 LHSKnownZero, LHSKnownOne, Depth+1)) 1325 return I; 1326 } 1327 // Otherwise just hand the sub off to ComputeMaskedBits to fill in 1328 // the known zeros and ones. 1329 ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); 1330 break; 1331 case Instruction::Shl: 1332 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 1333 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); 1334 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); 1335 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 1336 RHSKnownZero, RHSKnownOne, Depth+1)) 1337 return I; 1338 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1339 RHSKnownZero <<= ShiftAmt; 1340 RHSKnownOne <<= ShiftAmt; 1341 // low bits known zero. 1342 if (ShiftAmt) 1343 RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); 1344 } 1345 break; 1346 case Instruction::LShr: 1347 // For a logical shift right 1348 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 1349 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); 1350 1351 // Unsigned shift right. 1352 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 1353 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 1354 RHSKnownZero, RHSKnownOne, Depth+1)) 1355 return I; 1356 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1357 RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); 1358 RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); 1359 if (ShiftAmt) { 1360 // Compute the new bits that are at the top now. 1361 APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); 1362 RHSKnownZero |= HighBits; // high bits known zero. 1363 } 1364 } 1365 break; 1366 case Instruction::AShr: 1367 // If this is an arithmetic shift right and only the low-bit is set, we can 1368 // always convert this into a logical shr, even if the shift amount is 1369 // variable. The low bit of the shift cannot be an input sign bit unless 1370 // the shift amount is >= the size of the datatype, which is undefined. 1371 if (DemandedMask == 1) { 1372 // Perform the logical shift right. 1373 Instruction *NewVal = BinaryOperator::CreateLShr( 1374 I->getOperand(0), I->getOperand(1), I->getName()); 1375 return InsertNewInstBefore(NewVal, *I); 1376 } 1377 1378 // If the sign bit is the only bit demanded by this ashr, then there is no 1379 // need to do it, the shift doesn't change the high bit. 1380 if (DemandedMask.isSignBit()) 1381 return I->getOperand(0); 1382 1383 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { 1384 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); 1385 1386 // Signed shift right. 1387 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 1388 // If any of the "high bits" are demanded, we should set the sign bit as 1389 // demanded. 1390 if (DemandedMask.countLeadingZeros() <= ShiftAmt) 1391 DemandedMaskIn.set(BitWidth-1); 1392 if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 1393 RHSKnownZero, RHSKnownOne, Depth+1)) 1394 return I; 1395 assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 1396 // Compute the new bits that are at the top now. 1397 APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); 1398 RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); 1399 RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); 1400 1401 // Handle the sign bits. 1402 APInt SignBit(APInt::getSignBit(BitWidth)); 1403 // Adjust to where it is now in the mask. 1404 SignBit = APIntOps::lshr(SignBit, ShiftAmt); 1405 1406 // If the input sign bit is known to be zero, or if none of the top bits 1407 // are demanded, turn this into an unsigned shift right. 1408 if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 1409 (HighBits & ~DemandedMask) == HighBits) { 1410 // Perform the logical shift right. 1411 Instruction *NewVal = BinaryOperator::CreateLShr( 1412 I->getOperand(0), SA, I->getName()); 1413 return InsertNewInstBefore(NewVal, *I); 1414 } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. 1415 RHSKnownOne |= HighBits; 1416 } 1417 } 1418 break; 1419 case Instruction::SRem: 1420 if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { 1421 APInt RA = Rem->getValue().abs(); 1422 if (RA.isPowerOf2()) { 1423 if (DemandedMask.ult(RA)) // srem won't affect demanded bits 1424 return I->getOperand(0); 1425 1426 APInt LowBits = RA - 1; 1427 APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); 1428 if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, 1429 LHSKnownZero, LHSKnownOne, Depth+1)) 1430 return I; 1431 1432 if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) 1433 LHSKnownZero |= ~LowBits; 1434 1435 KnownZero |= LHSKnownZero & DemandedMask; 1436 1437 assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 1438 } 1439 } 1440 break; 1441 case Instruction::URem: { 1442 APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); 1443 APInt AllOnes = APInt::getAllOnesValue(BitWidth); 1444 if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, 1445 KnownZero2, KnownOne2, Depth+1) || 1446 SimplifyDemandedBits(I->getOperandUse(1), AllOnes, 1447 KnownZero2, KnownOne2, Depth+1)) 1448 return I; 1449 1450 unsigned Leaders = KnownZero2.countLeadingOnes(); 1451 Leaders = std::max(Leaders, 1452 KnownZero2.countLeadingOnes()); 1453 KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; 1454 break; 1455 } 1456 case Instruction::Call: 1457 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1458 switch (II->getIntrinsicID()) { 1459 default: break; 1460 case Intrinsic::bswap: { 1461 // If the only bits demanded come from one byte of the bswap result, 1462 // just shift the input byte into position to eliminate the bswap. 1463 unsigned NLZ = DemandedMask.countLeadingZeros(); 1464 unsigned NTZ = DemandedMask.countTrailingZeros(); 1465 1466 // Round NTZ down to the next byte. If we have 11 trailing zeros, then 1467 // we need all the bits down to bit 8. Likewise, round NLZ. If we 1468 // have 14 leading zeros, round to 8. 1469 NLZ &= ~7; 1470 NTZ &= ~7; 1471 // If we need exactly one byte, we can do this transformation. 1472 if (BitWidth-NLZ-NTZ == 8) { 1473 unsigned ResultBit = NTZ; 1474 unsigned InputBit = BitWidth-NTZ-8; 1475 1476 // Replace this with either a left or right shift to get the byte into 1477 // the right place. 1478 Instruction *NewVal; 1479 if (InputBit > ResultBit) 1480 NewVal = BinaryOperator::CreateLShr(I->getOperand(1), 1481 ConstantInt::get(I->getType(), InputBit-ResultBit)); 1482 else 1483 NewVal = BinaryOperator::CreateShl(I->getOperand(1), 1484 ConstantInt::get(I->getType(), ResultBit-InputBit)); 1485 NewVal->takeName(I); 1486 return InsertNewInstBefore(NewVal, *I); 1487 } 1488 1489 // TODO: Could compute known zero/one bits based on the input. 1490 break; 1491 } 1492 } 1493 } 1494 ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); 1495 break; 1496 } 1497 1498 // If the client is only demanding bits that we know, return the known 1499 // constant. 1500 if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) 1501 return Constant::getIntegerValue(VTy, RHSKnownOne); 1502 return false; 1503} 1504 1505 1506/// SimplifyDemandedVectorElts - The specified value produces a vector with 1507/// any number of elements. DemandedElts contains the set of elements that are 1508/// actually used by the caller. This method analyzes which elements of the 1509/// operand are undef and returns that information in UndefElts. 1510/// 1511/// If the information about demanded elements can be used to simplify the 1512/// operation, the operation is simplified, then the resultant value is 1513/// returned. This returns null if no change was made. 1514Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, 1515 APInt& UndefElts, 1516 unsigned Depth) { 1517 unsigned VWidth = cast<VectorType>(V->getType())->getNumElements(); 1518 APInt EltMask(APInt::getAllOnesValue(VWidth)); 1519 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); 1520 1521 if (isa<UndefValue>(V)) { 1522 // If the entire vector is undefined, just return this info. 1523 UndefElts = EltMask; 1524 return 0; 1525 } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. 1526 UndefElts = EltMask; 1527 return UndefValue::get(V->getType()); 1528 } 1529 1530 UndefElts = 0; 1531 if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) { 1532 const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 1533 Constant *Undef = UndefValue::get(EltTy); 1534 1535 std::vector<Constant*> Elts; 1536 for (unsigned i = 0; i != VWidth; ++i) 1537 if (!DemandedElts[i]) { // If not demanded, set to undef. 1538 Elts.push_back(Undef); 1539 UndefElts.set(i); 1540 } else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef. 1541 Elts.push_back(Undef); 1542 UndefElts.set(i); 1543 } else { // Otherwise, defined. 1544 Elts.push_back(CP->getOperand(i)); 1545 } 1546 1547 // If we changed the constant, return it. 1548 Constant *NewCP = ConstantVector::get(Elts); 1549 return NewCP != CP ? NewCP : 0; 1550 } else if (isa<ConstantAggregateZero>(V)) { 1551 // Simplify the CAZ to a ConstantVector where the non-demanded elements are 1552 // set to undef. 1553 1554 // Check if this is identity. If so, return 0 since we are not simplifying 1555 // anything. 1556 if (DemandedElts == ((1ULL << VWidth) -1)) 1557 return 0; 1558 1559 const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 1560 Constant *Zero = Constant::getNullValue(EltTy); 1561 Constant *Undef = UndefValue::get(EltTy); 1562 std::vector<Constant*> Elts; 1563 for (unsigned i = 0; i != VWidth; ++i) { 1564 Constant *Elt = DemandedElts[i] ? Zero : Undef; 1565 Elts.push_back(Elt); 1566 } 1567 UndefElts = DemandedElts ^ EltMask; 1568 return ConstantVector::get(Elts); 1569 } 1570 1571 // Limit search depth. 1572 if (Depth == 10) 1573 return 0; 1574 1575 // If multiple users are using the root value, procede with 1576 // simplification conservatively assuming that all elements 1577 // are needed. 1578 if (!V->hasOneUse()) { 1579 // Quit if we find multiple users of a non-root value though. 1580 // They'll be handled when it's their turn to be visited by 1581 // the main instcombine process. 1582 if (Depth != 0) 1583 // TODO: Just compute the UndefElts information recursively. 1584 return 0; 1585 1586 // Conservatively assume that all elements are needed. 1587 DemandedElts = EltMask; 1588 } 1589 1590 Instruction *I = dyn_cast<Instruction>(V); 1591 if (!I) return 0; // Only analyze instructions. 1592 1593 bool MadeChange = false; 1594 APInt UndefElts2(VWidth, 0); 1595 Value *TmpV; 1596 switch (I->getOpcode()) { 1597 default: break; 1598 1599 case Instruction::InsertElement: { 1600 // If this is a variable index, we don't know which element it overwrites. 1601 // demand exactly the same input as we produce. 1602 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); 1603 if (Idx == 0) { 1604 // Note that we can't propagate undef elt info, because we don't know 1605 // which elt is getting updated. 1606 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, 1607 UndefElts2, Depth+1); 1608 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1609 break; 1610 } 1611 1612 // If this is inserting an element that isn't demanded, remove this 1613 // insertelement. 1614 unsigned IdxNo = Idx->getZExtValue(); 1615 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { 1616 Worklist.Add(I); 1617 return I->getOperand(0); 1618 } 1619 1620 // Otherwise, the element inserted overwrites whatever was there, so the 1621 // input demanded set is simpler than the output set. 1622 APInt DemandedElts2 = DemandedElts; 1623 DemandedElts2.clear(IdxNo); 1624 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, 1625 UndefElts, Depth+1); 1626 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1627 1628 // The inserted element is defined. 1629 UndefElts.clear(IdxNo); 1630 break; 1631 } 1632 case Instruction::ShuffleVector: { 1633 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 1634 uint64_t LHSVWidth = 1635 cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements(); 1636 APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); 1637 for (unsigned i = 0; i < VWidth; i++) { 1638 if (DemandedElts[i]) { 1639 unsigned MaskVal = Shuffle->getMaskValue(i); 1640 if (MaskVal != -1u) { 1641 assert(MaskVal < LHSVWidth * 2 && 1642 "shufflevector mask index out of range!"); 1643 if (MaskVal < LHSVWidth) 1644 LeftDemanded.set(MaskVal); 1645 else 1646 RightDemanded.set(MaskVal - LHSVWidth); 1647 } 1648 } 1649 } 1650 1651 APInt UndefElts4(LHSVWidth, 0); 1652 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, 1653 UndefElts4, Depth+1); 1654 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1655 1656 APInt UndefElts3(LHSVWidth, 0); 1657 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, 1658 UndefElts3, Depth+1); 1659 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1660 1661 bool NewUndefElts = false; 1662 for (unsigned i = 0; i < VWidth; i++) { 1663 unsigned MaskVal = Shuffle->getMaskValue(i); 1664 if (MaskVal == -1u) { 1665 UndefElts.set(i); 1666 } else if (MaskVal < LHSVWidth) { 1667 if (UndefElts4[MaskVal]) { 1668 NewUndefElts = true; 1669 UndefElts.set(i); 1670 } 1671 } else { 1672 if (UndefElts3[MaskVal - LHSVWidth]) { 1673 NewUndefElts = true; 1674 UndefElts.set(i); 1675 } 1676 } 1677 } 1678 1679 if (NewUndefElts) { 1680 // Add additional discovered undefs. 1681 std::vector<Constant*> Elts; 1682 for (unsigned i = 0; i < VWidth; ++i) { 1683 if (UndefElts[i]) 1684 Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); 1685 else 1686 Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), 1687 Shuffle->getMaskValue(i))); 1688 } 1689 I->setOperand(2, ConstantVector::get(Elts)); 1690 MadeChange = true; 1691 } 1692 break; 1693 } 1694 case Instruction::BitCast: { 1695 // Vector->vector casts only. 1696 const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); 1697 if (!VTy) break; 1698 unsigned InVWidth = VTy->getNumElements(); 1699 APInt InputDemandedElts(InVWidth, 0); 1700 unsigned Ratio; 1701 1702 if (VWidth == InVWidth) { 1703 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same 1704 // elements as are demanded of us. 1705 Ratio = 1; 1706 InputDemandedElts = DemandedElts; 1707 } else if (VWidth > InVWidth) { 1708 // Untested so far. 1709 break; 1710 1711 // If there are more elements in the result than there are in the source, 1712 // then an input element is live if any of the corresponding output 1713 // elements are live. 1714 Ratio = VWidth/InVWidth; 1715 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { 1716 if (DemandedElts[OutIdx]) 1717 InputDemandedElts.set(OutIdx/Ratio); 1718 } 1719 } else { 1720 // Untested so far. 1721 break; 1722 1723 // If there are more elements in the source than there are in the result, 1724 // then an input element is live if the corresponding output element is 1725 // live. 1726 Ratio = InVWidth/VWidth; 1727 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1728 if (DemandedElts[InIdx/Ratio]) 1729 InputDemandedElts.set(InIdx); 1730 } 1731 1732 // div/rem demand all inputs, because they don't want divide by zero. 1733 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, 1734 UndefElts2, Depth+1); 1735 if (TmpV) { 1736 I->setOperand(0, TmpV); 1737 MadeChange = true; 1738 } 1739 1740 UndefElts = UndefElts2; 1741 if (VWidth > InVWidth) { 1742 llvm_unreachable("Unimp"); 1743 // If there are more elements in the result than there are in the source, 1744 // then an output element is undef if the corresponding input element is 1745 // undef. 1746 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1747 if (UndefElts2[OutIdx/Ratio]) 1748 UndefElts.set(OutIdx); 1749 } else if (VWidth < InVWidth) { 1750 llvm_unreachable("Unimp"); 1751 // If there are more elements in the source than there are in the result, 1752 // then a result element is undef if all of the corresponding input 1753 // elements are undef. 1754 UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. 1755 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1756 if (!UndefElts2[InIdx]) // Not undef? 1757 UndefElts.clear(InIdx/Ratio); // Clear undef bit. 1758 } 1759 break; 1760 } 1761 case Instruction::And: 1762 case Instruction::Or: 1763 case Instruction::Xor: 1764 case Instruction::Add: 1765 case Instruction::Sub: 1766 case Instruction::Mul: 1767 // div/rem demand all inputs, because they don't want divide by zero. 1768 TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, 1769 UndefElts, Depth+1); 1770 if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } 1771 TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, 1772 UndefElts2, Depth+1); 1773 if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } 1774 1775 // Output elements are undefined if both are undefined. Consider things 1776 // like undef&0. The result is known zero, not undef. 1777 UndefElts &= UndefElts2; 1778 break; 1779 1780 case Instruction::Call: { 1781 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1782 if (!II) break; 1783 switch (II->getIntrinsicID()) { 1784 default: break; 1785 1786 // Binary vector operations that work column-wise. A dest element is a 1787 // function of the corresponding input elements from the two inputs. 1788 case Intrinsic::x86_sse_sub_ss: 1789 case Intrinsic::x86_sse_mul_ss: 1790 case Intrinsic::x86_sse_min_ss: 1791 case Intrinsic::x86_sse_max_ss: 1792 case Intrinsic::x86_sse2_sub_sd: 1793 case Intrinsic::x86_sse2_mul_sd: 1794 case Intrinsic::x86_sse2_min_sd: 1795 case Intrinsic::x86_sse2_max_sd: 1796 TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, 1797 UndefElts, Depth+1); 1798 if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } 1799 TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, 1800 UndefElts2, Depth+1); 1801 if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } 1802 1803 // If only the low elt is demanded and this is a scalarizable intrinsic, 1804 // scalarize it now. 1805 if (DemandedElts == 1) { 1806 switch (II->getIntrinsicID()) { 1807 default: break; 1808 case Intrinsic::x86_sse_sub_ss: 1809 case Intrinsic::x86_sse_mul_ss: 1810 case Intrinsic::x86_sse2_sub_sd: 1811 case Intrinsic::x86_sse2_mul_sd: 1812 // TODO: Lower MIN/MAX/ABS/etc 1813 Value *LHS = II->getOperand(1); 1814 Value *RHS = II->getOperand(2); 1815 // Extract the element as scalars. 1816 LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 1817 ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); 1818 RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, 1819 ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); 1820 1821 switch (II->getIntrinsicID()) { 1822 default: llvm_unreachable("Case stmts out of sync!"); 1823 case Intrinsic::x86_sse_sub_ss: 1824 case Intrinsic::x86_sse2_sub_sd: 1825 TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, 1826 II->getName()), *II); 1827 break; 1828 case Intrinsic::x86_sse_mul_ss: 1829 case Intrinsic::x86_sse2_mul_sd: 1830 TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, 1831 II->getName()), *II); 1832 break; 1833 } 1834 1835 Instruction *New = 1836 InsertElementInst::Create( 1837 UndefValue::get(II->getType()), TmpV, 1838 ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName()); 1839 InsertNewInstBefore(New, *II); 1840 return New; 1841 } 1842 } 1843 1844 // Output elements are undefined if both are undefined. Consider things 1845 // like undef&0. The result is known zero, not undef. 1846 UndefElts &= UndefElts2; 1847 break; 1848 } 1849 break; 1850 } 1851 } 1852 return MadeChange ? I : 0; 1853} 1854 1855 1856/// AssociativeOpt - Perform an optimization on an associative operator. This 1857/// function is designed to check a chain of associative operators for a 1858/// potential to apply a certain optimization. Since the optimization may be 1859/// applicable if the expression was reassociated, this checks the chain, then 1860/// reassociates the expression as necessary to expose the optimization 1861/// opportunity. This makes use of a special Functor, which must define 1862/// 'shouldApply' and 'apply' methods. 1863/// 1864template<typename Functor> 1865static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { 1866 unsigned Opcode = Root.getOpcode(); 1867 Value *LHS = Root.getOperand(0); 1868 1869 // Quick check, see if the immediate LHS matches... 1870 if (F.shouldApply(LHS)) 1871 return F.apply(Root); 1872 1873 // Otherwise, if the LHS is not of the same opcode as the root, return. 1874 Instruction *LHSI = dyn_cast<Instruction>(LHS); 1875 while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { 1876 // Should we apply this transform to the RHS? 1877 bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); 1878 1879 // If not to the RHS, check to see if we should apply to the LHS... 1880 if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { 1881 cast<BinaryOperator>(LHSI)->swapOperands(); // Make the LHS the RHS 1882 ShouldApply = true; 1883 } 1884 1885 // If the functor wants to apply the optimization to the RHS of LHSI, 1886 // reassociate the expression from ((? op A) op B) to (? op (A op B)) 1887 if (ShouldApply) { 1888 // Now all of the instructions are in the current basic block, go ahead 1889 // and perform the reassociation. 1890 Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0)); 1891 1892 // First move the selected RHS to the LHS of the root... 1893 Root.setOperand(0, LHSI->getOperand(1)); 1894 1895 // Make what used to be the LHS of the root be the user of the root... 1896 Value *ExtraOperand = TmpLHSI->getOperand(1); 1897 if (&Root == TmpLHSI) { 1898 Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); 1899 return 0; 1900 } 1901 Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI 1902 TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root 1903 BasicBlock::iterator ARI = &Root; ++ARI; 1904 TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root 1905 ARI = Root; 1906 1907 // Now propagate the ExtraOperand down the chain of instructions until we 1908 // get to LHSI. 1909 while (TmpLHSI != LHSI) { 1910 Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0)); 1911 // Move the instruction to immediately before the chain we are 1912 // constructing to avoid breaking dominance properties. 1913 NextLHSI->moveBefore(ARI); 1914 ARI = NextLHSI; 1915 1916 Value *NextOp = NextLHSI->getOperand(1); 1917 NextLHSI->setOperand(1, ExtraOperand); 1918 TmpLHSI = NextLHSI; 1919 ExtraOperand = NextOp; 1920 } 1921 1922 // Now that the instructions are reassociated, have the functor perform 1923 // the transformation... 1924 return F.apply(Root); 1925 } 1926 1927 LHSI = dyn_cast<Instruction>(LHSI->getOperand(0)); 1928 } 1929 return 0; 1930} 1931 1932namespace { 1933 1934// AddRHS - Implements: X + X --> X << 1 1935struct AddRHS { 1936 Value *RHS; 1937 explicit AddRHS(Value *rhs) : RHS(rhs) {} 1938 bool shouldApply(Value *LHS) const { return LHS == RHS; } 1939 Instruction *apply(BinaryOperator &Add) const { 1940 return BinaryOperator::CreateShl(Add.getOperand(0), 1941 ConstantInt::get(Add.getType(), 1)); 1942 } 1943}; 1944 1945// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) 1946// iff C1&C2 == 0 1947struct AddMaskingAnd { 1948 Constant *C2; 1949 explicit AddMaskingAnd(Constant *c) : C2(c) {} 1950 bool shouldApply(Value *LHS) const { 1951 ConstantInt *C1; 1952 return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && 1953 ConstantExpr::getAnd(C1, C2)->isNullValue(); 1954 } 1955 Instruction *apply(BinaryOperator &Add) const { 1956 return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); 1957 } 1958}; 1959 1960} 1961 1962static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, 1963 InstCombiner *IC) { 1964 if (CastInst *CI = dyn_cast<CastInst>(&I)) 1965 return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); 1966 1967 // Figure out if the constant is the left or the right argument. 1968 bool ConstIsRHS = isa<Constant>(I.getOperand(1)); 1969 Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); 1970 1971 if (Constant *SOC = dyn_cast<Constant>(SO)) { 1972 if (ConstIsRHS) 1973 return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); 1974 return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); 1975 } 1976 1977 Value *Op0 = SO, *Op1 = ConstOperand; 1978 if (!ConstIsRHS) 1979 std::swap(Op0, Op1); 1980 1981 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 1982 return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, 1983 SO->getName()+".op"); 1984 if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) 1985 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 1986 SO->getName()+".cmp"); 1987 if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) 1988 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 1989 SO->getName()+".cmp"); 1990 llvm_unreachable("Unknown binary instruction type!"); 1991} 1992 1993// FoldOpIntoSelect - Given an instruction with a select as one operand and a 1994// constant as the other operand, try to fold the binary operator into the 1995// select arguments. This also works for Cast instructions, which obviously do 1996// not have a second operand. 1997static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, 1998 InstCombiner *IC) { 1999 // Don't modify shared select instructions 2000 if (!SI->hasOneUse()) return 0; 2001 Value *TV = SI->getOperand(1); 2002 Value *FV = SI->getOperand(2); 2003 2004 if (isa<Constant>(TV) || isa<Constant>(FV)) { 2005 // Bool selects with constant operands can be folded to logical ops. 2006 if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0; 2007 2008 Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); 2009 Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); 2010 2011 return SelectInst::Create(SI->getCondition(), SelectTrueVal, 2012 SelectFalseVal); 2013 } 2014 return 0; 2015} 2016 2017 2018/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which 2019/// has a PHI node as operand #0, see if we can fold the instruction into the 2020/// PHI (which is only possible if all operands to the PHI are constants). 2021/// 2022/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms 2023/// that would normally be unprofitable because they strongly encourage jump 2024/// threading. 2025Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, 2026 bool AllowAggressive) { 2027 AllowAggressive = false; 2028 PHINode *PN = cast<PHINode>(I.getOperand(0)); 2029 unsigned NumPHIValues = PN->getNumIncomingValues(); 2030 if (NumPHIValues == 0 || 2031 // We normally only transform phis with a single use, unless we're trying 2032 // hard to make jump threading happen. 2033 (!PN->hasOneUse() && !AllowAggressive)) 2034 return 0; 2035 2036 2037 // Check to see if all of the operands of the PHI are simple constants 2038 // (constantint/constantfp/undef). If there is one non-constant value, 2039 // remember the BB it is in. If there is more than one or if *it* is a PHI, 2040 // bail out. We don't do arbitrary constant expressions here because moving 2041 // their computation can be expensive without a cost model. 2042 BasicBlock *NonConstBB = 0; 2043 for (unsigned i = 0; i != NumPHIValues; ++i) 2044 if (!isa<Constant>(PN->getIncomingValue(i)) || 2045 isa<ConstantExpr>(PN->getIncomingValue(i))) { 2046 if (NonConstBB) return 0; // More than one non-const value. 2047 if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. 2048 NonConstBB = PN->getIncomingBlock(i); 2049 2050 // If the incoming non-constant value is in I's block, we have an infinite 2051 // loop. 2052 if (NonConstBB == I.getParent()) 2053 return 0; 2054 } 2055 2056 // If there is exactly one non-constant value, we can insert a copy of the 2057 // operation in that block. However, if this is a critical edge, we would be 2058 // inserting the computation one some other paths (e.g. inside a loop). Only 2059 // do this if the pred block is unconditionally branching into the phi block. 2060 if (NonConstBB != 0 && !AllowAggressive) { 2061 BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); 2062 if (!BI || !BI->isUnconditional()) return 0; 2063 } 2064 2065 // Okay, we can do the transformation: create the new PHI node. 2066 PHINode *NewPN = PHINode::Create(I.getType(), ""); 2067 NewPN->reserveOperandSpace(PN->getNumOperands()/2); 2068 InsertNewInstBefore(NewPN, *PN); 2069 NewPN->takeName(PN); 2070 2071 // Next, add all of the operands to the PHI. 2072 if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { 2073 // We only currently try to fold the condition of a select when it is a phi, 2074 // not the true/false values. 2075 Value *TrueV = SI->getTrueValue(); 2076 Value *FalseV = SI->getFalseValue(); 2077 BasicBlock *PhiTransBB = PN->getParent(); 2078 for (unsigned i = 0; i != NumPHIValues; ++i) { 2079 BasicBlock *ThisBB = PN->getIncomingBlock(i); 2080 Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); 2081 Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); 2082 Value *InV = 0; 2083 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 2084 InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; 2085 } else { 2086 assert(PN->getIncomingBlock(i) == NonConstBB); 2087 InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, 2088 FalseVInPred, 2089 "phitmp", NonConstBB->getTerminator()); 2090 Worklist.Add(cast<Instruction>(InV)); 2091 } 2092 NewPN->addIncoming(InV, ThisBB); 2093 } 2094 } else if (I.getNumOperands() == 2) { 2095 Constant *C = cast<Constant>(I.getOperand(1)); 2096 for (unsigned i = 0; i != NumPHIValues; ++i) { 2097 Value *InV = 0; 2098 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 2099 if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 2100 InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); 2101 else 2102 InV = ConstantExpr::get(I.getOpcode(), InC, C); 2103 } else { 2104 assert(PN->getIncomingBlock(i) == NonConstBB); 2105 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 2106 InV = BinaryOperator::Create(BO->getOpcode(), 2107 PN->getIncomingValue(i), C, "phitmp", 2108 NonConstBB->getTerminator()); 2109 else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 2110 InV = CmpInst::Create(CI->getOpcode(), 2111 CI->getPredicate(), 2112 PN->getIncomingValue(i), C, "phitmp", 2113 NonConstBB->getTerminator()); 2114 else 2115 llvm_unreachable("Unknown binop!"); 2116 2117 Worklist.Add(cast<Instruction>(InV)); 2118 } 2119 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 2120 } 2121 } else { 2122 CastInst *CI = cast<CastInst>(&I); 2123 const Type *RetTy = CI->getType(); 2124 for (unsigned i = 0; i != NumPHIValues; ++i) { 2125 Value *InV; 2126 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 2127 InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); 2128 } else { 2129 assert(PN->getIncomingBlock(i) == NonConstBB); 2130 InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 2131 I.getType(), "phitmp", 2132 NonConstBB->getTerminator()); 2133 Worklist.Add(cast<Instruction>(InV)); 2134 } 2135 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 2136 } 2137 } 2138 return ReplaceInstUsesWith(I, NewPN); 2139} 2140 2141 2142/// WillNotOverflowSignedAdd - Return true if we can prove that: 2143/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 2144/// This basically requires proving that the add in the original type would not 2145/// overflow to change the sign bit or have a carry out. 2146bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 2147 // There are different heuristics we can use for this. Here are some simple 2148 // ones. 2149 2150 // Add has the property that adding any two 2's complement numbers can only 2151 // have one carry bit which can change a sign. As such, if LHS and RHS each 2152 // have at least two sign bits, we know that the addition of the two values 2153 // will sign extend fine. 2154 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 2155 return true; 2156 2157 2158 // If one of the operands only has one non-zero bit, and if the other operand 2159 // has a known-zero bit in a more significant place than it (not including the 2160 // sign bit) the ripple may go up to and fill the zero, but won't change the 2161 // sign. For example, (X & ~4) + 1. 2162 2163 // TODO: Implement. 2164 2165 return false; 2166} 2167 2168 2169Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 2170 bool Changed = SimplifyCommutative(I); 2171 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 2172 2173 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 2174 I.hasNoUnsignedWrap(), TD)) 2175 return ReplaceInstUsesWith(I, V); 2176 2177 2178 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 2179 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { 2180 // X + (signbit) --> X ^ signbit 2181 const APInt& Val = CI->getValue(); 2182 uint32_t BitWidth = Val.getBitWidth(); 2183 if (Val == APInt::getSignBit(BitWidth)) 2184 return BinaryOperator::CreateXor(LHS, RHS); 2185 2186 // See if SimplifyDemandedBits can simplify this. This handles stuff like 2187 // (X & 254)+1 -> (X&254)|1 2188 if (SimplifyDemandedInstructionBits(I)) 2189 return &I; 2190 2191 // zext(bool) + C -> bool ? C + 1 : C 2192 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 2193 if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) 2194 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 2195 } 2196 2197 if (isa<PHINode>(LHS)) 2198 if (Instruction *NV = FoldOpIntoPhi(I)) 2199 return NV; 2200 2201 ConstantInt *XorRHS = 0; 2202 Value *XorLHS = 0; 2203 if (isa<ConstantInt>(RHSC) && 2204 match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 2205 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 2206 const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); 2207 2208 uint32_t Size = TySizeBits / 2; 2209 APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); 2210 APInt CFF80Val(-C0080Val); 2211 do { 2212 if (TySizeBits > Size) { 2213 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 2214 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 2215 if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || 2216 (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { 2217 // This is a sign extend if the top bits are known zero. 2218 if (!MaskedValueIsZero(XorLHS, 2219 APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) 2220 Size = 0; // Not a sign ext, but can't be any others either. 2221 break; 2222 } 2223 } 2224 Size >>= 1; 2225 C0080Val = APIntOps::lshr(C0080Val, Size); 2226 CFF80Val = APIntOps::ashr(CFF80Val, Size); 2227 } while (Size >= 1); 2228 2229 // FIXME: This shouldn't be necessary. When the backends can handle types 2230 // with funny bit widths then this switch statement should be removed. It 2231 // is just here to get the size of the "middle" type back up to something 2232 // that the back ends can handle. 2233 const Type *MiddleType = 0; 2234 switch (Size) { 2235 default: break; 2236 case 32: MiddleType = Type::getInt32Ty(*Context); break; 2237 case 16: MiddleType = Type::getInt16Ty(*Context); break; 2238 case 8: MiddleType = Type::getInt8Ty(*Context); break; 2239 } 2240 if (MiddleType) { 2241 Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); 2242 return new SExtInst(NewTrunc, I.getType(), I.getName()); 2243 } 2244 } 2245 } 2246 2247 if (I.getType() == Type::getInt1Ty(*Context)) 2248 return BinaryOperator::CreateXor(LHS, RHS); 2249 2250 // X + X --> X << 1 2251 if (I.getType()->isInteger()) { 2252 if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) 2253 return Result; 2254 2255 if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { 2256 if (RHSI->getOpcode() == Instruction::Sub) 2257 if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B 2258 return ReplaceInstUsesWith(I, RHSI->getOperand(0)); 2259 } 2260 if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 2261 if (LHSI->getOpcode() == Instruction::Sub) 2262 if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B 2263 return ReplaceInstUsesWith(I, LHSI->getOperand(0)); 2264 } 2265 } 2266 2267 // -A + B --> B - A 2268 // -A + -B --> -(A + B) 2269 if (Value *LHSV = dyn_castNegVal(LHS)) { 2270 if (LHS->getType()->isIntOrIntVector()) { 2271 if (Value *RHSV = dyn_castNegVal(RHS)) { 2272 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 2273 return BinaryOperator::CreateNeg(NewAdd); 2274 } 2275 } 2276 2277 return BinaryOperator::CreateSub(RHS, LHSV); 2278 } 2279 2280 // A + -B --> A - B 2281 if (!isa<Constant>(RHS)) 2282 if (Value *V = dyn_castNegVal(RHS)) 2283 return BinaryOperator::CreateSub(LHS, V); 2284 2285 2286 ConstantInt *C2; 2287 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 2288 if (X == RHS) // X*C + X --> X * (C+1) 2289 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 2290 2291 // X*C1 + X*C2 --> X * (C1+C2) 2292 ConstantInt *C1; 2293 if (X == dyn_castFoldableMul(RHS, C1)) 2294 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 2295 } 2296 2297 // X + X*C --> X * (C+1) 2298 if (dyn_castFoldableMul(RHS, C2) == LHS) 2299 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 2300 2301 // X + ~X --> -1 since ~X = -X-1 2302 if (dyn_castNotVal(LHS) == RHS || 2303 dyn_castNotVal(RHS) == LHS) 2304 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 2305 2306 2307 // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 2308 if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) 2309 if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) 2310 return R; 2311 2312 // A+B --> A|B iff A and B have no bits set in common. 2313 if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 2314 APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); 2315 APInt LHSKnownOne(IT->getBitWidth(), 0); 2316 APInt LHSKnownZero(IT->getBitWidth(), 0); 2317 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 2318 if (LHSKnownZero != 0) { 2319 APInt RHSKnownOne(IT->getBitWidth(), 0); 2320 APInt RHSKnownZero(IT->getBitWidth(), 0); 2321 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 2322 2323 // No bits in common -> bitwise or. 2324 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 2325 return BinaryOperator::CreateOr(LHS, RHS); 2326 } 2327 } 2328 2329 // W*X + Y*Z --> W * (X+Z) iff W == Y 2330 if (I.getType()->isIntOrIntVector()) { 2331 Value *W, *X, *Y, *Z; 2332 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 2333 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 2334 if (W != Y) { 2335 if (W == Z) { 2336 std::swap(Y, Z); 2337 } else if (Y == X) { 2338 std::swap(W, X); 2339 } else if (X == Z) { 2340 std::swap(Y, Z); 2341 std::swap(W, X); 2342 } 2343 } 2344 2345 if (W == Y) { 2346 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 2347 return BinaryOperator::CreateMul(W, NewAdd); 2348 } 2349 } 2350 } 2351 2352 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 2353 Value *X = 0; 2354 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 2355 return BinaryOperator::CreateSub(SubOne(CRHS), X); 2356 2357 // (X & FF00) + xx00 -> (X+xx00) & FF00 2358 if (LHS->hasOneUse() && 2359 match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { 2360 Constant *Anded = ConstantExpr::getAnd(CRHS, C2); 2361 if (Anded == CRHS) { 2362 // See if all bits from the first bit set in the Add RHS up are included 2363 // in the mask. First, get the rightmost bit. 2364 const APInt& AddRHSV = CRHS->getValue(); 2365 2366 // Form a mask of all bits from the lowest bit added through the top. 2367 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 2368 2369 // See if the and mask includes all of these bits. 2370 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 2371 2372 if (AddRHSHighBits == AddRHSHighBitsAnd) { 2373 // Okay, the xform is safe. Insert the new add pronto. 2374 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 2375 return BinaryOperator::CreateAnd(NewAdd, C2); 2376 } 2377 } 2378 } 2379 2380 // Try to fold constant add into select arguments. 2381 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 2382 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 2383 return R; 2384 } 2385 2386 // add (select X 0 (sub n A)) A --> select X A n 2387 { 2388 SelectInst *SI = dyn_cast<SelectInst>(LHS); 2389 Value *A = RHS; 2390 if (!SI) { 2391 SI = dyn_cast<SelectInst>(RHS); 2392 A = LHS; 2393 } 2394 if (SI && SI->hasOneUse()) { 2395 Value *TV = SI->getTrueValue(); 2396 Value *FV = SI->getFalseValue(); 2397 Value *N; 2398 2399 // Can we fold the add into the argument of the select? 2400 // We check both true and false select arguments for a matching subtract. 2401 if (match(FV, m_Zero()) && 2402 match(TV, m_Sub(m_Value(N), m_Specific(A)))) 2403 // Fold the add into the true select value. 2404 return SelectInst::Create(SI->getCondition(), N, A); 2405 if (match(TV, m_Zero()) && 2406 match(FV, m_Sub(m_Value(N), m_Specific(A)))) 2407 // Fold the add into the false select value. 2408 return SelectInst::Create(SI->getCondition(), A, N); 2409 } 2410 } 2411 2412 // Check for (add (sext x), y), see if we can merge this into an 2413 // integer add followed by a sext. 2414 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 2415 // (add (sext x), cst) --> (sext (add x, cst')) 2416 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 2417 Constant *CI = 2418 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 2419 if (LHSConv->hasOneUse() && 2420 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 2421 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 2422 // Insert the new, smaller add. 2423 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 2424 CI, "addconv"); 2425 return new SExtInst(NewAdd, I.getType()); 2426 } 2427 } 2428 2429 // (add (sext x), (sext y)) --> (sext (add int x, y)) 2430 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 2431 // Only do this if x/y have the same type, if at last one of them has a 2432 // single use (so we don't increase the number of sexts), and if the 2433 // integer add will not overflow. 2434 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 2435 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 2436 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 2437 RHSConv->getOperand(0))) { 2438 // Insert the new integer add. 2439 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 2440 RHSConv->getOperand(0), "addconv"); 2441 return new SExtInst(NewAdd, I.getType()); 2442 } 2443 } 2444 } 2445 2446 return Changed ? &I : 0; 2447} 2448 2449Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 2450 bool Changed = SimplifyCommutative(I); 2451 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 2452 2453 if (Constant *RHSC = dyn_cast<Constant>(RHS)) { 2454 // X + 0 --> X 2455 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { 2456 if (CFP->isExactlyValue(ConstantFP::getNegativeZero 2457 (I.getType())->getValueAPF())) 2458 return ReplaceInstUsesWith(I, LHS); 2459 } 2460 2461 if (isa<PHINode>(LHS)) 2462 if (Instruction *NV = FoldOpIntoPhi(I)) 2463 return NV; 2464 } 2465 2466 // -A + B --> B - A 2467 // -A + -B --> -(A + B) 2468 if (Value *LHSV = dyn_castFNegVal(LHS)) 2469 return BinaryOperator::CreateFSub(RHS, LHSV); 2470 2471 // A + -B --> A - B 2472 if (!isa<Constant>(RHS)) 2473 if (Value *V = dyn_castFNegVal(RHS)) 2474 return BinaryOperator::CreateFSub(LHS, V); 2475 2476 // Check for X+0.0. Simplify it to X if we know X is not -0.0. 2477 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) 2478 if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) 2479 return ReplaceInstUsesWith(I, LHS); 2480 2481 // Check for (add double (sitofp x), y), see if we can merge this into an 2482 // integer add followed by a promotion. 2483 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 2484 // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 2485 // ... if the constant fits in the integer value. This is useful for things 2486 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 2487 // requires a constant pool load, and generally allows the add to be better 2488 // instcombined. 2489 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 2490 Constant *CI = 2491 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 2492 if (LHSConv->hasOneUse() && 2493 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 2494 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 2495 // Insert the new integer add. 2496 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 2497 CI, "addconv"); 2498 return new SIToFPInst(NewAdd, I.getType()); 2499 } 2500 } 2501 2502 // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 2503 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 2504 // Only do this if x/y have the same type, if at last one of them has a 2505 // single use (so we don't increase the number of int->fp conversions), 2506 // and if the integer add will not overflow. 2507 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 2508 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 2509 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 2510 RHSConv->getOperand(0))) { 2511 // Insert the new integer add. 2512 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 2513 RHSConv->getOperand(0),"addconv"); 2514 return new SIToFPInst(NewAdd, I.getType()); 2515 } 2516 } 2517 } 2518 2519 return Changed ? &I : 0; 2520} 2521 2522 2523/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the 2524/// code necessary to compute the offset from the base pointer (without adding 2525/// in the base pointer). Return the result as a signed integer of intptr size. 2526static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) { 2527 TargetData &TD = *IC.getTargetData(); 2528 gep_type_iterator GTI = gep_type_begin(GEP); 2529 const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); 2530 Value *Result = Constant::getNullValue(IntPtrTy); 2531 2532 // Build a mask for high order bits. 2533 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 2534 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); 2535 2536 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; 2537 ++i, ++GTI) { 2538 Value *Op = *i; 2539 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; 2540 if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { 2541 if (OpC->isZero()) continue; 2542 2543 // Handle a struct index, which adds its field offset to the pointer. 2544 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 2545 Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 2546 2547 Result = IC.Builder->CreateAdd(Result, 2548 ConstantInt::get(IntPtrTy, Size), 2549 GEP->getName()+".offs"); 2550 continue; 2551 } 2552 2553 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 2554 Constant *OC = 2555 ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); 2556 Scale = ConstantExpr::getMul(OC, Scale); 2557 // Emit an add instruction. 2558 Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); 2559 continue; 2560 } 2561 // Convert to correct type. 2562 if (Op->getType() != IntPtrTy) 2563 Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); 2564 if (Size != 1) { 2565 Constant *Scale = ConstantInt::get(IntPtrTy, Size); 2566 // We'll let instcombine(mul) convert this to a shl if possible. 2567 Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); 2568 } 2569 2570 // Emit an add instruction. 2571 Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); 2572 } 2573 return Result; 2574} 2575 2576 2577/// EvaluateGEPOffsetExpression - Return a value that can be used to compare 2578/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we 2579/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can 2580/// be complex, and scales are involved. The above expression would also be 2581/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). 2582/// This later form is less amenable to optimization though, and we are allowed 2583/// to generate the first by knowing that pointer arithmetic doesn't overflow. 2584/// 2585/// If we can't emit an optimized form for this expression, this returns null. 2586/// 2587static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, 2588 InstCombiner &IC) { 2589 TargetData &TD = *IC.getTargetData(); 2590 gep_type_iterator GTI = gep_type_begin(GEP); 2591 2592 // Check to see if this gep only has a single variable index. If so, and if 2593 // any constant indices are a multiple of its scale, then we can compute this 2594 // in terms of the scale of the variable index. For example, if the GEP 2595 // implies an offset of "12 + i*4", then we can codegen this as "3 + i", 2596 // because the expression will cross zero at the same point. 2597 unsigned i, e = GEP->getNumOperands(); 2598 int64_t Offset = 0; 2599 for (i = 1; i != e; ++i, ++GTI) { 2600 if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { 2601 // Compute the aggregate offset of constant indices. 2602 if (CI->isZero()) continue; 2603 2604 // Handle a struct index, which adds its field offset to the pointer. 2605 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 2606 Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); 2607 } else { 2608 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 2609 Offset += Size*CI->getSExtValue(); 2610 } 2611 } else { 2612 // Found our variable index. 2613 break; 2614 } 2615 } 2616 2617 // If there are no variable indices, we must have a constant offset, just 2618 // evaluate it the general way. 2619 if (i == e) return 0; 2620 2621 Value *VariableIdx = GEP->getOperand(i); 2622 // Determine the scale factor of the variable element. For example, this is 2623 // 4 if the variable index is into an array of i32. 2624 uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); 2625 2626 // Verify that there are no other variable indices. If so, emit the hard way. 2627 for (++i, ++GTI; i != e; ++i, ++GTI) { 2628 ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); 2629 if (!CI) return 0; 2630 2631 // Compute the aggregate offset of constant indices. 2632 if (CI->isZero()) continue; 2633 2634 // Handle a struct index, which adds its field offset to the pointer. 2635 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 2636 Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); 2637 } else { 2638 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 2639 Offset += Size*CI->getSExtValue(); 2640 } 2641 } 2642 2643 // Okay, we know we have a single variable index, which must be a 2644 // pointer/array/vector index. If there is no offset, life is simple, return 2645 // the index. 2646 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 2647 if (Offset == 0) { 2648 // Cast to intptrty in case a truncation occurs. If an extension is needed, 2649 // we don't need to bother extending: the extension won't affect where the 2650 // computation crosses zero. 2651 if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) 2652 VariableIdx = new TruncInst(VariableIdx, 2653 TD.getIntPtrType(VariableIdx->getContext()), 2654 VariableIdx->getName(), &I); 2655 return VariableIdx; 2656 } 2657 2658 // Otherwise, there is an index. The computation we will do will be modulo 2659 // the pointer size, so get it. 2660 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); 2661 2662 Offset &= PtrSizeMask; 2663 VariableScale &= PtrSizeMask; 2664 2665 // To do this transformation, any constant index must be a multiple of the 2666 // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", 2667 // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a 2668 // multiple of the variable scale. 2669 int64_t NewOffs = Offset / (int64_t)VariableScale; 2670 if (Offset != NewOffs*(int64_t)VariableScale) 2671 return 0; 2672 2673 // Okay, we can do this evaluation. Start by converting the index to intptr. 2674 const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); 2675 if (VariableIdx->getType() != IntPtrTy) 2676 VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, 2677 true /*SExt*/, 2678 VariableIdx->getName(), &I); 2679 Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); 2680 return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); 2681} 2682 2683 2684/// Optimize pointer differences into the same array into a size. Consider: 2685/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 2686/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 2687/// 2688Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 2689 const Type *Ty) { 2690 assert(TD && "Must have target data info for this"); 2691 2692 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 2693 // this. 2694 bool Swapped; 2695 GetElementPtrInst *GEP = 0; 2696 ConstantExpr *CstGEP = 0; 2697 2698 // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". 2699 // For now we require one side to be the base pointer "A" or a constant 2700 // expression derived from it. 2701 if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { 2702 // (gep X, ...) - X 2703 if (LHSGEP->getOperand(0) == RHS) { 2704 GEP = LHSGEP; 2705 Swapped = false; 2706 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { 2707 // (gep X, ...) - (ce_gep X, ...) 2708 if (CE->getOpcode() == Instruction::GetElementPtr && 2709 LHSGEP->getOperand(0) == CE->getOperand(0)) { 2710 CstGEP = CE; 2711 GEP = LHSGEP; 2712 Swapped = false; 2713 } 2714 } 2715 } 2716 2717 if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { 2718 // X - (gep X, ...) 2719 if (RHSGEP->getOperand(0) == LHS) { 2720 GEP = RHSGEP; 2721 Swapped = true; 2722 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { 2723 // (ce_gep X, ...) - (gep X, ...) 2724 if (CE->getOpcode() == Instruction::GetElementPtr && 2725 RHSGEP->getOperand(0) == CE->getOperand(0)) { 2726 CstGEP = CE; 2727 GEP = RHSGEP; 2728 Swapped = true; 2729 } 2730 } 2731 } 2732 2733 if (GEP == 0) 2734 return 0; 2735 2736 // Emit the offset of the GEP and an intptr_t. 2737 Value *Result = EmitGEPOffset(GEP, *this); 2738 2739 // If we had a constant expression GEP on the other side offsetting the 2740 // pointer, subtract it from the offset we have. 2741 if (CstGEP) { 2742 Value *CstOffset = EmitGEPOffset(CstGEP, *this); 2743 Result = Builder->CreateSub(Result, CstOffset); 2744 } 2745 2746 2747 // If we have p - gep(p, ...) then we have to negate the result. 2748 if (Swapped) 2749 Result = Builder->CreateNeg(Result, "diff.neg"); 2750 2751 return Builder->CreateIntCast(Result, Ty, true); 2752} 2753 2754 2755Instruction *InstCombiner::visitSub(BinaryOperator &I) { 2756 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2757 2758 if (Op0 == Op1) // sub X, X -> 0 2759 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2760 2761 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 2762 if (Value *V = dyn_castNegVal(Op1)) { 2763 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 2764 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 2765 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 2766 return Res; 2767 } 2768 2769 if (isa<UndefValue>(Op0)) 2770 return ReplaceInstUsesWith(I, Op0); // undef - X -> undef 2771 if (isa<UndefValue>(Op1)) 2772 return ReplaceInstUsesWith(I, Op1); // X - undef -> undef 2773 if (I.getType() == Type::getInt1Ty(*Context)) 2774 return BinaryOperator::CreateXor(Op0, Op1); 2775 2776 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 2777 // Replace (-1 - A) with (~A). 2778 if (C->isAllOnesValue()) 2779 return BinaryOperator::CreateNot(Op1); 2780 2781 // C - ~X == X + (1+C) 2782 Value *X = 0; 2783 if (match(Op1, m_Not(m_Value(X)))) 2784 return BinaryOperator::CreateAdd(X, AddOne(C)); 2785 2786 // -(X >>u 31) -> (X >>s 31) 2787 // -(X >>s 31) -> (X >>u 31) 2788 if (C->isZero()) { 2789 if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { 2790 if (SI->getOpcode() == Instruction::LShr) { 2791 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 2792 // Check to see if we are shifting out everything but the sign bit. 2793 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 2794 SI->getType()->getPrimitiveSizeInBits()-1) { 2795 // Ok, the transformation is safe. Insert AShr. 2796 return BinaryOperator::Create(Instruction::AShr, 2797 SI->getOperand(0), CU, SI->getName()); 2798 } 2799 } 2800 } else if (SI->getOpcode() == Instruction::AShr) { 2801 if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { 2802 // Check to see if we are shifting out everything but the sign bit. 2803 if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == 2804 SI->getType()->getPrimitiveSizeInBits()-1) { 2805 // Ok, the transformation is safe. Insert LShr. 2806 return BinaryOperator::CreateLShr( 2807 SI->getOperand(0), CU, SI->getName()); 2808 } 2809 } 2810 } 2811 } 2812 } 2813 2814 // Try to fold constant sub into select arguments. 2815 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 2816 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 2817 return R; 2818 2819 // C - zext(bool) -> bool ? C - 1 : C 2820 if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) 2821 if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) 2822 return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); 2823 } 2824 2825 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 2826 if (Op1I->getOpcode() == Instruction::Add) { 2827 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 2828 return BinaryOperator::CreateNeg(Op1I->getOperand(1), 2829 I.getName()); 2830 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 2831 return BinaryOperator::CreateNeg(Op1I->getOperand(0), 2832 I.getName()); 2833 else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { 2834 if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) 2835 // C1-(X+C2) --> (C1-C2)-X 2836 return BinaryOperator::CreateSub( 2837 ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); 2838 } 2839 } 2840 2841 if (Op1I->hasOneUse()) { 2842 // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression 2843 // is not used by anyone else... 2844 // 2845 if (Op1I->getOpcode() == Instruction::Sub) { 2846 // Swap the two operands of the subexpr... 2847 Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); 2848 Op1I->setOperand(0, IIOp1); 2849 Op1I->setOperand(1, IIOp0); 2850 2851 // Create the new top level add instruction... 2852 return BinaryOperator::CreateAdd(Op0, Op1); 2853 } 2854 2855 // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... 2856 // 2857 if (Op1I->getOpcode() == Instruction::And && 2858 (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { 2859 Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); 2860 2861 Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); 2862 return BinaryOperator::CreateAnd(Op0, NewNot); 2863 } 2864 2865 // 0 - (X sdiv C) -> (X sdiv -C) 2866 if (Op1I->getOpcode() == Instruction::SDiv) 2867 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 2868 if (CSI->isZero()) 2869 if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) 2870 return BinaryOperator::CreateSDiv(Op1I->getOperand(0), 2871 ConstantExpr::getNeg(DivRHS)); 2872 2873 // X - X*C --> X * (1-C) 2874 ConstantInt *C2 = 0; 2875 if (dyn_castFoldableMul(Op1I, C2) == Op0) { 2876 Constant *CP1 = 2877 ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), 2878 C2); 2879 return BinaryOperator::CreateMul(Op0, CP1); 2880 } 2881 } 2882 } 2883 2884 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 2885 if (Op0I->getOpcode() == Instruction::Add) { 2886 if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X 2887 return ReplaceInstUsesWith(I, Op0I->getOperand(1)); 2888 else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X 2889 return ReplaceInstUsesWith(I, Op0I->getOperand(0)); 2890 } else if (Op0I->getOpcode() == Instruction::Sub) { 2891 if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y 2892 return BinaryOperator::CreateNeg(Op0I->getOperand(1), 2893 I.getName()); 2894 } 2895 } 2896 2897 ConstantInt *C1; 2898 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 2899 if (X == Op1) // X*C - X --> X * (C-1) 2900 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 2901 2902 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 2903 if (X == dyn_castFoldableMul(Op1, C2)) 2904 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 2905 } 2906 2907 // Optimize pointer differences into the same array into a size. Consider: 2908 // &A[10] - &A[0]: we should compile this to "10". 2909 if (TD) { 2910 Value *LHSOp, *RHSOp; 2911 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 2912 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 2913 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 2914 return ReplaceInstUsesWith(I, Res); 2915 2916 // trunc(p)-trunc(q) -> trunc(p-q) 2917 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 2918 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 2919 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 2920 return ReplaceInstUsesWith(I, Res); 2921 } 2922 2923 return 0; 2924} 2925 2926Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 2927 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2928 2929 // If this is a 'B = x-(-A)', change to B = x+A... 2930 if (Value *V = dyn_castFNegVal(Op1)) 2931 return BinaryOperator::CreateFAdd(Op0, V); 2932 2933 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 2934 if (Op1I->getOpcode() == Instruction::FAdd) { 2935 if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y 2936 return BinaryOperator::CreateFNeg(Op1I->getOperand(1), 2937 I.getName()); 2938 else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y 2939 return BinaryOperator::CreateFNeg(Op1I->getOperand(0), 2940 I.getName()); 2941 } 2942 } 2943 2944 return 0; 2945} 2946 2947/// isSignBitCheck - Given an exploded icmp instruction, return true if the 2948/// comparison only checks the sign bit. If it only checks the sign bit, set 2949/// TrueIfSigned if the result of the comparison is true when the input value is 2950/// signed. 2951static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, 2952 bool &TrueIfSigned) { 2953 switch (pred) { 2954 case ICmpInst::ICMP_SLT: // True if LHS s< 0 2955 TrueIfSigned = true; 2956 return RHS->isZero(); 2957 case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 2958 TrueIfSigned = true; 2959 return RHS->isAllOnesValue(); 2960 case ICmpInst::ICMP_SGT: // True if LHS s> -1 2961 TrueIfSigned = false; 2962 return RHS->isAllOnesValue(); 2963 case ICmpInst::ICMP_UGT: 2964 // True if LHS u> RHS and RHS == high-bit-mask - 1 2965 TrueIfSigned = true; 2966 return RHS->getValue() == 2967 APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); 2968 case ICmpInst::ICMP_UGE: 2969 // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) 2970 TrueIfSigned = true; 2971 return RHS->getValue().isSignBit(); 2972 default: 2973 return false; 2974 } 2975} 2976 2977Instruction *InstCombiner::visitMul(BinaryOperator &I) { 2978 bool Changed = SimplifyCommutative(I); 2979 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2980 2981 if (isa<UndefValue>(Op1)) // undef * X -> 0 2982 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2983 2984 // Simplify mul instructions with a constant RHS. 2985 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 2986 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { 2987 2988 // ((X << C1)*C2) == (X * (C2 << C1)) 2989 if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) 2990 if (SI->getOpcode() == Instruction::Shl) 2991 if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) 2992 return BinaryOperator::CreateMul(SI->getOperand(0), 2993 ConstantExpr::getShl(CI, ShOp)); 2994 2995 if (CI->isZero()) 2996 return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 2997 if (CI->equalsInt(1)) // X * 1 == X 2998 return ReplaceInstUsesWith(I, Op0); 2999 if (CI->isAllOnesValue()) // X * -1 == 0 - X 3000 return BinaryOperator::CreateNeg(Op0, I.getName()); 3001 3002 const APInt& Val = cast<ConstantInt>(CI)->getValue(); 3003 if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C 3004 return BinaryOperator::CreateShl(Op0, 3005 ConstantInt::get(Op0->getType(), Val.logBase2())); 3006 } 3007 } else if (isa<VectorType>(Op1C->getType())) { 3008 if (Op1C->isNullValue()) 3009 return ReplaceInstUsesWith(I, Op1C); 3010 3011 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { 3012 if (Op1V->isAllOnesValue()) // X * -1 == 0 - X 3013 return BinaryOperator::CreateNeg(Op0, I.getName()); 3014 3015 // As above, vector X*splat(1.0) -> X in all defined cases. 3016 if (Constant *Splat = Op1V->getSplatValue()) { 3017 if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) 3018 if (CI->equalsInt(1)) 3019 return ReplaceInstUsesWith(I, Op0); 3020 } 3021 } 3022 } 3023 3024 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) 3025 if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && 3026 isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { 3027 // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. 3028 Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); 3029 Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); 3030 return BinaryOperator::CreateAdd(Add, C1C2); 3031 3032 } 3033 3034 // Try to fold constant mul into select arguments. 3035 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3036 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3037 return R; 3038 3039 if (isa<PHINode>(Op0)) 3040 if (Instruction *NV = FoldOpIntoPhi(I)) 3041 return NV; 3042 } 3043 3044 if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y 3045 if (Value *Op1v = dyn_castNegVal(Op1)) 3046 return BinaryOperator::CreateMul(Op0v, Op1v); 3047 3048 // (X / Y) * Y = X - (X % Y) 3049 // (X / Y) * -Y = (X % Y) - X 3050 { 3051 Value *Op1C = Op1; 3052 BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); 3053 if (!BO || 3054 (BO->getOpcode() != Instruction::UDiv && 3055 BO->getOpcode() != Instruction::SDiv)) { 3056 Op1C = Op0; 3057 BO = dyn_cast<BinaryOperator>(Op1); 3058 } 3059 Value *Neg = dyn_castNegVal(Op1C); 3060 if (BO && BO->hasOneUse() && 3061 (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && 3062 (BO->getOpcode() == Instruction::UDiv || 3063 BO->getOpcode() == Instruction::SDiv)) { 3064 Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); 3065 3066 // If the division is exact, X % Y is zero. 3067 if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) 3068 if (SDiv->isExact()) { 3069 if (Op1BO == Op1C) 3070 return ReplaceInstUsesWith(I, Op0BO); 3071 return BinaryOperator::CreateNeg(Op0BO); 3072 } 3073 3074 Value *Rem; 3075 if (BO->getOpcode() == Instruction::UDiv) 3076 Rem = Builder->CreateURem(Op0BO, Op1BO); 3077 else 3078 Rem = Builder->CreateSRem(Op0BO, Op1BO); 3079 Rem->takeName(BO); 3080 3081 if (Op1BO == Op1C) 3082 return BinaryOperator::CreateSub(Op0BO, Rem); 3083 return BinaryOperator::CreateSub(Rem, Op0BO); 3084 } 3085 } 3086 3087 /// i1 mul -> i1 and. 3088 if (I.getType() == Type::getInt1Ty(*Context)) 3089 return BinaryOperator::CreateAnd(Op0, Op1); 3090 3091 // X*(1 << Y) --> X << Y 3092 // (1 << Y)*X --> X << Y 3093 { 3094 Value *Y; 3095 if (match(Op0, m_Shl(m_One(), m_Value(Y)))) 3096 return BinaryOperator::CreateShl(Op1, Y); 3097 if (match(Op1, m_Shl(m_One(), m_Value(Y)))) 3098 return BinaryOperator::CreateShl(Op0, Y); 3099 } 3100 3101 // If one of the operands of the multiply is a cast from a boolean value, then 3102 // we know the bool is either zero or one, so this is a 'masking' multiply. 3103 // X * Y (where Y is 0 or 1) -> X & (0-Y) 3104 if (!isa<VectorType>(I.getType())) { 3105 // -2 is "-1 << 1" so it is all bits set except the low one. 3106 APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); 3107 3108 Value *BoolCast = 0, *OtherOp = 0; 3109 if (MaskedValueIsZero(Op0, Negative2)) 3110 BoolCast = Op0, OtherOp = Op1; 3111 else if (MaskedValueIsZero(Op1, Negative2)) 3112 BoolCast = Op1, OtherOp = Op0; 3113 3114 if (BoolCast) { 3115 Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), 3116 BoolCast, "tmp"); 3117 return BinaryOperator::CreateAnd(V, OtherOp); 3118 } 3119 } 3120 3121 return Changed ? &I : 0; 3122} 3123 3124Instruction *InstCombiner::visitFMul(BinaryOperator &I) { 3125 bool Changed = SimplifyCommutative(I); 3126 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3127 3128 // Simplify mul instructions with a constant RHS... 3129 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 3130 if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { 3131 // "In IEEE floating point, x*1 is not equivalent to x for nans. However, 3132 // ANSI says we can drop signals, so we can do this anyway." (from GCC) 3133 if (Op1F->isExactlyValue(1.0)) 3134 return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' 3135 } else if (isa<VectorType>(Op1C->getType())) { 3136 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { 3137 // As above, vector X*splat(1.0) -> X in all defined cases. 3138 if (Constant *Splat = Op1V->getSplatValue()) { 3139 if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) 3140 if (F->isExactlyValue(1.0)) 3141 return ReplaceInstUsesWith(I, Op0); 3142 } 3143 } 3144 } 3145 3146 // Try to fold constant mul into select arguments. 3147 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3148 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3149 return R; 3150 3151 if (isa<PHINode>(Op0)) 3152 if (Instruction *NV = FoldOpIntoPhi(I)) 3153 return NV; 3154 } 3155 3156 if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y 3157 if (Value *Op1v = dyn_castFNegVal(Op1)) 3158 return BinaryOperator::CreateFMul(Op0v, Op1v); 3159 3160 return Changed ? &I : 0; 3161} 3162 3163/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select 3164/// instruction. 3165bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { 3166 SelectInst *SI = cast<SelectInst>(I.getOperand(1)); 3167 3168 // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y 3169 int NonNullOperand = -1; 3170 if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) 3171 if (ST->isNullValue()) 3172 NonNullOperand = 2; 3173 // div/rem X, (Cond ? Y : 0) -> div/rem X, Y 3174 if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) 3175 if (ST->isNullValue()) 3176 NonNullOperand = 1; 3177 3178 if (NonNullOperand == -1) 3179 return false; 3180 3181 Value *SelectCond = SI->getOperand(0); 3182 3183 // Change the div/rem to use 'Y' instead of the select. 3184 I.setOperand(1, SI->getOperand(NonNullOperand)); 3185 3186 // Okay, we know we replace the operand of the div/rem with 'Y' with no 3187 // problem. However, the select, or the condition of the select may have 3188 // multiple uses. Based on our knowledge that the operand must be non-zero, 3189 // propagate the known value for the select into other uses of it, and 3190 // propagate a known value of the condition into its other users. 3191 3192 // If the select and condition only have a single use, don't bother with this, 3193 // early exit. 3194 if (SI->use_empty() && SelectCond->hasOneUse()) 3195 return true; 3196 3197 // Scan the current block backward, looking for other uses of SI. 3198 BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); 3199 3200 while (BBI != BBFront) { 3201 --BBI; 3202 // If we found a call to a function, we can't assume it will return, so 3203 // information from below it cannot be propagated above it. 3204 if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) 3205 break; 3206 3207 // Replace uses of the select or its condition with the known values. 3208 for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); 3209 I != E; ++I) { 3210 if (*I == SI) { 3211 *I = SI->getOperand(NonNullOperand); 3212 Worklist.Add(BBI); 3213 } else if (*I == SelectCond) { 3214 *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : 3215 ConstantInt::getFalse(*Context); 3216 Worklist.Add(BBI); 3217 } 3218 } 3219 3220 // If we past the instruction, quit looking for it. 3221 if (&*BBI == SI) 3222 SI = 0; 3223 if (&*BBI == SelectCond) 3224 SelectCond = 0; 3225 3226 // If we ran out of things to eliminate, break out of the loop. 3227 if (SelectCond == 0 && SI == 0) 3228 break; 3229 3230 } 3231 return true; 3232} 3233 3234 3235/// This function implements the transforms on div instructions that work 3236/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is 3237/// used by the visitors to those instructions. 3238/// @brief Transforms common to all three div instructions 3239Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { 3240 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3241 3242 // undef / X -> 0 for integer. 3243 // undef / X -> undef for FP (the undef could be a snan). 3244 if (isa<UndefValue>(Op0)) { 3245 if (Op0->getType()->isFPOrFPVector()) 3246 return ReplaceInstUsesWith(I, Op0); 3247 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3248 } 3249 3250 // X / undef -> undef 3251 if (isa<UndefValue>(Op1)) 3252 return ReplaceInstUsesWith(I, Op1); 3253 3254 return 0; 3255} 3256 3257/// This function implements the transforms common to both integer division 3258/// instructions (udiv and sdiv). It is called by the visitors to those integer 3259/// division instructions. 3260/// @brief Common integer divide transforms 3261Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { 3262 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3263 3264 // (sdiv X, X) --> 1 (udiv X, X) --> 1 3265 if (Op0 == Op1) { 3266 if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { 3267 Constant *CI = ConstantInt::get(Ty->getElementType(), 1); 3268 std::vector<Constant*> Elts(Ty->getNumElements(), CI); 3269 return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); 3270 } 3271 3272 Constant *CI = ConstantInt::get(I.getType(), 1); 3273 return ReplaceInstUsesWith(I, CI); 3274 } 3275 3276 if (Instruction *Common = commonDivTransforms(I)) 3277 return Common; 3278 3279 // Handle cases involving: [su]div X, (select Cond, Y, Z) 3280 // This does not apply for fdiv. 3281 if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) 3282 return &I; 3283 3284 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3285 // div X, 1 == X 3286 if (RHS->equalsInt(1)) 3287 return ReplaceInstUsesWith(I, Op0); 3288 3289 // (X / C1) / C2 -> X / (C1*C2) 3290 if (Instruction *LHS = dyn_cast<Instruction>(Op0)) 3291 if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) 3292 if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { 3293 if (MultiplyOverflows(RHS, LHSRHS, 3294 I.getOpcode()==Instruction::SDiv)) 3295 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3296 else 3297 return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), 3298 ConstantExpr::getMul(RHS, LHSRHS)); 3299 } 3300 3301 if (!RHS->isZero()) { // avoid X udiv 0 3302 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 3303 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3304 return R; 3305 if (isa<PHINode>(Op0)) 3306 if (Instruction *NV = FoldOpIntoPhi(I)) 3307 return NV; 3308 } 3309 } 3310 3311 // 0 / X == 0, we don't need to preserve faults! 3312 if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) 3313 if (LHS->equalsInt(0)) 3314 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3315 3316 // It can't be division by zero, hence it must be division by one. 3317 if (I.getType() == Type::getInt1Ty(*Context)) 3318 return ReplaceInstUsesWith(I, Op0); 3319 3320 if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { 3321 if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) 3322 // div X, 1 == X 3323 if (X->isOne()) 3324 return ReplaceInstUsesWith(I, Op0); 3325 } 3326 3327 return 0; 3328} 3329 3330Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { 3331 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3332 3333 // Handle the integer div common cases 3334 if (Instruction *Common = commonIDivTransforms(I)) 3335 return Common; 3336 3337 if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { 3338 // X udiv C^2 -> X >> C 3339 // Check to see if this is an unsigned division with an exact power of 2, 3340 // if so, convert to a right shift. 3341 if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 3342 return BinaryOperator::CreateLShr(Op0, 3343 ConstantInt::get(Op0->getType(), C->getValue().logBase2())); 3344 3345 // X udiv C, where C >= signbit 3346 if (C->getValue().isNegative()) { 3347 Value *IC = Builder->CreateICmpULT( Op0, C); 3348 return SelectInst::Create(IC, Constant::getNullValue(I.getType()), 3349 ConstantInt::get(I.getType(), 1)); 3350 } 3351 } 3352 3353 // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) 3354 if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { 3355 if (RHSI->getOpcode() == Instruction::Shl && 3356 isa<ConstantInt>(RHSI->getOperand(0))) { 3357 const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); 3358 if (C1.isPowerOf2()) { 3359 Value *N = RHSI->getOperand(1); 3360 const Type *NTy = N->getType(); 3361 if (uint32_t C2 = C1.logBase2()) 3362 N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); 3363 return BinaryOperator::CreateLShr(Op0, N); 3364 } 3365 } 3366 } 3367 3368 // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) 3369 // where C1&C2 are powers of two. 3370 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 3371 if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) 3372 if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { 3373 const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); 3374 if (TVA.isPowerOf2() && FVA.isPowerOf2()) { 3375 // Compute the shift amounts 3376 uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); 3377 // Construct the "on true" case of the select 3378 Constant *TC = ConstantInt::get(Op0->getType(), TSA); 3379 Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); 3380 3381 // Construct the "on false" case of the select 3382 Constant *FC = ConstantInt::get(Op0->getType(), FSA); 3383 Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); 3384 3385 // construct the select instruction and return it. 3386 return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); 3387 } 3388 } 3389 return 0; 3390} 3391 3392Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { 3393 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3394 3395 // Handle the integer div common cases 3396 if (Instruction *Common = commonIDivTransforms(I)) 3397 return Common; 3398 3399 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3400 // sdiv X, -1 == -X 3401 if (RHS->isAllOnesValue()) 3402 return BinaryOperator::CreateNeg(Op0); 3403 3404 // sdiv X, C --> ashr X, log2(C) 3405 if (cast<SDivOperator>(&I)->isExact() && 3406 RHS->getValue().isNonNegative() && 3407 RHS->getValue().isPowerOf2()) { 3408 Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), 3409 RHS->getValue().exactLogBase2()); 3410 return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); 3411 } 3412 3413 // -X/C --> X/-C provided the negation doesn't overflow. 3414 if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) 3415 if (isa<Constant>(Sub->getOperand(0)) && 3416 cast<Constant>(Sub->getOperand(0))->isNullValue() && 3417 Sub->hasNoSignedWrap()) 3418 return BinaryOperator::CreateSDiv(Sub->getOperand(1), 3419 ConstantExpr::getNeg(RHS)); 3420 } 3421 3422 // If the sign bits of both operands are zero (i.e. we can prove they are 3423 // unsigned inputs), turn this into a udiv. 3424 if (I.getType()->isInteger()) { 3425 APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); 3426 if (MaskedValueIsZero(Op0, Mask)) { 3427 if (MaskedValueIsZero(Op1, Mask)) { 3428 // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set 3429 return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 3430 } 3431 ConstantInt *ShiftedInt; 3432 if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && 3433 ShiftedInt->getValue().isPowerOf2()) { 3434 // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) 3435 // Safe because the only negative value (1 << Y) can take on is 3436 // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have 3437 // the sign bit set. 3438 return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 3439 } 3440 } 3441 } 3442 3443 return 0; 3444} 3445 3446Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { 3447 return commonDivTransforms(I); 3448} 3449 3450/// This function implements the transforms on rem instructions that work 3451/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 3452/// is used by the visitors to those instructions. 3453/// @brief Transforms common to all three rem instructions 3454Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { 3455 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3456 3457 if (isa<UndefValue>(Op0)) { // undef % X -> 0 3458 if (I.getType()->isFPOrFPVector()) 3459 return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) 3460 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3461 } 3462 if (isa<UndefValue>(Op1)) 3463 return ReplaceInstUsesWith(I, Op1); // X % undef -> undef 3464 3465 // Handle cases involving: rem X, (select Cond, Y, Z) 3466 if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) 3467 return &I; 3468 3469 return 0; 3470} 3471 3472/// This function implements the transforms common to both integer remainder 3473/// instructions (urem and srem). It is called by the visitors to those integer 3474/// remainder instructions. 3475/// @brief Common integer remainder transforms 3476Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { 3477 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3478 3479 if (Instruction *common = commonRemTransforms(I)) 3480 return common; 3481 3482 // 0 % X == 0 for integer, we don't need to preserve faults! 3483 if (Constant *LHS = dyn_cast<Constant>(Op0)) 3484 if (LHS->isNullValue()) 3485 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3486 3487 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3488 // X % 0 == undef, we don't need to preserve faults! 3489 if (RHS->equalsInt(0)) 3490 return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); 3491 3492 if (RHS->equalsInt(1)) // X % 1 == 0 3493 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 3494 3495 if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { 3496 if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { 3497 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 3498 return R; 3499 } else if (isa<PHINode>(Op0I)) { 3500 if (Instruction *NV = FoldOpIntoPhi(I)) 3501 return NV; 3502 } 3503 3504 // See if we can fold away this rem instruction. 3505 if (SimplifyDemandedInstructionBits(I)) 3506 return &I; 3507 } 3508 } 3509 3510 return 0; 3511} 3512 3513Instruction *InstCombiner::visitURem(BinaryOperator &I) { 3514 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3515 3516 if (Instruction *common = commonIRemTransforms(I)) 3517 return common; 3518 3519 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 3520 // X urem C^2 -> X and C 3521 // Check to see if this is an unsigned remainder with an exact power of 2, 3522 // if so, convert to a bitwise and. 3523 if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) 3524 if (C->getValue().isPowerOf2()) 3525 return BinaryOperator::CreateAnd(Op0, SubOne(C)); 3526 } 3527 3528 if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { 3529 // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) 3530 if (RHSI->getOpcode() == Instruction::Shl && 3531 isa<ConstantInt>(RHSI->getOperand(0))) { 3532 if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { 3533 Constant *N1 = Constant::getAllOnesValue(I.getType()); 3534 Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); 3535 return BinaryOperator::CreateAnd(Op0, Add); 3536 } 3537 } 3538 } 3539 3540 // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) 3541 // where C1&C2 are powers of two. 3542 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { 3543 if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) 3544 if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { 3545 // STO == 0 and SFO == 0 handled above. 3546 if ((STO->getValue().isPowerOf2()) && 3547 (SFO->getValue().isPowerOf2())) { 3548 Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), 3549 SI->getName()+".t"); 3550 Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), 3551 SI->getName()+".f"); 3552 return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); 3553 } 3554 } 3555 } 3556 3557 return 0; 3558} 3559 3560Instruction *InstCombiner::visitSRem(BinaryOperator &I) { 3561 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3562 3563 // Handle the integer rem common cases 3564 if (Instruction *Common = commonIRemTransforms(I)) 3565 return Common; 3566 3567 if (Value *RHSNeg = dyn_castNegVal(Op1)) 3568 if (!isa<Constant>(RHSNeg) || 3569 (isa<ConstantInt>(RHSNeg) && 3570 cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { 3571 // X % -Y -> X % Y 3572 Worklist.AddValue(I.getOperand(1)); 3573 I.setOperand(1, RHSNeg); 3574 return &I; 3575 } 3576 3577 // If the sign bits of both operands are zero (i.e. we can prove they are 3578 // unsigned inputs), turn this into a urem. 3579 if (I.getType()->isInteger()) { 3580 APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); 3581 if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { 3582 // X srem Y -> X urem Y, iff X and Y don't have sign bit set 3583 return BinaryOperator::CreateURem(Op0, Op1, I.getName()); 3584 } 3585 } 3586 3587 // If it's a constant vector, flip any negative values positive. 3588 if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { 3589 unsigned VWidth = RHSV->getNumOperands(); 3590 3591 bool hasNegative = false; 3592 for (unsigned i = 0; !hasNegative && i != VWidth; ++i) 3593 if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) 3594 if (RHS->getValue().isNegative()) 3595 hasNegative = true; 3596 3597 if (hasNegative) { 3598 std::vector<Constant *> Elts(VWidth); 3599 for (unsigned i = 0; i != VWidth; ++i) { 3600 if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { 3601 if (RHS->getValue().isNegative()) 3602 Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); 3603 else 3604 Elts[i] = RHS; 3605 } 3606 } 3607 3608 Constant *NewRHSV = ConstantVector::get(Elts); 3609 if (NewRHSV != RHSV) { 3610 Worklist.AddValue(I.getOperand(1)); 3611 I.setOperand(1, NewRHSV); 3612 return &I; 3613 } 3614 } 3615 } 3616 3617 return 0; 3618} 3619 3620Instruction *InstCombiner::visitFRem(BinaryOperator &I) { 3621 return commonRemTransforms(I); 3622} 3623 3624// isOneBitSet - Return true if there is exactly one bit set in the specified 3625// constant. 3626static bool isOneBitSet(const ConstantInt *CI) { 3627 return CI->getValue().isPowerOf2(); 3628} 3629 3630// isHighOnes - Return true if the constant is of the form 1+0+. 3631// This is the same as lowones(~X). 3632static bool isHighOnes(const ConstantInt *CI) { 3633 return (~CI->getValue() + 1).isPowerOf2(); 3634} 3635 3636/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits 3637/// are carefully arranged to allow folding of expressions such as: 3638/// 3639/// (A < B) | (A > B) --> (A != B) 3640/// 3641/// Note that this is only valid if the first and second predicates have the 3642/// same sign. Is illegal to do: (A u< B) | (A s> B) 3643/// 3644/// Three bits are used to represent the condition, as follows: 3645/// 0 A > B 3646/// 1 A == B 3647/// 2 A < B 3648/// 3649/// <=> Value Definition 3650/// 000 0 Always false 3651/// 001 1 A > B 3652/// 010 2 A == B 3653/// 011 3 A >= B 3654/// 100 4 A < B 3655/// 101 5 A != B 3656/// 110 6 A <= B 3657/// 111 7 Always true 3658/// 3659static unsigned getICmpCode(const ICmpInst *ICI) { 3660 switch (ICI->getPredicate()) { 3661 // False -> 0 3662 case ICmpInst::ICMP_UGT: return 1; // 001 3663 case ICmpInst::ICMP_SGT: return 1; // 001 3664 case ICmpInst::ICMP_EQ: return 2; // 010 3665 case ICmpInst::ICMP_UGE: return 3; // 011 3666 case ICmpInst::ICMP_SGE: return 3; // 011 3667 case ICmpInst::ICMP_ULT: return 4; // 100 3668 case ICmpInst::ICMP_SLT: return 4; // 100 3669 case ICmpInst::ICMP_NE: return 5; // 101 3670 case ICmpInst::ICMP_ULE: return 6; // 110 3671 case ICmpInst::ICMP_SLE: return 6; // 110 3672 // True -> 7 3673 default: 3674 llvm_unreachable("Invalid ICmp predicate!"); 3675 return 0; 3676 } 3677} 3678 3679/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp 3680/// predicate into a three bit mask. It also returns whether it is an ordered 3681/// predicate by reference. 3682static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { 3683 isOrdered = false; 3684 switch (CC) { 3685 case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 3686 case FCmpInst::FCMP_UNO: return 0; // 000 3687 case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 3688 case FCmpInst::FCMP_UGT: return 1; // 001 3689 case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 3690 case FCmpInst::FCMP_UEQ: return 2; // 010 3691 case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 3692 case FCmpInst::FCMP_UGE: return 3; // 011 3693 case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 3694 case FCmpInst::FCMP_ULT: return 4; // 100 3695 case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 3696 case FCmpInst::FCMP_UNE: return 5; // 101 3697 case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 3698 case FCmpInst::FCMP_ULE: return 6; // 110 3699 // True -> 7 3700 default: 3701 // Not expecting FCMP_FALSE and FCMP_TRUE; 3702 llvm_unreachable("Unexpected FCmp predicate!"); 3703 return 0; 3704 } 3705} 3706 3707/// getICmpValue - This is the complement of getICmpCode, which turns an 3708/// opcode and two operands into either a constant true or false, or a brand 3709/// new ICmp instruction. The sign is passed in to determine which kind 3710/// of predicate to use in the new icmp instruction. 3711static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, 3712 LLVMContext *Context) { 3713 switch (code) { 3714 default: llvm_unreachable("Illegal ICmp code!"); 3715 case 0: return ConstantInt::getFalse(*Context); 3716 case 1: 3717 if (sign) 3718 return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); 3719 else 3720 return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); 3721 case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); 3722 case 3: 3723 if (sign) 3724 return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); 3725 else 3726 return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); 3727 case 4: 3728 if (sign) 3729 return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); 3730 else 3731 return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); 3732 case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); 3733 case 6: 3734 if (sign) 3735 return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); 3736 else 3737 return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); 3738 case 7: return ConstantInt::getTrue(*Context); 3739 } 3740} 3741 3742/// getFCmpValue - This is the complement of getFCmpCode, which turns an 3743/// opcode and two operands into either a FCmp instruction. isordered is passed 3744/// in to determine which kind of predicate to use in the new fcmp instruction. 3745static Value *getFCmpValue(bool isordered, unsigned code, 3746 Value *LHS, Value *RHS, LLVMContext *Context) { 3747 switch (code) { 3748 default: llvm_unreachable("Illegal FCmp code!"); 3749 case 0: 3750 if (isordered) 3751 return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); 3752 else 3753 return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); 3754 case 1: 3755 if (isordered) 3756 return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); 3757 else 3758 return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); 3759 case 2: 3760 if (isordered) 3761 return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); 3762 else 3763 return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); 3764 case 3: 3765 if (isordered) 3766 return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); 3767 else 3768 return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); 3769 case 4: 3770 if (isordered) 3771 return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); 3772 else 3773 return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); 3774 case 5: 3775 if (isordered) 3776 return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); 3777 else 3778 return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); 3779 case 6: 3780 if (isordered) 3781 return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); 3782 else 3783 return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); 3784 case 7: return ConstantInt::getTrue(*Context); 3785 } 3786} 3787 3788/// PredicatesFoldable - Return true if both predicates match sign or if at 3789/// least one of them is an equality comparison (which is signless). 3790static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { 3791 return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || 3792 (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || 3793 (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); 3794} 3795 3796namespace { 3797// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 3798struct FoldICmpLogical { 3799 InstCombiner &IC; 3800 Value *LHS, *RHS; 3801 ICmpInst::Predicate pred; 3802 FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) 3803 : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), 3804 pred(ICI->getPredicate()) {} 3805 bool shouldApply(Value *V) const { 3806 if (ICmpInst *ICI = dyn_cast<ICmpInst>(V)) 3807 if (PredicatesFoldable(pred, ICI->getPredicate())) 3808 return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || 3809 (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); 3810 return false; 3811 } 3812 Instruction *apply(Instruction &Log) const { 3813 ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0)); 3814 if (ICI->getOperand(0) != LHS) { 3815 assert(ICI->getOperand(1) == LHS); 3816 ICI->swapOperands(); // Swap the LHS and RHS of the ICmp 3817 } 3818 3819 ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1)); 3820 unsigned LHSCode = getICmpCode(ICI); 3821 unsigned RHSCode = getICmpCode(RHSICI); 3822 unsigned Code; 3823 switch (Log.getOpcode()) { 3824 case Instruction::And: Code = LHSCode & RHSCode; break; 3825 case Instruction::Or: Code = LHSCode | RHSCode; break; 3826 case Instruction::Xor: Code = LHSCode ^ RHSCode; break; 3827 default: llvm_unreachable("Illegal logical opcode!"); return 0; 3828 } 3829 3830 bool isSigned = RHSICI->isSigned() || ICI->isSigned(); 3831 Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext()); 3832 if (Instruction *I = dyn_cast<Instruction>(RV)) 3833 return I; 3834 // Otherwise, it's a constant boolean value... 3835 return IC.ReplaceInstUsesWith(Log, RV); 3836 } 3837}; 3838} // end anonymous namespace 3839 3840// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where 3841// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is 3842// guaranteed to be a binary operator. 3843Instruction *InstCombiner::OptAndOp(Instruction *Op, 3844 ConstantInt *OpRHS, 3845 ConstantInt *AndRHS, 3846 BinaryOperator &TheAnd) { 3847 Value *X = Op->getOperand(0); 3848 Constant *Together = 0; 3849 if (!Op->isShift()) 3850 Together = ConstantExpr::getAnd(AndRHS, OpRHS); 3851 3852 switch (Op->getOpcode()) { 3853 case Instruction::Xor: 3854 if (Op->hasOneUse()) { 3855 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 3856 Value *And = Builder->CreateAnd(X, AndRHS); 3857 And->takeName(Op); 3858 return BinaryOperator::CreateXor(And, Together); 3859 } 3860 break; 3861 case Instruction::Or: 3862 if (Together == AndRHS) // (X | C) & C --> C 3863 return ReplaceInstUsesWith(TheAnd, AndRHS); 3864 3865 if (Op->hasOneUse() && Together != OpRHS) { 3866 // (X | C1) & C2 --> (X | (C1&C2)) & C2 3867 Value *Or = Builder->CreateOr(X, Together); 3868 Or->takeName(Op); 3869 return BinaryOperator::CreateAnd(Or, AndRHS); 3870 } 3871 break; 3872 case Instruction::Add: 3873 if (Op->hasOneUse()) { 3874 // Adding a one to a single bit bit-field should be turned into an XOR 3875 // of the bit. First thing to check is to see if this AND is with a 3876 // single bit constant. 3877 const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); 3878 3879 // If there is only one bit set... 3880 if (isOneBitSet(cast<ConstantInt>(AndRHS))) { 3881 // Ok, at this point, we know that we are masking the result of the 3882 // ADD down to exactly one bit. If the constant we are adding has 3883 // no bits set below this bit, then we can eliminate the ADD. 3884 const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); 3885 3886 // Check to see if any bits below the one bit set in AndRHSV are set. 3887 if ((AddRHS & (AndRHSV-1)) == 0) { 3888 // If not, the only thing that can effect the output of the AND is 3889 // the bit specified by AndRHSV. If that bit is set, the effect of 3890 // the XOR is to toggle the bit. If it is clear, then the ADD has 3891 // no effect. 3892 if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop 3893 TheAnd.setOperand(0, X); 3894 return &TheAnd; 3895 } else { 3896 // Pull the XOR out of the AND. 3897 Value *NewAnd = Builder->CreateAnd(X, AndRHS); 3898 NewAnd->takeName(Op); 3899 return BinaryOperator::CreateXor(NewAnd, AndRHS); 3900 } 3901 } 3902 } 3903 } 3904 break; 3905 3906 case Instruction::Shl: { 3907 // We know that the AND will not produce any of the bits shifted in, so if 3908 // the anded constant includes them, clear them now! 3909 // 3910 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 3911 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 3912 APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); 3913 ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask); 3914 3915 if (CI->getValue() == ShlMask) { 3916 // Masking out bits that the shift already masks 3917 return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. 3918 } else if (CI != AndRHS) { // Reducing bits set in and. 3919 TheAnd.setOperand(1, CI); 3920 return &TheAnd; 3921 } 3922 break; 3923 } 3924 case Instruction::LShr: 3925 { 3926 // We know that the AND will not produce any of the bits shifted in, so if 3927 // the anded constant includes them, clear them now! This only applies to 3928 // unsigned shifts, because a signed shr may bring in set bits! 3929 // 3930 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 3931 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 3932 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 3933 ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); 3934 3935 if (CI->getValue() == ShrMask) { 3936 // Masking out bits that the shift already masks. 3937 return ReplaceInstUsesWith(TheAnd, Op); 3938 } else if (CI != AndRHS) { 3939 TheAnd.setOperand(1, CI); // Reduce bits set in and cst. 3940 return &TheAnd; 3941 } 3942 break; 3943 } 3944 case Instruction::AShr: 3945 // Signed shr. 3946 // See if this is shifting in some sign extension, then masking it out 3947 // with an and. 3948 if (Op->hasOneUse()) { 3949 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 3950 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 3951 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 3952 Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); 3953 if (C == AndRHS) { // Masking out bits shifted in. 3954 // (Val ashr C1) & C2 -> (Val lshr C1) & C2 3955 // Make the argument unsigned. 3956 Value *ShVal = Op->getOperand(0); 3957 ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); 3958 return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); 3959 } 3960 } 3961 break; 3962 } 3963 return 0; 3964} 3965 3966 3967/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is 3968/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient 3969/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates 3970/// whether to treat the V, Lo and HI as signed or not. IB is the location to 3971/// insert new instructions. 3972Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, 3973 bool isSigned, bool Inside, 3974 Instruction &IB) { 3975 assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 3976 ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && 3977 "Lo is not <= Hi in range emission code!"); 3978 3979 if (Inside) { 3980 if (Lo == Hi) // Trivially false. 3981 return new ICmpInst(ICmpInst::ICMP_NE, V, V); 3982 3983 // V >= Min && V < Hi --> V < Hi 3984 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 3985 ICmpInst::Predicate pred = (isSigned ? 3986 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); 3987 return new ICmpInst(pred, V, Hi); 3988 } 3989 3990 // Emit V-Lo <u Hi-Lo 3991 Constant *NegLo = ConstantExpr::getNeg(Lo); 3992 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 3993 Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); 3994 return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); 3995 } 3996 3997 if (Lo == Hi) // Trivially true. 3998 return new ICmpInst(ICmpInst::ICMP_EQ, V, V); 3999 4000 // V < Min || V >= Hi -> V > Hi-1 4001 Hi = SubOne(cast<ConstantInt>(Hi)); 4002 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 4003 ICmpInst::Predicate pred = (isSigned ? 4004 ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 4005 return new ICmpInst(pred, V, Hi); 4006 } 4007 4008 // Emit V-Lo >u Hi-1-Lo 4009 // Note that Hi has already had one subtracted from it, above. 4010 ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); 4011 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 4012 Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); 4013 return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); 4014} 4015 4016// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with 4017// any number of 0s on either side. The 1s are allowed to wrap from LSB to 4018// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is 4019// not, since all 1s are not contiguous. 4020static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { 4021 const APInt& V = Val->getValue(); 4022 uint32_t BitWidth = Val->getType()->getBitWidth(); 4023 if (!APIntOps::isShiftedMask(BitWidth, V)) return false; 4024 4025 // look for the first zero bit after the run of ones 4026 MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); 4027 // look for the first non-zero bit 4028 ME = V.getActiveBits(); 4029 return true; 4030} 4031 4032/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, 4033/// where isSub determines whether the operator is a sub. If we can fold one of 4034/// the following xforms: 4035/// 4036/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask 4037/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 4038/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 4039/// 4040/// return (A +/- B). 4041/// 4042Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, 4043 ConstantInt *Mask, bool isSub, 4044 Instruction &I) { 4045 Instruction *LHSI = dyn_cast<Instruction>(LHS); 4046 if (!LHSI || LHSI->getNumOperands() != 2 || 4047 !isa<ConstantInt>(LHSI->getOperand(1))) return 0; 4048 4049 ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); 4050 4051 switch (LHSI->getOpcode()) { 4052 default: return 0; 4053 case Instruction::And: 4054 if (ConstantExpr::getAnd(N, Mask) == Mask) { 4055 // If the AndRHS is a power of two minus one (0+1+), this is simple. 4056 if ((Mask->getValue().countLeadingZeros() + 4057 Mask->getValue().countPopulation()) == 4058 Mask->getValue().getBitWidth()) 4059 break; 4060 4061 // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ 4062 // part, we don't need any explicit masks to take them out of A. If that 4063 // is all N is, ignore it. 4064 uint32_t MB = 0, ME = 0; 4065 if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive 4066 uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); 4067 APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); 4068 if (MaskedValueIsZero(RHS, Mask)) 4069 break; 4070 } 4071 } 4072 return 0; 4073 case Instruction::Or: 4074 case Instruction::Xor: 4075 // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 4076 if ((Mask->getValue().countLeadingZeros() + 4077 Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() 4078 && ConstantExpr::getAnd(N, Mask)->isNullValue()) 4079 break; 4080 return 0; 4081 } 4082 4083 if (isSub) 4084 return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); 4085 return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); 4086} 4087 4088/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. 4089Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, 4090 ICmpInst *LHS, ICmpInst *RHS) { 4091 Value *Val, *Val2; 4092 ConstantInt *LHSCst, *RHSCst; 4093 ICmpInst::Predicate LHSCC, RHSCC; 4094 4095 // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). 4096 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), 4097 m_ConstantInt(LHSCst))) || 4098 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), 4099 m_ConstantInt(RHSCst)))) 4100 return 0; 4101 4102 if (LHSCst == RHSCst && LHSCC == RHSCC) { 4103 // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) 4104 // where C is a power of 2 4105 if (LHSCC == ICmpInst::ICMP_ULT && 4106 LHSCst->getValue().isPowerOf2()) { 4107 Value *NewOr = Builder->CreateOr(Val, Val2); 4108 return new ICmpInst(LHSCC, NewOr, LHSCst); 4109 } 4110 4111 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 4112 if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { 4113 Value *NewOr = Builder->CreateOr(Val, Val2); 4114 return new ICmpInst(LHSCC, NewOr, LHSCst); 4115 } 4116 } 4117 4118 // From here on, we only handle: 4119 // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. 4120 if (Val != Val2) return 0; 4121 4122 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 4123 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 4124 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 4125 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 4126 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 4127 return 0; 4128 4129 // We can't fold (ugt x, C) & (sgt x, C2). 4130 if (!PredicatesFoldable(LHSCC, RHSCC)) 4131 return 0; 4132 4133 // Ensure that the larger constant is on the RHS. 4134 bool ShouldSwap; 4135 if (CmpInst::isSigned(LHSCC) || 4136 (ICmpInst::isEquality(LHSCC) && 4137 CmpInst::isSigned(RHSCC))) 4138 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 4139 else 4140 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 4141 4142 if (ShouldSwap) { 4143 std::swap(LHS, RHS); 4144 std::swap(LHSCst, RHSCst); 4145 std::swap(LHSCC, RHSCC); 4146 } 4147 4148 // At this point, we know we have have two icmp instructions 4149 // comparing a value against two constants and and'ing the result 4150 // together. Because of the above check, we know that we only have 4151 // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 4152 // (from the FoldICmpLogical check above), that the two constants 4153 // are not equal and that the larger constant is on the RHS 4154 assert(LHSCst != RHSCst && "Compares not folded above?"); 4155 4156 switch (LHSCC) { 4157 default: llvm_unreachable("Unknown integer condition code!"); 4158 case ICmpInst::ICMP_EQ: 4159 switch (RHSCC) { 4160 default: llvm_unreachable("Unknown integer condition code!"); 4161 case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false 4162 case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false 4163 case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false 4164 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4165 case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 4166 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 4167 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 4168 return ReplaceInstUsesWith(I, LHS); 4169 } 4170 case ICmpInst::ICMP_NE: 4171 switch (RHSCC) { 4172 default: llvm_unreachable("Unknown integer condition code!"); 4173 case ICmpInst::ICMP_ULT: 4174 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 4175 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); 4176 break; // (X != 13 & X u< 15) -> no change 4177 case ICmpInst::ICMP_SLT: 4178 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 4179 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); 4180 break; // (X != 13 & X s< 15) -> no change 4181 case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 4182 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 4183 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 4184 return ReplaceInstUsesWith(I, RHS); 4185 case ICmpInst::ICMP_NE: 4186 if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 4187 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 4188 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 4189 return new ICmpInst(ICmpInst::ICMP_UGT, Add, 4190 ConstantInt::get(Add->getType(), 1)); 4191 } 4192 break; // (X != 13 & X != 15) -> no change 4193 } 4194 break; 4195 case ICmpInst::ICMP_ULT: 4196 switch (RHSCC) { 4197 default: llvm_unreachable("Unknown integer condition code!"); 4198 case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false 4199 case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false 4200 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4201 case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change 4202 break; 4203 case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 4204 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 4205 return ReplaceInstUsesWith(I, LHS); 4206 case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change 4207 break; 4208 } 4209 break; 4210 case ICmpInst::ICMP_SLT: 4211 switch (RHSCC) { 4212 default: llvm_unreachable("Unknown integer condition code!"); 4213 case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false 4214 case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false 4215 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4216 case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change 4217 break; 4218 case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 4219 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 4220 return ReplaceInstUsesWith(I, LHS); 4221 case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change 4222 break; 4223 } 4224 break; 4225 case ICmpInst::ICMP_UGT: 4226 switch (RHSCC) { 4227 default: llvm_unreachable("Unknown integer condition code!"); 4228 case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 4229 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 4230 return ReplaceInstUsesWith(I, RHS); 4231 case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change 4232 break; 4233 case ICmpInst::ICMP_NE: 4234 if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 4235 return new ICmpInst(LHSCC, Val, RHSCst); 4236 break; // (X u> 13 & X != 15) -> no change 4237 case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 4238 return InsertRangeTest(Val, AddOne(LHSCst), 4239 RHSCst, false, true, I); 4240 case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change 4241 break; 4242 } 4243 break; 4244 case ICmpInst::ICMP_SGT: 4245 switch (RHSCC) { 4246 default: llvm_unreachable("Unknown integer condition code!"); 4247 case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 4248 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 4249 return ReplaceInstUsesWith(I, RHS); 4250 case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change 4251 break; 4252 case ICmpInst::ICMP_NE: 4253 if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 4254 return new ICmpInst(LHSCC, Val, RHSCst); 4255 break; // (X s> 13 & X != 15) -> no change 4256 case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 4257 return InsertRangeTest(Val, AddOne(LHSCst), 4258 RHSCst, true, true, I); 4259 case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change 4260 break; 4261 } 4262 break; 4263 } 4264 4265 return 0; 4266} 4267 4268Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, 4269 FCmpInst *RHS) { 4270 4271 if (LHS->getPredicate() == FCmpInst::FCMP_ORD && 4272 RHS->getPredicate() == FCmpInst::FCMP_ORD) { 4273 // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) 4274 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 4275 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 4276 // If either of the constants are nans, then the whole thing returns 4277 // false. 4278 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 4279 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4280 return new FCmpInst(FCmpInst::FCMP_ORD, 4281 LHS->getOperand(0), RHS->getOperand(0)); 4282 } 4283 4284 // Handle vector zeros. This occurs because the canonical form of 4285 // "fcmp ord x,x" is "fcmp ord x, 0". 4286 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 4287 isa<ConstantAggregateZero>(RHS->getOperand(1))) 4288 return new FCmpInst(FCmpInst::FCMP_ORD, 4289 LHS->getOperand(0), RHS->getOperand(0)); 4290 return 0; 4291 } 4292 4293 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 4294 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 4295 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 4296 4297 4298 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 4299 // Swap RHS operands to match LHS. 4300 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 4301 std::swap(Op1LHS, Op1RHS); 4302 } 4303 4304 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 4305 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 4306 if (Op0CC == Op1CC) 4307 return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 4308 4309 if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) 4310 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4311 if (Op0CC == FCmpInst::FCMP_TRUE) 4312 return ReplaceInstUsesWith(I, RHS); 4313 if (Op1CC == FCmpInst::FCMP_TRUE) 4314 return ReplaceInstUsesWith(I, LHS); 4315 4316 bool Op0Ordered; 4317 bool Op1Ordered; 4318 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 4319 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 4320 if (Op1Pred == 0) { 4321 std::swap(LHS, RHS); 4322 std::swap(Op0Pred, Op1Pred); 4323 std::swap(Op0Ordered, Op1Ordered); 4324 } 4325 if (Op0Pred == 0) { 4326 // uno && ueq -> uno && (uno || eq) -> ueq 4327 // ord && olt -> ord && (ord && lt) -> olt 4328 if (Op0Ordered == Op1Ordered) 4329 return ReplaceInstUsesWith(I, RHS); 4330 4331 // uno && oeq -> uno && (ord && eq) -> false 4332 // uno && ord -> false 4333 if (!Op0Ordered) 4334 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 4335 // ord && ueq -> ord && (uno || eq) -> oeq 4336 return cast<Instruction>(getFCmpValue(true, Op1Pred, 4337 Op0LHS, Op0RHS, Context)); 4338 } 4339 } 4340 4341 return 0; 4342} 4343 4344 4345Instruction *InstCombiner::visitAnd(BinaryOperator &I) { 4346 bool Changed = SimplifyCommutative(I); 4347 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 4348 4349 if (Value *V = SimplifyAndInst(Op0, Op1, TD)) 4350 return ReplaceInstUsesWith(I, V); 4351 4352 // See if we can simplify any instructions used by the instruction whose sole 4353 // purpose is to compute bits we don't care about. 4354 if (SimplifyDemandedInstructionBits(I)) 4355 return &I; 4356 4357 if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { 4358 const APInt &AndRHSMask = AndRHS->getValue(); 4359 APInt NotAndRHS(~AndRHSMask); 4360 4361 // Optimize a variety of ((val OP C1) & C2) combinations... 4362 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 4363 Value *Op0LHS = Op0I->getOperand(0); 4364 Value *Op0RHS = Op0I->getOperand(1); 4365 switch (Op0I->getOpcode()) { 4366 default: break; 4367 case Instruction::Xor: 4368 case Instruction::Or: 4369 // If the mask is only needed on one incoming arm, push it up. 4370 if (!Op0I->hasOneUse()) break; 4371 4372 if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { 4373 // Not masking anything out for the LHS, move to RHS. 4374 Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, 4375 Op0RHS->getName()+".masked"); 4376 return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); 4377 } 4378 if (!isa<Constant>(Op0RHS) && 4379 MaskedValueIsZero(Op0RHS, NotAndRHS)) { 4380 // Not masking anything out for the RHS, move to LHS. 4381 Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, 4382 Op0LHS->getName()+".masked"); 4383 return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); 4384 } 4385 4386 break; 4387 case Instruction::Add: 4388 // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. 4389 // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 4390 // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 4391 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) 4392 return BinaryOperator::CreateAnd(V, AndRHS); 4393 if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) 4394 return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes 4395 break; 4396 4397 case Instruction::Sub: 4398 // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. 4399 // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 4400 // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 4401 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) 4402 return BinaryOperator::CreateAnd(V, AndRHS); 4403 4404 // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS 4405 // has 1's for all bits that the subtraction with A might affect. 4406 if (Op0I->hasOneUse()) { 4407 uint32_t BitWidth = AndRHSMask.getBitWidth(); 4408 uint32_t Zeros = AndRHSMask.countLeadingZeros(); 4409 APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); 4410 4411 ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); 4412 if (!(A && A->isZero()) && // avoid infinite recursion. 4413 MaskedValueIsZero(Op0LHS, Mask)) { 4414 Value *NewNeg = Builder->CreateNeg(Op0RHS); 4415 return BinaryOperator::CreateAnd(NewNeg, AndRHS); 4416 } 4417 } 4418 break; 4419 4420 case Instruction::Shl: 4421 case Instruction::LShr: 4422 // (1 << x) & 1 --> zext(x == 0) 4423 // (1 >> x) & 1 --> zext(x == 0) 4424 if (AndRHSMask == 1 && Op0LHS == AndRHS) { 4425 Value *NewICmp = 4426 Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); 4427 return new ZExtInst(NewICmp, I.getType()); 4428 } 4429 break; 4430 } 4431 4432 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) 4433 if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) 4434 return Res; 4435 } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { 4436 // If this is an integer truncation or change from signed-to-unsigned, and 4437 // if the source is an and/or with immediate, transform it. This 4438 // frequently occurs for bitfield accesses. 4439 if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { 4440 if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && 4441 CastOp->getNumOperands() == 2) 4442 if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ 4443 if (CastOp->getOpcode() == Instruction::And) { 4444 // Change: and (cast (and X, C1) to T), C2 4445 // into : and (cast X to T), trunc_or_bitcast(C1)&C2 4446 // This will fold the two constants together, which may allow 4447 // other simplifications. 4448 Value *NewCast = Builder->CreateTruncOrBitCast( 4449 CastOp->getOperand(0), I.getType(), 4450 CastOp->getName()+".shrunk"); 4451 // trunc_or_bitcast(C1)&C2 4452 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 4453 C3 = ConstantExpr::getAnd(C3, AndRHS); 4454 return BinaryOperator::CreateAnd(NewCast, C3); 4455 } else if (CastOp->getOpcode() == Instruction::Or) { 4456 // Change: and (cast (or X, C1) to T), C2 4457 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 4458 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 4459 if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) 4460 // trunc(C1)&C2 4461 return ReplaceInstUsesWith(I, AndRHS); 4462 } 4463 } 4464 } 4465 } 4466 4467 // Try to fold constant and into select arguments. 4468 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 4469 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 4470 return R; 4471 if (isa<PHINode>(Op0)) 4472 if (Instruction *NV = FoldOpIntoPhi(I)) 4473 return NV; 4474 } 4475 4476 4477 // (~A & ~B) == (~(A | B)) - De Morgan's Law 4478 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 4479 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 4480 if (Op0->hasOneUse() && Op1->hasOneUse()) { 4481 Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, 4482 I.getName()+".demorgan"); 4483 return BinaryOperator::CreateNot(Or); 4484 } 4485 4486 { 4487 Value *A = 0, *B = 0, *C = 0, *D = 0; 4488 // (A|B) & ~(A&B) -> A^B 4489 if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 4490 match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && 4491 ((A == C && B == D) || (A == D && B == C))) 4492 return BinaryOperator::CreateXor(A, B); 4493 4494 // ~(A&B) & (A|B) -> A^B 4495 if (match(Op1, m_Or(m_Value(A), m_Value(B))) && 4496 match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && 4497 ((A == C && B == D) || (A == D && B == C))) 4498 return BinaryOperator::CreateXor(A, B); 4499 4500 if (Op0->hasOneUse() && 4501 match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 4502 if (A == Op1) { // (A^B)&A -> A&(A^B) 4503 I.swapOperands(); // Simplify below 4504 std::swap(Op0, Op1); 4505 } else if (B == Op1) { // (A^B)&B -> B&(B^A) 4506 cast<BinaryOperator>(Op0)->swapOperands(); 4507 I.swapOperands(); // Simplify below 4508 std::swap(Op0, Op1); 4509 } 4510 } 4511 4512 if (Op1->hasOneUse() && 4513 match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 4514 if (B == Op0) { // B&(A^B) -> B&(B^A) 4515 cast<BinaryOperator>(Op1)->swapOperands(); 4516 std::swap(A, B); 4517 } 4518 if (A == Op0) // A&(A^B) -> A & ~B 4519 return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); 4520 } 4521 4522 // (A&((~A)|B)) -> A&B 4523 if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || 4524 match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) 4525 return BinaryOperator::CreateAnd(A, Op1); 4526 if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || 4527 match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) 4528 return BinaryOperator::CreateAnd(A, Op0); 4529 } 4530 4531 if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) { 4532 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 4533 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 4534 return R; 4535 4536 if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) 4537 if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) 4538 return Res; 4539 } 4540 4541 // fold (and (cast A), (cast B)) -> (cast (and A, B)) 4542 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) 4543 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 4544 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? 4545 const Type *SrcTy = Op0C->getOperand(0)->getType(); 4546 if (SrcTy == Op1C->getOperand(0)->getType() && 4547 SrcTy->isIntOrIntVector() && 4548 // Only do this if the casts both really cause code to be generated. 4549 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 4550 I.getType(), TD) && 4551 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 4552 I.getType(), TD)) { 4553 Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), 4554 Op1C->getOperand(0), I.getName()); 4555 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 4556 } 4557 } 4558 4559 // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. 4560 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 4561 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 4562 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 4563 SI0->getOperand(1) == SI1->getOperand(1) && 4564 (SI0->hasOneUse() || SI1->hasOneUse())) { 4565 Value *NewOp = 4566 Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), 4567 SI0->getName()); 4568 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 4569 SI1->getOperand(1)); 4570 } 4571 } 4572 4573 // If and'ing two fcmp, try combine them into one. 4574 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 4575 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 4576 if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) 4577 return Res; 4578 } 4579 4580 return Changed ? &I : 0; 4581} 4582 4583/// CollectBSwapParts - Analyze the specified subexpression and see if it is 4584/// capable of providing pieces of a bswap. The subexpression provides pieces 4585/// of a bswap if it is proven that each of the non-zero bytes in the output of 4586/// the expression came from the corresponding "byte swapped" byte in some other 4587/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then 4588/// we know that the expression deposits the low byte of %X into the high byte 4589/// of the bswap result and that all other bytes are zero. This expression is 4590/// accepted, the high byte of ByteValues is set to X to indicate a correct 4591/// match. 4592/// 4593/// This function returns true if the match was unsuccessful and false if so. 4594/// On entry to the function the "OverallLeftShift" is a signed integer value 4595/// indicating the number of bytes that the subexpression is later shifted. For 4596/// example, if the expression is later right shifted by 16 bits, the 4597/// OverallLeftShift value would be -2 on entry. This is used to specify which 4598/// byte of ByteValues is actually being set. 4599/// 4600/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding 4601/// byte is masked to zero by a user. For example, in (X & 255), X will be 4602/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits 4603/// this function to working on up to 32-byte (256 bit) values. ByteMask is 4604/// always in the local (OverallLeftShift) coordinate space. 4605/// 4606static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, 4607 SmallVector<Value*, 8> &ByteValues) { 4608 if (Instruction *I = dyn_cast<Instruction>(V)) { 4609 // If this is an or instruction, it may be an inner node of the bswap. 4610 if (I->getOpcode() == Instruction::Or) { 4611 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 4612 ByteValues) || 4613 CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, 4614 ByteValues); 4615 } 4616 4617 // If this is a logical shift by a constant multiple of 8, recurse with 4618 // OverallLeftShift and ByteMask adjusted. 4619 if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { 4620 unsigned ShAmt = 4621 cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); 4622 // Ensure the shift amount is defined and of a byte value. 4623 if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) 4624 return true; 4625 4626 unsigned ByteShift = ShAmt >> 3; 4627 if (I->getOpcode() == Instruction::Shl) { 4628 // X << 2 -> collect(X, +2) 4629 OverallLeftShift += ByteShift; 4630 ByteMask >>= ByteShift; 4631 } else { 4632 // X >>u 2 -> collect(X, -2) 4633 OverallLeftShift -= ByteShift; 4634 ByteMask <<= ByteShift; 4635 ByteMask &= (~0U >> (32-ByteValues.size())); 4636 } 4637 4638 if (OverallLeftShift >= (int)ByteValues.size()) return true; 4639 if (OverallLeftShift <= -(int)ByteValues.size()) return true; 4640 4641 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 4642 ByteValues); 4643 } 4644 4645 // If this is a logical 'and' with a mask that clears bytes, clear the 4646 // corresponding bytes in ByteMask. 4647 if (I->getOpcode() == Instruction::And && 4648 isa<ConstantInt>(I->getOperand(1))) { 4649 // Scan every byte of the and mask, seeing if the byte is either 0 or 255. 4650 unsigned NumBytes = ByteValues.size(); 4651 APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); 4652 const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); 4653 4654 for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { 4655 // If this byte is masked out by a later operation, we don't care what 4656 // the and mask is. 4657 if ((ByteMask & (1 << i)) == 0) 4658 continue; 4659 4660 // If the AndMask is all zeros for this byte, clear the bit. 4661 APInt MaskB = AndMask & Byte; 4662 if (MaskB == 0) { 4663 ByteMask &= ~(1U << i); 4664 continue; 4665 } 4666 4667 // If the AndMask is not all ones for this byte, it's not a bytezap. 4668 if (MaskB != Byte) 4669 return true; 4670 4671 // Otherwise, this byte is kept. 4672 } 4673 4674 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 4675 ByteValues); 4676 } 4677 } 4678 4679 // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be 4680 // the input value to the bswap. Some observations: 1) if more than one byte 4681 // is demanded from this input, then it could not be successfully assembled 4682 // into a byteswap. At least one of the two bytes would not be aligned with 4683 // their ultimate destination. 4684 if (!isPowerOf2_32(ByteMask)) return true; 4685 unsigned InputByteNo = CountTrailingZeros_32(ByteMask); 4686 4687 // 2) The input and ultimate destinations must line up: if byte 3 of an i32 4688 // is demanded, it needs to go into byte 0 of the result. This means that the 4689 // byte needs to be shifted until it lands in the right byte bucket. The 4690 // shift amount depends on the position: if the byte is coming from the high 4691 // part of the value (e.g. byte 3) then it must be shifted right. If from the 4692 // low part, it must be shifted left. 4693 unsigned DestByteNo = InputByteNo + OverallLeftShift; 4694 if (InputByteNo < ByteValues.size()/2) { 4695 if (ByteValues.size()-1-DestByteNo != InputByteNo) 4696 return true; 4697 } else { 4698 if (ByteValues.size()-1-DestByteNo != InputByteNo) 4699 return true; 4700 } 4701 4702 // If the destination byte value is already defined, the values are or'd 4703 // together, which isn't a bswap (unless it's an or of the same bits). 4704 if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) 4705 return true; 4706 ByteValues[DestByteNo] = V; 4707 return false; 4708} 4709 4710/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. 4711/// If so, insert the new bswap intrinsic and return it. 4712Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { 4713 const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); 4714 if (!ITy || ITy->getBitWidth() % 16 || 4715 // ByteMask only allows up to 32-byte values. 4716 ITy->getBitWidth() > 32*8) 4717 return 0; // Can only bswap pairs of bytes. Can't do vectors. 4718 4719 /// ByteValues - For each byte of the result, we keep track of which value 4720 /// defines each byte. 4721 SmallVector<Value*, 8> ByteValues; 4722 ByteValues.resize(ITy->getBitWidth()/8); 4723 4724 // Try to find all the pieces corresponding to the bswap. 4725 uint32_t ByteMask = ~0U >> (32-ByteValues.size()); 4726 if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) 4727 return 0; 4728 4729 // Check to see if all of the bytes come from the same value. 4730 Value *V = ByteValues[0]; 4731 if (V == 0) return 0; // Didn't find a byte? Must be zero. 4732 4733 // Check to make sure that all of the bytes come from the same value. 4734 for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) 4735 if (ByteValues[i] != V) 4736 return 0; 4737 const Type *Tys[] = { ITy }; 4738 Module *M = I.getParent()->getParent()->getParent(); 4739 Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); 4740 return CallInst::Create(F, V); 4741} 4742 4743/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check 4744/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then 4745/// we can simplify this expression to "cond ? C : D or B". 4746static Instruction *MatchSelectFromAndOr(Value *A, Value *B, 4747 Value *C, Value *D, 4748 LLVMContext *Context) { 4749 // If A is not a select of -1/0, this cannot match. 4750 Value *Cond = 0; 4751 if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) 4752 return 0; 4753 4754 // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. 4755 if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) 4756 return SelectInst::Create(Cond, C, B); 4757 if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 4758 return SelectInst::Create(Cond, C, B); 4759 // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. 4760 if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) 4761 return SelectInst::Create(Cond, C, D); 4762 if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 4763 return SelectInst::Create(Cond, C, D); 4764 return 0; 4765} 4766 4767/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. 4768Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, 4769 ICmpInst *LHS, ICmpInst *RHS) { 4770 Value *Val, *Val2; 4771 ConstantInt *LHSCst, *RHSCst; 4772 ICmpInst::Predicate LHSCC, RHSCC; 4773 4774 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 4775 if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || 4776 !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) 4777 return 0; 4778 4779 4780 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 4781 if (LHSCst == RHSCst && LHSCC == RHSCC && 4782 LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { 4783 Value *NewOr = Builder->CreateOr(Val, Val2); 4784 return new ICmpInst(LHSCC, NewOr, LHSCst); 4785 } 4786 4787 // From here on, we only handle: 4788 // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. 4789 if (Val != Val2) return 0; 4790 4791 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 4792 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 4793 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 4794 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 4795 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 4796 return 0; 4797 4798 // We can't fold (ugt x, C) | (sgt x, C2). 4799 if (!PredicatesFoldable(LHSCC, RHSCC)) 4800 return 0; 4801 4802 // Ensure that the larger constant is on the RHS. 4803 bool ShouldSwap; 4804 if (CmpInst::isSigned(LHSCC) || 4805 (ICmpInst::isEquality(LHSCC) && 4806 CmpInst::isSigned(RHSCC))) 4807 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 4808 else 4809 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 4810 4811 if (ShouldSwap) { 4812 std::swap(LHS, RHS); 4813 std::swap(LHSCst, RHSCst); 4814 std::swap(LHSCC, RHSCC); 4815 } 4816 4817 // At this point, we know we have have two icmp instructions 4818 // comparing a value against two constants and or'ing the result 4819 // together. Because of the above check, we know that we only have 4820 // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the 4821 // FoldICmpLogical check above), that the two constants are not 4822 // equal. 4823 assert(LHSCst != RHSCst && "Compares not folded above?"); 4824 4825 switch (LHSCC) { 4826 default: llvm_unreachable("Unknown integer condition code!"); 4827 case ICmpInst::ICMP_EQ: 4828 switch (RHSCC) { 4829 default: llvm_unreachable("Unknown integer condition code!"); 4830 case ICmpInst::ICMP_EQ: 4831 if (LHSCst == SubOne(RHSCst)) { 4832 // (X == 13 | X == 14) -> X-13 <u 2 4833 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 4834 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 4835 AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); 4836 return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); 4837 } 4838 break; // (X == 13 | X == 15) -> no change 4839 case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change 4840 case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change 4841 break; 4842 case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 4843 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 4844 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 4845 return ReplaceInstUsesWith(I, RHS); 4846 } 4847 break; 4848 case ICmpInst::ICMP_NE: 4849 switch (RHSCC) { 4850 default: llvm_unreachable("Unknown integer condition code!"); 4851 case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 4852 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 4853 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 4854 return ReplaceInstUsesWith(I, LHS); 4855 case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true 4856 case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true 4857 case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true 4858 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 4859 } 4860 break; 4861 case ICmpInst::ICMP_ULT: 4862 switch (RHSCC) { 4863 default: llvm_unreachable("Unknown integer condition code!"); 4864 case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change 4865 break; 4866 case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 4867 // If RHSCst is [us]MAXINT, it is always false. Not handling 4868 // this can cause overflow. 4869 if (RHSCst->isMaxValue(false)) 4870 return ReplaceInstUsesWith(I, LHS); 4871 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 4872 false, false, I); 4873 case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change 4874 break; 4875 case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 4876 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 4877 return ReplaceInstUsesWith(I, RHS); 4878 case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change 4879 break; 4880 } 4881 break; 4882 case ICmpInst::ICMP_SLT: 4883 switch (RHSCC) { 4884 default: llvm_unreachable("Unknown integer condition code!"); 4885 case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change 4886 break; 4887 case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 4888 // If RHSCst is [us]MAXINT, it is always false. Not handling 4889 // this can cause overflow. 4890 if (RHSCst->isMaxValue(true)) 4891 return ReplaceInstUsesWith(I, LHS); 4892 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 4893 true, false, I); 4894 case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change 4895 break; 4896 case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 4897 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 4898 return ReplaceInstUsesWith(I, RHS); 4899 case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change 4900 break; 4901 } 4902 break; 4903 case ICmpInst::ICMP_UGT: 4904 switch (RHSCC) { 4905 default: llvm_unreachable("Unknown integer condition code!"); 4906 case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 4907 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 4908 return ReplaceInstUsesWith(I, LHS); 4909 case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change 4910 break; 4911 case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true 4912 case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true 4913 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 4914 case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change 4915 break; 4916 } 4917 break; 4918 case ICmpInst::ICMP_SGT: 4919 switch (RHSCC) { 4920 default: llvm_unreachable("Unknown integer condition code!"); 4921 case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 4922 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 4923 return ReplaceInstUsesWith(I, LHS); 4924 case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change 4925 break; 4926 case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true 4927 case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true 4928 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 4929 case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change 4930 break; 4931 } 4932 break; 4933 } 4934 return 0; 4935} 4936 4937Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, 4938 FCmpInst *RHS) { 4939 if (LHS->getPredicate() == FCmpInst::FCMP_UNO && 4940 RHS->getPredicate() == FCmpInst::FCMP_UNO && 4941 LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { 4942 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 4943 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 4944 // If either of the constants are nans, then the whole thing returns 4945 // true. 4946 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 4947 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 4948 4949 // Otherwise, no need to compare the two constants, compare the 4950 // rest. 4951 return new FCmpInst(FCmpInst::FCMP_UNO, 4952 LHS->getOperand(0), RHS->getOperand(0)); 4953 } 4954 4955 // Handle vector zeros. This occurs because the canonical form of 4956 // "fcmp uno x,x" is "fcmp uno x, 0". 4957 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 4958 isa<ConstantAggregateZero>(RHS->getOperand(1))) 4959 return new FCmpInst(FCmpInst::FCMP_UNO, 4960 LHS->getOperand(0), RHS->getOperand(0)); 4961 4962 return 0; 4963 } 4964 4965 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 4966 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 4967 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 4968 4969 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 4970 // Swap RHS operands to match LHS. 4971 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 4972 std::swap(Op1LHS, Op1RHS); 4973 } 4974 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 4975 // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). 4976 if (Op0CC == Op1CC) 4977 return new FCmpInst((FCmpInst::Predicate)Op0CC, 4978 Op0LHS, Op0RHS); 4979 if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) 4980 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 4981 if (Op0CC == FCmpInst::FCMP_FALSE) 4982 return ReplaceInstUsesWith(I, RHS); 4983 if (Op1CC == FCmpInst::FCMP_FALSE) 4984 return ReplaceInstUsesWith(I, LHS); 4985 bool Op0Ordered; 4986 bool Op1Ordered; 4987 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 4988 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 4989 if (Op0Ordered == Op1Ordered) { 4990 // If both are ordered or unordered, return a new fcmp with 4991 // or'ed predicates. 4992 Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, 4993 Op0LHS, Op0RHS, Context); 4994 if (Instruction *I = dyn_cast<Instruction>(RV)) 4995 return I; 4996 // Otherwise, it's a constant boolean value... 4997 return ReplaceInstUsesWith(I, RV); 4998 } 4999 } 5000 return 0; 5001} 5002 5003/// FoldOrWithConstants - This helper function folds: 5004/// 5005/// ((A | B) & C1) | (B & C2) 5006/// 5007/// into: 5008/// 5009/// (A & C1) | B 5010/// 5011/// when the XOR of the two constants is "all ones" (-1). 5012Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, 5013 Value *A, Value *B, Value *C) { 5014 ConstantInt *CI1 = dyn_cast<ConstantInt>(C); 5015 if (!CI1) return 0; 5016 5017 Value *V1 = 0; 5018 ConstantInt *CI2 = 0; 5019 if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; 5020 5021 APInt Xor = CI1->getValue() ^ CI2->getValue(); 5022 if (!Xor.isAllOnesValue()) return 0; 5023 5024 if (V1 == A || V1 == B) { 5025 Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); 5026 return BinaryOperator::CreateOr(NewOp, V1); 5027 } 5028 5029 return 0; 5030} 5031 5032Instruction *InstCombiner::visitOr(BinaryOperator &I) { 5033 bool Changed = SimplifyCommutative(I); 5034 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 5035 5036 if (Value *V = SimplifyOrInst(Op0, Op1, TD)) 5037 return ReplaceInstUsesWith(I, V); 5038 5039 5040 // See if we can simplify any instructions used by the instruction whose sole 5041 // purpose is to compute bits we don't care about. 5042 if (SimplifyDemandedInstructionBits(I)) 5043 return &I; 5044 5045 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 5046 ConstantInt *C1 = 0; Value *X = 0; 5047 // (X & C1) | C2 --> (X | C2) & (C1|C2) 5048 if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && 5049 isOnlyUse(Op0)) { 5050 Value *Or = Builder->CreateOr(X, RHS); 5051 Or->takeName(Op0); 5052 return BinaryOperator::CreateAnd(Or, 5053 ConstantInt::get(*Context, RHS->getValue() | C1->getValue())); 5054 } 5055 5056 // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) 5057 if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && 5058 isOnlyUse(Op0)) { 5059 Value *Or = Builder->CreateOr(X, RHS); 5060 Or->takeName(Op0); 5061 return BinaryOperator::CreateXor(Or, 5062 ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue())); 5063 } 5064 5065 // Try to fold constant and into select arguments. 5066 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 5067 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 5068 return R; 5069 if (isa<PHINode>(Op0)) 5070 if (Instruction *NV = FoldOpIntoPhi(I)) 5071 return NV; 5072 } 5073 5074 Value *A = 0, *B = 0; 5075 ConstantInt *C1 = 0, *C2 = 0; 5076 5077 // (A | B) | C and A | (B | C) -> bswap if possible. 5078 // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. 5079 if (match(Op0, m_Or(m_Value(), m_Value())) || 5080 match(Op1, m_Or(m_Value(), m_Value())) || 5081 (match(Op0, m_Shift(m_Value(), m_Value())) && 5082 match(Op1, m_Shift(m_Value(), m_Value())))) { 5083 if (Instruction *BSwap = MatchBSwap(I)) 5084 return BSwap; 5085 } 5086 5087 // (X^C)|Y -> (X|Y)^C iff Y&C == 0 5088 if (Op0->hasOneUse() && 5089 match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && 5090 MaskedValueIsZero(Op1, C1->getValue())) { 5091 Value *NOr = Builder->CreateOr(A, Op1); 5092 NOr->takeName(Op0); 5093 return BinaryOperator::CreateXor(NOr, C1); 5094 } 5095 5096 // Y|(X^C) -> (X|Y)^C iff Y&C == 0 5097 if (Op1->hasOneUse() && 5098 match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && 5099 MaskedValueIsZero(Op0, C1->getValue())) { 5100 Value *NOr = Builder->CreateOr(A, Op0); 5101 NOr->takeName(Op0); 5102 return BinaryOperator::CreateXor(NOr, C1); 5103 } 5104 5105 // (A & C)|(B & D) 5106 Value *C = 0, *D = 0; 5107 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 5108 match(Op1, m_And(m_Value(B), m_Value(D)))) { 5109 Value *V1 = 0, *V2 = 0, *V3 = 0; 5110 C1 = dyn_cast<ConstantInt>(C); 5111 C2 = dyn_cast<ConstantInt>(D); 5112 if (C1 && C2) { // (A & C1)|(B & C2) 5113 // If we have: ((V + N) & C1) | (V & C2) 5114 // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 5115 // replace with V+N. 5116 if (C1->getValue() == ~C2->getValue()) { 5117 if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ 5118 match(A, m_Add(m_Value(V1), m_Value(V2)))) { 5119 // Add commutes, try both ways. 5120 if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) 5121 return ReplaceInstUsesWith(I, A); 5122 if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) 5123 return ReplaceInstUsesWith(I, A); 5124 } 5125 // Or commutes, try both ways. 5126 if ((C1->getValue() & (C1->getValue()+1)) == 0 && 5127 match(B, m_Add(m_Value(V1), m_Value(V2)))) { 5128 // Add commutes, try both ways. 5129 if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) 5130 return ReplaceInstUsesWith(I, B); 5131 if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) 5132 return ReplaceInstUsesWith(I, B); 5133 } 5134 } 5135 5136 // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) 5137 // iff (C1&C2) == 0 and (N&~C1) == 0 5138 if ((C1->getValue() & C2->getValue()) == 0) { 5139 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && 5140 ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) 5141 (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) 5142 return BinaryOperator::CreateAnd(A, 5143 ConstantInt::get(A->getContext(), 5144 C1->getValue()|C2->getValue())); 5145 // Or commutes, try both ways. 5146 if (match(B, m_Or(m_Value(V1), m_Value(V2))) && 5147 ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) 5148 (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) 5149 return BinaryOperator::CreateAnd(B, 5150 ConstantInt::get(B->getContext(), 5151 C1->getValue()|C2->getValue())); 5152 } 5153 } 5154 5155 // Check to see if we have any common things being and'ed. If so, find the 5156 // terms for V1 & (V2|V3). 5157 if (isOnlyUse(Op0) || isOnlyUse(Op1)) { 5158 V1 = 0; 5159 if (A == B) // (A & C)|(A & D) == A & (C|D) 5160 V1 = A, V2 = C, V3 = D; 5161 else if (A == D) // (A & C)|(B & A) == A & (B|C) 5162 V1 = A, V2 = B, V3 = C; 5163 else if (C == B) // (A & C)|(C & D) == C & (A|D) 5164 V1 = C, V2 = A, V3 = D; 5165 else if (C == D) // (A & C)|(B & C) == C & (A|B) 5166 V1 = C, V2 = A, V3 = B; 5167 5168 if (V1) { 5169 Value *Or = Builder->CreateOr(V2, V3, "tmp"); 5170 return BinaryOperator::CreateAnd(V1, Or); 5171 } 5172 } 5173 5174 // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants 5175 if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context)) 5176 return Match; 5177 if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context)) 5178 return Match; 5179 if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context)) 5180 return Match; 5181 if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context)) 5182 return Match; 5183 5184 // ((A&~B)|(~A&B)) -> A^B 5185 if ((match(C, m_Not(m_Specific(D))) && 5186 match(B, m_Not(m_Specific(A))))) 5187 return BinaryOperator::CreateXor(A, D); 5188 // ((~B&A)|(~A&B)) -> A^B 5189 if ((match(A, m_Not(m_Specific(D))) && 5190 match(B, m_Not(m_Specific(C))))) 5191 return BinaryOperator::CreateXor(C, D); 5192 // ((A&~B)|(B&~A)) -> A^B 5193 if ((match(C, m_Not(m_Specific(B))) && 5194 match(D, m_Not(m_Specific(A))))) 5195 return BinaryOperator::CreateXor(A, B); 5196 // ((~B&A)|(B&~A)) -> A^B 5197 if ((match(A, m_Not(m_Specific(B))) && 5198 match(D, m_Not(m_Specific(C))))) 5199 return BinaryOperator::CreateXor(C, B); 5200 } 5201 5202 // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. 5203 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 5204 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 5205 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 5206 SI0->getOperand(1) == SI1->getOperand(1) && 5207 (SI0->hasOneUse() || SI1->hasOneUse())) { 5208 Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), 5209 SI0->getName()); 5210 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 5211 SI1->getOperand(1)); 5212 } 5213 } 5214 5215 // ((A|B)&1)|(B&-2) -> (A&1) | B 5216 if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 5217 match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 5218 Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); 5219 if (Ret) return Ret; 5220 } 5221 // (B&-2)|((A|B)&1) -> (A&1) | B 5222 if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 5223 match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 5224 Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); 5225 if (Ret) return Ret; 5226 } 5227 5228 // (~A | ~B) == (~(A & B)) - De Morgan's Law 5229 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 5230 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 5231 if (Op0->hasOneUse() && Op1->hasOneUse()) { 5232 Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, 5233 I.getName()+".demorgan"); 5234 return BinaryOperator::CreateNot(And); 5235 } 5236 5237 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 5238 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { 5239 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 5240 return R; 5241 5242 if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 5243 if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) 5244 return Res; 5245 } 5246 5247 // fold (or (cast A), (cast B)) -> (cast (or A, B)) 5248 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 5249 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 5250 if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? 5251 if (!isa<ICmpInst>(Op0C->getOperand(0)) || 5252 !isa<ICmpInst>(Op1C->getOperand(0))) { 5253 const Type *SrcTy = Op0C->getOperand(0)->getType(); 5254 if (SrcTy == Op1C->getOperand(0)->getType() && 5255 SrcTy->isIntOrIntVector() && 5256 // Only do this if the casts both really cause code to be 5257 // generated. 5258 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 5259 I.getType(), TD) && 5260 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 5261 I.getType(), TD)) { 5262 Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), 5263 Op1C->getOperand(0), I.getName()); 5264 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 5265 } 5266 } 5267 } 5268 } 5269 5270 5271 // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) 5272 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 5273 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 5274 if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) 5275 return Res; 5276 } 5277 5278 return Changed ? &I : 0; 5279} 5280 5281namespace { 5282 5283// XorSelf - Implements: X ^ X --> 0 5284struct XorSelf { 5285 Value *RHS; 5286 XorSelf(Value *rhs) : RHS(rhs) {} 5287 bool shouldApply(Value *LHS) const { return LHS == RHS; } 5288 Instruction *apply(BinaryOperator &Xor) const { 5289 return &Xor; 5290 } 5291}; 5292 5293} 5294 5295Instruction *InstCombiner::visitXor(BinaryOperator &I) { 5296 bool Changed = SimplifyCommutative(I); 5297 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 5298 5299 if (isa<UndefValue>(Op1)) { 5300 if (isa<UndefValue>(Op0)) 5301 // Handle undef ^ undef -> 0 special case. This is a common 5302 // idiom (misuse). 5303 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 5304 return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef 5305 } 5306 5307 // xor X, X = 0, even if X is nested in a sequence of Xor's. 5308 if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { 5309 assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; 5310 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 5311 } 5312 5313 // See if we can simplify any instructions used by the instruction whose sole 5314 // purpose is to compute bits we don't care about. 5315 if (SimplifyDemandedInstructionBits(I)) 5316 return &I; 5317 if (isa<VectorType>(I.getType())) 5318 if (isa<ConstantAggregateZero>(Op1)) 5319 return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X 5320 5321 // Is this a ~ operation? 5322 if (Value *NotOp = dyn_castNotVal(&I)) { 5323 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { 5324 if (Op0I->getOpcode() == Instruction::And || 5325 Op0I->getOpcode() == Instruction::Or) { 5326 // ~(~X & Y) --> (X | ~Y) - De Morgan's Law 5327 // ~(~X | Y) === (X & ~Y) - De Morgan's Law 5328 if (dyn_castNotVal(Op0I->getOperand(1))) 5329 Op0I->swapOperands(); 5330 if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { 5331 Value *NotY = 5332 Builder->CreateNot(Op0I->getOperand(1), 5333 Op0I->getOperand(1)->getName()+".not"); 5334 if (Op0I->getOpcode() == Instruction::And) 5335 return BinaryOperator::CreateOr(Op0NotVal, NotY); 5336 return BinaryOperator::CreateAnd(Op0NotVal, NotY); 5337 } 5338 5339 // ~(X & Y) --> (~X | ~Y) - De Morgan's Law 5340 // ~(X | Y) === (~X & ~Y) - De Morgan's Law 5341 if (isFreeToInvert(Op0I->getOperand(0)) && 5342 isFreeToInvert(Op0I->getOperand(1))) { 5343 Value *NotX = 5344 Builder->CreateNot(Op0I->getOperand(0), "notlhs"); 5345 Value *NotY = 5346 Builder->CreateNot(Op0I->getOperand(1), "notrhs"); 5347 if (Op0I->getOpcode() == Instruction::And) 5348 return BinaryOperator::CreateOr(NotX, NotY); 5349 return BinaryOperator::CreateAnd(NotX, NotY); 5350 } 5351 } 5352 } 5353 } 5354 5355 5356 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 5357 if (RHS->isOne() && Op0->hasOneUse()) { 5358 // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B 5359 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) 5360 return new ICmpInst(ICI->getInversePredicate(), 5361 ICI->getOperand(0), ICI->getOperand(1)); 5362 5363 if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) 5364 return new FCmpInst(FCI->getInversePredicate(), 5365 FCI->getOperand(0), FCI->getOperand(1)); 5366 } 5367 5368 // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). 5369 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 5370 if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { 5371 if (CI->hasOneUse() && Op0C->hasOneUse()) { 5372 Instruction::CastOps Opcode = Op0C->getOpcode(); 5373 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && 5374 (RHS == ConstantExpr::getCast(Opcode, 5375 ConstantInt::getTrue(*Context), 5376 Op0C->getDestTy()))) { 5377 CI->setPredicate(CI->getInversePredicate()); 5378 return CastInst::Create(Opcode, CI, Op0C->getType()); 5379 } 5380 } 5381 } 5382 } 5383 5384 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 5385 // ~(c-X) == X-c-1 == X+(-c-1) 5386 if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) 5387 if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { 5388 Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); 5389 Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, 5390 ConstantInt::get(I.getType(), 1)); 5391 return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); 5392 } 5393 5394 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 5395 if (Op0I->getOpcode() == Instruction::Add) { 5396 // ~(X-c) --> (-c-1)-X 5397 if (RHS->isAllOnesValue()) { 5398 Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); 5399 return BinaryOperator::CreateSub( 5400 ConstantExpr::getSub(NegOp0CI, 5401 ConstantInt::get(I.getType(), 1)), 5402 Op0I->getOperand(0)); 5403 } else if (RHS->getValue().isSignBit()) { 5404 // (X + C) ^ signbit -> (X + C + signbit) 5405 Constant *C = ConstantInt::get(*Context, 5406 RHS->getValue() + Op0CI->getValue()); 5407 return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); 5408 5409 } 5410 } else if (Op0I->getOpcode() == Instruction::Or) { 5411 // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 5412 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { 5413 Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); 5414 // Anything in both C1 and C2 is known to be zero, remove it from 5415 // NewRHS. 5416 Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); 5417 NewRHS = ConstantExpr::getAnd(NewRHS, 5418 ConstantExpr::getNot(CommonBits)); 5419 Worklist.Add(Op0I); 5420 I.setOperand(0, Op0I->getOperand(0)); 5421 I.setOperand(1, NewRHS); 5422 return &I; 5423 } 5424 } 5425 } 5426 } 5427 5428 // Try to fold constant and into select arguments. 5429 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 5430 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 5431 return R; 5432 if (isa<PHINode>(Op0)) 5433 if (Instruction *NV = FoldOpIntoPhi(I)) 5434 return NV; 5435 } 5436 5437 if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 5438 if (X == Op1) 5439 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 5440 5441 if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 5442 if (X == Op0) 5443 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 5444 5445 5446 BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); 5447 if (Op1I) { 5448 Value *A, *B; 5449 if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 5450 if (A == Op0) { // B^(B|A) == (A|B)^B 5451 Op1I->swapOperands(); 5452 I.swapOperands(); 5453 std::swap(Op0, Op1); 5454 } else if (B == Op0) { // B^(A|B) == (A|B)^B 5455 I.swapOperands(); // Simplified below. 5456 std::swap(Op0, Op1); 5457 } 5458 } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { 5459 return ReplaceInstUsesWith(I, B); // A^(A^B) == B 5460 } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { 5461 return ReplaceInstUsesWith(I, A); // A^(B^A) == B 5462 } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 5463 Op1I->hasOneUse()){ 5464 if (A == Op0) { // A^(A&B) -> A^(B&A) 5465 Op1I->swapOperands(); 5466 std::swap(A, B); 5467 } 5468 if (B == Op0) { // A^(B&A) -> (B&A)^A 5469 I.swapOperands(); // Simplified below. 5470 std::swap(Op0, Op1); 5471 } 5472 } 5473 } 5474 5475 BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); 5476 if (Op0I) { 5477 Value *A, *B; 5478 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 5479 Op0I->hasOneUse()) { 5480 if (A == Op1) // (B|A)^B == (A|B)^B 5481 std::swap(A, B); 5482 if (B == Op1) // (A|B)^B == A & ~B 5483 return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); 5484 } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { 5485 return ReplaceInstUsesWith(I, B); // (A^B)^A == B 5486 } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { 5487 return ReplaceInstUsesWith(I, A); // (B^A)^A == B 5488 } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 5489 Op0I->hasOneUse()){ 5490 if (A == Op1) // (A&B)^A -> (B&A)^A 5491 std::swap(A, B); 5492 if (B == Op1 && // (B&A)^A == ~B & A 5493 !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C 5494 return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); 5495 } 5496 } 5497 } 5498 5499 // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. 5500 if (Op0I && Op1I && Op0I->isShift() && 5501 Op0I->getOpcode() == Op1I->getOpcode() && 5502 Op0I->getOperand(1) == Op1I->getOperand(1) && 5503 (Op1I->hasOneUse() || Op1I->hasOneUse())) { 5504 Value *NewOp = 5505 Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), 5506 Op0I->getName()); 5507 return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 5508 Op1I->getOperand(1)); 5509 } 5510 5511 if (Op0I && Op1I) { 5512 Value *A, *B, *C, *D; 5513 // (A & B)^(A | B) -> A ^ B 5514 if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 5515 match(Op1I, m_Or(m_Value(C), m_Value(D)))) { 5516 if ((A == C && B == D) || (A == D && B == C)) 5517 return BinaryOperator::CreateXor(A, B); 5518 } 5519 // (A | B)^(A & B) -> A ^ B 5520 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 5521 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 5522 if ((A == C && B == D) || (A == D && B == C)) 5523 return BinaryOperator::CreateXor(A, B); 5524 } 5525 5526 // (A & B)^(C & D) 5527 if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && 5528 match(Op0I, m_And(m_Value(A), m_Value(B))) && 5529 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 5530 // (X & Y)^(X & Y) -> (Y^Z) & X 5531 Value *X = 0, *Y = 0, *Z = 0; 5532 if (A == C) 5533 X = A, Y = B, Z = D; 5534 else if (A == D) 5535 X = A, Y = B, Z = C; 5536 else if (B == C) 5537 X = B, Y = A, Z = D; 5538 else if (B == D) 5539 X = B, Y = A, Z = C; 5540 5541 if (X) { 5542 Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); 5543 return BinaryOperator::CreateAnd(NewOp, X); 5544 } 5545 } 5546 } 5547 5548 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 5549 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 5550 if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) 5551 return R; 5552 5553 // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) 5554 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 5555 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 5556 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? 5557 const Type *SrcTy = Op0C->getOperand(0)->getType(); 5558 if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && 5559 // Only do this if the casts both really cause code to be generated. 5560 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 5561 I.getType(), TD) && 5562 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 5563 I.getType(), TD)) { 5564 Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), 5565 Op1C->getOperand(0), I.getName()); 5566 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 5567 } 5568 } 5569 } 5570 5571 return Changed ? &I : 0; 5572} 5573 5574static ConstantInt *ExtractElement(Constant *V, Constant *Idx, 5575 LLVMContext *Context) { 5576 return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); 5577} 5578 5579static bool HasAddOverflow(ConstantInt *Result, 5580 ConstantInt *In1, ConstantInt *In2, 5581 bool IsSigned) { 5582 if (IsSigned) 5583 if (In2->getValue().isNegative()) 5584 return Result->getValue().sgt(In1->getValue()); 5585 else 5586 return Result->getValue().slt(In1->getValue()); 5587 else 5588 return Result->getValue().ult(In1->getValue()); 5589} 5590 5591/// AddWithOverflow - Compute Result = In1+In2, returning true if the result 5592/// overflowed for this type. 5593static bool AddWithOverflow(Constant *&Result, Constant *In1, 5594 Constant *In2, LLVMContext *Context, 5595 bool IsSigned = false) { 5596 Result = ConstantExpr::getAdd(In1, In2); 5597 5598 if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { 5599 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { 5600 Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); 5601 if (HasAddOverflow(ExtractElement(Result, Idx, Context), 5602 ExtractElement(In1, Idx, Context), 5603 ExtractElement(In2, Idx, Context), 5604 IsSigned)) 5605 return true; 5606 } 5607 return false; 5608 } 5609 5610 return HasAddOverflow(cast<ConstantInt>(Result), 5611 cast<ConstantInt>(In1), cast<ConstantInt>(In2), 5612 IsSigned); 5613} 5614 5615static bool HasSubOverflow(ConstantInt *Result, 5616 ConstantInt *In1, ConstantInt *In2, 5617 bool IsSigned) { 5618 if (IsSigned) 5619 if (In2->getValue().isNegative()) 5620 return Result->getValue().slt(In1->getValue()); 5621 else 5622 return Result->getValue().sgt(In1->getValue()); 5623 else 5624 return Result->getValue().ugt(In1->getValue()); 5625} 5626 5627/// SubWithOverflow - Compute Result = In1-In2, returning true if the result 5628/// overflowed for this type. 5629static bool SubWithOverflow(Constant *&Result, Constant *In1, 5630 Constant *In2, LLVMContext *Context, 5631 bool IsSigned = false) { 5632 Result = ConstantExpr::getSub(In1, In2); 5633 5634 if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { 5635 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { 5636 Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); 5637 if (HasSubOverflow(ExtractElement(Result, Idx, Context), 5638 ExtractElement(In1, Idx, Context), 5639 ExtractElement(In2, Idx, Context), 5640 IsSigned)) 5641 return true; 5642 } 5643 return false; 5644 } 5645 5646 return HasSubOverflow(cast<ConstantInt>(Result), 5647 cast<ConstantInt>(In1), cast<ConstantInt>(In2), 5648 IsSigned); 5649} 5650 5651 5652/// FoldGEPICmp - Fold comparisons between a GEP instruction and something 5653/// else. At this point we know that the GEP is on the LHS of the comparison. 5654Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, 5655 ICmpInst::Predicate Cond, 5656 Instruction &I) { 5657 // Look through bitcasts. 5658 if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) 5659 RHS = BCI->getOperand(0); 5660 5661 Value *PtrBase = GEPLHS->getOperand(0); 5662 if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { 5663 // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). 5664 // This transformation (ignoring the base and scales) is valid because we 5665 // know pointers can't overflow since the gep is inbounds. See if we can 5666 // output an optimized form. 5667 Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); 5668 5669 // If not, synthesize the offset the hard way. 5670 if (Offset == 0) 5671 Offset = EmitGEPOffset(GEPLHS, *this); 5672 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, 5673 Constant::getNullValue(Offset->getType())); 5674 } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { 5675 // If the base pointers are different, but the indices are the same, just 5676 // compare the base pointer. 5677 if (PtrBase != GEPRHS->getOperand(0)) { 5678 bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); 5679 IndicesTheSame &= GEPLHS->getOperand(0)->getType() == 5680 GEPRHS->getOperand(0)->getType(); 5681 if (IndicesTheSame) 5682 for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) 5683 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { 5684 IndicesTheSame = false; 5685 break; 5686 } 5687 5688 // If all indices are the same, just compare the base pointers. 5689 if (IndicesTheSame) 5690 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), 5691 GEPLHS->getOperand(0), GEPRHS->getOperand(0)); 5692 5693 // Otherwise, the base pointers are different and the indices are 5694 // different, bail out. 5695 return 0; 5696 } 5697 5698 // If one of the GEPs has all zero indices, recurse. 5699 bool AllZeros = true; 5700 for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) 5701 if (!isa<Constant>(GEPLHS->getOperand(i)) || 5702 !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { 5703 AllZeros = false; 5704 break; 5705 } 5706 if (AllZeros) 5707 return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), 5708 ICmpInst::getSwappedPredicate(Cond), I); 5709 5710 // If the other GEP has all zero indices, recurse. 5711 AllZeros = true; 5712 for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) 5713 if (!isa<Constant>(GEPRHS->getOperand(i)) || 5714 !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { 5715 AllZeros = false; 5716 break; 5717 } 5718 if (AllZeros) 5719 return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); 5720 5721 if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { 5722 // If the GEPs only differ by one index, compare it. 5723 unsigned NumDifferences = 0; // Keep track of # differences. 5724 unsigned DiffOperand = 0; // The operand that differs. 5725 for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) 5726 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { 5727 if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != 5728 GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { 5729 // Irreconcilable differences. 5730 NumDifferences = 2; 5731 break; 5732 } else { 5733 if (NumDifferences++) break; 5734 DiffOperand = i; 5735 } 5736 } 5737 5738 if (NumDifferences == 0) // SAME GEP? 5739 return ReplaceInstUsesWith(I, // No comparison is needed here. 5740 ConstantInt::get(Type::getInt1Ty(*Context), 5741 ICmpInst::isTrueWhenEqual(Cond))); 5742 5743 else if (NumDifferences == 1) { 5744 Value *LHSV = GEPLHS->getOperand(DiffOperand); 5745 Value *RHSV = GEPRHS->getOperand(DiffOperand); 5746 // Make sure we do a signed comparison here. 5747 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); 5748 } 5749 } 5750 5751 // Only lower this if the icmp is the only user of the GEP or if we expect 5752 // the result to fold to a constant! 5753 if (TD && 5754 (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && 5755 (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { 5756 // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) 5757 Value *L = EmitGEPOffset(GEPLHS, *this); 5758 Value *R = EmitGEPOffset(GEPRHS, *this); 5759 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); 5760 } 5761 } 5762 return 0; 5763} 5764 5765/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. 5766/// 5767Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, 5768 Instruction *LHSI, 5769 Constant *RHSC) { 5770 if (!isa<ConstantFP>(RHSC)) return 0; 5771 const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); 5772 5773 // Get the width of the mantissa. We don't want to hack on conversions that 5774 // might lose information from the integer, e.g. "i64 -> float" 5775 int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); 5776 if (MantissaWidth == -1) return 0; // Unknown. 5777 5778 // Check to see that the input is converted from an integer type that is small 5779 // enough that preserves all bits. TODO: check here for "known" sign bits. 5780 // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. 5781 unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); 5782 5783 // If this is a uitofp instruction, we need an extra bit to hold the sign. 5784 bool LHSUnsigned = isa<UIToFPInst>(LHSI); 5785 if (LHSUnsigned) 5786 ++InputSize; 5787 5788 // If the conversion would lose info, don't hack on this. 5789 if ((int)InputSize > MantissaWidth) 5790 return 0; 5791 5792 // Otherwise, we can potentially simplify the comparison. We know that it 5793 // will always come through as an integer value and we know the constant is 5794 // not a NAN (it would have been previously simplified). 5795 assert(!RHS.isNaN() && "NaN comparison not already folded!"); 5796 5797 ICmpInst::Predicate Pred; 5798 switch (I.getPredicate()) { 5799 default: llvm_unreachable("Unexpected predicate!"); 5800 case FCmpInst::FCMP_UEQ: 5801 case FCmpInst::FCMP_OEQ: 5802 Pred = ICmpInst::ICMP_EQ; 5803 break; 5804 case FCmpInst::FCMP_UGT: 5805 case FCmpInst::FCMP_OGT: 5806 Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; 5807 break; 5808 case FCmpInst::FCMP_UGE: 5809 case FCmpInst::FCMP_OGE: 5810 Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; 5811 break; 5812 case FCmpInst::FCMP_ULT: 5813 case FCmpInst::FCMP_OLT: 5814 Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; 5815 break; 5816 case FCmpInst::FCMP_ULE: 5817 case FCmpInst::FCMP_OLE: 5818 Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; 5819 break; 5820 case FCmpInst::FCMP_UNE: 5821 case FCmpInst::FCMP_ONE: 5822 Pred = ICmpInst::ICMP_NE; 5823 break; 5824 case FCmpInst::FCMP_ORD: 5825 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5826 case FCmpInst::FCMP_UNO: 5827 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5828 } 5829 5830 const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); 5831 5832 // Now we know that the APFloat is a normal number, zero or inf. 5833 5834 // See if the FP constant is too large for the integer. For example, 5835 // comparing an i8 to 300.0. 5836 unsigned IntWidth = IntTy->getScalarSizeInBits(); 5837 5838 if (!LHSUnsigned) { 5839 // If the RHS value is > SignedMax, fold the comparison. This handles +INF 5840 // and large values. 5841 APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); 5842 SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, 5843 APFloat::rmNearestTiesToEven); 5844 if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 5845 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || 5846 Pred == ICmpInst::ICMP_SLE) 5847 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5848 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5849 } 5850 } else { 5851 // If the RHS value is > UnsignedMax, fold the comparison. This handles 5852 // +INF and large values. 5853 APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); 5854 UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, 5855 APFloat::rmNearestTiesToEven); 5856 if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 5857 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || 5858 Pred == ICmpInst::ICMP_ULE) 5859 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5860 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5861 } 5862 } 5863 5864 if (!LHSUnsigned) { 5865 // See if the RHS value is < SignedMin. 5866 APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); 5867 SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, 5868 APFloat::rmNearestTiesToEven); 5869 if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 5870 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || 5871 Pred == ICmpInst::ICMP_SGE) 5872 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5873 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5874 } 5875 } 5876 5877 // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or 5878 // [0, UMAX], but it may still be fractional. See if it is fractional by 5879 // casting the FP value to the integer value and back, checking for equality. 5880 // Don't do this for zero, because -0.0 is not fractional. 5881 Constant *RHSInt = LHSUnsigned 5882 ? ConstantExpr::getFPToUI(RHSC, IntTy) 5883 : ConstantExpr::getFPToSI(RHSC, IntTy); 5884 if (!RHS.isZero()) { 5885 bool Equal = LHSUnsigned 5886 ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC 5887 : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; 5888 if (!Equal) { 5889 // If we had a comparison against a fractional value, we have to adjust 5890 // the compare predicate and sometimes the value. RHSC is rounded towards 5891 // zero at this point. 5892 switch (Pred) { 5893 default: llvm_unreachable("Unexpected integer comparison!"); 5894 case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true 5895 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5896 case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false 5897 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5898 case ICmpInst::ICMP_ULE: 5899 // (float)int <= 4.4 --> int <= 4 5900 // (float)int <= -4.4 --> false 5901 if (RHS.isNegative()) 5902 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5903 break; 5904 case ICmpInst::ICMP_SLE: 5905 // (float)int <= 4.4 --> int <= 4 5906 // (float)int <= -4.4 --> int < -4 5907 if (RHS.isNegative()) 5908 Pred = ICmpInst::ICMP_SLT; 5909 break; 5910 case ICmpInst::ICMP_ULT: 5911 // (float)int < -4.4 --> false 5912 // (float)int < 4.4 --> int <= 4 5913 if (RHS.isNegative()) 5914 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 5915 Pred = ICmpInst::ICMP_ULE; 5916 break; 5917 case ICmpInst::ICMP_SLT: 5918 // (float)int < -4.4 --> int < -4 5919 // (float)int < 4.4 --> int <= 4 5920 if (!RHS.isNegative()) 5921 Pred = ICmpInst::ICMP_SLE; 5922 break; 5923 case ICmpInst::ICMP_UGT: 5924 // (float)int > 4.4 --> int > 4 5925 // (float)int > -4.4 --> true 5926 if (RHS.isNegative()) 5927 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5928 break; 5929 case ICmpInst::ICMP_SGT: 5930 // (float)int > 4.4 --> int > 4 5931 // (float)int > -4.4 --> int >= -4 5932 if (RHS.isNegative()) 5933 Pred = ICmpInst::ICMP_SGE; 5934 break; 5935 case ICmpInst::ICMP_UGE: 5936 // (float)int >= -4.4 --> true 5937 // (float)int >= 4.4 --> int > 4 5938 if (!RHS.isNegative()) 5939 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 5940 Pred = ICmpInst::ICMP_UGT; 5941 break; 5942 case ICmpInst::ICMP_SGE: 5943 // (float)int >= -4.4 --> int >= -4 5944 // (float)int >= 4.4 --> int > 4 5945 if (!RHS.isNegative()) 5946 Pred = ICmpInst::ICMP_SGT; 5947 break; 5948 } 5949 } 5950 } 5951 5952 // Lower this FP comparison into an appropriate integer version of the 5953 // comparison. 5954 return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); 5955} 5956 5957/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: 5958/// cmp pred (load (gep GV, ...)), cmpcst 5959/// where GV is a global variable with a constant initializer. Try to simplify 5960/// this into some simple computation that does not need the load. For example 5961/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". 5962/// 5963/// If AndCst is non-null, then the loaded value is masked with that constant 5964/// before doing the comparison. This handles cases like "A[i]&4 == 0". 5965Instruction *InstCombiner:: 5966FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, 5967 CmpInst &ICI, ConstantInt *AndCst) { 5968 ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); 5969 if (Init == 0 || Init->getNumOperands() > 1024) return 0; 5970 5971 // There are many forms of this optimization we can handle, for now, just do 5972 // the simple index into a single-dimensional array. 5973 // 5974 // Require: GEP GV, 0, i {{, constant indices}} 5975 if (GEP->getNumOperands() < 3 || 5976 !isa<ConstantInt>(GEP->getOperand(1)) || 5977 !cast<ConstantInt>(GEP->getOperand(1))->isZero() || 5978 isa<Constant>(GEP->getOperand(2))) 5979 return 0; 5980 5981 // Check that indices after the variable are constants and in-range for the 5982 // type they index. Collect the indices. This is typically for arrays of 5983 // structs. 5984 SmallVector<unsigned, 4> LaterIndices; 5985 5986 const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); 5987 for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { 5988 ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); 5989 if (Idx == 0) return 0; // Variable index. 5990 5991 uint64_t IdxVal = Idx->getZExtValue(); 5992 if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. 5993 5994 if (const StructType *STy = dyn_cast<StructType>(EltTy)) 5995 EltTy = STy->getElementType(IdxVal); 5996 else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { 5997 if (IdxVal >= ATy->getNumElements()) return 0; 5998 EltTy = ATy->getElementType(); 5999 } else { 6000 return 0; // Unknown type. 6001 } 6002 6003 LaterIndices.push_back(IdxVal); 6004 } 6005 6006 enum { Overdefined = -3, Undefined = -2 }; 6007 6008 // Variables for our state machines. 6009 6010 // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form 6011 // "i == 47 | i == 87", where 47 is the first index the condition is true for, 6012 // and 87 is the second (and last) index. FirstTrueElement is -2 when 6013 // undefined, otherwise set to the first true element. SecondTrueElement is 6014 // -2 when undefined, -3 when overdefined and >= 0 when that index is true. 6015 int FirstTrueElement = Undefined, SecondTrueElement = Undefined; 6016 6017 // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the 6018 // form "i != 47 & i != 87". Same state transitions as for true elements. 6019 int FirstFalseElement = Undefined, SecondFalseElement = Undefined; 6020 6021 /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these 6022 /// define a state machine that triggers for ranges of values that the index 6023 /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. 6024 /// This is -2 when undefined, -3 when overdefined, and otherwise the last 6025 /// index in the range (inclusive). We use -2 for undefined here because we 6026 /// use relative comparisons and don't want 0-1 to match -1. 6027 int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; 6028 6029 // MagicBitvector - This is a magic bitvector where we set a bit if the 6030 // comparison is true for element 'i'. If there are 64 elements or less in 6031 // the array, this will fully represent all the comparison results. 6032 uint64_t MagicBitvector = 0; 6033 6034 6035 // Scan the array and see if one of our patterns matches. 6036 Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); 6037 for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { 6038 Constant *Elt = Init->getOperand(i); 6039 6040 // If this is indexing an array of structures, get the structure element. 6041 if (!LaterIndices.empty()) 6042 Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), 6043 LaterIndices.size()); 6044 6045 // If the element is masked, handle it. 6046 if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); 6047 6048 // Find out if the comparison would be true or false for the i'th element. 6049 Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, 6050 CompareRHS, TD); 6051 // If the result is undef for this element, ignore it. 6052 if (isa<UndefValue>(C)) { 6053 // Extend range state machines to cover this element in case there is an 6054 // undef in the middle of the range. 6055 if (TrueRangeEnd == (int)i-1) 6056 TrueRangeEnd = i; 6057 if (FalseRangeEnd == (int)i-1) 6058 FalseRangeEnd = i; 6059 continue; 6060 } 6061 6062 // If we can't compute the result for any of the elements, we have to give 6063 // up evaluating the entire conditional. 6064 if (!isa<ConstantInt>(C)) return 0; 6065 6066 // Otherwise, we know if the comparison is true or false for this element, 6067 // update our state machines. 6068 bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); 6069 6070 // State machine for single/double/range index comparison. 6071 if (IsTrueForElt) { 6072 // Update the TrueElement state machine. 6073 if (FirstTrueElement == Undefined) 6074 FirstTrueElement = TrueRangeEnd = i; // First true element. 6075 else { 6076 // Update double-compare state machine. 6077 if (SecondTrueElement == Undefined) 6078 SecondTrueElement = i; 6079 else 6080 SecondTrueElement = Overdefined; 6081 6082 // Update range state machine. 6083 if (TrueRangeEnd == (int)i-1) 6084 TrueRangeEnd = i; 6085 else 6086 TrueRangeEnd = Overdefined; 6087 } 6088 } else { 6089 // Update the FalseElement state machine. 6090 if (FirstFalseElement == Undefined) 6091 FirstFalseElement = FalseRangeEnd = i; // First false element. 6092 else { 6093 // Update double-compare state machine. 6094 if (SecondFalseElement == Undefined) 6095 SecondFalseElement = i; 6096 else 6097 SecondFalseElement = Overdefined; 6098 6099 // Update range state machine. 6100 if (FalseRangeEnd == (int)i-1) 6101 FalseRangeEnd = i; 6102 else 6103 FalseRangeEnd = Overdefined; 6104 } 6105 } 6106 6107 6108 // If this element is in range, update our magic bitvector. 6109 if (i < 64 && IsTrueForElt) 6110 MagicBitvector |= 1ULL << i; 6111 6112 // If all of our states become overdefined, bail out early. Since the 6113 // predicate is expensive, only check it every 8 elements. This is only 6114 // really useful for really huge arrays. 6115 if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && 6116 SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && 6117 FalseRangeEnd == Overdefined) 6118 return 0; 6119 } 6120 6121 // Now that we've scanned the entire array, emit our new comparison(s). We 6122 // order the state machines in complexity of the generated code. 6123 Value *Idx = GEP->getOperand(2); 6124 6125 6126 // If the comparison is only true for one or two elements, emit direct 6127 // comparisons. 6128 if (SecondTrueElement != Overdefined) { 6129 // None true -> false. 6130 if (FirstTrueElement == Undefined) 6131 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 6132 6133 Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); 6134 6135 // True for one element -> 'i == 47'. 6136 if (SecondTrueElement == Undefined) 6137 return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); 6138 6139 // True for two elements -> 'i == 47 | i == 72'. 6140 Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); 6141 Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); 6142 Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); 6143 return BinaryOperator::CreateOr(C1, C2); 6144 } 6145 6146 // If the comparison is only false for one or two elements, emit direct 6147 // comparisons. 6148 if (SecondFalseElement != Overdefined) { 6149 // None false -> true. 6150 if (FirstFalseElement == Undefined) 6151 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 6152 6153 Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); 6154 6155 // False for one element -> 'i != 47'. 6156 if (SecondFalseElement == Undefined) 6157 return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); 6158 6159 // False for two elements -> 'i != 47 & i != 72'. 6160 Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); 6161 Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); 6162 Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); 6163 return BinaryOperator::CreateAnd(C1, C2); 6164 } 6165 6166 // If the comparison can be replaced with a range comparison for the elements 6167 // where it is true, emit the range check. 6168 if (TrueRangeEnd != Overdefined) { 6169 assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); 6170 6171 // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). 6172 if (FirstTrueElement) { 6173 Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); 6174 Idx = Builder->CreateAdd(Idx, Offs); 6175 } 6176 6177 Value *End = ConstantInt::get(Idx->getType(), 6178 TrueRangeEnd-FirstTrueElement+1); 6179 return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); 6180 } 6181 6182 // False range check. 6183 if (FalseRangeEnd != Overdefined) { 6184 assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); 6185 // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). 6186 if (FirstFalseElement) { 6187 Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); 6188 Idx = Builder->CreateAdd(Idx, Offs); 6189 } 6190 6191 Value *End = ConstantInt::get(Idx->getType(), 6192 FalseRangeEnd-FirstFalseElement); 6193 return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); 6194 } 6195 6196 6197 // If a 32-bit or 64-bit magic bitvector captures the entire comparison state 6198 // of this load, replace it with computation that does: 6199 // ((magic_cst >> i) & 1) != 0 6200 if (Init->getNumOperands() <= 32 || 6201 (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { 6202 const Type *Ty; 6203 if (Init->getNumOperands() <= 32) 6204 Ty = Type::getInt32Ty(Init->getContext()); 6205 else 6206 Ty = Type::getInt64Ty(Init->getContext()); 6207 Value *V = Builder->CreateIntCast(Idx, Ty, false); 6208 V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); 6209 V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); 6210 return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); 6211 } 6212 6213 return 0; 6214} 6215 6216 6217Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { 6218 bool Changed = false; 6219 6220 /// Orders the operands of the compare so that they are listed from most 6221 /// complex to least complex. This puts constants before unary operators, 6222 /// before binary operators. 6223 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { 6224 I.swapOperands(); 6225 Changed = true; 6226 } 6227 6228 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 6229 6230 if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) 6231 return ReplaceInstUsesWith(I, V); 6232 6233 // Simplify 'fcmp pred X, X' 6234 if (Op0 == Op1) { 6235 switch (I.getPredicate()) { 6236 default: llvm_unreachable("Unknown predicate!"); 6237 case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) 6238 case FCmpInst::FCMP_ULT: // True if unordered or less than 6239 case FCmpInst::FCMP_UGT: // True if unordered or greater than 6240 case FCmpInst::FCMP_UNE: // True if unordered or not equal 6241 // Canonicalize these to be 'fcmp uno %X, 0.0'. 6242 I.setPredicate(FCmpInst::FCMP_UNO); 6243 I.setOperand(1, Constant::getNullValue(Op0->getType())); 6244 return &I; 6245 6246 case FCmpInst::FCMP_ORD: // True if ordered (no nans) 6247 case FCmpInst::FCMP_OEQ: // True if ordered and equal 6248 case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal 6249 case FCmpInst::FCMP_OLE: // True if ordered and less than or equal 6250 // Canonicalize these to be 'fcmp ord %X, 0.0'. 6251 I.setPredicate(FCmpInst::FCMP_ORD); 6252 I.setOperand(1, Constant::getNullValue(Op0->getType())); 6253 return &I; 6254 } 6255 } 6256 6257 // Handle fcmp with constant RHS 6258 if (Constant *RHSC = dyn_cast<Constant>(Op1)) { 6259 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 6260 switch (LHSI->getOpcode()) { 6261 case Instruction::PHI: 6262 // Only fold fcmp into the PHI if the phi and fcmp are in the same 6263 // block. If in the same block, we're encouraging jump threading. If 6264 // not, we are just pessimizing the code by making an i1 phi. 6265 if (LHSI->getParent() == I.getParent()) 6266 if (Instruction *NV = FoldOpIntoPhi(I, true)) 6267 return NV; 6268 break; 6269 case Instruction::SIToFP: 6270 case Instruction::UIToFP: 6271 if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) 6272 return NV; 6273 break; 6274 case Instruction::Select: { 6275 // If either operand of the select is a constant, we can fold the 6276 // comparison into the select arms, which will cause one to be 6277 // constant folded and the select turned into a bitwise or. 6278 Value *Op1 = 0, *Op2 = 0; 6279 if (LHSI->hasOneUse()) { 6280 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { 6281 // Fold the known value into the constant operand. 6282 Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); 6283 // Insert a new FCmp of the other select operand. 6284 Op2 = Builder->CreateFCmp(I.getPredicate(), 6285 LHSI->getOperand(2), RHSC, I.getName()); 6286 } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { 6287 // Fold the known value into the constant operand. 6288 Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); 6289 // Insert a new FCmp of the other select operand. 6290 Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), 6291 RHSC, I.getName()); 6292 } 6293 } 6294 6295 if (Op1) 6296 return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); 6297 break; 6298 } 6299 case Instruction::Load: 6300 if (GetElementPtrInst *GEP = 6301 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { 6302 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 6303 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 6304 !cast<LoadInst>(LHSI)->isVolatile()) 6305 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) 6306 return Res; 6307 } 6308 break; 6309 } 6310 } 6311 6312 return Changed ? &I : 0; 6313} 6314 6315Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { 6316 bool Changed = false; 6317 6318 /// Orders the operands of the compare so that they are listed from most 6319 /// complex to least complex. This puts constants before unary operators, 6320 /// before binary operators. 6321 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { 6322 I.swapOperands(); 6323 Changed = true; 6324 } 6325 6326 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 6327 6328 if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) 6329 return ReplaceInstUsesWith(I, V); 6330 6331 const Type *Ty = Op0->getType(); 6332 6333 // icmp's with boolean values can always be turned into bitwise operations 6334 if (Ty == Type::getInt1Ty(*Context)) { 6335 switch (I.getPredicate()) { 6336 default: llvm_unreachable("Invalid icmp instruction!"); 6337 case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) 6338 Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); 6339 return BinaryOperator::CreateNot(Xor); 6340 } 6341 case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B 6342 return BinaryOperator::CreateXor(Op0, Op1); 6343 6344 case ICmpInst::ICMP_UGT: 6345 std::swap(Op0, Op1); // Change icmp ugt -> icmp ult 6346 // FALL THROUGH 6347 case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B 6348 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); 6349 return BinaryOperator::CreateAnd(Not, Op1); 6350 } 6351 case ICmpInst::ICMP_SGT: 6352 std::swap(Op0, Op1); // Change icmp sgt -> icmp slt 6353 // FALL THROUGH 6354 case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B 6355 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); 6356 return BinaryOperator::CreateAnd(Not, Op0); 6357 } 6358 case ICmpInst::ICMP_UGE: 6359 std::swap(Op0, Op1); // Change icmp uge -> icmp ule 6360 // FALL THROUGH 6361 case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B 6362 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); 6363 return BinaryOperator::CreateOr(Not, Op1); 6364 } 6365 case ICmpInst::ICMP_SGE: 6366 std::swap(Op0, Op1); // Change icmp sge -> icmp sle 6367 // FALL THROUGH 6368 case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B 6369 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); 6370 return BinaryOperator::CreateOr(Not, Op0); 6371 } 6372 } 6373 } 6374 6375 unsigned BitWidth = 0; 6376 if (TD) 6377 BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); 6378 else if (Ty->isIntOrIntVector()) 6379 BitWidth = Ty->getScalarSizeInBits(); 6380 6381 bool isSignBit = false; 6382 6383 // See if we are doing a comparison with a constant. 6384 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6385 Value *A = 0, *B = 0; 6386 6387 // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) 6388 if (I.isEquality() && CI->isZero() && 6389 match(Op0, m_Sub(m_Value(A), m_Value(B)))) { 6390 // (icmp cond A B) if cond is equality 6391 return new ICmpInst(I.getPredicate(), A, B); 6392 } 6393 6394 // If we have an icmp le or icmp ge instruction, turn it into the 6395 // appropriate icmp lt or icmp gt instruction. This allows us to rely on 6396 // them being folded in the code below. The SimplifyICmpInst code has 6397 // already handled the edge cases for us, so we just assert on them. 6398 switch (I.getPredicate()) { 6399 default: break; 6400 case ICmpInst::ICMP_ULE: 6401 assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE 6402 return new ICmpInst(ICmpInst::ICMP_ULT, Op0, 6403 AddOne(CI)); 6404 case ICmpInst::ICMP_SLE: 6405 assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE 6406 return new ICmpInst(ICmpInst::ICMP_SLT, Op0, 6407 AddOne(CI)); 6408 case ICmpInst::ICMP_UGE: 6409 assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE 6410 return new ICmpInst(ICmpInst::ICMP_UGT, Op0, 6411 SubOne(CI)); 6412 case ICmpInst::ICMP_SGE: 6413 assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE 6414 return new ICmpInst(ICmpInst::ICMP_SGT, Op0, 6415 SubOne(CI)); 6416 } 6417 6418 // If this comparison is a normal comparison, it demands all 6419 // bits, if it is a sign bit comparison, it only demands the sign bit. 6420 bool UnusedBit; 6421 isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); 6422 } 6423 6424 // See if we can fold the comparison based on range information we can get 6425 // by checking whether bits are known to be zero or one in the input. 6426 if (BitWidth != 0) { 6427 APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); 6428 APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); 6429 6430 if (SimplifyDemandedBits(I.getOperandUse(0), 6431 isSignBit ? APInt::getSignBit(BitWidth) 6432 : APInt::getAllOnesValue(BitWidth), 6433 Op0KnownZero, Op0KnownOne, 0)) 6434 return &I; 6435 if (SimplifyDemandedBits(I.getOperandUse(1), 6436 APInt::getAllOnesValue(BitWidth), 6437 Op1KnownZero, Op1KnownOne, 0)) 6438 return &I; 6439 6440 // Given the known and unknown bits, compute a range that the LHS could be 6441 // in. Compute the Min, Max and RHS values based on the known bits. For the 6442 // EQ and NE we use unsigned values. 6443 APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); 6444 APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); 6445 if (I.isSigned()) { 6446 ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, 6447 Op0Min, Op0Max); 6448 ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, 6449 Op1Min, Op1Max); 6450 } else { 6451 ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, 6452 Op0Min, Op0Max); 6453 ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, 6454 Op1Min, Op1Max); 6455 } 6456 6457 // If Min and Max are known to be the same, then SimplifyDemandedBits 6458 // figured out that the LHS is a constant. Just constant fold this now so 6459 // that code below can assume that Min != Max. 6460 if (!isa<Constant>(Op0) && Op0Min == Op0Max) 6461 return new ICmpInst(I.getPredicate(), 6462 ConstantInt::get(*Context, Op0Min), Op1); 6463 if (!isa<Constant>(Op1) && Op1Min == Op1Max) 6464 return new ICmpInst(I.getPredicate(), Op0, 6465 ConstantInt::get(*Context, Op1Min)); 6466 6467 // Based on the range information we know about the LHS, see if we can 6468 // simplify this comparison. For example, (x&4) < 8 is always true. 6469 switch (I.getPredicate()) { 6470 default: llvm_unreachable("Unknown icmp opcode!"); 6471 case ICmpInst::ICMP_EQ: 6472 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) 6473 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6474 break; 6475 case ICmpInst::ICMP_NE: 6476 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) 6477 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6478 break; 6479 case ICmpInst::ICMP_ULT: 6480 if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) 6481 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6482 if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) 6483 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6484 if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) 6485 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 6486 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6487 if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C 6488 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 6489 SubOne(CI)); 6490 6491 // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear 6492 if (CI->isMinValue(true)) 6493 return new ICmpInst(ICmpInst::ICMP_SGT, Op0, 6494 Constant::getAllOnesValue(Op0->getType())); 6495 } 6496 break; 6497 case ICmpInst::ICMP_UGT: 6498 if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) 6499 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6500 if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) 6501 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6502 6503 if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) 6504 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 6505 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6506 if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C 6507 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 6508 AddOne(CI)); 6509 6510 // (x >u 2147483647) -> (x <s 0) -> true if sign bit set 6511 if (CI->isMaxValue(true)) 6512 return new ICmpInst(ICmpInst::ICMP_SLT, Op0, 6513 Constant::getNullValue(Op0->getType())); 6514 } 6515 break; 6516 case ICmpInst::ICMP_SLT: 6517 if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) 6518 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6519 if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) 6520 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6521 if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) 6522 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 6523 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6524 if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C 6525 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 6526 SubOne(CI)); 6527 } 6528 break; 6529 case ICmpInst::ICMP_SGT: 6530 if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) 6531 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6532 if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) 6533 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6534 6535 if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) 6536 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); 6537 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6538 if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C 6539 return new ICmpInst(ICmpInst::ICMP_EQ, Op0, 6540 AddOne(CI)); 6541 } 6542 break; 6543 case ICmpInst::ICMP_SGE: 6544 assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); 6545 if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) 6546 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6547 if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) 6548 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6549 break; 6550 case ICmpInst::ICMP_SLE: 6551 assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); 6552 if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) 6553 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6554 if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) 6555 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6556 break; 6557 case ICmpInst::ICMP_UGE: 6558 assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); 6559 if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) 6560 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6561 if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) 6562 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6563 break; 6564 case ICmpInst::ICMP_ULE: 6565 assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); 6566 if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) 6567 return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); 6568 if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) 6569 return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); 6570 break; 6571 } 6572 6573 // Turn a signed comparison into an unsigned one if both operands 6574 // are known to have the same sign. 6575 if (I.isSigned() && 6576 ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || 6577 (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) 6578 return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); 6579 } 6580 6581 // Test if the ICmpInst instruction is used exclusively by a select as 6582 // part of a minimum or maximum operation. If so, refrain from doing 6583 // any other folding. This helps out other analyses which understand 6584 // non-obfuscated minimum and maximum idioms, such as ScalarEvolution 6585 // and CodeGen. And in this case, at least one of the comparison 6586 // operands has at least one user besides the compare (the select), 6587 // which would often largely negate the benefit of folding anyway. 6588 if (I.hasOneUse()) 6589 if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin())) 6590 if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || 6591 (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) 6592 return 0; 6593 6594 // See if we are doing a comparison between a constant and an instruction that 6595 // can be folded into the comparison. 6596 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 6597 // Since the RHS is a ConstantInt (CI), if the left hand side is an 6598 // instruction, see if that instruction also has constants so that the 6599 // instruction can be folded into the icmp 6600 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 6601 if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) 6602 return Res; 6603 } 6604 6605 // Handle icmp with constant (but not simple integer constant) RHS 6606 if (Constant *RHSC = dyn_cast<Constant>(Op1)) { 6607 if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) 6608 switch (LHSI->getOpcode()) { 6609 case Instruction::GetElementPtr: 6610 // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null 6611 if (RHSC->isNullValue() && 6612 cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) 6613 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), 6614 Constant::getNullValue(LHSI->getOperand(0)->getType())); 6615 break; 6616 case Instruction::PHI: 6617 // Only fold icmp into the PHI if the phi and icmp are in the same 6618 // block. If in the same block, we're encouraging jump threading. If 6619 // not, we are just pessimizing the code by making an i1 phi. 6620 if (LHSI->getParent() == I.getParent()) 6621 if (Instruction *NV = FoldOpIntoPhi(I, true)) 6622 return NV; 6623 break; 6624 case Instruction::Select: { 6625 // If either operand of the select is a constant, we can fold the 6626 // comparison into the select arms, which will cause one to be 6627 // constant folded and the select turned into a bitwise or. 6628 Value *Op1 = 0, *Op2 = 0; 6629 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) 6630 Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); 6631 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) 6632 Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); 6633 6634 // We only want to perform this transformation if it will not lead to 6635 // additional code. This is true if either both sides of the select 6636 // fold to a constant (in which case the icmp is replaced with a select 6637 // which will usually simplify) or this is the only user of the 6638 // select (in which case we are trading a select+icmp for a simpler 6639 // select+icmp). 6640 if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { 6641 if (!Op1) 6642 Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), 6643 RHSC, I.getName()); 6644 if (!Op2) 6645 Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), 6646 RHSC, I.getName()); 6647 return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); 6648 } 6649 break; 6650 } 6651 case Instruction::Call: 6652 // If we have (malloc != null), and if the malloc has a single use, we 6653 // can assume it is successful and remove the malloc. 6654 if (isMalloc(LHSI) && LHSI->hasOneUse() && 6655 isa<ConstantPointerNull>(RHSC)) { 6656 // Need to explicitly erase malloc call here, instead of adding it to 6657 // Worklist, because it won't get DCE'd from the Worklist since 6658 // isInstructionTriviallyDead() returns false for function calls. 6659 // It is OK to replace LHSI/MallocCall with Undef because the 6660 // instruction that uses it will be erased via Worklist. 6661 if (extractMallocCall(LHSI)) { 6662 LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); 6663 EraseInstFromFunction(*LHSI); 6664 return ReplaceInstUsesWith(I, 6665 ConstantInt::get(Type::getInt1Ty(*Context), 6666 !I.isTrueWhenEqual())); 6667 } 6668 if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) 6669 if (MallocCall->hasOneUse()) { 6670 MallocCall->replaceAllUsesWith( 6671 UndefValue::get(MallocCall->getType())); 6672 EraseInstFromFunction(*MallocCall); 6673 Worklist.Add(LHSI); // The malloc's bitcast use. 6674 return ReplaceInstUsesWith(I, 6675 ConstantInt::get(Type::getInt1Ty(*Context), 6676 !I.isTrueWhenEqual())); 6677 } 6678 } 6679 break; 6680 case Instruction::IntToPtr: 6681 // icmp pred inttoptr(X), null -> icmp pred X, 0 6682 if (RHSC->isNullValue() && TD && 6683 TD->getIntPtrType(RHSC->getContext()) == 6684 LHSI->getOperand(0)->getType()) 6685 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), 6686 Constant::getNullValue(LHSI->getOperand(0)->getType())); 6687 break; 6688 6689 case Instruction::Load: 6690 // Try to optimize things like "A[i] > 4" to index computations. 6691 if (GetElementPtrInst *GEP = 6692 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { 6693 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 6694 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 6695 !cast<LoadInst>(LHSI)->isVolatile()) 6696 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) 6697 return Res; 6698 } 6699 break; 6700 } 6701 } 6702 6703 // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. 6704 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) 6705 if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) 6706 return NI; 6707 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) 6708 if (Instruction *NI = FoldGEPICmp(GEP, Op0, 6709 ICmpInst::getSwappedPredicate(I.getPredicate()), I)) 6710 return NI; 6711 6712 // Test to see if the operands of the icmp are casted versions of other 6713 // values. If the ptr->ptr cast can be stripped off both arguments, we do so 6714 // now. 6715 if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { 6716 if (isa<PointerType>(Op0->getType()) && 6717 (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 6718 // We keep moving the cast from the left operand over to the right 6719 // operand, where it can often be eliminated completely. 6720 Op0 = CI->getOperand(0); 6721 6722 // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast 6723 // so eliminate it as well. 6724 if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) 6725 Op1 = CI2->getOperand(0); 6726 6727 // If Op1 is a constant, we can fold the cast into the constant. 6728 if (Op0->getType() != Op1->getType()) { 6729 if (Constant *Op1C = dyn_cast<Constant>(Op1)) { 6730 Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); 6731 } else { 6732 // Otherwise, cast the RHS right before the icmp 6733 Op1 = Builder->CreateBitCast(Op1, Op0->getType()); 6734 } 6735 } 6736 return new ICmpInst(I.getPredicate(), Op0, Op1); 6737 } 6738 } 6739 6740 if (isa<CastInst>(Op0)) { 6741 // Handle the special case of: icmp (cast bool to X), <cst> 6742 // This comes up when you have code like 6743 // int X = A < B; 6744 // if (X) ... 6745 // For generality, we handle any zero-extension of any operand comparison 6746 // with a constant or another cast from the same type. 6747 if (isa<Constant>(Op1) || isa<CastInst>(Op1)) 6748 if (Instruction *R = visitICmpInstWithCastAndCast(I)) 6749 return R; 6750 } 6751 6752 // See if it's the same type of instruction on the left and right. 6753 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 6754 if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { 6755 if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && 6756 Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { 6757 switch (Op0I->getOpcode()) { 6758 default: break; 6759 case Instruction::Add: 6760 case Instruction::Sub: 6761 case Instruction::Xor: 6762 if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b 6763 return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), 6764 Op1I->getOperand(0)); 6765 // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b 6766 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 6767 if (CI->getValue().isSignBit()) { 6768 ICmpInst::Predicate Pred = I.isSigned() 6769 ? I.getUnsignedPredicate() 6770 : I.getSignedPredicate(); 6771 return new ICmpInst(Pred, Op0I->getOperand(0), 6772 Op1I->getOperand(0)); 6773 } 6774 6775 if (CI->getValue().isMaxSignedValue()) { 6776 ICmpInst::Predicate Pred = I.isSigned() 6777 ? I.getUnsignedPredicate() 6778 : I.getSignedPredicate(); 6779 Pred = I.getSwappedPredicate(Pred); 6780 return new ICmpInst(Pred, Op0I->getOperand(0), 6781 Op1I->getOperand(0)); 6782 } 6783 } 6784 break; 6785 case Instruction::Mul: 6786 if (!I.isEquality()) 6787 break; 6788 6789 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 6790 // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask 6791 // Mask = -1 >> count-trailing-zeros(Cst). 6792 if (!CI->isZero() && !CI->isOne()) { 6793 const APInt &AP = CI->getValue(); 6794 ConstantInt *Mask = ConstantInt::get(*Context, 6795 APInt::getLowBitsSet(AP.getBitWidth(), 6796 AP.getBitWidth() - 6797 AP.countTrailingZeros())); 6798 Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); 6799 Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); 6800 return new ICmpInst(I.getPredicate(), And1, And2); 6801 } 6802 } 6803 break; 6804 } 6805 } 6806 } 6807 } 6808 6809 // ~x < ~y --> y < x 6810 { Value *A, *B; 6811 if (match(Op0, m_Not(m_Value(A))) && 6812 match(Op1, m_Not(m_Value(B)))) 6813 return new ICmpInst(I.getPredicate(), B, A); 6814 } 6815 6816 if (I.isEquality()) { 6817 Value *A, *B, *C, *D; 6818 6819 // -x == -y --> x == y 6820 if (match(Op0, m_Neg(m_Value(A))) && 6821 match(Op1, m_Neg(m_Value(B)))) 6822 return new ICmpInst(I.getPredicate(), A, B); 6823 6824 if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 6825 if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 6826 Value *OtherVal = A == Op1 ? B : A; 6827 return new ICmpInst(I.getPredicate(), OtherVal, 6828 Constant::getNullValue(A->getType())); 6829 } 6830 6831 if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { 6832 // A^c1 == C^c2 --> A == C^(c1^c2) 6833 ConstantInt *C1, *C2; 6834 if (match(B, m_ConstantInt(C1)) && 6835 match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { 6836 Constant *NC = 6837 ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); 6838 Value *Xor = Builder->CreateXor(C, NC, "tmp"); 6839 return new ICmpInst(I.getPredicate(), A, Xor); 6840 } 6841 6842 // A^B == A^D -> B == D 6843 if (A == C) return new ICmpInst(I.getPredicate(), B, D); 6844 if (A == D) return new ICmpInst(I.getPredicate(), B, C); 6845 if (B == C) return new ICmpInst(I.getPredicate(), A, D); 6846 if (B == D) return new ICmpInst(I.getPredicate(), A, C); 6847 } 6848 } 6849 6850 if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && 6851 (A == Op0 || B == Op0)) { 6852 // A == (A^B) -> B == 0 6853 Value *OtherVal = A == Op0 ? B : A; 6854 return new ICmpInst(I.getPredicate(), OtherVal, 6855 Constant::getNullValue(A->getType())); 6856 } 6857 6858 // (A-B) == A -> B == 0 6859 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) 6860 return new ICmpInst(I.getPredicate(), B, 6861 Constant::getNullValue(B->getType())); 6862 6863 // A == (A-B) -> B == 0 6864 if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) 6865 return new ICmpInst(I.getPredicate(), B, 6866 Constant::getNullValue(B->getType())); 6867 6868 // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 6869 if (Op0->hasOneUse() && Op1->hasOneUse() && 6870 match(Op0, m_And(m_Value(A), m_Value(B))) && 6871 match(Op1, m_And(m_Value(C), m_Value(D)))) { 6872 Value *X = 0, *Y = 0, *Z = 0; 6873 6874 if (A == C) { 6875 X = B; Y = D; Z = A; 6876 } else if (A == D) { 6877 X = B; Y = C; Z = A; 6878 } else if (B == C) { 6879 X = A; Y = D; Z = B; 6880 } else if (B == D) { 6881 X = A; Y = C; Z = B; 6882 } 6883 6884 if (X) { // Build (X^Y) & Z 6885 Op1 = Builder->CreateXor(X, Y, "tmp"); 6886 Op1 = Builder->CreateAnd(Op1, Z, "tmp"); 6887 I.setOperand(0, Op1); 6888 I.setOperand(1, Constant::getNullValue(Op1->getType())); 6889 return &I; 6890 } 6891 } 6892 } 6893 6894 { 6895 Value *X; ConstantInt *Cst; 6896 // icmp X+Cst, X 6897 if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) 6898 return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); 6899 6900 // icmp X, X+Cst 6901 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) 6902 return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); 6903 } 6904 return Changed ? &I : 0; 6905} 6906 6907/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". 6908Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, 6909 Value *X, ConstantInt *CI, 6910 ICmpInst::Predicate Pred, 6911 Value *TheAdd) { 6912 // If we have X+0, exit early (simplifying logic below) and let it get folded 6913 // elsewhere. icmp X+0, X -> icmp X, X 6914 if (CI->isZero()) { 6915 bool isTrue = ICmpInst::isTrueWhenEqual(Pred); 6916 return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); 6917 } 6918 6919 // (X+4) == X -> false. 6920 if (Pred == ICmpInst::ICMP_EQ) 6921 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); 6922 6923 // (X+4) != X -> true. 6924 if (Pred == ICmpInst::ICMP_NE) 6925 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); 6926 6927 // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. 6928 bool isNUW = false, isNSW = false; 6929 if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { 6930 isNUW = Add->hasNoUnsignedWrap(); 6931 isNSW = Add->hasNoSignedWrap(); 6932 } 6933 6934 // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, 6935 // so the values can never be equal. Similiarly for all other "or equals" 6936 // operators. 6937 6938 // (X+1) <u X --> X >u (MAXUINT-1) --> X != 255 6939 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 6940 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 6941 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 6942 // If this is an NUW add, then this is always false. 6943 if (isNUW) 6944 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); 6945 6946 Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI); 6947 return new ICmpInst(ICmpInst::ICMP_UGT, X, R); 6948 } 6949 6950 // (X+1) >u X --> X <u (0-1) --> X != 255 6951 // (X+2) >u X --> X <u (0-2) --> X <u 254 6952 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 6953 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 6954 // If this is an NUW add, then this is always true. 6955 if (isNUW) 6956 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); 6957 return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); 6958 } 6959 6960 unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); 6961 ConstantInt *SMax = ConstantInt::get(X->getContext(), 6962 APInt::getSignedMaxValue(BitWidth)); 6963 6964 // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 6965 // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 6966 // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 6967 // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 6968 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 6969 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 6970 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 6971 // If this is an NSW add, then we have two cases: if the constant is 6972 // positive, then this is always false, if negative, this is always true. 6973 if (isNSW) { 6974 bool isTrue = CI->getValue().isNegative(); 6975 return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); 6976 } 6977 6978 return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); 6979 } 6980 6981 // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 6982 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 6983 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 6984 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 6985 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 6986 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 6987 6988 // If this is an NSW add, then we have two cases: if the constant is 6989 // positive, then this is always true, if negative, this is always false. 6990 if (isNSW) { 6991 bool isTrue = !CI->getValue().isNegative(); 6992 return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); 6993 } 6994 6995 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); 6996 Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); 6997 return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); 6998} 6999 7000/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS 7001/// and CmpRHS are both known to be integer constants. 7002Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, 7003 ConstantInt *DivRHS) { 7004 ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); 7005 const APInt &CmpRHSV = CmpRHS->getValue(); 7006 7007 // FIXME: If the operand types don't match the type of the divide 7008 // then don't attempt this transform. The code below doesn't have the 7009 // logic to deal with a signed divide and an unsigned compare (and 7010 // vice versa). This is because (x /s C1) <s C2 produces different 7011 // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even 7012 // (x /u C1) <u C2. Simply casting the operands and result won't 7013 // work. :( The if statement below tests that condition and bails 7014 // if it finds it. 7015 bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; 7016 if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) 7017 return 0; 7018 if (DivRHS->isZero()) 7019 return 0; // The ProdOV computation fails on divide by zero. 7020 if (DivIsSigned && DivRHS->isAllOnesValue()) 7021 return 0; // The overflow computation also screws up here 7022 if (DivRHS->isOne()) 7023 return 0; // Not worth bothering, and eliminates some funny cases 7024 // with INT_MIN. 7025 7026 // Compute Prod = CI * DivRHS. We are essentially solving an equation 7027 // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 7028 // C2 (CI). By solving for X we can turn this into a range check 7029 // instead of computing a divide. 7030 Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); 7031 7032 // Determine if the product overflows by seeing if the product is 7033 // not equal to the divide. Make sure we do the same kind of divide 7034 // as in the LHS instruction that we're folding. 7035 bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : 7036 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; 7037 7038 // Get the ICmp opcode 7039 ICmpInst::Predicate Pred = ICI.getPredicate(); 7040 7041 // Figure out the interval that is being checked. For example, a comparison 7042 // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 7043 // Compute this interval based on the constants involved and the signedness of 7044 // the compare/divide. This computes a half-open interval, keeping track of 7045 // whether either value in the interval overflows. After analysis each 7046 // overflow variable is set to 0 if it's corresponding bound variable is valid 7047 // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. 7048 int LoOverflow = 0, HiOverflow = 0; 7049 Constant *LoBound = 0, *HiBound = 0; 7050 7051 if (!DivIsSigned) { // udiv 7052 // e.g. X/5 op 3 --> [15, 20) 7053 LoBound = Prod; 7054 HiOverflow = LoOverflow = ProdOV; 7055 if (!HiOverflow) 7056 HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, Context, false); 7057 } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. 7058 if (CmpRHSV == 0) { // (X / pos) op 0 7059 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) 7060 LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); 7061 HiBound = DivRHS; 7062 } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos 7063 LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) 7064 HiOverflow = LoOverflow = ProdOV; 7065 if (!HiOverflow) 7066 HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true); 7067 } else { // (X / pos) op neg 7068 // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) 7069 HiBound = AddOne(Prod); 7070 LoOverflow = HiOverflow = ProdOV ? -1 : 0; 7071 if (!LoOverflow) { 7072 ConstantInt* DivNeg = 7073 cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); 7074 LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context, 7075 true) ? -1 : 0; 7076 } 7077 } 7078 } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. 7079 if (CmpRHSV == 0) { // (X / neg) op 0 7080 // e.g. X/-5 op 0 --> [-4, 5) 7081 LoBound = AddOne(DivRHS); 7082 HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); 7083 if (HiBound == DivRHS) { // -INTMIN = INTMIN 7084 HiOverflow = 1; // [INTMIN+1, overflow) 7085 HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN 7086 } 7087 } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos 7088 // e.g. X/-5 op 3 --> [-19, -14) 7089 HiBound = AddOne(Prod); 7090 HiOverflow = LoOverflow = ProdOV ? -1 : 0; 7091 if (!LoOverflow) 7092 LoOverflow = AddWithOverflow(LoBound, HiBound, 7093 DivRHS, Context, true) ? -1 : 0; 7094 } else { // (X / neg) op neg 7095 LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) 7096 LoOverflow = HiOverflow = ProdOV; 7097 if (!HiOverflow) 7098 HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, Context, true); 7099 } 7100 7101 // Dividing by a negative swaps the condition. LT <-> GT 7102 Pred = ICmpInst::getSwappedPredicate(Pred); 7103 } 7104 7105 Value *X = DivI->getOperand(0); 7106 switch (Pred) { 7107 default: llvm_unreachable("Unhandled icmp opcode!"); 7108 case ICmpInst::ICMP_EQ: 7109 if (LoOverflow && HiOverflow) 7110 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 7111 else if (HiOverflow) 7112 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 7113 ICmpInst::ICMP_UGE, X, LoBound); 7114 else if (LoOverflow) 7115 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 7116 ICmpInst::ICMP_ULT, X, HiBound); 7117 else 7118 return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); 7119 case ICmpInst::ICMP_NE: 7120 if (LoOverflow && HiOverflow) 7121 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 7122 else if (HiOverflow) 7123 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 7124 ICmpInst::ICMP_ULT, X, LoBound); 7125 else if (LoOverflow) 7126 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 7127 ICmpInst::ICMP_UGE, X, HiBound); 7128 else 7129 return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); 7130 case ICmpInst::ICMP_ULT: 7131 case ICmpInst::ICMP_SLT: 7132 if (LoOverflow == +1) // Low bound is greater than input range. 7133 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 7134 if (LoOverflow == -1) // Low bound is less than input range. 7135 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 7136 return new ICmpInst(Pred, X, LoBound); 7137 case ICmpInst::ICMP_UGT: 7138 case ICmpInst::ICMP_SGT: 7139 if (HiOverflow == +1) // High bound greater than input range. 7140 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 7141 else if (HiOverflow == -1) // High bound less than input range. 7142 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 7143 if (Pred == ICmpInst::ICMP_UGT) 7144 return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); 7145 else 7146 return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); 7147 } 7148} 7149 7150 7151/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". 7152/// 7153Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, 7154 Instruction *LHSI, 7155 ConstantInt *RHS) { 7156 const APInt &RHSV = RHS->getValue(); 7157 7158 switch (LHSI->getOpcode()) { 7159 case Instruction::Trunc: 7160 if (ICI.isEquality() && LHSI->hasOneUse()) { 7161 // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all 7162 // of the high bits truncated out of x are known. 7163 unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), 7164 SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); 7165 APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); 7166 APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); 7167 ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); 7168 7169 // If all the high bits are known, we can do this xform. 7170 if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { 7171 // Pull in the high bits from known-ones set. 7172 APInt NewRHS(RHS->getValue()); 7173 NewRHS.zext(SrcBits); 7174 NewRHS |= KnownOne; 7175 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), 7176 ConstantInt::get(*Context, NewRHS)); 7177 } 7178 } 7179 break; 7180 7181 case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) 7182 if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { 7183 // If this is a comparison that tests the signbit (X < 0) or (x > -1), 7184 // fold the xor. 7185 if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || 7186 (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { 7187 Value *CompareVal = LHSI->getOperand(0); 7188 7189 // If the sign bit of the XorCST is not set, there is no change to 7190 // the operation, just stop using the Xor. 7191 if (!XorCST->getValue().isNegative()) { 7192 ICI.setOperand(0, CompareVal); 7193 Worklist.Add(LHSI); 7194 return &ICI; 7195 } 7196 7197 // Was the old condition true if the operand is positive? 7198 bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; 7199 7200 // If so, the new one isn't. 7201 isTrueIfPositive ^= true; 7202 7203 if (isTrueIfPositive) 7204 return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, 7205 SubOne(RHS)); 7206 else 7207 return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, 7208 AddOne(RHS)); 7209 } 7210 7211 if (LHSI->hasOneUse()) { 7212 // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) 7213 if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { 7214 const APInt &SignBit = XorCST->getValue(); 7215 ICmpInst::Predicate Pred = ICI.isSigned() 7216 ? ICI.getUnsignedPredicate() 7217 : ICI.getSignedPredicate(); 7218 return new ICmpInst(Pred, LHSI->getOperand(0), 7219 ConstantInt::get(*Context, RHSV ^ SignBit)); 7220 } 7221 7222 // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) 7223 if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { 7224 const APInt &NotSignBit = XorCST->getValue(); 7225 ICmpInst::Predicate Pred = ICI.isSigned() 7226 ? ICI.getUnsignedPredicate() 7227 : ICI.getSignedPredicate(); 7228 Pred = ICI.getSwappedPredicate(Pred); 7229 return new ICmpInst(Pred, LHSI->getOperand(0), 7230 ConstantInt::get(*Context, RHSV ^ NotSignBit)); 7231 } 7232 } 7233 } 7234 break; 7235 case Instruction::And: // (icmp pred (and X, AndCST), RHS) 7236 if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && 7237 LHSI->getOperand(0)->hasOneUse()) { 7238 ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); 7239 7240 // If the LHS is an AND of a truncating cast, we can widen the 7241 // and/compare to be the input width without changing the value 7242 // produced, eliminating a cast. 7243 if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { 7244 // We can do this transformation if either the AND constant does not 7245 // have its sign bit set or if it is an equality comparison. 7246 // Extending a relational comparison when we're checking the sign 7247 // bit would not work. 7248 if (Cast->hasOneUse() && 7249 (ICI.isEquality() || 7250 (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { 7251 uint32_t BitWidth = 7252 cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); 7253 APInt NewCST = AndCST->getValue(); 7254 NewCST.zext(BitWidth); 7255 APInt NewCI = RHSV; 7256 NewCI.zext(BitWidth); 7257 Value *NewAnd = 7258 Builder->CreateAnd(Cast->getOperand(0), 7259 ConstantInt::get(*Context, NewCST), LHSI->getName()); 7260 return new ICmpInst(ICI.getPredicate(), NewAnd, 7261 ConstantInt::get(*Context, NewCI)); 7262 } 7263 } 7264 7265 // If this is: (X >> C1) & C2 != C3 (where any shift and any compare 7266 // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This 7267 // happens a LOT in code produced by the C front-end, for bitfield 7268 // access. 7269 BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); 7270 if (Shift && !Shift->isShift()) 7271 Shift = 0; 7272 7273 ConstantInt *ShAmt; 7274 ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; 7275 const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. 7276 const Type *AndTy = AndCST->getType(); // Type of the and. 7277 7278 // We can fold this as long as we can't shift unknown bits 7279 // into the mask. This can only happen with signed shift 7280 // rights, as they sign-extend. 7281 if (ShAmt) { 7282 bool CanFold = Shift->isLogicalShift(); 7283 if (!CanFold) { 7284 // To test for the bad case of the signed shr, see if any 7285 // of the bits shifted in could be tested after the mask. 7286 uint32_t TyBits = Ty->getPrimitiveSizeInBits(); 7287 int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); 7288 7289 uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); 7290 if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 7291 AndCST->getValue()) == 0) 7292 CanFold = true; 7293 } 7294 7295 if (CanFold) { 7296 Constant *NewCst; 7297 if (Shift->getOpcode() == Instruction::Shl) 7298 NewCst = ConstantExpr::getLShr(RHS, ShAmt); 7299 else 7300 NewCst = ConstantExpr::getShl(RHS, ShAmt); 7301 7302 // Check to see if we are shifting out any of the bits being 7303 // compared. 7304 if (ConstantExpr::get(Shift->getOpcode(), 7305 NewCst, ShAmt) != RHS) { 7306 // If we shifted bits out, the fold is not going to work out. 7307 // As a special case, check to see if this means that the 7308 // result is always true or false now. 7309 if (ICI.getPredicate() == ICmpInst::ICMP_EQ) 7310 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 7311 if (ICI.getPredicate() == ICmpInst::ICMP_NE) 7312 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 7313 } else { 7314 ICI.setOperand(1, NewCst); 7315 Constant *NewAndCST; 7316 if (Shift->getOpcode() == Instruction::Shl) 7317 NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); 7318 else 7319 NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); 7320 LHSI->setOperand(1, NewAndCST); 7321 LHSI->setOperand(0, Shift->getOperand(0)); 7322 Worklist.Add(Shift); // Shift is dead. 7323 return &ICI; 7324 } 7325 } 7326 } 7327 7328 // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is 7329 // preferable because it allows the C<<Y expression to be hoisted out 7330 // of a loop if Y is invariant and X is not. 7331 if (Shift && Shift->hasOneUse() && RHSV == 0 && 7332 ICI.isEquality() && !Shift->isArithmeticShift() && 7333 !isa<Constant>(Shift->getOperand(0))) { 7334 // Compute C << Y. 7335 Value *NS; 7336 if (Shift->getOpcode() == Instruction::LShr) { 7337 NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); 7338 } else { 7339 // Insert a logical shift. 7340 NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); 7341 } 7342 7343 // Compute X & (C << Y). 7344 Value *NewAnd = 7345 Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); 7346 7347 ICI.setOperand(0, NewAnd); 7348 return &ICI; 7349 } 7350 } 7351 7352 // Try to optimize things like "A[i]&42 == 0" to index computations. 7353 if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { 7354 if (GetElementPtrInst *GEP = 7355 dyn_cast<GetElementPtrInst>(LI->getOperand(0))) 7356 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) 7357 if (GV->isConstant() && GV->hasDefinitiveInitializer() && 7358 !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { 7359 ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); 7360 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) 7361 return Res; 7362 } 7363 } 7364 break; 7365 7366 case Instruction::Or: { 7367 if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) 7368 break; 7369 Value *P, *Q; 7370 if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { 7371 // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 7372 // -> and (icmp eq P, null), (icmp eq Q, null). 7373 7374 Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, 7375 Constant::getNullValue(P->getType())); 7376 Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, 7377 Constant::getNullValue(Q->getType())); 7378 Instruction *Op; 7379 if (ICI.getPredicate() == ICmpInst::ICMP_EQ) 7380 Op = BinaryOperator::CreateAnd(ICIP, ICIQ); 7381 else 7382 Op = BinaryOperator::CreateOr(ICIP, ICIQ); 7383 return Op; 7384 } 7385 break; 7386 } 7387 7388 case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) 7389 ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 7390 if (!ShAmt) break; 7391 7392 uint32_t TypeBits = RHSV.getBitWidth(); 7393 7394 // Check that the shift amount is in range. If not, don't perform 7395 // undefined shifts. When the shift is visited it will be 7396 // simplified. 7397 if (ShAmt->uge(TypeBits)) 7398 break; 7399 7400 if (ICI.isEquality()) { 7401 // If we are comparing against bits always shifted out, the 7402 // comparison cannot succeed. 7403 Constant *Comp = 7404 ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), 7405 ShAmt); 7406 if (Comp != RHS) {// Comparing against a bit that we know is zero. 7407 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 7408 Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); 7409 return ReplaceInstUsesWith(ICI, Cst); 7410 } 7411 7412 if (LHSI->hasOneUse()) { 7413 // Otherwise strength reduce the shift into an and. 7414 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); 7415 Constant *Mask = 7416 ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, 7417 TypeBits-ShAmtVal)); 7418 7419 Value *And = 7420 Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); 7421 return new ICmpInst(ICI.getPredicate(), And, 7422 ConstantInt::get(*Context, RHSV.lshr(ShAmtVal))); 7423 } 7424 } 7425 7426 // Otherwise, if this is a comparison of the sign bit, simplify to and/test. 7427 bool TrueIfSigned = false; 7428 if (LHSI->hasOneUse() && 7429 isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { 7430 // (X << 31) <s 0 --> (X&1) != 0 7431 Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) << 7432 (TypeBits-ShAmt->getZExtValue()-1)); 7433 Value *And = 7434 Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); 7435 return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, 7436 And, Constant::getNullValue(And->getType())); 7437 } 7438 break; 7439 } 7440 7441 case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) 7442 case Instruction::AShr: { 7443 // Only handle equality comparisons of shift-by-constant. 7444 ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 7445 if (!ShAmt || !ICI.isEquality()) break; 7446 7447 // Check that the shift amount is in range. If not, don't perform 7448 // undefined shifts. When the shift is visited it will be 7449 // simplified. 7450 uint32_t TypeBits = RHSV.getBitWidth(); 7451 if (ShAmt->uge(TypeBits)) 7452 break; 7453 7454 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); 7455 7456 // If we are comparing against bits always shifted out, the 7457 // comparison cannot succeed. 7458 APInt Comp = RHSV << ShAmtVal; 7459 if (LHSI->getOpcode() == Instruction::LShr) 7460 Comp = Comp.lshr(ShAmtVal); 7461 else 7462 Comp = Comp.ashr(ShAmtVal); 7463 7464 if (Comp != RHSV) { // Comparing against a bit that we know is zero. 7465 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 7466 Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); 7467 return ReplaceInstUsesWith(ICI, Cst); 7468 } 7469 7470 // Otherwise, check to see if the bits shifted out are known to be zero. 7471 // If so, we can compare against the unshifted value: 7472 // (X & 4) >> 1 == 2 --> (X & 4) == 4. 7473 if (LHSI->hasOneUse() && 7474 MaskedValueIsZero(LHSI->getOperand(0), 7475 APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { 7476 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), 7477 ConstantExpr::getShl(RHS, ShAmt)); 7478 } 7479 7480 if (LHSI->hasOneUse()) { 7481 // Otherwise strength reduce the shift into an and. 7482 APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); 7483 Constant *Mask = ConstantInt::get(*Context, Val); 7484 7485 Value *And = Builder->CreateAnd(LHSI->getOperand(0), 7486 Mask, LHSI->getName()+".mask"); 7487 return new ICmpInst(ICI.getPredicate(), And, 7488 ConstantExpr::getShl(RHS, ShAmt)); 7489 } 7490 break; 7491 } 7492 7493 case Instruction::SDiv: 7494 case Instruction::UDiv: 7495 // Fold: icmp pred ([us]div X, C1), C2 -> range test 7496 // Fold this div into the comparison, producing a range check. 7497 // Determine, based on the divide type, what the range is being 7498 // checked. If there is an overflow on the low or high side, remember 7499 // it, otherwise compute the range [low, hi) bounding the new value. 7500 // See: InsertRangeTest above for the kinds of replacements possible. 7501 if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) 7502 if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), 7503 DivRHS)) 7504 return R; 7505 break; 7506 7507 case Instruction::Add: 7508 // Fold: icmp pred (add X, C1), C2 7509 if (!ICI.isEquality()) { 7510 ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); 7511 if (!LHSC) break; 7512 const APInt &LHSV = LHSC->getValue(); 7513 7514 ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) 7515 .subtract(LHSV); 7516 7517 if (ICI.isSigned()) { 7518 if (CR.getLower().isSignBit()) { 7519 return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), 7520 ConstantInt::get(*Context, CR.getUpper())); 7521 } else if (CR.getUpper().isSignBit()) { 7522 return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), 7523 ConstantInt::get(*Context, CR.getLower())); 7524 } 7525 } else { 7526 if (CR.getLower().isMinValue()) { 7527 return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), 7528 ConstantInt::get(*Context, CR.getUpper())); 7529 } else if (CR.getUpper().isMinValue()) { 7530 return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), 7531 ConstantInt::get(*Context, CR.getLower())); 7532 } 7533 } 7534 } 7535 break; 7536 } 7537 7538 // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. 7539 if (ICI.isEquality()) { 7540 bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; 7541 7542 // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 7543 // the second operand is a constant, simplify a bit. 7544 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { 7545 switch (BO->getOpcode()) { 7546 case Instruction::SRem: 7547 // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. 7548 if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ 7549 const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); 7550 if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { 7551 Value *NewRem = 7552 Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), 7553 BO->getName()); 7554 return new ICmpInst(ICI.getPredicate(), NewRem, 7555 Constant::getNullValue(BO->getType())); 7556 } 7557 } 7558 break; 7559 case Instruction::Add: 7560 // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. 7561 if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { 7562 if (BO->hasOneUse()) 7563 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 7564 ConstantExpr::getSub(RHS, BOp1C)); 7565 } else if (RHSV == 0) { 7566 // Replace ((add A, B) != 0) with (A != -B) if A or B is 7567 // efficiently invertible, or if the add has just this one use. 7568 Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); 7569 7570 if (Value *NegVal = dyn_castNegVal(BOp1)) 7571 return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); 7572 else if (Value *NegVal = dyn_castNegVal(BOp0)) 7573 return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); 7574 else if (BO->hasOneUse()) { 7575 Value *Neg = Builder->CreateNeg(BOp1); 7576 Neg->takeName(BO); 7577 return new ICmpInst(ICI.getPredicate(), BOp0, Neg); 7578 } 7579 } 7580 break; 7581 case Instruction::Xor: 7582 // For the xor case, we can xor two constants together, eliminating 7583 // the explicit xor. 7584 if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) 7585 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 7586 ConstantExpr::getXor(RHS, BOC)); 7587 7588 // FALLTHROUGH 7589 case Instruction::Sub: 7590 // Replace (([sub|xor] A, B) != 0) with (A != B) 7591 if (RHSV == 0) 7592 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 7593 BO->getOperand(1)); 7594 break; 7595 7596 case Instruction::Or: 7597 // If bits are being or'd in that are not present in the constant we 7598 // are comparing against, then the comparison could never succeed! 7599 if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { 7600 Constant *NotCI = ConstantExpr::getNot(RHS); 7601 if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) 7602 return ReplaceInstUsesWith(ICI, 7603 ConstantInt::get(Type::getInt1Ty(*Context), 7604 isICMP_NE)); 7605 } 7606 break; 7607 7608 case Instruction::And: 7609 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { 7610 // If bits are being compared against that are and'd out, then the 7611 // comparison can never succeed! 7612 if ((RHSV & ~BOC->getValue()) != 0) 7613 return ReplaceInstUsesWith(ICI, 7614 ConstantInt::get(Type::getInt1Ty(*Context), 7615 isICMP_NE)); 7616 7617 // If we have ((X & C) == C), turn it into ((X & C) != 0). 7618 if (RHS == BOC && RHSV.isPowerOf2()) 7619 return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : 7620 ICmpInst::ICMP_NE, LHSI, 7621 Constant::getNullValue(RHS->getType())); 7622 7623 // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 7624 if (BOC->getValue().isSignBit()) { 7625 Value *X = BO->getOperand(0); 7626 Constant *Zero = Constant::getNullValue(X->getType()); 7627 ICmpInst::Predicate pred = isICMP_NE ? 7628 ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; 7629 return new ICmpInst(pred, X, Zero); 7630 } 7631 7632 // ((X & ~7) == 0) --> X < 8 7633 if (RHSV == 0 && isHighOnes(BOC)) { 7634 Value *X = BO->getOperand(0); 7635 Constant *NegX = ConstantExpr::getNeg(BOC); 7636 ICmpInst::Predicate pred = isICMP_NE ? 7637 ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; 7638 return new ICmpInst(pred, X, NegX); 7639 } 7640 } 7641 default: break; 7642 } 7643 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { 7644 // Handle icmp {eq|ne} <intrinsic>, intcst. 7645 if (II->getIntrinsicID() == Intrinsic::bswap) { 7646 Worklist.Add(II); 7647 ICI.setOperand(0, II->getOperand(1)); 7648 ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap())); 7649 return &ICI; 7650 } 7651 } 7652 } 7653 return 0; 7654} 7655 7656/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). 7657/// We only handle extending casts so far. 7658/// 7659Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { 7660 const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); 7661 Value *LHSCIOp = LHSCI->getOperand(0); 7662 const Type *SrcTy = LHSCIOp->getType(); 7663 const Type *DestTy = LHSCI->getType(); 7664 Value *RHSCIOp; 7665 7666 // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 7667 // integer type is the same size as the pointer type. 7668 if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && 7669 TD->getPointerSizeInBits() == 7670 cast<IntegerType>(DestTy)->getBitWidth()) { 7671 Value *RHSOp = 0; 7672 if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { 7673 RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); 7674 } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { 7675 RHSOp = RHSC->getOperand(0); 7676 // If the pointer types don't match, insert a bitcast. 7677 if (LHSCIOp->getType() != RHSOp->getType()) 7678 RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); 7679 } 7680 7681 if (RHSOp) 7682 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); 7683 } 7684 7685 // The code below only handles extension cast instructions, so far. 7686 // Enforce this. 7687 if (LHSCI->getOpcode() != Instruction::ZExt && 7688 LHSCI->getOpcode() != Instruction::SExt) 7689 return 0; 7690 7691 bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; 7692 bool isSignedCmp = ICI.isSigned(); 7693 7694 if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { 7695 // Not an extension from the same type? 7696 RHSCIOp = CI->getOperand(0); 7697 if (RHSCIOp->getType() != LHSCIOp->getType()) 7698 return 0; 7699 7700 // If the signedness of the two casts doesn't agree (i.e. one is a sext 7701 // and the other is a zext), then we can't handle this. 7702 if (CI->getOpcode() != LHSCI->getOpcode()) 7703 return 0; 7704 7705 // Deal with equality cases early. 7706 if (ICI.isEquality()) 7707 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); 7708 7709 // A signed comparison of sign extended values simplifies into a 7710 // signed comparison. 7711 if (isSignedCmp && isSignedExt) 7712 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); 7713 7714 // The other three cases all fold into an unsigned comparison. 7715 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); 7716 } 7717 7718 // If we aren't dealing with a constant on the RHS, exit early 7719 ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); 7720 if (!CI) 7721 return 0; 7722 7723 // Compute the constant that would happen if we truncated to SrcTy then 7724 // reextended to DestTy. 7725 Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); 7726 Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), 7727 Res1, DestTy); 7728 7729 // If the re-extended constant didn't change... 7730 if (Res2 == CI) { 7731 // Deal with equality cases early. 7732 if (ICI.isEquality()) 7733 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); 7734 7735 // A signed comparison of sign extended values simplifies into a 7736 // signed comparison. 7737 if (isSignedExt && isSignedCmp) 7738 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); 7739 7740 // The other three cases all fold into an unsigned comparison. 7741 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); 7742 } 7743 7744 // The re-extended constant changed so the constant cannot be represented 7745 // in the shorter type. Consequently, we cannot emit a simple comparison. 7746 7747 // First, handle some easy cases. We know the result cannot be equal at this 7748 // point so handle the ICI.isEquality() cases 7749 if (ICI.getPredicate() == ICmpInst::ICMP_EQ) 7750 return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); 7751 if (ICI.getPredicate() == ICmpInst::ICMP_NE) 7752 return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); 7753 7754 // Evaluate the comparison for LT (we invert for GT below). LE and GE cases 7755 // should have been folded away previously and not enter in here. 7756 Value *Result; 7757 if (isSignedCmp) { 7758 // We're performing a signed comparison. 7759 if (cast<ConstantInt>(CI)->getValue().isNegative()) 7760 Result = ConstantInt::getFalse(*Context); // X < (small) --> false 7761 else 7762 Result = ConstantInt::getTrue(*Context); // X < (large) --> true 7763 } else { 7764 // We're performing an unsigned comparison. 7765 if (isSignedExt) { 7766 // We're performing an unsigned comp with a sign extended value. 7767 // This is true if the input is >= 0. [aka >s -1] 7768 Constant *NegOne = Constant::getAllOnesValue(SrcTy); 7769 Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); 7770 } else { 7771 // Unsigned extend & unsigned compare -> always true. 7772 Result = ConstantInt::getTrue(*Context); 7773 } 7774 } 7775 7776 // Finally, return the value computed. 7777 if (ICI.getPredicate() == ICmpInst::ICMP_ULT || 7778 ICI.getPredicate() == ICmpInst::ICMP_SLT) 7779 return ReplaceInstUsesWith(ICI, Result); 7780 7781 assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || 7782 ICI.getPredicate()==ICmpInst::ICMP_SGT) && 7783 "ICmp should be folded!"); 7784 if (Constant *CI = dyn_cast<Constant>(Result)) 7785 return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); 7786 return BinaryOperator::CreateNot(Result); 7787} 7788 7789Instruction *InstCombiner::visitShl(BinaryOperator &I) { 7790 return commonShiftTransforms(I); 7791} 7792 7793Instruction *InstCombiner::visitLShr(BinaryOperator &I) { 7794 return commonShiftTransforms(I); 7795} 7796 7797Instruction *InstCombiner::visitAShr(BinaryOperator &I) { 7798 if (Instruction *R = commonShiftTransforms(I)) 7799 return R; 7800 7801 Value *Op0 = I.getOperand(0); 7802 7803 // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) 7804 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 7805 if (CSI->isAllOnesValue()) 7806 return ReplaceInstUsesWith(I, CSI); 7807 7808 // See if we can turn a signed shr into an unsigned shr. 7809 if (MaskedValueIsZero(Op0, 7810 APInt::getSignBit(I.getType()->getScalarSizeInBits()))) 7811 return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); 7812 7813 // Arithmetic shifting an all-sign-bit value is a no-op. 7814 unsigned NumSignBits = ComputeNumSignBits(Op0); 7815 if (NumSignBits == Op0->getType()->getScalarSizeInBits()) 7816 return ReplaceInstUsesWith(I, Op0); 7817 7818 return 0; 7819} 7820 7821Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { 7822 assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); 7823 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 7824 7825 // shl X, 0 == X and shr X, 0 == X 7826 // shl 0, X == 0 and shr 0, X == 0 7827 if (Op1 == Constant::getNullValue(Op1->getType()) || 7828 Op0 == Constant::getNullValue(Op0->getType())) 7829 return ReplaceInstUsesWith(I, Op0); 7830 7831 if (isa<UndefValue>(Op0)) { 7832 if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef 7833 return ReplaceInstUsesWith(I, Op0); 7834 else // undef << X -> 0, undef >>u X -> 0 7835 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 7836 } 7837 if (isa<UndefValue>(Op1)) { 7838 if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X 7839 return ReplaceInstUsesWith(I, Op0); 7840 else // X << undef, X >>u undef -> 0 7841 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 7842 } 7843 7844 // See if we can fold away this shift. 7845 if (SimplifyDemandedInstructionBits(I)) 7846 return &I; 7847 7848 // Try to fold constant and into select arguments. 7849 if (isa<Constant>(Op0)) 7850 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 7851 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 7852 return R; 7853 7854 if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) 7855 if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) 7856 return Res; 7857 return 0; 7858} 7859 7860Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, 7861 BinaryOperator &I) { 7862 bool isLeftShift = I.getOpcode() == Instruction::Shl; 7863 7864 // See if we can simplify any instructions used by the instruction whose sole 7865 // purpose is to compute bits we don't care about. 7866 uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); 7867 7868 // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate 7869 // a signed shift. 7870 // 7871 if (Op1->uge(TypeBits)) { 7872 if (I.getOpcode() != Instruction::AShr) 7873 return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); 7874 else { 7875 I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); 7876 return &I; 7877 } 7878 } 7879 7880 // ((X*C1) << C2) == (X * (C1 << C2)) 7881 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) 7882 if (BO->getOpcode() == Instruction::Mul && isLeftShift) 7883 if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) 7884 return BinaryOperator::CreateMul(BO->getOperand(0), 7885 ConstantExpr::getShl(BOOp, Op1)); 7886 7887 // Try to fold constant and into select arguments. 7888 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 7889 if (Instruction *R = FoldOpIntoSelect(I, SI, this)) 7890 return R; 7891 if (isa<PHINode>(Op0)) 7892 if (Instruction *NV = FoldOpIntoPhi(I)) 7893 return NV; 7894 7895 // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) 7896 if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { 7897 Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); 7898 // If 'shift2' is an ashr, we would have to get the sign bit into a funny 7899 // place. Don't try to do this transformation in this case. Also, we 7900 // require that the input operand is a shift-by-constant so that we have 7901 // confidence that the shifts will get folded together. We could do this 7902 // xform in more cases, but it is unlikely to be profitable. 7903 if (TrOp && I.isLogicalShift() && TrOp->isShift() && 7904 isa<ConstantInt>(TrOp->getOperand(1))) { 7905 // Okay, we'll do this xform. Make the shift of shift. 7906 Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); 7907 // (shift2 (shift1 & 0x00FF), c2) 7908 Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); 7909 7910 // For logical shifts, the truncation has the effect of making the high 7911 // part of the register be zeros. Emulate this by inserting an AND to 7912 // clear the top bits as needed. This 'and' will usually be zapped by 7913 // other xforms later if dead. 7914 unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); 7915 unsigned DstSize = TI->getType()->getScalarSizeInBits(); 7916 APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); 7917 7918 // The mask we constructed says what the trunc would do if occurring 7919 // between the shifts. We want to know the effect *after* the second 7920 // shift. We know that it is a logical shift by a constant, so adjust the 7921 // mask as appropriate. 7922 if (I.getOpcode() == Instruction::Shl) 7923 MaskV <<= Op1->getZExtValue(); 7924 else { 7925 assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); 7926 MaskV = MaskV.lshr(Op1->getZExtValue()); 7927 } 7928 7929 // shift1 & 0x00FF 7930 Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), 7931 TI->getName()); 7932 7933 // Return the value truncated to the interesting size. 7934 return new TruncInst(And, I.getType()); 7935 } 7936 } 7937 7938 if (Op0->hasOneUse()) { 7939 if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { 7940 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 7941 Value *V1, *V2; 7942 ConstantInt *CC; 7943 switch (Op0BO->getOpcode()) { 7944 default: break; 7945 case Instruction::Add: 7946 case Instruction::And: 7947 case Instruction::Or: 7948 case Instruction::Xor: { 7949 // These operators commute. 7950 // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) 7951 if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && 7952 match(Op0BO->getOperand(1), m_Shr(m_Value(V1), 7953 m_Specific(Op1)))) { 7954 Value *YS = // (Y << C) 7955 Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); 7956 // (X + (Y << C)) 7957 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, 7958 Op0BO->getOperand(1)->getName()); 7959 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 7960 return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, 7961 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 7962 } 7963 7964 // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) 7965 Value *Op0BOOp1 = Op0BO->getOperand(1); 7966 if (isLeftShift && Op0BOOp1->hasOneUse() && 7967 match(Op0BOOp1, 7968 m_And(m_Shr(m_Value(V1), m_Specific(Op1)), 7969 m_ConstantInt(CC))) && 7970 cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { 7971 Value *YS = // (Y << C) 7972 Builder->CreateShl(Op0BO->getOperand(0), Op1, 7973 Op0BO->getName()); 7974 // X & (CC << C) 7975 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 7976 V1->getName()+".mask"); 7977 return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); 7978 } 7979 } 7980 7981 // FALL THROUGH. 7982 case Instruction::Sub: { 7983 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 7984 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 7985 match(Op0BO->getOperand(0), m_Shr(m_Value(V1), 7986 m_Specific(Op1)))) { 7987 Value *YS = // (Y << C) 7988 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 7989 // (X + (Y << C)) 7990 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, 7991 Op0BO->getOperand(0)->getName()); 7992 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 7993 return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, 7994 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 7995 } 7996 7997 // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) 7998 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 7999 match(Op0BO->getOperand(0), 8000 m_And(m_Shr(m_Value(V1), m_Value(V2)), 8001 m_ConstantInt(CC))) && V2 == Op1 && 8002 cast<BinaryOperator>(Op0BO->getOperand(0)) 8003 ->getOperand(0)->hasOneUse()) { 8004 Value *YS = // (Y << C) 8005 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 8006 // X & (CC << C) 8007 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 8008 V1->getName()+".mask"); 8009 8010 return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); 8011 } 8012 8013 break; 8014 } 8015 } 8016 8017 8018 // If the operand is an bitwise operator with a constant RHS, and the 8019 // shift is the only use, we can pull it out of the shift. 8020 if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { 8021 bool isValid = true; // Valid only for And, Or, Xor 8022 bool highBitSet = false; // Transform if high bit of constant set? 8023 8024 switch (Op0BO->getOpcode()) { 8025 default: isValid = false; break; // Do not perform transform! 8026 case Instruction::Add: 8027 isValid = isLeftShift; 8028 break; 8029 case Instruction::Or: 8030 case Instruction::Xor: 8031 highBitSet = false; 8032 break; 8033 case Instruction::And: 8034 highBitSet = true; 8035 break; 8036 } 8037 8038 // If this is a signed shift right, and the high bit is modified 8039 // by the logical operation, do not perform the transformation. 8040 // The highBitSet boolean indicates the value of the high bit of 8041 // the constant which would cause it to be modified for this 8042 // operation. 8043 // 8044 if (isValid && I.getOpcode() == Instruction::AShr) 8045 isValid = Op0C->getValue()[TypeBits-1] == highBitSet; 8046 8047 if (isValid) { 8048 Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); 8049 8050 Value *NewShift = 8051 Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); 8052 NewShift->takeName(Op0BO); 8053 8054 return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, 8055 NewRHS); 8056 } 8057 } 8058 } 8059 } 8060 8061 // Find out if this is a shift of a shift by a constant. 8062 BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); 8063 if (ShiftOp && !ShiftOp->isShift()) 8064 ShiftOp = 0; 8065 8066 if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { 8067 ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); 8068 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); 8069 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); 8070 assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); 8071 if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. 8072 Value *X = ShiftOp->getOperand(0); 8073 8074 uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. 8075 8076 const IntegerType *Ty = cast<IntegerType>(I.getType()); 8077 8078 // Check for (X << c1) << c2 and (X >> c1) >> c2 8079 if (I.getOpcode() == ShiftOp->getOpcode()) { 8080 // If this is oversized composite shift, then unsigned shifts get 0, ashr 8081 // saturates. 8082 if (AmtSum >= TypeBits) { 8083 if (I.getOpcode() != Instruction::AShr) 8084 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 8085 AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. 8086 } 8087 8088 return BinaryOperator::Create(I.getOpcode(), X, 8089 ConstantInt::get(Ty, AmtSum)); 8090 } 8091 8092 if (ShiftOp->getOpcode() == Instruction::LShr && 8093 I.getOpcode() == Instruction::AShr) { 8094 if (AmtSum >= TypeBits) 8095 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 8096 8097 // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. 8098 return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); 8099 } 8100 8101 if (ShiftOp->getOpcode() == Instruction::AShr && 8102 I.getOpcode() == Instruction::LShr) { 8103 // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. 8104 if (AmtSum >= TypeBits) 8105 AmtSum = TypeBits-1; 8106 8107 Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); 8108 8109 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 8110 return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask)); 8111 } 8112 8113 // Okay, if we get here, one shift must be left, and the other shift must be 8114 // right. See if the amounts are equal. 8115 if (ShiftAmt1 == ShiftAmt2) { 8116 // If we have ((X >>? C) << C), turn this into X & (-1 << C). 8117 if (I.getOpcode() == Instruction::Shl) { 8118 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); 8119 return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); 8120 } 8121 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). 8122 if (I.getOpcode() == Instruction::LShr) { 8123 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); 8124 return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); 8125 } 8126 // We can simplify ((X << C) >>s C) into a trunc + sext. 8127 // NOTE: we could do this for any C, but that would make 'unusual' integer 8128 // types. For now, just stick to ones well-supported by the code 8129 // generators. 8130 const Type *SExtType = 0; 8131 switch (Ty->getBitWidth() - ShiftAmt1) { 8132 case 1 : 8133 case 8 : 8134 case 16 : 8135 case 32 : 8136 case 64 : 8137 case 128: 8138 SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1); 8139 break; 8140 default: break; 8141 } 8142 if (SExtType) 8143 return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); 8144 // Otherwise, we can't handle it yet. 8145 } else if (ShiftAmt1 < ShiftAmt2) { 8146 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; 8147 8148 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) 8149 if (I.getOpcode() == Instruction::Shl) { 8150 assert(ShiftOp->getOpcode() == Instruction::LShr || 8151 ShiftOp->getOpcode() == Instruction::AShr); 8152 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 8153 8154 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 8155 return BinaryOperator::CreateAnd(Shift, 8156 ConstantInt::get(*Context, Mask)); 8157 } 8158 8159 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) 8160 if (I.getOpcode() == Instruction::LShr) { 8161 assert(ShiftOp->getOpcode() == Instruction::Shl); 8162 Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); 8163 8164 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 8165 return BinaryOperator::CreateAnd(Shift, 8166 ConstantInt::get(*Context, Mask)); 8167 } 8168 8169 // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. 8170 } else { 8171 assert(ShiftAmt2 < ShiftAmt1); 8172 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; 8173 8174 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) 8175 if (I.getOpcode() == Instruction::Shl) { 8176 assert(ShiftOp->getOpcode() == Instruction::LShr || 8177 ShiftOp->getOpcode() == Instruction::AShr); 8178 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, 8179 ConstantInt::get(Ty, ShiftDiff)); 8180 8181 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 8182 return BinaryOperator::CreateAnd(Shift, 8183 ConstantInt::get(*Context, Mask)); 8184 } 8185 8186 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) 8187 if (I.getOpcode() == Instruction::LShr) { 8188 assert(ShiftOp->getOpcode() == Instruction::Shl); 8189 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 8190 8191 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 8192 return BinaryOperator::CreateAnd(Shift, 8193 ConstantInt::get(*Context, Mask)); 8194 } 8195 8196 // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. 8197 } 8198 } 8199 return 0; 8200} 8201 8202 8203/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear 8204/// expression. If so, decompose it, returning some value X, such that Val is 8205/// X*Scale+Offset. 8206/// 8207static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, 8208 int &Offset, LLVMContext *Context) { 8209 assert(Val->getType() == Type::getInt32Ty(*Context) && 8210 "Unexpected allocation size type!"); 8211 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 8212 Offset = CI->getZExtValue(); 8213 Scale = 0; 8214 return ConstantInt::get(Type::getInt32Ty(*Context), 0); 8215 } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { 8216 if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 8217 if (I->getOpcode() == Instruction::Shl) { 8218 // This is a value scaled by '1 << the shift amt'. 8219 Scale = 1U << RHS->getZExtValue(); 8220 Offset = 0; 8221 return I->getOperand(0); 8222 } else if (I->getOpcode() == Instruction::Mul) { 8223 // This value is scaled by 'RHS'. 8224 Scale = RHS->getZExtValue(); 8225 Offset = 0; 8226 return I->getOperand(0); 8227 } else if (I->getOpcode() == Instruction::Add) { 8228 // We have X+C. Check to see if we really have (X*C2)+C1, 8229 // where C1 is divisible by C2. 8230 unsigned SubScale; 8231 Value *SubVal = 8232 DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, 8233 Offset, Context); 8234 Offset += RHS->getZExtValue(); 8235 Scale = SubScale; 8236 return SubVal; 8237 } 8238 } 8239 } 8240 8241 // Otherwise, we can't look past this. 8242 Scale = 1; 8243 Offset = 0; 8244 return Val; 8245} 8246 8247 8248/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, 8249/// try to eliminate the cast by moving the type information into the alloc. 8250Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, 8251 AllocaInst &AI) { 8252 const PointerType *PTy = cast<PointerType>(CI.getType()); 8253 8254 BuilderTy AllocaBuilder(*Builder); 8255 AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); 8256 8257 // Remove any uses of AI that are dead. 8258 assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); 8259 8260 for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { 8261 Instruction *User = cast<Instruction>(*UI++); 8262 if (isInstructionTriviallyDead(User)) { 8263 while (UI != E && *UI == User) 8264 ++UI; // If this instruction uses AI more than once, don't break UI. 8265 8266 ++NumDeadInst; 8267 DEBUG(errs() << "IC: DCE: " << *User << '\n'); 8268 EraseInstFromFunction(*User); 8269 } 8270 } 8271 8272 // This requires TargetData to get the alloca alignment and size information. 8273 if (!TD) return 0; 8274 8275 // Get the type really allocated and the type casted to. 8276 const Type *AllocElTy = AI.getAllocatedType(); 8277 const Type *CastElTy = PTy->getElementType(); 8278 if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; 8279 8280 unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); 8281 unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); 8282 if (CastElTyAlign < AllocElTyAlign) return 0; 8283 8284 // If the allocation has multiple uses, only promote it if we are strictly 8285 // increasing the alignment of the resultant allocation. If we keep it the 8286 // same, we open the door to infinite loops of various kinds. (A reference 8287 // from a dbg.declare doesn't count as a use for this purpose.) 8288 if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && 8289 CastElTyAlign == AllocElTyAlign) return 0; 8290 8291 uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); 8292 uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); 8293 if (CastElTySize == 0 || AllocElTySize == 0) return 0; 8294 8295 // See if we can satisfy the modulus by pulling a scale out of the array 8296 // size argument. 8297 unsigned ArraySizeScale; 8298 int ArrayOffset; 8299 Value *NumElements = // See if the array size is a decomposable linear expr. 8300 DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, 8301 ArrayOffset, Context); 8302 8303 // If we can now satisfy the modulus, by using a non-1 scale, we really can 8304 // do the xform. 8305 if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || 8306 (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; 8307 8308 unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; 8309 Value *Amt = 0; 8310 if (Scale == 1) { 8311 Amt = NumElements; 8312 } else { 8313 Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); 8314 // Insert before the alloca, not before the cast. 8315 Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); 8316 } 8317 8318 if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { 8319 Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); 8320 Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); 8321 } 8322 8323 AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); 8324 New->setAlignment(AI.getAlignment()); 8325 New->takeName(&AI); 8326 8327 // If the allocation has one real use plus a dbg.declare, just remove the 8328 // declare. 8329 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { 8330 EraseInstFromFunction(*DI); 8331 } 8332 // If the allocation has multiple real uses, insert a cast and change all 8333 // things that used it to use the new cast. This will also hack on CI, but it 8334 // will die soon. 8335 else if (!AI.hasOneUse()) { 8336 // New is the allocation instruction, pointer typed. AI is the original 8337 // allocation instruction, also pointer typed. Thus, cast to use is BitCast. 8338 Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); 8339 AI.replaceAllUsesWith(NewCast); 8340 } 8341 return ReplaceInstUsesWith(CI, New); 8342} 8343 8344/// CanEvaluateInDifferentType - Return true if we can take the specified value 8345/// and return it as type Ty without inserting any new casts and without 8346/// changing the computed value. This is used by code that tries to decide 8347/// whether promoting or shrinking integer operations to wider or smaller types 8348/// will allow us to eliminate a truncate or extend. 8349/// 8350/// This is a truncation operation if Ty is smaller than V->getType(), or an 8351/// extension operation if Ty is larger. 8352/// 8353/// If CastOpc is a truncation, then Ty will be a type smaller than V. We 8354/// should return true if trunc(V) can be computed by computing V in the smaller 8355/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as 8356/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be 8357/// efficiently truncated. 8358/// 8359/// If CastOpc is a sext or zext, we are asking if the low bits of the value can 8360/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get 8361/// the final result. 8362bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, 8363 unsigned CastOpc, 8364 int &NumCastsRemoved){ 8365 // We can always evaluate constants in another type. 8366 if (isa<Constant>(V)) 8367 return true; 8368 8369 Instruction *I = dyn_cast<Instruction>(V); 8370 if (!I) return false; 8371 8372 const Type *OrigTy = V->getType(); 8373 8374 // If this is an extension or truncate, we can often eliminate it. 8375 if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) { 8376 // If this is a cast from the destination type, we can trivially eliminate 8377 // it, and this will remove a cast overall. 8378 if (I->getOperand(0)->getType() == Ty) { 8379 // If the first operand is itself a cast, and is eliminable, do not count 8380 // this as an eliminable cast. We would prefer to eliminate those two 8381 // casts first. 8382 if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse()) 8383 ++NumCastsRemoved; 8384 return true; 8385 } 8386 } 8387 8388 // We can't extend or shrink something that has multiple uses: doing so would 8389 // require duplicating the instruction in general, which isn't profitable. 8390 if (!I->hasOneUse()) return false; 8391 8392 unsigned Opc = I->getOpcode(); 8393 switch (Opc) { 8394 case Instruction::Add: 8395 case Instruction::Sub: 8396 case Instruction::Mul: 8397 case Instruction::And: 8398 case Instruction::Or: 8399 case Instruction::Xor: 8400 // These operators can all arbitrarily be extended or truncated. 8401 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 8402 NumCastsRemoved) && 8403 CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, 8404 NumCastsRemoved); 8405 8406 case Instruction::UDiv: 8407 case Instruction::URem: { 8408 // UDiv and URem can be truncated if all the truncated bits are zero. 8409 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); 8410 uint32_t BitWidth = Ty->getScalarSizeInBits(); 8411 if (BitWidth < OrigBitWidth) { 8412 APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); 8413 if (MaskedValueIsZero(I->getOperand(0), Mask) && 8414 MaskedValueIsZero(I->getOperand(1), Mask)) { 8415 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 8416 NumCastsRemoved) && 8417 CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, 8418 NumCastsRemoved); 8419 } 8420 } 8421 break; 8422 } 8423 case Instruction::Shl: 8424 // If we are truncating the result of this SHL, and if it's a shift of a 8425 // constant amount, we can always perform a SHL in a smaller type. 8426 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 8427 uint32_t BitWidth = Ty->getScalarSizeInBits(); 8428 if (BitWidth < OrigTy->getScalarSizeInBits() && 8429 CI->getLimitedValue(BitWidth) < BitWidth) 8430 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 8431 NumCastsRemoved); 8432 } 8433 break; 8434 case Instruction::LShr: 8435 // If this is a truncate of a logical shr, we can truncate it to a smaller 8436 // lshr iff we know that the bits we would otherwise be shifting in are 8437 // already zeros. 8438 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 8439 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); 8440 uint32_t BitWidth = Ty->getScalarSizeInBits(); 8441 if (BitWidth < OrigBitWidth && 8442 MaskedValueIsZero(I->getOperand(0), 8443 APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && 8444 CI->getLimitedValue(BitWidth) < BitWidth) { 8445 return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, 8446 NumCastsRemoved); 8447 } 8448 } 8449 break; 8450 case Instruction::ZExt: 8451 case Instruction::SExt: 8452 case Instruction::Trunc: 8453 // If this is the same kind of case as our original (e.g. zext+zext), we 8454 // can safely replace it. Note that replacing it does not reduce the number 8455 // of casts in the input. 8456 if (Opc == CastOpc) 8457 return true; 8458 8459 // sext (zext ty1), ty2 -> zext ty2 8460 if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) 8461 return true; 8462 break; 8463 case Instruction::Select: { 8464 SelectInst *SI = cast<SelectInst>(I); 8465 return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, 8466 NumCastsRemoved) && 8467 CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, 8468 NumCastsRemoved); 8469 } 8470 case Instruction::PHI: { 8471 // We can change a phi if we can change all operands. 8472 PHINode *PN = cast<PHINode>(I); 8473 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 8474 if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, 8475 NumCastsRemoved)) 8476 return false; 8477 return true; 8478 } 8479 default: 8480 // TODO: Can handle more cases here. 8481 break; 8482 } 8483 8484 return false; 8485} 8486 8487/// EvaluateInDifferentType - Given an expression that 8488/// CanEvaluateInDifferentType returns true for, actually insert the code to 8489/// evaluate the expression. 8490Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 8491 bool isSigned) { 8492 if (Constant *C = dyn_cast<Constant>(V)) 8493 return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); 8494 8495 // Otherwise, it must be an instruction. 8496 Instruction *I = cast<Instruction>(V); 8497 Instruction *Res = 0; 8498 unsigned Opc = I->getOpcode(); 8499 switch (Opc) { 8500 case Instruction::Add: 8501 case Instruction::Sub: 8502 case Instruction::Mul: 8503 case Instruction::And: 8504 case Instruction::Or: 8505 case Instruction::Xor: 8506 case Instruction::AShr: 8507 case Instruction::LShr: 8508 case Instruction::Shl: 8509 case Instruction::UDiv: 8510 case Instruction::URem: { 8511 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); 8512 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); 8513 Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); 8514 break; 8515 } 8516 case Instruction::Trunc: 8517 case Instruction::ZExt: 8518 case Instruction::SExt: 8519 // If the source type of the cast is the type we're trying for then we can 8520 // just return the source. There's no need to insert it because it is not 8521 // new. 8522 if (I->getOperand(0)->getType() == Ty) 8523 return I->getOperand(0); 8524 8525 // Otherwise, must be the same type of cast, so just reinsert a new one. 8526 Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty); 8527 break; 8528 case Instruction::Select: { 8529 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); 8530 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); 8531 Res = SelectInst::Create(I->getOperand(0), True, False); 8532 break; 8533 } 8534 case Instruction::PHI: { 8535 PHINode *OPN = cast<PHINode>(I); 8536 PHINode *NPN = PHINode::Create(Ty); 8537 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { 8538 Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); 8539 NPN->addIncoming(V, OPN->getIncomingBlock(i)); 8540 } 8541 Res = NPN; 8542 break; 8543 } 8544 default: 8545 // TODO: Can handle more cases here. 8546 llvm_unreachable("Unreachable!"); 8547 break; 8548 } 8549 8550 Res->takeName(I); 8551 return InsertNewInstBefore(Res, *I); 8552} 8553 8554/// @brief Implement the transforms common to all CastInst visitors. 8555Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { 8556 Value *Src = CI.getOperand(0); 8557 8558 // Many cases of "cast of a cast" are eliminable. If it's eliminable we just 8559 // eliminate it now. 8560 if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast 8561 if (Instruction::CastOps opc = 8562 isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { 8563 // The first cast (CSrc) is eliminable so we need to fix up or replace 8564 // the second cast (CI). CSrc will then have a good chance of being dead. 8565 return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); 8566 } 8567 } 8568 8569 // If we are casting a select then fold the cast into the select 8570 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) 8571 if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) 8572 return NV; 8573 8574 // If we are casting a PHI then fold the cast into the PHI 8575 if (isa<PHINode>(Src)) { 8576 // We don't do this if this would create a PHI node with an illegal type if 8577 // it is currently legal. 8578 if (!isa<IntegerType>(Src->getType()) || 8579 !isa<IntegerType>(CI.getType()) || 8580 ShouldChangeType(CI.getType(), Src->getType(), TD)) 8581 if (Instruction *NV = FoldOpIntoPhi(CI)) 8582 return NV; 8583 } 8584 8585 return 0; 8586} 8587 8588/// FindElementAtOffset - Given a type and a constant offset, determine whether 8589/// or not there is a sequence of GEP indices into the type that will land us at 8590/// the specified offset. If so, fill them into NewIndices and return the 8591/// resultant element type, otherwise return null. 8592static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 8593 SmallVectorImpl<Value*> &NewIndices, 8594 const TargetData *TD, 8595 LLVMContext *Context) { 8596 if (!TD) return 0; 8597 if (!Ty->isSized()) return 0; 8598 8599 // Start with the index over the outer type. Note that the type size 8600 // might be zero (even if the offset isn't zero) if the indexed type 8601 // is something like [0 x {int, int}] 8602 const Type *IntPtrTy = TD->getIntPtrType(*Context); 8603 int64_t FirstIdx = 0; 8604 if (int64_t TySize = TD->getTypeAllocSize(Ty)) { 8605 FirstIdx = Offset/TySize; 8606 Offset -= FirstIdx*TySize; 8607 8608 // Handle hosts where % returns negative instead of values [0..TySize). 8609 if (Offset < 0) { 8610 --FirstIdx; 8611 Offset += TySize; 8612 assert(Offset >= 0); 8613 } 8614 assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); 8615 } 8616 8617 NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); 8618 8619 // Index into the types. If we fail, set OrigBase to null. 8620 while (Offset) { 8621 // Indexing into tail padding between struct/array elements. 8622 if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) 8623 return 0; 8624 8625 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 8626 const StructLayout *SL = TD->getStructLayout(STy); 8627 assert(Offset < (int64_t)SL->getSizeInBytes() && 8628 "Offset must stay within the indexed type"); 8629 8630 unsigned Elt = SL->getElementContainingOffset(Offset); 8631 NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt)); 8632 8633 Offset -= SL->getElementOffset(Elt); 8634 Ty = STy->getElementType(Elt); 8635 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 8636 uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); 8637 assert(EltSize && "Cannot index into a zero-sized array"); 8638 NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); 8639 Offset %= EltSize; 8640 Ty = AT->getElementType(); 8641 } else { 8642 // Otherwise, we can't index into the middle of this atomic type, bail. 8643 return 0; 8644 } 8645 } 8646 8647 return Ty; 8648} 8649 8650/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) 8651Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { 8652 Value *Src = CI.getOperand(0); 8653 8654 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { 8655 // If casting the result of a getelementptr instruction with no offset, turn 8656 // this into a cast of the original pointer! 8657 if (GEP->hasAllZeroIndices()) { 8658 // Changing the cast operand is usually not a good idea but it is safe 8659 // here because the pointer operand is being replaced with another 8660 // pointer operand so the opcode doesn't need to change. 8661 Worklist.Add(GEP); 8662 CI.setOperand(0, GEP->getOperand(0)); 8663 return &CI; 8664 } 8665 8666 // If the GEP has a single use, and the base pointer is a bitcast, and the 8667 // GEP computes a constant offset, see if we can convert these three 8668 // instructions into fewer. This typically happens with unions and other 8669 // non-type-safe code. 8670 if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { 8671 if (GEP->hasAllConstantIndices()) { 8672 // We are guaranteed to get a constant from EmitGEPOffset. 8673 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, *this)); 8674 int64_t Offset = OffsetV->getSExtValue(); 8675 8676 // Get the base pointer input of the bitcast, and the type it points to. 8677 Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); 8678 const Type *GEPIdxTy = 8679 cast<PointerType>(OrigBase->getType())->getElementType(); 8680 SmallVector<Value*, 8> NewIndices; 8681 if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD, Context)) { 8682 // If we were able to index down into an element, create the GEP 8683 // and bitcast the result. This eliminates one bitcast, potentially 8684 // two. 8685 Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? 8686 Builder->CreateInBoundsGEP(OrigBase, 8687 NewIndices.begin(), NewIndices.end()) : 8688 Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); 8689 NGEP->takeName(GEP); 8690 8691 if (isa<BitCastInst>(CI)) 8692 return new BitCastInst(NGEP, CI.getType()); 8693 assert(isa<PtrToIntInst>(CI)); 8694 return new PtrToIntInst(NGEP, CI.getType()); 8695 } 8696 } 8697 } 8698 } 8699 8700 return commonCastTransforms(CI); 8701} 8702 8703/// commonIntCastTransforms - This function implements the common transforms 8704/// for trunc, zext, and sext. 8705Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { 8706 if (Instruction *Result = commonCastTransforms(CI)) 8707 return Result; 8708 8709 Value *Src = CI.getOperand(0); 8710 const Type *SrcTy = Src->getType(); 8711 const Type *DestTy = CI.getType(); 8712 uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); 8713 uint32_t DestBitSize = DestTy->getScalarSizeInBits(); 8714 8715 // See if we can simplify any instructions used by the LHS whose sole 8716 // purpose is to compute bits we don't care about. 8717 if (SimplifyDemandedInstructionBits(CI)) 8718 return &CI; 8719 8720 // If the source isn't an instruction or has more than one use then we 8721 // can't do anything more. 8722 Instruction *SrcI = dyn_cast<Instruction>(Src); 8723 if (!SrcI || !Src->hasOneUse()) 8724 return 0; 8725 8726 // Attempt to propagate the cast into the instruction for int->int casts. 8727 int NumCastsRemoved = 0; 8728 // Only do this if the dest type is a simple type, don't convert the 8729 // expression tree to something weird like i93 unless the source is also 8730 // strange. 8731 if ((isa<VectorType>(DestTy) || 8732 ShouldChangeType(SrcI->getType(), DestTy, TD)) && 8733 CanEvaluateInDifferentType(SrcI, DestTy, 8734 CI.getOpcode(), NumCastsRemoved)) { 8735 // If this cast is a truncate, evaluting in a different type always 8736 // eliminates the cast, so it is always a win. If this is a zero-extension, 8737 // we need to do an AND to maintain the clear top-part of the computation, 8738 // so we require that the input have eliminated at least one cast. If this 8739 // is a sign extension, we insert two new casts (to do the extension) so we 8740 // require that two casts have been eliminated. 8741 bool DoXForm = false; 8742 bool JustReplace = false; 8743 switch (CI.getOpcode()) { 8744 default: 8745 // All the others use floating point so we shouldn't actually 8746 // get here because of the check above. 8747 llvm_unreachable("Unknown cast type"); 8748 case Instruction::Trunc: 8749 DoXForm = true; 8750 break; 8751 case Instruction::ZExt: { 8752 DoXForm = NumCastsRemoved >= 1; 8753 8754 if (!DoXForm && 0) { 8755 // If it's unnecessary to issue an AND to clear the high bits, it's 8756 // always profitable to do this xform. 8757 Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); 8758 APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); 8759 if (MaskedValueIsZero(TryRes, Mask)) 8760 return ReplaceInstUsesWith(CI, TryRes); 8761 8762 if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) 8763 if (TryI->use_empty()) 8764 EraseInstFromFunction(*TryI); 8765 } 8766 break; 8767 } 8768 case Instruction::SExt: { 8769 DoXForm = NumCastsRemoved >= 2; 8770 if (!DoXForm && !isa<TruncInst>(SrcI) && 0) { 8771 // If we do not have to emit the truncate + sext pair, then it's always 8772 // profitable to do this xform. 8773 // 8774 // It's not safe to eliminate the trunc + sext pair if one of the 8775 // eliminated cast is a truncate. e.g. 8776 // t2 = trunc i32 t1 to i16 8777 // t3 = sext i16 t2 to i32 8778 // != 8779 // i32 t1 8780 Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); 8781 unsigned NumSignBits = ComputeNumSignBits(TryRes); 8782 if (NumSignBits > (DestBitSize - SrcBitSize)) 8783 return ReplaceInstUsesWith(CI, TryRes); 8784 8785 if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) 8786 if (TryI->use_empty()) 8787 EraseInstFromFunction(*TryI); 8788 } 8789 break; 8790 } 8791 } 8792 8793 if (DoXForm) { 8794 DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" 8795 " to avoid cast: " << CI); 8796 Value *Res = EvaluateInDifferentType(SrcI, DestTy, 8797 CI.getOpcode() == Instruction::SExt); 8798 if (JustReplace) 8799 // Just replace this cast with the result. 8800 return ReplaceInstUsesWith(CI, Res); 8801 8802 assert(Res->getType() == DestTy); 8803 switch (CI.getOpcode()) { 8804 default: llvm_unreachable("Unknown cast type!"); 8805 case Instruction::Trunc: 8806 // Just replace this cast with the result. 8807 return ReplaceInstUsesWith(CI, Res); 8808 case Instruction::ZExt: { 8809 assert(SrcBitSize < DestBitSize && "Not a zext?"); 8810 8811 // If the high bits are already zero, just replace this cast with the 8812 // result. 8813 APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); 8814 if (MaskedValueIsZero(Res, Mask)) 8815 return ReplaceInstUsesWith(CI, Res); 8816 8817 // We need to emit an AND to clear the high bits. 8818 Constant *C = ConstantInt::get(*Context, 8819 APInt::getLowBitsSet(DestBitSize, SrcBitSize)); 8820 return BinaryOperator::CreateAnd(Res, C); 8821 } 8822 case Instruction::SExt: { 8823 // If the high bits are already filled with sign bit, just replace this 8824 // cast with the result. 8825 unsigned NumSignBits = ComputeNumSignBits(Res); 8826 if (NumSignBits > (DestBitSize - SrcBitSize)) 8827 return ReplaceInstUsesWith(CI, Res); 8828 8829 // We need to emit a cast to truncate, then a cast to sext. 8830 return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); 8831 } 8832 } 8833 } 8834 } 8835 8836 Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; 8837 Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; 8838 8839 switch (SrcI->getOpcode()) { 8840 case Instruction::Add: 8841 case Instruction::Mul: 8842 case Instruction::And: 8843 case Instruction::Or: 8844 case Instruction::Xor: 8845 // If we are discarding information, rewrite. 8846 if (DestBitSize < SrcBitSize && DestBitSize != 1) { 8847 // Don't insert two casts unless at least one can be eliminated. 8848 if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || 8849 !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { 8850 Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); 8851 Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); 8852 return BinaryOperator::Create( 8853 cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); 8854 } 8855 } 8856 8857 // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 8858 if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 8859 SrcI->getOpcode() == Instruction::Xor && 8860 Op1 == ConstantInt::getTrue(*Context) && 8861 (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) { 8862 Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); 8863 return BinaryOperator::CreateXor(New, 8864 ConstantInt::get(CI.getType(), 1)); 8865 } 8866 break; 8867 8868 case Instruction::Shl: { 8869 // Canonicalize trunc inside shl, if we can. 8870 ConstantInt *CI = dyn_cast<ConstantInt>(Op1); 8871 if (CI && DestBitSize < SrcBitSize && 8872 CI->getLimitedValue(DestBitSize) < DestBitSize) { 8873 Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); 8874 Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); 8875 return BinaryOperator::CreateShl(Op0c, Op1c); 8876 } 8877 break; 8878 } 8879 } 8880 return 0; 8881} 8882 8883Instruction *InstCombiner::visitTrunc(TruncInst &CI) { 8884 if (Instruction *Result = commonIntCastTransforms(CI)) 8885 return Result; 8886 8887 Value *Src = CI.getOperand(0); 8888 const Type *Ty = CI.getType(); 8889 uint32_t DestBitWidth = Ty->getScalarSizeInBits(); 8890 uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); 8891 8892 // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) 8893 if (DestBitWidth == 1) { 8894 Constant *One = ConstantInt::get(Src->getType(), 1); 8895 Src = Builder->CreateAnd(Src, One, "tmp"); 8896 Value *Zero = Constant::getNullValue(Src->getType()); 8897 return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); 8898 } 8899 8900 // Optimize trunc(lshr(), c) to pull the shift through the truncate. 8901 ConstantInt *ShAmtV = 0; 8902 Value *ShiftOp = 0; 8903 if (Src->hasOneUse() && 8904 match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { 8905 uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); 8906 8907 // Get a mask for the bits shifting in. 8908 APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); 8909 if (MaskedValueIsZero(ShiftOp, Mask)) { 8910 if (ShAmt >= DestBitWidth) // All zeros. 8911 return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); 8912 8913 // Okay, we can shrink this. Truncate the input, then return a new 8914 // shift. 8915 Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); 8916 Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); 8917 return BinaryOperator::CreateLShr(V1, V2); 8918 } 8919 } 8920 8921 return 0; 8922} 8923 8924/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations 8925/// in order to eliminate the icmp. 8926Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, 8927 bool DoXform) { 8928 // If we are just checking for a icmp eq of a single bit and zext'ing it 8929 // to an integer, then shift the bit to the appropriate place and then 8930 // cast to integer to avoid the comparison. 8931 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { 8932 const APInt &Op1CV = Op1C->getValue(); 8933 8934 // zext (x <s 0) to i32 --> x>>u31 true if signbit set. 8935 // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. 8936 if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || 8937 (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { 8938 if (!DoXform) return ICI; 8939 8940 Value *In = ICI->getOperand(0); 8941 Value *Sh = ConstantInt::get(In->getType(), 8942 In->getType()->getScalarSizeInBits()-1); 8943 In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); 8944 if (In->getType() != CI.getType()) 8945 In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); 8946 8947 if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { 8948 Constant *One = ConstantInt::get(In->getType(), 1); 8949 In = Builder->CreateXor(In, One, In->getName()+".not"); 8950 } 8951 8952 return ReplaceInstUsesWith(CI, In); 8953 } 8954 8955 8956 8957 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. 8958 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. 8959 // zext (X == 1) to i32 --> X iff X has only the low bit set. 8960 // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. 8961 // zext (X != 0) to i32 --> X iff X has only the low bit set. 8962 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. 8963 // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. 8964 // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. 8965 if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 8966 // This only works for EQ and NE 8967 ICI->isEquality()) { 8968 // If Op1C some other power of two, convert: 8969 uint32_t BitWidth = Op1C->getType()->getBitWidth(); 8970 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 8971 APInt TypeMask(APInt::getAllOnesValue(BitWidth)); 8972 ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); 8973 8974 APInt KnownZeroMask(~KnownZero); 8975 if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? 8976 if (!DoXform) return ICI; 8977 8978 bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; 8979 if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { 8980 // (X&4) == 2 --> false 8981 // (X&4) != 2 --> true 8982 Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); 8983 Res = ConstantExpr::getZExt(Res, CI.getType()); 8984 return ReplaceInstUsesWith(CI, Res); 8985 } 8986 8987 uint32_t ShiftAmt = KnownZeroMask.logBase2(); 8988 Value *In = ICI->getOperand(0); 8989 if (ShiftAmt) { 8990 // Perform a logical shr by shiftamt. 8991 // Insert the shift to put the result in the low bit. 8992 In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), 8993 In->getName()+".lobit"); 8994 } 8995 8996 if ((Op1CV != 0) == isNE) { // Toggle the low bit. 8997 Constant *One = ConstantInt::get(In->getType(), 1); 8998 In = Builder->CreateXor(In, One, "tmp"); 8999 } 9000 9001 if (CI.getType() == In->getType()) 9002 return ReplaceInstUsesWith(CI, In); 9003 else 9004 return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); 9005 } 9006 } 9007 } 9008 9009 // icmp ne A, B is equal to xor A, B when A and B only really have one bit. 9010 // It is also profitable to transform icmp eq into not(xor(A, B)) because that 9011 // may lead to additional simplifications. 9012 if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { 9013 if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { 9014 uint32_t BitWidth = ITy->getBitWidth(); 9015 Value *LHS = ICI->getOperand(0); 9016 Value *RHS = ICI->getOperand(1); 9017 9018 APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); 9019 APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); 9020 APInt TypeMask(APInt::getAllOnesValue(BitWidth)); 9021 ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); 9022 ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); 9023 9024 if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { 9025 APInt KnownBits = KnownZeroLHS | KnownOneLHS; 9026 APInt UnknownBit = ~KnownBits; 9027 if (UnknownBit.countPopulation() == 1) { 9028 if (!DoXform) return ICI; 9029 9030 Value *Result = Builder->CreateXor(LHS, RHS); 9031 9032 // Mask off any bits that are set and won't be shifted away. 9033 if (KnownOneLHS.uge(UnknownBit)) 9034 Result = Builder->CreateAnd(Result, 9035 ConstantInt::get(ITy, UnknownBit)); 9036 9037 // Shift the bit we're testing down to the lsb. 9038 Result = Builder->CreateLShr( 9039 Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); 9040 9041 if (ICI->getPredicate() == ICmpInst::ICMP_EQ) 9042 Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); 9043 Result->takeName(ICI); 9044 return ReplaceInstUsesWith(CI, Result); 9045 } 9046 } 9047 } 9048 } 9049 9050 return 0; 9051} 9052 9053Instruction *InstCombiner::visitZExt(ZExtInst &CI) { 9054 // If one of the common conversion will work, do it. 9055 if (Instruction *Result = commonIntCastTransforms(CI)) 9056 return Result; 9057 9058 Value *Src = CI.getOperand(0); 9059 9060 // If this is a TRUNC followed by a ZEXT then we are dealing with integral 9061 // types and if the sizes are just right we can convert this into a logical 9062 // 'and' which will be much cheaper than the pair of casts. 9063 if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast 9064 // Get the sizes of the types involved. We know that the intermediate type 9065 // will be smaller than A or C, but don't know the relation between A and C. 9066 Value *A = CSrc->getOperand(0); 9067 unsigned SrcSize = A->getType()->getScalarSizeInBits(); 9068 unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); 9069 unsigned DstSize = CI.getType()->getScalarSizeInBits(); 9070 // If we're actually extending zero bits, then if 9071 // SrcSize < DstSize: zext(a & mask) 9072 // SrcSize == DstSize: a & mask 9073 // SrcSize > DstSize: trunc(a) & mask 9074 if (SrcSize < DstSize) { 9075 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); 9076 Constant *AndConst = ConstantInt::get(A->getType(), AndValue); 9077 Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); 9078 return new ZExtInst(And, CI.getType()); 9079 } 9080 9081 if (SrcSize == DstSize) { 9082 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); 9083 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), 9084 AndValue)); 9085 } 9086 if (SrcSize > DstSize) { 9087 Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); 9088 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); 9089 return BinaryOperator::CreateAnd(Trunc, 9090 ConstantInt::get(Trunc->getType(), 9091 AndValue)); 9092 } 9093 } 9094 9095 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) 9096 return transformZExtICmp(ICI, CI); 9097 9098 BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); 9099 if (SrcI && SrcI->getOpcode() == Instruction::Or) { 9100 // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one 9101 // of the (zext icmp) will be transformed. 9102 ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); 9103 ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); 9104 if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && 9105 (transformZExtICmp(LHS, CI, false) || 9106 transformZExtICmp(RHS, CI, false))) { 9107 Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); 9108 Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); 9109 return BinaryOperator::Create(Instruction::Or, LCast, RCast); 9110 } 9111 } 9112 9113 // zext(trunc(t) & C) -> (t & zext(C)). 9114 if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) 9115 if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) 9116 if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { 9117 Value *TI0 = TI->getOperand(0); 9118 if (TI0->getType() == CI.getType()) 9119 return 9120 BinaryOperator::CreateAnd(TI0, 9121 ConstantExpr::getZExt(C, CI.getType())); 9122 } 9123 9124 // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). 9125 if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) 9126 if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) 9127 if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) 9128 if (And->getOpcode() == Instruction::And && And->hasOneUse() && 9129 And->getOperand(1) == C) 9130 if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { 9131 Value *TI0 = TI->getOperand(0); 9132 if (TI0->getType() == CI.getType()) { 9133 Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); 9134 Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); 9135 return BinaryOperator::CreateXor(NewAnd, ZC); 9136 } 9137 } 9138 9139 return 0; 9140} 9141 9142Instruction *InstCombiner::visitSExt(SExtInst &CI) { 9143 if (Instruction *I = commonIntCastTransforms(CI)) 9144 return I; 9145 9146 Value *Src = CI.getOperand(0); 9147 9148 // Canonicalize sign-extend from i1 to a select. 9149 if (Src->getType() == Type::getInt1Ty(*Context)) 9150 return SelectInst::Create(Src, 9151 Constant::getAllOnesValue(CI.getType()), 9152 Constant::getNullValue(CI.getType())); 9153 9154 // See if the value being truncated is already sign extended. If so, just 9155 // eliminate the trunc/sext pair. 9156 if (Operator::getOpcode(Src) == Instruction::Trunc) { 9157 Value *Op = cast<User>(Src)->getOperand(0); 9158 unsigned OpBits = Op->getType()->getScalarSizeInBits(); 9159 unsigned MidBits = Src->getType()->getScalarSizeInBits(); 9160 unsigned DestBits = CI.getType()->getScalarSizeInBits(); 9161 unsigned NumSignBits = ComputeNumSignBits(Op); 9162 9163 if (OpBits == DestBits) { 9164 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 9165 // bits, it is already ready. 9166 if (NumSignBits > DestBits-MidBits) 9167 return ReplaceInstUsesWith(CI, Op); 9168 } else if (OpBits < DestBits) { 9169 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 9170 // bits, just sext from i32. 9171 if (NumSignBits > OpBits-MidBits) 9172 return new SExtInst(Op, CI.getType(), "tmp"); 9173 } else { 9174 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 9175 // bits, just truncate to i32. 9176 if (NumSignBits > OpBits-MidBits) 9177 return new TruncInst(Op, CI.getType(), "tmp"); 9178 } 9179 } 9180 9181 // If the input is a shl/ashr pair of a same constant, then this is a sign 9182 // extension from a smaller value. If we could trust arbitrary bitwidth 9183 // integers, we could turn this into a truncate to the smaller bit and then 9184 // use a sext for the whole extension. Since we don't, look deeper and check 9185 // for a truncate. If the source and dest are the same type, eliminate the 9186 // trunc and extend and just do shifts. For example, turn: 9187 // %a = trunc i32 %i to i8 9188 // %b = shl i8 %a, 6 9189 // %c = ashr i8 %b, 6 9190 // %d = sext i8 %c to i32 9191 // into: 9192 // %a = shl i32 %i, 30 9193 // %d = ashr i32 %a, 30 9194 Value *A = 0; 9195 ConstantInt *BA = 0, *CA = 0; 9196 if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), 9197 m_ConstantInt(CA))) && 9198 BA == CA && isa<TruncInst>(A)) { 9199 Value *I = cast<TruncInst>(A)->getOperand(0); 9200 if (I->getType() == CI.getType()) { 9201 unsigned MidSize = Src->getType()->getScalarSizeInBits(); 9202 unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); 9203 unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; 9204 Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); 9205 I = Builder->CreateShl(I, ShAmtV, CI.getName()); 9206 return BinaryOperator::CreateAShr(I, ShAmtV); 9207 } 9208 } 9209 9210 return 0; 9211} 9212 9213/// FitsInFPType - Return a Constant* for the specified FP constant if it fits 9214/// in the specified FP type without changing its value. 9215static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, 9216 LLVMContext *Context) { 9217 bool losesInfo; 9218 APFloat F = CFP->getValueAPF(); 9219 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); 9220 if (!losesInfo) 9221 return ConstantFP::get(*Context, F); 9222 return 0; 9223} 9224 9225/// LookThroughFPExtensions - If this is an fp extension instruction, look 9226/// through it until we get the source value. 9227static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) { 9228 if (Instruction *I = dyn_cast<Instruction>(V)) 9229 if (I->getOpcode() == Instruction::FPExt) 9230 return LookThroughFPExtensions(I->getOperand(0), Context); 9231 9232 // If this value is a constant, return the constant in the smallest FP type 9233 // that can accurately represent it. This allows us to turn 9234 // (float)((double)X+2.0) into x+2.0f. 9235 if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 9236 if (CFP->getType() == Type::getPPC_FP128Ty(*Context)) 9237 return V; // No constant folding of this. 9238 // See if the value can be truncated to float and then reextended. 9239 if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context)) 9240 return V; 9241 if (CFP->getType() == Type::getDoubleTy(*Context)) 9242 return V; // Won't shrink. 9243 if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context)) 9244 return V; 9245 // Don't try to shrink to various long double types. 9246 } 9247 9248 return V; 9249} 9250 9251Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { 9252 if (Instruction *I = commonCastTransforms(CI)) 9253 return I; 9254 9255 // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are 9256 // smaller than the destination type, we can eliminate the truncate by doing 9257 // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as 9258 // many builtins (sqrt, etc). 9259 BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); 9260 if (OpI && OpI->hasOneUse()) { 9261 switch (OpI->getOpcode()) { 9262 default: break; 9263 case Instruction::FAdd: 9264 case Instruction::FSub: 9265 case Instruction::FMul: 9266 case Instruction::FDiv: 9267 case Instruction::FRem: 9268 const Type *SrcTy = OpI->getType(); 9269 Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0), Context); 9270 Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1), Context); 9271 if (LHSTrunc->getType() != SrcTy && 9272 RHSTrunc->getType() != SrcTy) { 9273 unsigned DstSize = CI.getType()->getScalarSizeInBits(); 9274 // If the source types were both smaller than the destination type of 9275 // the cast, do this xform. 9276 if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && 9277 RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { 9278 LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); 9279 RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); 9280 return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); 9281 } 9282 } 9283 break; 9284 } 9285 } 9286 return 0; 9287} 9288 9289Instruction *InstCombiner::visitFPExt(CastInst &CI) { 9290 return commonCastTransforms(CI); 9291} 9292 9293Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { 9294 Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); 9295 if (OpI == 0) 9296 return commonCastTransforms(FI); 9297 9298 // fptoui(uitofp(X)) --> X 9299 // fptoui(sitofp(X)) --> X 9300 // This is safe if the intermediate type has enough bits in its mantissa to 9301 // accurately represent all values of X. For example, do not do this with 9302 // i64->float->i64. This is also safe for sitofp case, because any negative 9303 // 'X' value would cause an undefined result for the fptoui. 9304 if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && 9305 OpI->getOperand(0)->getType() == FI.getType() && 9306 (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ 9307 OpI->getType()->getFPMantissaWidth()) 9308 return ReplaceInstUsesWith(FI, OpI->getOperand(0)); 9309 9310 return commonCastTransforms(FI); 9311} 9312 9313Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { 9314 Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); 9315 if (OpI == 0) 9316 return commonCastTransforms(FI); 9317 9318 // fptosi(sitofp(X)) --> X 9319 // fptosi(uitofp(X)) --> X 9320 // This is safe if the intermediate type has enough bits in its mantissa to 9321 // accurately represent all values of X. For example, do not do this with 9322 // i64->float->i64. This is also safe for sitofp case, because any negative 9323 // 'X' value would cause an undefined result for the fptoui. 9324 if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && 9325 OpI->getOperand(0)->getType() == FI.getType() && 9326 (int)FI.getType()->getScalarSizeInBits() <= 9327 OpI->getType()->getFPMantissaWidth()) 9328 return ReplaceInstUsesWith(FI, OpI->getOperand(0)); 9329 9330 return commonCastTransforms(FI); 9331} 9332 9333Instruction *InstCombiner::visitUIToFP(CastInst &CI) { 9334 return commonCastTransforms(CI); 9335} 9336 9337Instruction *InstCombiner::visitSIToFP(CastInst &CI) { 9338 return commonCastTransforms(CI); 9339} 9340 9341Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { 9342 // If the destination integer type is smaller than the intptr_t type for 9343 // this target, do a ptrtoint to intptr_t then do a trunc. This allows the 9344 // trunc to be exposed to other transforms. Don't do this for extending 9345 // ptrtoint's, because we don't know if the target sign or zero extends its 9346 // pointers. 9347 if (TD && 9348 CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { 9349 Value *P = Builder->CreatePtrToInt(CI.getOperand(0), 9350 TD->getIntPtrType(CI.getContext()), 9351 "tmp"); 9352 return new TruncInst(P, CI.getType()); 9353 } 9354 9355 return commonPointerCastTransforms(CI); 9356} 9357 9358Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { 9359 // If the source integer type is larger than the intptr_t type for 9360 // this target, do a trunc to the intptr_t type, then inttoptr of it. This 9361 // allows the trunc to be exposed to other transforms. Don't do this for 9362 // extending inttoptr's, because we don't know if the target sign or zero 9363 // extends to pointers. 9364 if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > 9365 TD->getPointerSizeInBits()) { 9366 Value *P = Builder->CreateTrunc(CI.getOperand(0), 9367 TD->getIntPtrType(CI.getContext()), "tmp"); 9368 return new IntToPtrInst(P, CI.getType()); 9369 } 9370 9371 if (Instruction *I = commonCastTransforms(CI)) 9372 return I; 9373 9374 return 0; 9375} 9376 9377Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { 9378 // If the operands are integer typed then apply the integer transforms, 9379 // otherwise just apply the common ones. 9380 Value *Src = CI.getOperand(0); 9381 const Type *SrcTy = Src->getType(); 9382 const Type *DestTy = CI.getType(); 9383 9384 if (isa<PointerType>(SrcTy)) { 9385 if (Instruction *I = commonPointerCastTransforms(CI)) 9386 return I; 9387 } else { 9388 if (Instruction *Result = commonCastTransforms(CI)) 9389 return Result; 9390 } 9391 9392 9393 // Get rid of casts from one type to the same type. These are useless and can 9394 // be replaced by the operand. 9395 if (DestTy == Src->getType()) 9396 return ReplaceInstUsesWith(CI, Src); 9397 9398 if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { 9399 const PointerType *SrcPTy = cast<PointerType>(SrcTy); 9400 const Type *DstElTy = DstPTy->getElementType(); 9401 const Type *SrcElTy = SrcPTy->getElementType(); 9402 9403 // If the address spaces don't match, don't eliminate the bitcast, which is 9404 // required for changing types. 9405 if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) 9406 return 0; 9407 9408 // If we are casting a alloca to a pointer to a type of the same 9409 // size, rewrite the allocation instruction to allocate the "right" type. 9410 // There is no need to modify malloc calls because it is their bitcast that 9411 // needs to be cleaned up. 9412 if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) 9413 if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) 9414 return V; 9415 9416 // If the source and destination are pointers, and this cast is equivalent 9417 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. 9418 // This can enhance SROA and other transforms that want type-safe pointers. 9419 Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context)); 9420 unsigned NumZeros = 0; 9421 while (SrcElTy != DstElTy && 9422 isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && 9423 SrcElTy->getNumContainedTypes() /* not "{}" */) { 9424 SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); 9425 ++NumZeros; 9426 } 9427 9428 // If we found a path from the src to dest, create the getelementptr now. 9429 if (SrcElTy == DstElTy) { 9430 SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); 9431 return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", 9432 ((Instruction*) NULL)); 9433 } 9434 } 9435 9436 if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { 9437 if (DestVTy->getNumElements() == 1) { 9438 if (!isa<VectorType>(SrcTy)) { 9439 Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); 9440 return InsertElementInst::Create(UndefValue::get(DestTy), Elem, 9441 Constant::getNullValue(Type::getInt32Ty(*Context))); 9442 } 9443 // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) 9444 } 9445 } 9446 9447 if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { 9448 if (SrcVTy->getNumElements() == 1) { 9449 if (!isa<VectorType>(DestTy)) { 9450 Value *Elem = 9451 Builder->CreateExtractElement(Src, 9452 Constant::getNullValue(Type::getInt32Ty(*Context))); 9453 return CastInst::Create(Instruction::BitCast, Elem, DestTy); 9454 } 9455 } 9456 } 9457 9458 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { 9459 if (SVI->hasOneUse()) { 9460 // Okay, we have (bitconvert (shuffle ..)). Check to see if this is 9461 // a bitconvert to a vector with the same # elts. 9462 if (isa<VectorType>(DestTy) && 9463 cast<VectorType>(DestTy)->getNumElements() == 9464 SVI->getType()->getNumElements() && 9465 SVI->getType()->getNumElements() == 9466 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) { 9467 CastInst *Tmp; 9468 // If either of the operands is a cast from CI.getType(), then 9469 // evaluating the shuffle in the casted destination's type will allow 9470 // us to eliminate at least one cast. 9471 if (((Tmp = dyn_cast<CastInst>(SVI->getOperand(0))) && 9472 Tmp->getOperand(0)->getType() == DestTy) || 9473 ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 9474 Tmp->getOperand(0)->getType() == DestTy)) { 9475 Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); 9476 Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); 9477 // Return a new shuffle vector. Use the same element ID's, as we 9478 // know the vector types match #elts. 9479 return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); 9480 } 9481 } 9482 } 9483 } 9484 return 0; 9485} 9486 9487/// GetSelectFoldableOperands - We want to turn code that looks like this: 9488/// %C = or %A, %B 9489/// %D = select %cond, %C, %A 9490/// into: 9491/// %C = select %cond, %B, 0 9492/// %D = or %A, %C 9493/// 9494/// Assuming that the specified instruction is an operand to the select, return 9495/// a bitmask indicating which operands of this instruction are foldable if they 9496/// equal the other incoming value of the select. 9497/// 9498static unsigned GetSelectFoldableOperands(Instruction *I) { 9499 switch (I->getOpcode()) { 9500 case Instruction::Add: 9501 case Instruction::Mul: 9502 case Instruction::And: 9503 case Instruction::Or: 9504 case Instruction::Xor: 9505 return 3; // Can fold through either operand. 9506 case Instruction::Sub: // Can only fold on the amount subtracted. 9507 case Instruction::Shl: // Can only fold on the shift amount. 9508 case Instruction::LShr: 9509 case Instruction::AShr: 9510 return 1; 9511 default: 9512 return 0; // Cannot fold 9513 } 9514} 9515 9516/// GetSelectFoldableConstant - For the same transformation as the previous 9517/// function, return the identity constant that goes into the select. 9518static Constant *GetSelectFoldableConstant(Instruction *I, 9519 LLVMContext *Context) { 9520 switch (I->getOpcode()) { 9521 default: llvm_unreachable("This cannot happen!"); 9522 case Instruction::Add: 9523 case Instruction::Sub: 9524 case Instruction::Or: 9525 case Instruction::Xor: 9526 case Instruction::Shl: 9527 case Instruction::LShr: 9528 case Instruction::AShr: 9529 return Constant::getNullValue(I->getType()); 9530 case Instruction::And: 9531 return Constant::getAllOnesValue(I->getType()); 9532 case Instruction::Mul: 9533 return ConstantInt::get(I->getType(), 1); 9534 } 9535} 9536 9537/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI 9538/// have the same opcode and only one use each. Try to simplify this. 9539Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, 9540 Instruction *FI) { 9541 if (TI->getNumOperands() == 1) { 9542 // If this is a non-volatile load or a cast from the same type, 9543 // merge. 9544 if (TI->isCast()) { 9545 if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) 9546 return 0; 9547 } else { 9548 return 0; // unknown unary op. 9549 } 9550 9551 // Fold this by inserting a select from the input values. 9552 SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), 9553 FI->getOperand(0), SI.getName()+".v"); 9554 InsertNewInstBefore(NewSI, SI); 9555 return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 9556 TI->getType()); 9557 } 9558 9559 // Only handle binary operators here. 9560 if (!isa<BinaryOperator>(TI)) 9561 return 0; 9562 9563 // Figure out if the operations have any operands in common. 9564 Value *MatchOp, *OtherOpT, *OtherOpF; 9565 bool MatchIsOpZero; 9566 if (TI->getOperand(0) == FI->getOperand(0)) { 9567 MatchOp = TI->getOperand(0); 9568 OtherOpT = TI->getOperand(1); 9569 OtherOpF = FI->getOperand(1); 9570 MatchIsOpZero = true; 9571 } else if (TI->getOperand(1) == FI->getOperand(1)) { 9572 MatchOp = TI->getOperand(1); 9573 OtherOpT = TI->getOperand(0); 9574 OtherOpF = FI->getOperand(0); 9575 MatchIsOpZero = false; 9576 } else if (!TI->isCommutative()) { 9577 return 0; 9578 } else if (TI->getOperand(0) == FI->getOperand(1)) { 9579 MatchOp = TI->getOperand(0); 9580 OtherOpT = TI->getOperand(1); 9581 OtherOpF = FI->getOperand(0); 9582 MatchIsOpZero = true; 9583 } else if (TI->getOperand(1) == FI->getOperand(0)) { 9584 MatchOp = TI->getOperand(1); 9585 OtherOpT = TI->getOperand(0); 9586 OtherOpF = FI->getOperand(1); 9587 MatchIsOpZero = true; 9588 } else { 9589 return 0; 9590 } 9591 9592 // If we reach here, they do have operations in common. 9593 SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, 9594 OtherOpF, SI.getName()+".v"); 9595 InsertNewInstBefore(NewSI, SI); 9596 9597 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { 9598 if (MatchIsOpZero) 9599 return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); 9600 else 9601 return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); 9602 } 9603 llvm_unreachable("Shouldn't get here"); 9604 return 0; 9605} 9606 9607static bool isSelect01(Constant *C1, Constant *C2) { 9608 ConstantInt *C1I = dyn_cast<ConstantInt>(C1); 9609 if (!C1I) 9610 return false; 9611 ConstantInt *C2I = dyn_cast<ConstantInt>(C2); 9612 if (!C2I) 9613 return false; 9614 return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); 9615} 9616 9617/// FoldSelectIntoOp - Try fold the select into one of the operands to 9618/// facilitate further optimization. 9619Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, 9620 Value *FalseVal) { 9621 // See the comment above GetSelectFoldableOperands for a description of the 9622 // transformation we are doing here. 9623 if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { 9624 if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && 9625 !isa<Constant>(FalseVal)) { 9626 if (unsigned SFO = GetSelectFoldableOperands(TVI)) { 9627 unsigned OpToFold = 0; 9628 if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { 9629 OpToFold = 1; 9630 } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { 9631 OpToFold = 2; 9632 } 9633 9634 if (OpToFold) { 9635 Constant *C = GetSelectFoldableConstant(TVI, Context); 9636 Value *OOp = TVI->getOperand(2-OpToFold); 9637 // Avoid creating select between 2 constants unless it's selecting 9638 // between 0 and 1. 9639 if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { 9640 Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); 9641 InsertNewInstBefore(NewSel, SI); 9642 NewSel->takeName(TVI); 9643 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) 9644 return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); 9645 llvm_unreachable("Unknown instruction!!"); 9646 } 9647 } 9648 } 9649 } 9650 } 9651 9652 if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { 9653 if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && 9654 !isa<Constant>(TrueVal)) { 9655 if (unsigned SFO = GetSelectFoldableOperands(FVI)) { 9656 unsigned OpToFold = 0; 9657 if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { 9658 OpToFold = 1; 9659 } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { 9660 OpToFold = 2; 9661 } 9662 9663 if (OpToFold) { 9664 Constant *C = GetSelectFoldableConstant(FVI, Context); 9665 Value *OOp = FVI->getOperand(2-OpToFold); 9666 // Avoid creating select between 2 constants unless it's selecting 9667 // between 0 and 1. 9668 if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { 9669 Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); 9670 InsertNewInstBefore(NewSel, SI); 9671 NewSel->takeName(FVI); 9672 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) 9673 return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); 9674 llvm_unreachable("Unknown instruction!!"); 9675 } 9676 } 9677 } 9678 } 9679 } 9680 9681 return 0; 9682} 9683 9684/// visitSelectInstWithICmp - Visit a SelectInst that has an 9685/// ICmpInst as its first operand. 9686/// 9687Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, 9688 ICmpInst *ICI) { 9689 bool Changed = false; 9690 ICmpInst::Predicate Pred = ICI->getPredicate(); 9691 Value *CmpLHS = ICI->getOperand(0); 9692 Value *CmpRHS = ICI->getOperand(1); 9693 Value *TrueVal = SI.getTrueValue(); 9694 Value *FalseVal = SI.getFalseValue(); 9695 9696 // Check cases where the comparison is with a constant that 9697 // can be adjusted to fit the min/max idiom. We may edit ICI in 9698 // place here, so make sure the select is the only user. 9699 if (ICI->hasOneUse()) 9700 if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { 9701 switch (Pred) { 9702 default: break; 9703 case ICmpInst::ICMP_ULT: 9704 case ICmpInst::ICMP_SLT: { 9705 // X < MIN ? T : F --> F 9706 if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) 9707 return ReplaceInstUsesWith(SI, FalseVal); 9708 // X < C ? X : C-1 --> X > C-1 ? C-1 : X 9709 Constant *AdjustedRHS = SubOne(CI); 9710 if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || 9711 (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { 9712 Pred = ICmpInst::getSwappedPredicate(Pred); 9713 CmpRHS = AdjustedRHS; 9714 std::swap(FalseVal, TrueVal); 9715 ICI->setPredicate(Pred); 9716 ICI->setOperand(1, CmpRHS); 9717 SI.setOperand(1, TrueVal); 9718 SI.setOperand(2, FalseVal); 9719 Changed = true; 9720 } 9721 break; 9722 } 9723 case ICmpInst::ICMP_UGT: 9724 case ICmpInst::ICMP_SGT: { 9725 // X > MAX ? T : F --> F 9726 if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) 9727 return ReplaceInstUsesWith(SI, FalseVal); 9728 // X > C ? X : C+1 --> X < C+1 ? C+1 : X 9729 Constant *AdjustedRHS = AddOne(CI); 9730 if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || 9731 (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { 9732 Pred = ICmpInst::getSwappedPredicate(Pred); 9733 CmpRHS = AdjustedRHS; 9734 std::swap(FalseVal, TrueVal); 9735 ICI->setPredicate(Pred); 9736 ICI->setOperand(1, CmpRHS); 9737 SI.setOperand(1, TrueVal); 9738 SI.setOperand(2, FalseVal); 9739 Changed = true; 9740 } 9741 break; 9742 } 9743 } 9744 9745 // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed 9746 // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed 9747 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; 9748 if (match(TrueVal, m_ConstantInt<-1>()) && 9749 match(FalseVal, m_ConstantInt<0>())) 9750 Pred = ICI->getPredicate(); 9751 else if (match(TrueVal, m_ConstantInt<0>()) && 9752 match(FalseVal, m_ConstantInt<-1>())) 9753 Pred = CmpInst::getInversePredicate(ICI->getPredicate()); 9754 9755 if (Pred != CmpInst::BAD_ICMP_PREDICATE) { 9756 // If we are just checking for a icmp eq of a single bit and zext'ing it 9757 // to an integer, then shift the bit to the appropriate place and then 9758 // cast to integer to avoid the comparison. 9759 const APInt &Op1CV = CI->getValue(); 9760 9761 // sext (x <s 0) to i32 --> x>>s31 true if signbit set. 9762 // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. 9763 if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || 9764 (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { 9765 Value *In = ICI->getOperand(0); 9766 Value *Sh = ConstantInt::get(In->getType(), 9767 In->getType()->getScalarSizeInBits()-1); 9768 In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, 9769 In->getName()+".lobit"), 9770 *ICI); 9771 if (In->getType() != SI.getType()) 9772 In = CastInst::CreateIntegerCast(In, SI.getType(), 9773 true/*SExt*/, "tmp", ICI); 9774 9775 if (Pred == ICmpInst::ICMP_SGT) 9776 In = InsertNewInstBefore(BinaryOperator::CreateNot(In, 9777 In->getName()+".not"), *ICI); 9778 9779 return ReplaceInstUsesWith(SI, In); 9780 } 9781 } 9782 } 9783 9784 if (CmpLHS == TrueVal && CmpRHS == FalseVal) { 9785 // Transform (X == Y) ? X : Y -> Y 9786 if (Pred == ICmpInst::ICMP_EQ) 9787 return ReplaceInstUsesWith(SI, FalseVal); 9788 // Transform (X != Y) ? X : Y -> X 9789 if (Pred == ICmpInst::ICMP_NE) 9790 return ReplaceInstUsesWith(SI, TrueVal); 9791 /// NOTE: if we wanted to, this is where to detect integer MIN/MAX 9792 9793 } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { 9794 // Transform (X == Y) ? Y : X -> X 9795 if (Pred == ICmpInst::ICMP_EQ) 9796 return ReplaceInstUsesWith(SI, FalseVal); 9797 // Transform (X != Y) ? Y : X -> Y 9798 if (Pred == ICmpInst::ICMP_NE) 9799 return ReplaceInstUsesWith(SI, TrueVal); 9800 /// NOTE: if we wanted to, this is where to detect integer MIN/MAX 9801 } 9802 return Changed ? &SI : 0; 9803} 9804 9805 9806/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a 9807/// PHI node (but the two may be in different blocks). See if the true/false 9808/// values (V) are live in all of the predecessor blocks of the PHI. For 9809/// example, cases like this cannot be mapped: 9810/// 9811/// X = phi [ C1, BB1], [C2, BB2] 9812/// Y = add 9813/// Z = select X, Y, 0 9814/// 9815/// because Y is not live in BB1/BB2. 9816/// 9817static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, 9818 const SelectInst &SI) { 9819 // If the value is a non-instruction value like a constant or argument, it 9820 // can always be mapped. 9821 const Instruction *I = dyn_cast<Instruction>(V); 9822 if (I == 0) return true; 9823 9824 // If V is a PHI node defined in the same block as the condition PHI, we can 9825 // map the arguments. 9826 const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); 9827 9828 if (const PHINode *VP = dyn_cast<PHINode>(I)) 9829 if (VP->getParent() == CondPHI->getParent()) 9830 return true; 9831 9832 // Otherwise, if the PHI and select are defined in the same block and if V is 9833 // defined in a different block, then we can transform it. 9834 if (SI.getParent() == CondPHI->getParent() && 9835 I->getParent() != CondPHI->getParent()) 9836 return true; 9837 9838 // Otherwise we have a 'hard' case and we can't tell without doing more 9839 // detailed dominator based analysis, punt. 9840 return false; 9841} 9842 9843/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: 9844/// SPF2(SPF1(A, B), C) 9845Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, 9846 SelectPatternFlavor SPF1, 9847 Value *A, Value *B, 9848 Instruction &Outer, 9849 SelectPatternFlavor SPF2, Value *C) { 9850 if (C == A || C == B) { 9851 // MAX(MAX(A, B), B) -> MAX(A, B) 9852 // MIN(MIN(a, b), a) -> MIN(a, b) 9853 if (SPF1 == SPF2) 9854 return ReplaceInstUsesWith(Outer, Inner); 9855 9856 // MAX(MIN(a, b), a) -> a 9857 // MIN(MAX(a, b), a) -> a 9858 if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || 9859 (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || 9860 (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || 9861 (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) 9862 return ReplaceInstUsesWith(Outer, C); 9863 } 9864 9865 // TODO: MIN(MIN(A, 23), 97) 9866 return 0; 9867} 9868 9869 9870 9871 9872Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { 9873 Value *CondVal = SI.getCondition(); 9874 Value *TrueVal = SI.getTrueValue(); 9875 Value *FalseVal = SI.getFalseValue(); 9876 9877 // select true, X, Y -> X 9878 // select false, X, Y -> Y 9879 if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal)) 9880 return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); 9881 9882 // select C, X, X -> X 9883 if (TrueVal == FalseVal) 9884 return ReplaceInstUsesWith(SI, TrueVal); 9885 9886 if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X 9887 return ReplaceInstUsesWith(SI, FalseVal); 9888 if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X 9889 return ReplaceInstUsesWith(SI, TrueVal); 9890 if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y 9891 if (isa<Constant>(TrueVal)) 9892 return ReplaceInstUsesWith(SI, TrueVal); 9893 else 9894 return ReplaceInstUsesWith(SI, FalseVal); 9895 } 9896 9897 if (SI.getType() == Type::getInt1Ty(*Context)) { 9898 if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { 9899 if (C->getZExtValue()) { 9900 // Change: A = select B, true, C --> A = or B, C 9901 return BinaryOperator::CreateOr(CondVal, FalseVal); 9902 } else { 9903 // Change: A = select B, false, C --> A = and !B, C 9904 Value *NotCond = 9905 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 9906 "not."+CondVal->getName()), SI); 9907 return BinaryOperator::CreateAnd(NotCond, FalseVal); 9908 } 9909 } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { 9910 if (C->getZExtValue() == false) { 9911 // Change: A = select B, C, false --> A = and B, C 9912 return BinaryOperator::CreateAnd(CondVal, TrueVal); 9913 } else { 9914 // Change: A = select B, C, true --> A = or !B, C 9915 Value *NotCond = 9916 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 9917 "not."+CondVal->getName()), SI); 9918 return BinaryOperator::CreateOr(NotCond, TrueVal); 9919 } 9920 } 9921 9922 // select a, b, a -> a&b 9923 // select a, a, b -> a|b 9924 if (CondVal == TrueVal) 9925 return BinaryOperator::CreateOr(CondVal, FalseVal); 9926 else if (CondVal == FalseVal) 9927 return BinaryOperator::CreateAnd(CondVal, TrueVal); 9928 } 9929 9930 // Selecting between two integer constants? 9931 if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) 9932 if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { 9933 // select C, 1, 0 -> zext C to int 9934 if (FalseValC->isZero() && TrueValC->getValue() == 1) { 9935 return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); 9936 } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { 9937 // select C, 0, 1 -> zext !C to int 9938 Value *NotCond = 9939 InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, 9940 "not."+CondVal->getName()), SI); 9941 return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); 9942 } 9943 9944 if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { 9945 // If one of the constants is zero (we know they can't both be) and we 9946 // have an icmp instruction with zero, and we have an 'and' with the 9947 // non-constant value, eliminate this whole mess. This corresponds to 9948 // cases like this: ((X & 27) ? 27 : 0) 9949 if (TrueValC->isZero() || FalseValC->isZero()) 9950 if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && 9951 cast<Constant>(IC->getOperand(1))->isNullValue()) 9952 if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) 9953 if (ICA->getOpcode() == Instruction::And && 9954 isa<ConstantInt>(ICA->getOperand(1)) && 9955 (ICA->getOperand(1) == TrueValC || 9956 ICA->getOperand(1) == FalseValC) && 9957 isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) { 9958 // Okay, now we know that everything is set up, we just don't 9959 // know whether we have a icmp_ne or icmp_eq and whether the 9960 // true or false val is the zero. 9961 bool ShouldNotVal = !TrueValC->isZero(); 9962 ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; 9963 Value *V = ICA; 9964 if (ShouldNotVal) 9965 V = InsertNewInstBefore(BinaryOperator::Create( 9966 Instruction::Xor, V, ICA->getOperand(1)), SI); 9967 return ReplaceInstUsesWith(SI, V); 9968 } 9969 } 9970 } 9971 9972 // See if we are selecting two values based on a comparison of the two values. 9973 if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) { 9974 if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { 9975 // Transform (X == Y) ? X : Y -> Y 9976 if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { 9977 // This is not safe in general for floating point: 9978 // consider X== -0, Y== +0. 9979 // It becomes safe if either operand is a nonzero constant. 9980 ConstantFP *CFPt, *CFPf; 9981 if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && 9982 !CFPt->getValueAPF().isZero()) || 9983 ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && 9984 !CFPf->getValueAPF().isZero())) 9985 return ReplaceInstUsesWith(SI, FalseVal); 9986 } 9987 // Transform (X != Y) ? X : Y -> X 9988 if (FCI->getPredicate() == FCmpInst::FCMP_ONE) 9989 return ReplaceInstUsesWith(SI, TrueVal); 9990 // NOTE: if we wanted to, this is where to detect MIN/MAX 9991 9992 } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ 9993 // Transform (X == Y) ? Y : X -> X 9994 if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { 9995 // This is not safe in general for floating point: 9996 // consider X== -0, Y== +0. 9997 // It becomes safe if either operand is a nonzero constant. 9998 ConstantFP *CFPt, *CFPf; 9999 if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && 10000 !CFPt->getValueAPF().isZero()) || 10001 ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && 10002 !CFPf->getValueAPF().isZero())) 10003 return ReplaceInstUsesWith(SI, FalseVal); 10004 } 10005 // Transform (X != Y) ? Y : X -> Y 10006 if (FCI->getPredicate() == FCmpInst::FCMP_ONE) 10007 return ReplaceInstUsesWith(SI, TrueVal); 10008 // NOTE: if we wanted to, this is where to detect MIN/MAX 10009 } 10010 // NOTE: if we wanted to, this is where to detect ABS 10011 } 10012 10013 // See if we are selecting two values based on a comparison of the two values. 10014 if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) 10015 if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) 10016 return Result; 10017 10018 if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) 10019 if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) 10020 if (TI->hasOneUse() && FI->hasOneUse()) { 10021 Instruction *AddOp = 0, *SubOp = 0; 10022 10023 // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) 10024 if (TI->getOpcode() == FI->getOpcode()) 10025 if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) 10026 return IV; 10027 10028 // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is 10029 // even legal for FP. 10030 if ((TI->getOpcode() == Instruction::Sub && 10031 FI->getOpcode() == Instruction::Add) || 10032 (TI->getOpcode() == Instruction::FSub && 10033 FI->getOpcode() == Instruction::FAdd)) { 10034 AddOp = FI; SubOp = TI; 10035 } else if ((FI->getOpcode() == Instruction::Sub && 10036 TI->getOpcode() == Instruction::Add) || 10037 (FI->getOpcode() == Instruction::FSub && 10038 TI->getOpcode() == Instruction::FAdd)) { 10039 AddOp = TI; SubOp = FI; 10040 } 10041 10042 if (AddOp) { 10043 Value *OtherAddOp = 0; 10044 if (SubOp->getOperand(0) == AddOp->getOperand(0)) { 10045 OtherAddOp = AddOp->getOperand(1); 10046 } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { 10047 OtherAddOp = AddOp->getOperand(0); 10048 } 10049 10050 if (OtherAddOp) { 10051 // So at this point we know we have (Y -> OtherAddOp): 10052 // select C, (add X, Y), (sub X, Z) 10053 Value *NegVal; // Compute -Z 10054 if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { 10055 NegVal = ConstantExpr::getNeg(C); 10056 } else { 10057 NegVal = InsertNewInstBefore( 10058 BinaryOperator::CreateNeg(SubOp->getOperand(1), 10059 "tmp"), SI); 10060 } 10061 10062 Value *NewTrueOp = OtherAddOp; 10063 Value *NewFalseOp = NegVal; 10064 if (AddOp != TI) 10065 std::swap(NewTrueOp, NewFalseOp); 10066 Instruction *NewSel = 10067 SelectInst::Create(CondVal, NewTrueOp, 10068 NewFalseOp, SI.getName() + ".p"); 10069 10070 NewSel = InsertNewInstBefore(NewSel, SI); 10071 return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); 10072 } 10073 } 10074 } 10075 10076 // See if we can fold the select into one of our operands. 10077 if (SI.getType()->isInteger()) { 10078 if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) 10079 return FoldI; 10080 10081 // MAX(MAX(a, b), a) -> MAX(a, b) 10082 // MIN(MIN(a, b), a) -> MIN(a, b) 10083 // MAX(MIN(a, b), a) -> a 10084 // MIN(MAX(a, b), a) -> a 10085 Value *LHS, *RHS, *LHS2, *RHS2; 10086 if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { 10087 if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) 10088 if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 10089 SI, SPF, RHS)) 10090 return R; 10091 if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) 10092 if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, 10093 SI, SPF, LHS)) 10094 return R; 10095 } 10096 10097 // TODO. 10098 // ABS(-X) -> ABS(X) 10099 // ABS(ABS(X)) -> ABS(X) 10100 } 10101 10102 // See if we can fold the select into a phi node if the condition is a select. 10103 if (isa<PHINode>(SI.getCondition())) 10104 // The true/false values have to be live in the PHI predecessor's blocks. 10105 if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && 10106 CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) 10107 if (Instruction *NV = FoldOpIntoPhi(SI)) 10108 return NV; 10109 10110 if (BinaryOperator::isNot(CondVal)) { 10111 SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); 10112 SI.setOperand(1, FalseVal); 10113 SI.setOperand(2, TrueVal); 10114 return &SI; 10115 } 10116 10117 return 0; 10118} 10119 10120/// EnforceKnownAlignment - If the specified pointer points to an object that 10121/// we control, modify the object's alignment to PrefAlign. This isn't 10122/// often possible though. If alignment is important, a more reliable approach 10123/// is to simply align all global variables and allocation instructions to 10124/// their preferred alignment from the beginning. 10125/// 10126static unsigned EnforceKnownAlignment(Value *V, 10127 unsigned Align, unsigned PrefAlign) { 10128 10129 User *U = dyn_cast<User>(V); 10130 if (!U) return Align; 10131 10132 switch (Operator::getOpcode(U)) { 10133 default: break; 10134 case Instruction::BitCast: 10135 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 10136 case Instruction::GetElementPtr: { 10137 // If all indexes are zero, it is just the alignment of the base pointer. 10138 bool AllZeroOperands = true; 10139 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) 10140 if (!isa<Constant>(*i) || 10141 !cast<Constant>(*i)->isNullValue()) { 10142 AllZeroOperands = false; 10143 break; 10144 } 10145 10146 if (AllZeroOperands) { 10147 // Treat this like a bitcast. 10148 return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); 10149 } 10150 break; 10151 } 10152 } 10153 10154 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 10155 // If there is a large requested alignment and we can, bump up the alignment 10156 // of the global. 10157 if (!GV->isDeclaration()) { 10158 if (GV->getAlignment() >= PrefAlign) 10159 Align = GV->getAlignment(); 10160 else { 10161 GV->setAlignment(PrefAlign); 10162 Align = PrefAlign; 10163 } 10164 } 10165 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 10166 // If there is a requested alignment and if this is an alloca, round up. 10167 if (AI->getAlignment() >= PrefAlign) 10168 Align = AI->getAlignment(); 10169 else { 10170 AI->setAlignment(PrefAlign); 10171 Align = PrefAlign; 10172 } 10173 } 10174 10175 return Align; 10176} 10177 10178/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that 10179/// we can determine, return it, otherwise return 0. If PrefAlign is specified, 10180/// and it is more than the alignment of the ultimate object, see if we can 10181/// increase the alignment of the ultimate object, making this check succeed. 10182unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, 10183 unsigned PrefAlign) { 10184 unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 10185 sizeof(PrefAlign) * CHAR_BIT; 10186 APInt Mask = APInt::getAllOnesValue(BitWidth); 10187 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 10188 ComputeMaskedBits(V, Mask, KnownZero, KnownOne); 10189 unsigned TrailZ = KnownZero.countTrailingOnes(); 10190 unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); 10191 10192 if (PrefAlign > Align) 10193 Align = EnforceKnownAlignment(V, Align, PrefAlign); 10194 10195 // We don't need to make any adjustment. 10196 return Align; 10197} 10198 10199Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 10200 unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); 10201 unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); 10202 unsigned MinAlign = std::min(DstAlign, SrcAlign); 10203 unsigned CopyAlign = MI->getAlignment(); 10204 10205 if (CopyAlign < MinAlign) { 10206 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 10207 MinAlign, false)); 10208 return MI; 10209 } 10210 10211 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 10212 // load/store. 10213 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); 10214 if (MemOpLength == 0) return 0; 10215 10216 // Source and destination pointer types are always "i8*" for intrinsic. See 10217 // if the size is something we can handle with a single primitive load/store. 10218 // A single load+store correctly handles overlapping memory in the memmove 10219 // case. 10220 unsigned Size = MemOpLength->getZExtValue(); 10221 if (Size == 0) return MI; // Delete this mem transfer. 10222 10223 if (Size > 8 || (Size&(Size-1))) 10224 return 0; // If not 1/2/4/8 bytes, exit. 10225 10226 // Use an integer load+store unless we can find something better. 10227 Type *NewPtrTy = 10228 PointerType::getUnqual(IntegerType::get(*Context, Size<<3)); 10229 10230 // Memcpy forces the use of i8* for the source and destination. That means 10231 // that if you're using memcpy to move one double around, you'll get a cast 10232 // from double* to i8*. We'd much rather use a double load+store rather than 10233 // an i64 load+store, here because this improves the odds that the source or 10234 // dest address will be promotable. See if we can find a better type than the 10235 // integer datatype. 10236 if (Value *Op = getBitCastOperand(MI->getOperand(1))) { 10237 const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); 10238 if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { 10239 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 10240 // down through these levels if so. 10241 while (!SrcETy->isSingleValueType()) { 10242 if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { 10243 if (STy->getNumElements() == 1) 10244 SrcETy = STy->getElementType(0); 10245 else 10246 break; 10247 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { 10248 if (ATy->getNumElements() == 1) 10249 SrcETy = ATy->getElementType(); 10250 else 10251 break; 10252 } else 10253 break; 10254 } 10255 10256 if (SrcETy->isSingleValueType()) 10257 NewPtrTy = PointerType::getUnqual(SrcETy); 10258 } 10259 } 10260 10261 10262 // If the memcpy/memmove provides better alignment info than we can 10263 // infer, use it. 10264 SrcAlign = std::max(SrcAlign, CopyAlign); 10265 DstAlign = std::max(DstAlign, CopyAlign); 10266 10267 Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); 10268 Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); 10269 Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); 10270 InsertNewInstBefore(L, *MI); 10271 InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); 10272 10273 // Set the size of the copy to 0, it will be deleted on the next iteration. 10274 MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); 10275 return MI; 10276} 10277 10278Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 10279 unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); 10280 if (MI->getAlignment() < Alignment) { 10281 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 10282 Alignment, false)); 10283 return MI; 10284 } 10285 10286 // Extract the length and alignment and fill if they are constant. 10287 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 10288 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 10289 if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context)) 10290 return 0; 10291 uint64_t Len = LenC->getZExtValue(); 10292 Alignment = MI->getAlignment(); 10293 10294 // If the length is zero, this is a no-op 10295 if (Len == 0) return MI; // memset(d,c,0,a) -> noop 10296 10297 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 10298 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 10299 const Type *ITy = IntegerType::get(*Context, Len*8); // n=1 -> i8. 10300 10301 Value *Dest = MI->getDest(); 10302 Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); 10303 10304 // Alignment 0 is identity for alignment 1 for memset, but not store. 10305 if (Alignment == 0) Alignment = 1; 10306 10307 // Extract the fill value and store. 10308 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 10309 InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), 10310 Dest, false, Alignment), *MI); 10311 10312 // Set the size of the copy to 0, it will be deleted on the next iteration. 10313 MI->setLength(Constant::getNullValue(LenC->getType())); 10314 return MI; 10315 } 10316 10317 return 0; 10318} 10319 10320 10321/// visitCallInst - CallInst simplification. This mostly only handles folding 10322/// of intrinsic instructions. For normal calls, it allows visitCallSite to do 10323/// the heavy lifting. 10324/// 10325Instruction *InstCombiner::visitCallInst(CallInst &CI) { 10326 if (isFreeCall(&CI)) 10327 return visitFree(CI); 10328 10329 // If the caller function is nounwind, mark the call as nounwind, even if the 10330 // callee isn't. 10331 if (CI.getParent()->getParent()->doesNotThrow() && 10332 !CI.doesNotThrow()) { 10333 CI.setDoesNotThrow(); 10334 return &CI; 10335 } 10336 10337 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 10338 if (!II) return visitCallSite(&CI); 10339 10340 // Intrinsics cannot occur in an invoke, so handle them here instead of in 10341 // visitCallSite. 10342 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 10343 bool Changed = false; 10344 10345 // memmove/cpy/set of zero bytes is a noop. 10346 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 10347 if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); 10348 10349 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 10350 if (CI->getZExtValue() == 1) { 10351 // Replace the instruction with just byte operations. We would 10352 // transform other cases to loads/stores, but we don't know if 10353 // alignment is sufficient. 10354 } 10355 } 10356 10357 // If we have a memmove and the source operation is a constant global, 10358 // then the source and dest pointers can't alias, so we can change this 10359 // into a call to memcpy. 10360 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 10361 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 10362 if (GVSrc->isConstant()) { 10363 Module *M = CI.getParent()->getParent()->getParent(); 10364 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 10365 const Type *Tys[1]; 10366 Tys[0] = CI.getOperand(3)->getType(); 10367 CI.setOperand(0, 10368 Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); 10369 Changed = true; 10370 } 10371 } 10372 10373 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 10374 // memmove(x,x,size) -> noop. 10375 if (MTI->getSource() == MTI->getDest()) 10376 return EraseInstFromFunction(CI); 10377 } 10378 10379 // If we can determine a pointer alignment that is bigger than currently 10380 // set, update the alignment. 10381 if (isa<MemTransferInst>(MI)) { 10382 if (Instruction *I = SimplifyMemTransfer(MI)) 10383 return I; 10384 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 10385 if (Instruction *I = SimplifyMemSet(MSI)) 10386 return I; 10387 } 10388 10389 if (Changed) return II; 10390 } 10391 10392 switch (II->getIntrinsicID()) { 10393 default: break; 10394 case Intrinsic::bswap: 10395 // bswap(bswap(x)) -> x 10396 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) 10397 if (Operand->getIntrinsicID() == Intrinsic::bswap) 10398 return ReplaceInstUsesWith(CI, Operand->getOperand(1)); 10399 10400 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 10401 if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { 10402 if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) 10403 if (Operand->getIntrinsicID() == Intrinsic::bswap) { 10404 unsigned C = Operand->getType()->getPrimitiveSizeInBits() - 10405 TI->getType()->getPrimitiveSizeInBits(); 10406 Value *CV = ConstantInt::get(Operand->getType(), C); 10407 Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); 10408 return new TruncInst(V, TI->getType()); 10409 } 10410 } 10411 10412 break; 10413 case Intrinsic::powi: 10414 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { 10415 // powi(x, 0) -> 1.0 10416 if (Power->isZero()) 10417 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 10418 // powi(x, 1) -> x 10419 if (Power->isOne()) 10420 return ReplaceInstUsesWith(CI, II->getOperand(1)); 10421 // powi(x, -1) -> 1/x 10422 if (Power->isAllOnesValue()) 10423 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 10424 II->getOperand(1)); 10425 } 10426 break; 10427 10428 case Intrinsic::uadd_with_overflow: { 10429 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 10430 const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); 10431 uint32_t BitWidth = IT->getBitWidth(); 10432 APInt Mask = APInt::getSignBit(BitWidth); 10433 APInt LHSKnownZero(BitWidth, 0); 10434 APInt LHSKnownOne(BitWidth, 0); 10435 ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); 10436 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 10437 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 10438 10439 if (LHSKnownNegative || LHSKnownPositive) { 10440 APInt RHSKnownZero(BitWidth, 0); 10441 APInt RHSKnownOne(BitWidth, 0); 10442 ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); 10443 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 10444 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 10445 if (LHSKnownNegative && RHSKnownNegative) { 10446 // The sign bit is set in both cases: this MUST overflow. 10447 // Create a simple add instruction, and insert it into the struct. 10448 Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); 10449 Worklist.Add(Add); 10450 Constant *V[] = { 10451 UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context) 10452 }; 10453 Constant *Struct = ConstantStruct::get(*Context, V, 2, false); 10454 return InsertValueInst::Create(Struct, Add, 0); 10455 } 10456 10457 if (LHSKnownPositive && RHSKnownPositive) { 10458 // The sign bit is clear in both cases: this CANNOT overflow. 10459 // Create a simple add instruction, and insert it into the struct. 10460 Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); 10461 Worklist.Add(Add); 10462 Constant *V[] = { 10463 UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context) 10464 }; 10465 Constant *Struct = ConstantStruct::get(*Context, V, 2, false); 10466 return InsertValueInst::Create(Struct, Add, 0); 10467 } 10468 } 10469 } 10470 // FALL THROUGH uadd into sadd 10471 case Intrinsic::sadd_with_overflow: 10472 // Canonicalize constants into the RHS. 10473 if (isa<Constant>(II->getOperand(1)) && 10474 !isa<Constant>(II->getOperand(2))) { 10475 Value *LHS = II->getOperand(1); 10476 II->setOperand(1, II->getOperand(2)); 10477 II->setOperand(2, LHS); 10478 return II; 10479 } 10480 10481 // X + undef -> undef 10482 if (isa<UndefValue>(II->getOperand(2))) 10483 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 10484 10485 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 10486 // X + 0 -> {X, false} 10487 if (RHS->isZero()) { 10488 Constant *V[] = { 10489 UndefValue::get(II->getOperand(0)->getType()), 10490 ConstantInt::getFalse(*Context) 10491 }; 10492 Constant *Struct = ConstantStruct::get(*Context, V, 2, false); 10493 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 10494 } 10495 } 10496 break; 10497 case Intrinsic::usub_with_overflow: 10498 case Intrinsic::ssub_with_overflow: 10499 // undef - X -> undef 10500 // X - undef -> undef 10501 if (isa<UndefValue>(II->getOperand(1)) || 10502 isa<UndefValue>(II->getOperand(2))) 10503 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 10504 10505 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { 10506 // X - 0 -> {X, false} 10507 if (RHS->isZero()) { 10508 Constant *V[] = { 10509 UndefValue::get(II->getOperand(1)->getType()), 10510 ConstantInt::getFalse(*Context) 10511 }; 10512 Constant *Struct = ConstantStruct::get(*Context, V, 2, false); 10513 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 10514 } 10515 } 10516 break; 10517 case Intrinsic::umul_with_overflow: 10518 case Intrinsic::smul_with_overflow: 10519 // Canonicalize constants into the RHS. 10520 if (isa<Constant>(II->getOperand(1)) && 10521 !isa<Constant>(II->getOperand(2))) { 10522 Value *LHS = II->getOperand(1); 10523 II->setOperand(1, II->getOperand(2)); 10524 II->setOperand(2, LHS); 10525 return II; 10526 } 10527 10528 // X * undef -> undef 10529 if (isa<UndefValue>(II->getOperand(2))) 10530 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 10531 10532 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { 10533 // X*0 -> {0, false} 10534 if (RHSI->isZero()) 10535 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 10536 10537 // X * 1 -> {X, false} 10538 if (RHSI->equalsInt(1)) { 10539 Constant *V[] = { 10540 UndefValue::get(II->getOperand(1)->getType()), 10541 ConstantInt::getFalse(*Context) 10542 }; 10543 Constant *Struct = ConstantStruct::get(*Context, V, 2, false); 10544 return InsertValueInst::Create(Struct, II->getOperand(1), 0); 10545 } 10546 } 10547 break; 10548 case Intrinsic::ppc_altivec_lvx: 10549 case Intrinsic::ppc_altivec_lvxl: 10550 case Intrinsic::x86_sse_loadu_ps: 10551 case Intrinsic::x86_sse2_loadu_pd: 10552 case Intrinsic::x86_sse2_loadu_dq: 10553 // Turn PPC lvx -> load if the pointer is known aligned. 10554 // Turn X86 loadups -> load if the pointer is known aligned. 10555 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 10556 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), 10557 PointerType::getUnqual(II->getType())); 10558 return new LoadInst(Ptr); 10559 } 10560 break; 10561 case Intrinsic::ppc_altivec_stvx: 10562 case Intrinsic::ppc_altivec_stvxl: 10563 // Turn stvx -> store if the pointer is known aligned. 10564 if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { 10565 const Type *OpPtrTy = 10566 PointerType::getUnqual(II->getOperand(1)->getType()); 10567 Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); 10568 return new StoreInst(II->getOperand(1), Ptr); 10569 } 10570 break; 10571 case Intrinsic::x86_sse_storeu_ps: 10572 case Intrinsic::x86_sse2_storeu_pd: 10573 case Intrinsic::x86_sse2_storeu_dq: 10574 // Turn X86 storeu -> store if the pointer is known aligned. 10575 if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { 10576 const Type *OpPtrTy = 10577 PointerType::getUnqual(II->getOperand(2)->getType()); 10578 Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); 10579 return new StoreInst(II->getOperand(2), Ptr); 10580 } 10581 break; 10582 10583 case Intrinsic::x86_sse_cvttss2si: { 10584 // These intrinsics only demands the 0th element of its input vector. If 10585 // we can simplify the input based on that, do so now. 10586 unsigned VWidth = 10587 cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); 10588 APInt DemandedElts(VWidth, 1); 10589 APInt UndefElts(VWidth, 0); 10590 if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, 10591 UndefElts)) { 10592 II->setOperand(1, V); 10593 return II; 10594 } 10595 break; 10596 } 10597 10598 case Intrinsic::ppc_altivec_vperm: 10599 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 10600 if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { 10601 assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); 10602 10603 // Check that all of the elements are integer constants or undefs. 10604 bool AllEltsOk = true; 10605 for (unsigned i = 0; i != 16; ++i) { 10606 if (!isa<ConstantInt>(Mask->getOperand(i)) && 10607 !isa<UndefValue>(Mask->getOperand(i))) { 10608 AllEltsOk = false; 10609 break; 10610 } 10611 } 10612 10613 if (AllEltsOk) { 10614 // Cast the input vectors to byte vectors. 10615 Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); 10616 Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); 10617 Value *Result = UndefValue::get(Op0->getType()); 10618 10619 // Only extract each element once. 10620 Value *ExtractedElts[32]; 10621 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 10622 10623 for (unsigned i = 0; i != 16; ++i) { 10624 if (isa<UndefValue>(Mask->getOperand(i))) 10625 continue; 10626 unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); 10627 Idx &= 31; // Match the hardware behavior. 10628 10629 if (ExtractedElts[Idx] == 0) { 10630 ExtractedElts[Idx] = 10631 Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 10632 ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), 10633 "tmp"); 10634 } 10635 10636 // Insert this value into the result vector. 10637 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 10638 ConstantInt::get(Type::getInt32Ty(*Context), i, false), 10639 "tmp"); 10640 } 10641 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 10642 } 10643 } 10644 break; 10645 10646 case Intrinsic::stackrestore: { 10647 // If the save is right next to the restore, remove the restore. This can 10648 // happen when variable allocas are DCE'd. 10649 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { 10650 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 10651 BasicBlock::iterator BI = SS; 10652 if (&*++BI == II) 10653 return EraseInstFromFunction(CI); 10654 } 10655 } 10656 10657 // Scan down this block to see if there is another stack restore in the 10658 // same block without an intervening call/alloca. 10659 BasicBlock::iterator BI = II; 10660 TerminatorInst *TI = II->getParent()->getTerminator(); 10661 bool CannotRemove = false; 10662 for (++BI; &*BI != TI; ++BI) { 10663 if (isa<AllocaInst>(BI) || isMalloc(BI)) { 10664 CannotRemove = true; 10665 break; 10666 } 10667 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 10668 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 10669 // If there is a stackrestore below this one, remove this one. 10670 if (II->getIntrinsicID() == Intrinsic::stackrestore) 10671 return EraseInstFromFunction(CI); 10672 // Otherwise, ignore the intrinsic. 10673 } else { 10674 // If we found a non-intrinsic call, we can't remove the stack 10675 // restore. 10676 CannotRemove = true; 10677 break; 10678 } 10679 } 10680 } 10681 10682 // If the stack restore is in a return/unwind block and if there are no 10683 // allocas or calls between the restore and the return, nuke the restore. 10684 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) 10685 return EraseInstFromFunction(CI); 10686 break; 10687 } 10688 } 10689 10690 return visitCallSite(II); 10691} 10692 10693// InvokeInst simplification 10694// 10695Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 10696 return visitCallSite(&II); 10697} 10698 10699/// isSafeToEliminateVarargsCast - If this cast does not affect the value 10700/// passed through the varargs area, we can eliminate the use of the cast. 10701static bool isSafeToEliminateVarargsCast(const CallSite CS, 10702 const CastInst * const CI, 10703 const TargetData * const TD, 10704 const int ix) { 10705 if (!CI->isLosslessCast()) 10706 return false; 10707 10708 // The size of ByVal arguments is derived from the type, so we 10709 // can't change to a type with a different size. If the size were 10710 // passed explicitly we could avoid this check. 10711 if (!CS.paramHasAttr(ix, Attribute::ByVal)) 10712 return true; 10713 10714 const Type* SrcTy = 10715 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 10716 const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 10717 if (!SrcTy->isSized() || !DstTy->isSized()) 10718 return false; 10719 if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) 10720 return false; 10721 return true; 10722} 10723 10724// visitCallSite - Improvements for call and invoke instructions. 10725// 10726Instruction *InstCombiner::visitCallSite(CallSite CS) { 10727 bool Changed = false; 10728 10729 // If the callee is a constexpr cast of a function, attempt to move the cast 10730 // to the arguments of the call/invoke. 10731 if (transformConstExprCastCall(CS)) return 0; 10732 10733 Value *Callee = CS.getCalledValue(); 10734 10735 if (Function *CalleeF = dyn_cast<Function>(Callee)) 10736 if (CalleeF->getCallingConv() != CS.getCallingConv()) { 10737 Instruction *OldCall = CS.getInstruction(); 10738 // If the call and callee calling conventions don't match, this call must 10739 // be unreachable, as the call is undefined. 10740 new StoreInst(ConstantInt::getTrue(*Context), 10741 UndefValue::get(Type::getInt1PtrTy(*Context)), 10742 OldCall); 10743 // If OldCall dues not return void then replaceAllUsesWith undef. 10744 // This allows ValueHandlers and custom metadata to adjust itself. 10745 if (!OldCall->getType()->isVoidTy()) 10746 OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); 10747 if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. 10748 return EraseInstFromFunction(*OldCall); 10749 return 0; 10750 } 10751 10752 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 10753 // This instruction is not reachable, just remove it. We insert a store to 10754 // undef so that we know that this code is not reachable, despite the fact 10755 // that we can't modify the CFG here. 10756 new StoreInst(ConstantInt::getTrue(*Context), 10757 UndefValue::get(Type::getInt1PtrTy(*Context)), 10758 CS.getInstruction()); 10759 10760 // If CS dues not return void then replaceAllUsesWith undef. 10761 // This allows ValueHandlers and custom metadata to adjust itself. 10762 if (!CS.getInstruction()->getType()->isVoidTy()) 10763 CS.getInstruction()-> 10764 replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); 10765 10766 if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { 10767 // Don't break the CFG, insert a dummy cond branch. 10768 BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), 10769 ConstantInt::getTrue(*Context), II); 10770 } 10771 return EraseInstFromFunction(*CS.getInstruction()); 10772 } 10773 10774 if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) 10775 if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) 10776 if (In->getIntrinsicID() == Intrinsic::init_trampoline) 10777 return transformCallThroughTrampoline(CS); 10778 10779 const PointerType *PTy = cast<PointerType>(Callee->getType()); 10780 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 10781 if (FTy->isVarArg()) { 10782 int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); 10783 // See if we can optimize any arguments passed through the varargs area of 10784 // the call. 10785 for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), 10786 E = CS.arg_end(); I != E; ++I, ++ix) { 10787 CastInst *CI = dyn_cast<CastInst>(*I); 10788 if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { 10789 *I = CI->getOperand(0); 10790 Changed = true; 10791 } 10792 } 10793 } 10794 10795 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 10796 // Inline asm calls cannot throw - mark them 'nounwind'. 10797 CS.setDoesNotThrow(); 10798 Changed = true; 10799 } 10800 10801 return Changed ? CS.getInstruction() : 0; 10802} 10803 10804// transformConstExprCastCall - If the callee is a constexpr cast of a function, 10805// attempt to move the cast to the arguments of the call/invoke. 10806// 10807bool InstCombiner::transformConstExprCastCall(CallSite CS) { 10808 if (!isa<ConstantExpr>(CS.getCalledValue())) return false; 10809 ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); 10810 if (CE->getOpcode() != Instruction::BitCast || 10811 !isa<Function>(CE->getOperand(0))) 10812 return false; 10813 Function *Callee = cast<Function>(CE->getOperand(0)); 10814 Instruction *Caller = CS.getInstruction(); 10815 const AttrListPtr &CallerPAL = CS.getAttributes(); 10816 10817 // Okay, this is a cast from a function to a different type. Unless doing so 10818 // would cause a type conversion of one of our arguments, change this call to 10819 // be a direct call with arguments casted to the appropriate types. 10820 // 10821 const FunctionType *FT = Callee->getFunctionType(); 10822 const Type *OldRetTy = Caller->getType(); 10823 const Type *NewRetTy = FT->getReturnType(); 10824 10825 if (isa<StructType>(NewRetTy)) 10826 return false; // TODO: Handle multiple return values. 10827 10828 // Check to see if we are changing the return type... 10829 if (OldRetTy != NewRetTy) { 10830 if (Callee->isDeclaration() && 10831 // Conversion is ok if changing from one pointer type to another or from 10832 // a pointer to an integer of the same size. 10833 !((isa<PointerType>(OldRetTy) || !TD || 10834 OldRetTy == TD->getIntPtrType(Caller->getContext())) && 10835 (isa<PointerType>(NewRetTy) || !TD || 10836 NewRetTy == TD->getIntPtrType(Caller->getContext())))) 10837 return false; // Cannot transform this return value. 10838 10839 if (!Caller->use_empty() && 10840 // void -> non-void is handled specially 10841 !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) 10842 return false; // Cannot transform this return value. 10843 10844 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 10845 Attributes RAttrs = CallerPAL.getRetAttributes(); 10846 if (RAttrs & Attribute::typeIncompatible(NewRetTy)) 10847 return false; // Attribute not compatible with transformed value. 10848 } 10849 10850 // If the callsite is an invoke instruction, and the return value is used by 10851 // a PHI node in a successor, we cannot change the return type of the call 10852 // because there is no place to put the cast instruction (without breaking 10853 // the critical edge). Bail out in this case. 10854 if (!Caller->use_empty()) 10855 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 10856 for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); 10857 UI != E; ++UI) 10858 if (PHINode *PN = dyn_cast<PHINode>(*UI)) 10859 if (PN->getParent() == II->getNormalDest() || 10860 PN->getParent() == II->getUnwindDest()) 10861 return false; 10862 } 10863 10864 unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); 10865 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 10866 10867 CallSite::arg_iterator AI = CS.arg_begin(); 10868 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 10869 const Type *ParamTy = FT->getParamType(i); 10870 const Type *ActTy = (*AI)->getType(); 10871 10872 if (!CastInst::isCastable(ActTy, ParamTy)) 10873 return false; // Cannot transform this parameter value. 10874 10875 if (CallerPAL.getParamAttributes(i + 1) 10876 & Attribute::typeIncompatible(ParamTy)) 10877 return false; // Attribute not compatible with transformed value. 10878 10879 // Converting from one pointer type to another or between a pointer and an 10880 // integer of the same size is safe even if we do not have a body. 10881 bool isConvertible = ActTy == ParamTy || 10882 (TD && ((isa<PointerType>(ParamTy) || 10883 ParamTy == TD->getIntPtrType(Caller->getContext())) && 10884 (isa<PointerType>(ActTy) || 10885 ActTy == TD->getIntPtrType(Caller->getContext())))); 10886 if (Callee->isDeclaration() && !isConvertible) return false; 10887 } 10888 10889 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && 10890 Callee->isDeclaration()) 10891 return false; // Do not delete arguments unless we have a function body. 10892 10893 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 10894 !CallerPAL.isEmpty()) 10895 // In this case we have more arguments than the new function type, but we 10896 // won't be dropping them. Check that these extra arguments have attributes 10897 // that are compatible with being a vararg call argument. 10898 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 10899 if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) 10900 break; 10901 Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; 10902 if (PAttrs & Attribute::VarArgsIncompatible) 10903 return false; 10904 } 10905 10906 // Okay, we decided that this is a safe thing to do: go ahead and start 10907 // inserting cast instructions as necessary... 10908 std::vector<Value*> Args; 10909 Args.reserve(NumActualArgs); 10910 SmallVector<AttributeWithIndex, 8> attrVec; 10911 attrVec.reserve(NumCommonArgs); 10912 10913 // Get any return attributes. 10914 Attributes RAttrs = CallerPAL.getRetAttributes(); 10915 10916 // If the return value is not being used, the type may not be compatible 10917 // with the existing attributes. Wipe out any problematic attributes. 10918 RAttrs &= ~Attribute::typeIncompatible(NewRetTy); 10919 10920 // Add the new return attributes. 10921 if (RAttrs) 10922 attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); 10923 10924 AI = CS.arg_begin(); 10925 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 10926 const Type *ParamTy = FT->getParamType(i); 10927 if ((*AI)->getType() == ParamTy) { 10928 Args.push_back(*AI); 10929 } else { 10930 Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, 10931 false, ParamTy, false); 10932 Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); 10933 } 10934 10935 // Add any parameter attributes. 10936 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 10937 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 10938 } 10939 10940 // If the function takes more arguments than the call was taking, add them 10941 // now. 10942 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 10943 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 10944 10945 // If we are removing arguments to the function, emit an obnoxious warning. 10946 if (FT->getNumParams() < NumActualArgs) { 10947 if (!FT->isVarArg()) { 10948 errs() << "WARNING: While resolving call to function '" 10949 << Callee->getName() << "' arguments were dropped!\n"; 10950 } else { 10951 // Add all of the arguments in their promoted form to the arg list. 10952 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 10953 const Type *PTy = getPromotedType((*AI)->getType()); 10954 if (PTy != (*AI)->getType()) { 10955 // Must promote to pass through va_arg area! 10956 Instruction::CastOps opcode = 10957 CastInst::getCastOpcode(*AI, false, PTy, false); 10958 Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); 10959 } else { 10960 Args.push_back(*AI); 10961 } 10962 10963 // Add any parameter attributes. 10964 if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) 10965 attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); 10966 } 10967 } 10968 } 10969 10970 if (Attributes FnAttrs = CallerPAL.getFnAttributes()) 10971 attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); 10972 10973 if (NewRetTy->isVoidTy()) 10974 Caller->setName(""); // Void type should not have a name. 10975 10976 const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), 10977 attrVec.end()); 10978 10979 Instruction *NC; 10980 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 10981 NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), 10982 Args.begin(), Args.end(), 10983 Caller->getName(), Caller); 10984 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 10985 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 10986 } else { 10987 NC = CallInst::Create(Callee, Args.begin(), Args.end(), 10988 Caller->getName(), Caller); 10989 CallInst *CI = cast<CallInst>(Caller); 10990 if (CI->isTailCall()) 10991 cast<CallInst>(NC)->setTailCall(); 10992 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 10993 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 10994 } 10995 10996 // Insert a cast of the return type as necessary. 10997 Value *NV = NC; 10998 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 10999 if (!NV->getType()->isVoidTy()) { 11000 Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 11001 OldRetTy, false); 11002 NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); 11003 11004 // If this is an invoke instruction, we should insert it after the first 11005 // non-phi, instruction in the normal successor block. 11006 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 11007 BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); 11008 InsertNewInstBefore(NC, *I); 11009 } else { 11010 // Otherwise, it's a call, just insert cast right after the call instr 11011 InsertNewInstBefore(NC, *Caller); 11012 } 11013 Worklist.AddUsersToWorkList(*Caller); 11014 } else { 11015 NV = UndefValue::get(Caller->getType()); 11016 } 11017 } 11018 11019 11020 if (!Caller->use_empty()) 11021 Caller->replaceAllUsesWith(NV); 11022 11023 EraseInstFromFunction(*Caller); 11024 return true; 11025} 11026 11027// transformCallThroughTrampoline - Turn a call to a function created by the 11028// init_trampoline intrinsic into a direct call to the underlying function. 11029// 11030Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { 11031 Value *Callee = CS.getCalledValue(); 11032 const PointerType *PTy = cast<PointerType>(Callee->getType()); 11033 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 11034 const AttrListPtr &Attrs = CS.getAttributes(); 11035 11036 // If the call already has the 'nest' attribute somewhere then give up - 11037 // otherwise 'nest' would occur twice after splicing in the chain. 11038 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 11039 return 0; 11040 11041 IntrinsicInst *Tramp = 11042 cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); 11043 11044 Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); 11045 const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 11046 const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 11047 11048 const AttrListPtr &NestAttrs = NestF->getAttributes(); 11049 if (!NestAttrs.isEmpty()) { 11050 unsigned NestIdx = 1; 11051 const Type *NestTy = 0; 11052 Attributes NestAttr = Attribute::None; 11053 11054 // Look for a parameter marked with the 'nest' attribute. 11055 for (FunctionType::param_iterator I = NestFTy->param_begin(), 11056 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 11057 if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { 11058 // Record the parameter type and any other attributes. 11059 NestTy = *I; 11060 NestAttr = NestAttrs.getParamAttributes(NestIdx); 11061 break; 11062 } 11063 11064 if (NestTy) { 11065 Instruction *Caller = CS.getInstruction(); 11066 std::vector<Value*> NewArgs; 11067 NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); 11068 11069 SmallVector<AttributeWithIndex, 8> NewAttrs; 11070 NewAttrs.reserve(Attrs.getNumSlots() + 1); 11071 11072 // Insert the nest argument into the call argument list, which may 11073 // mean appending it. Likewise for attributes. 11074 11075 // Add any result attributes. 11076 if (Attributes Attr = Attrs.getRetAttributes()) 11077 NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); 11078 11079 { 11080 unsigned Idx = 1; 11081 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 11082 do { 11083 if (Idx == NestIdx) { 11084 // Add the chain argument and attributes. 11085 Value *NestVal = Tramp->getOperand(3); 11086 if (NestVal->getType() != NestTy) 11087 NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); 11088 NewArgs.push_back(NestVal); 11089 NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); 11090 } 11091 11092 if (I == E) 11093 break; 11094 11095 // Add the original argument and attributes. 11096 NewArgs.push_back(*I); 11097 if (Attributes Attr = Attrs.getParamAttributes(Idx)) 11098 NewAttrs.push_back 11099 (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); 11100 11101 ++Idx, ++I; 11102 } while (1); 11103 } 11104 11105 // Add any function attributes. 11106 if (Attributes Attr = Attrs.getFnAttributes()) 11107 NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); 11108 11109 // The trampoline may have been bitcast to a bogus type (FTy). 11110 // Handle this by synthesizing a new function type, equal to FTy 11111 // with the chain parameter inserted. 11112 11113 std::vector<const Type*> NewTypes; 11114 NewTypes.reserve(FTy->getNumParams()+1); 11115 11116 // Insert the chain's type into the list of parameter types, which may 11117 // mean appending it. 11118 { 11119 unsigned Idx = 1; 11120 FunctionType::param_iterator I = FTy->param_begin(), 11121 E = FTy->param_end(); 11122 11123 do { 11124 if (Idx == NestIdx) 11125 // Add the chain's type. 11126 NewTypes.push_back(NestTy); 11127 11128 if (I == E) 11129 break; 11130 11131 // Add the original type. 11132 NewTypes.push_back(*I); 11133 11134 ++Idx, ++I; 11135 } while (1); 11136 } 11137 11138 // Replace the trampoline call with a direct call. Let the generic 11139 // code sort out any function type mismatches. 11140 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 11141 FTy->isVarArg()); 11142 Constant *NewCallee = 11143 NestF->getType() == PointerType::getUnqual(NewFTy) ? 11144 NestF : ConstantExpr::getBitCast(NestF, 11145 PointerType::getUnqual(NewFTy)); 11146 const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), 11147 NewAttrs.end()); 11148 11149 Instruction *NewCaller; 11150 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 11151 NewCaller = InvokeInst::Create(NewCallee, 11152 II->getNormalDest(), II->getUnwindDest(), 11153 NewArgs.begin(), NewArgs.end(), 11154 Caller->getName(), Caller); 11155 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 11156 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 11157 } else { 11158 NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), 11159 Caller->getName(), Caller); 11160 if (cast<CallInst>(Caller)->isTailCall()) 11161 cast<CallInst>(NewCaller)->setTailCall(); 11162 cast<CallInst>(NewCaller)-> 11163 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 11164 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 11165 } 11166 if (!Caller->getType()->isVoidTy()) 11167 Caller->replaceAllUsesWith(NewCaller); 11168 Caller->eraseFromParent(); 11169 Worklist.Remove(Caller); 11170 return 0; 11171 } 11172 } 11173 11174 // Replace the trampoline call with a direct call. Since there is no 'nest' 11175 // parameter, there is no need to adjust the argument list. Let the generic 11176 // code sort out any function type mismatches. 11177 Constant *NewCallee = 11178 NestF->getType() == PTy ? NestF : 11179 ConstantExpr::getBitCast(NestF, PTy); 11180 CS.setCalledFunction(NewCallee); 11181 return CS.getInstruction(); 11182} 11183 11184/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] 11185/// and if a/b/c and the add's all have a single use, turn this into a phi 11186/// and a single binop. 11187Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { 11188 Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); 11189 assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); 11190 unsigned Opc = FirstInst->getOpcode(); 11191 Value *LHSVal = FirstInst->getOperand(0); 11192 Value *RHSVal = FirstInst->getOperand(1); 11193 11194 const Type *LHSType = LHSVal->getType(); 11195 const Type *RHSType = RHSVal->getType(); 11196 11197 // Scan to see if all operands are the same opcode, and all have one use. 11198 for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { 11199 Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); 11200 if (!I || I->getOpcode() != Opc || !I->hasOneUse() || 11201 // Verify type of the LHS matches so we don't fold cmp's of different 11202 // types or GEP's with different index types. 11203 I->getOperand(0)->getType() != LHSType || 11204 I->getOperand(1)->getType() != RHSType) 11205 return 0; 11206 11207 // If they are CmpInst instructions, check their predicates 11208 if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) 11209 if (cast<CmpInst>(I)->getPredicate() != 11210 cast<CmpInst>(FirstInst)->getPredicate()) 11211 return 0; 11212 11213 // Keep track of which operand needs a phi node. 11214 if (I->getOperand(0) != LHSVal) LHSVal = 0; 11215 if (I->getOperand(1) != RHSVal) RHSVal = 0; 11216 } 11217 11218 // If both LHS and RHS would need a PHI, don't do this transformation, 11219 // because it would increase the number of PHIs entering the block, 11220 // which leads to higher register pressure. This is especially 11221 // bad when the PHIs are in the header of a loop. 11222 if (!LHSVal && !RHSVal) 11223 return 0; 11224 11225 // Otherwise, this is safe to transform! 11226 11227 Value *InLHS = FirstInst->getOperand(0); 11228 Value *InRHS = FirstInst->getOperand(1); 11229 PHINode *NewLHS = 0, *NewRHS = 0; 11230 if (LHSVal == 0) { 11231 NewLHS = PHINode::Create(LHSType, 11232 FirstInst->getOperand(0)->getName() + ".pn"); 11233 NewLHS->reserveOperandSpace(PN.getNumOperands()/2); 11234 NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); 11235 InsertNewInstBefore(NewLHS, PN); 11236 LHSVal = NewLHS; 11237 } 11238 11239 if (RHSVal == 0) { 11240 NewRHS = PHINode::Create(RHSType, 11241 FirstInst->getOperand(1)->getName() + ".pn"); 11242 NewRHS->reserveOperandSpace(PN.getNumOperands()/2); 11243 NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); 11244 InsertNewInstBefore(NewRHS, PN); 11245 RHSVal = NewRHS; 11246 } 11247 11248 // Add all operands to the new PHIs. 11249 if (NewLHS || NewRHS) { 11250 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11251 Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); 11252 if (NewLHS) { 11253 Value *NewInLHS = InInst->getOperand(0); 11254 NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); 11255 } 11256 if (NewRHS) { 11257 Value *NewInRHS = InInst->getOperand(1); 11258 NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); 11259 } 11260 } 11261 } 11262 11263 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) 11264 return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); 11265 CmpInst *CIOp = cast<CmpInst>(FirstInst); 11266 return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 11267 LHSVal, RHSVal); 11268} 11269 11270Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { 11271 GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); 11272 11273 SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 11274 FirstInst->op_end()); 11275 // This is true if all GEP bases are allocas and if all indices into them are 11276 // constants. 11277 bool AllBasePointersAreAllocas = true; 11278 11279 // We don't want to replace this phi if the replacement would require 11280 // more than one phi, which leads to higher register pressure. This is 11281 // especially bad when the PHIs are in the header of a loop. 11282 bool NeededPhi = false; 11283 11284 // Scan to see if all operands are the same opcode, and all have one use. 11285 for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { 11286 GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); 11287 if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || 11288 GEP->getNumOperands() != FirstInst->getNumOperands()) 11289 return 0; 11290 11291 // Keep track of whether or not all GEPs are of alloca pointers. 11292 if (AllBasePointersAreAllocas && 11293 (!isa<AllocaInst>(GEP->getOperand(0)) || 11294 !GEP->hasAllConstantIndices())) 11295 AllBasePointersAreAllocas = false; 11296 11297 // Compare the operand lists. 11298 for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { 11299 if (FirstInst->getOperand(op) == GEP->getOperand(op)) 11300 continue; 11301 11302 // Don't merge two GEPs when two operands differ (introducing phi nodes) 11303 // if one of the PHIs has a constant for the index. The index may be 11304 // substantially cheaper to compute for the constants, so making it a 11305 // variable index could pessimize the path. This also handles the case 11306 // for struct indices, which must always be constant. 11307 if (isa<ConstantInt>(FirstInst->getOperand(op)) || 11308 isa<ConstantInt>(GEP->getOperand(op))) 11309 return 0; 11310 11311 if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) 11312 return 0; 11313 11314 // If we already needed a PHI for an earlier operand, and another operand 11315 // also requires a PHI, we'd be introducing more PHIs than we're 11316 // eliminating, which increases register pressure on entry to the PHI's 11317 // block. 11318 if (NeededPhi) 11319 return 0; 11320 11321 FixedOperands[op] = 0; // Needs a PHI. 11322 NeededPhi = true; 11323 } 11324 } 11325 11326 // If all of the base pointers of the PHI'd GEPs are from allocas, don't 11327 // bother doing this transformation. At best, this will just save a bit of 11328 // offset calculation, but all the predecessors will have to materialize the 11329 // stack address into a register anyway. We'd actually rather *clone* the 11330 // load up into the predecessors so that we have a load of a gep of an alloca, 11331 // which can usually all be folded into the load. 11332 if (AllBasePointersAreAllocas) 11333 return 0; 11334 11335 // Otherwise, this is safe to transform. Insert PHI nodes for each operand 11336 // that is variable. 11337 SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); 11338 11339 bool HasAnyPHIs = false; 11340 for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { 11341 if (FixedOperands[i]) continue; // operand doesn't need a phi. 11342 Value *FirstOp = FirstInst->getOperand(i); 11343 PHINode *NewPN = PHINode::Create(FirstOp->getType(), 11344 FirstOp->getName()+".pn"); 11345 InsertNewInstBefore(NewPN, PN); 11346 11347 NewPN->reserveOperandSpace(e); 11348 NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); 11349 OperandPhis[i] = NewPN; 11350 FixedOperands[i] = NewPN; 11351 HasAnyPHIs = true; 11352 } 11353 11354 11355 // Add all operands to the new PHIs. 11356 if (HasAnyPHIs) { 11357 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11358 GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); 11359 BasicBlock *InBB = PN.getIncomingBlock(i); 11360 11361 for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) 11362 if (PHINode *OpPhi = OperandPhis[op]) 11363 OpPhi->addIncoming(InGEP->getOperand(op), InBB); 11364 } 11365 } 11366 11367 Value *Base = FixedOperands[0]; 11368 return cast<GEPOperator>(FirstInst)->isInBounds() ? 11369 GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, 11370 FixedOperands.end()) : 11371 GetElementPtrInst::Create(Base, FixedOperands.begin()+1, 11372 FixedOperands.end()); 11373} 11374 11375 11376/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to 11377/// sink the load out of the block that defines it. This means that it must be 11378/// obvious the value of the load is not changed from the point of the load to 11379/// the end of the block it is in. 11380/// 11381/// Finally, it is safe, but not profitable, to sink a load targetting a 11382/// non-address-taken alloca. Doing so will cause us to not promote the alloca 11383/// to a register. 11384static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { 11385 BasicBlock::iterator BBI = L, E = L->getParent()->end(); 11386 11387 for (++BBI; BBI != E; ++BBI) 11388 if (BBI->mayWriteToMemory()) 11389 return false; 11390 11391 // Check for non-address taken alloca. If not address-taken already, it isn't 11392 // profitable to do this xform. 11393 if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { 11394 bool isAddressTaken = false; 11395 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 11396 UI != E; ++UI) { 11397 if (isa<LoadInst>(UI)) continue; 11398 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 11399 // If storing TO the alloca, then the address isn't taken. 11400 if (SI->getOperand(1) == AI) continue; 11401 } 11402 isAddressTaken = true; 11403 break; 11404 } 11405 11406 if (!isAddressTaken && AI->isStaticAlloca()) 11407 return false; 11408 } 11409 11410 // If this load is a load from a GEP with a constant offset from an alloca, 11411 // then we don't want to sink it. In its present form, it will be 11412 // load [constant stack offset]. Sinking it will cause us to have to 11413 // materialize the stack addresses in each predecessor in a register only to 11414 // do a shared load from register in the successor. 11415 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0))) 11416 if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) 11417 if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) 11418 return false; 11419 11420 return true; 11421} 11422 11423Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { 11424 LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); 11425 11426 // When processing loads, we need to propagate two bits of information to the 11427 // sunk load: whether it is volatile, and what its alignment is. We currently 11428 // don't sink loads when some have their alignment specified and some don't. 11429 // visitLoadInst will propagate an alignment onto the load when TD is around, 11430 // and if TD isn't around, we can't handle the mixed case. 11431 bool isVolatile = FirstLI->isVolatile(); 11432 unsigned LoadAlignment = FirstLI->getAlignment(); 11433 11434 // We can't sink the load if the loaded value could be modified between the 11435 // load and the PHI. 11436 if (FirstLI->getParent() != PN.getIncomingBlock(0) || 11437 !isSafeAndProfitableToSinkLoad(FirstLI)) 11438 return 0; 11439 11440 // If the PHI is of volatile loads and the load block has multiple 11441 // successors, sinking it would remove a load of the volatile value from 11442 // the path through the other successor. 11443 if (isVolatile && 11444 FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) 11445 return 0; 11446 11447 // Check to see if all arguments are the same operation. 11448 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11449 LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); 11450 if (!LI || !LI->hasOneUse()) 11451 return 0; 11452 11453 // We can't sink the load if the loaded value could be modified between 11454 // the load and the PHI. 11455 if (LI->isVolatile() != isVolatile || 11456 LI->getParent() != PN.getIncomingBlock(i) || 11457 !isSafeAndProfitableToSinkLoad(LI)) 11458 return 0; 11459 11460 // If some of the loads have an alignment specified but not all of them, 11461 // we can't do the transformation. 11462 if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) 11463 return 0; 11464 11465 LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); 11466 11467 // If the PHI is of volatile loads and the load block has multiple 11468 // successors, sinking it would remove a load of the volatile value from 11469 // the path through the other successor. 11470 if (isVolatile && 11471 LI->getParent()->getTerminator()->getNumSuccessors() != 1) 11472 return 0; 11473 } 11474 11475 // Okay, they are all the same operation. Create a new PHI node of the 11476 // correct type, and PHI together all of the LHS's of the instructions. 11477 PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), 11478 PN.getName()+".in"); 11479 NewPN->reserveOperandSpace(PN.getNumOperands()/2); 11480 11481 Value *InVal = FirstLI->getOperand(0); 11482 NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); 11483 11484 // Add all operands to the new PHI. 11485 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11486 Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); 11487 if (NewInVal != InVal) 11488 InVal = 0; 11489 NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); 11490 } 11491 11492 Value *PhiVal; 11493 if (InVal) { 11494 // The new PHI unions all of the same values together. This is really 11495 // common, so we handle it intelligently here for compile-time speed. 11496 PhiVal = InVal; 11497 delete NewPN; 11498 } else { 11499 InsertNewInstBefore(NewPN, PN); 11500 PhiVal = NewPN; 11501 } 11502 11503 // If this was a volatile load that we are merging, make sure to loop through 11504 // and mark all the input loads as non-volatile. If we don't do this, we will 11505 // insert a new volatile load and the old ones will not be deletable. 11506 if (isVolatile) 11507 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 11508 cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); 11509 11510 return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); 11511} 11512 11513 11514 11515/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" 11516/// operator and they all are only used by the PHI, PHI together their 11517/// inputs, and do the operation once, to the result of the PHI. 11518Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { 11519 Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); 11520 11521 if (isa<GetElementPtrInst>(FirstInst)) 11522 return FoldPHIArgGEPIntoPHI(PN); 11523 if (isa<LoadInst>(FirstInst)) 11524 return FoldPHIArgLoadIntoPHI(PN); 11525 11526 // Scan the instruction, looking for input operations that can be folded away. 11527 // If all input operands to the phi are the same instruction (e.g. a cast from 11528 // the same type or "+42") we can pull the operation through the PHI, reducing 11529 // code size and simplifying code. 11530 Constant *ConstantOp = 0; 11531 const Type *CastSrcTy = 0; 11532 11533 if (isa<CastInst>(FirstInst)) { 11534 CastSrcTy = FirstInst->getOperand(0)->getType(); 11535 11536 // Be careful about transforming integer PHIs. We don't want to pessimize 11537 // the code by turning an i32 into an i1293. 11538 if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { 11539 if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) 11540 return 0; 11541 } 11542 } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { 11543 // Can fold binop, compare or shift here if the RHS is a constant, 11544 // otherwise call FoldPHIArgBinOpIntoPHI. 11545 ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); 11546 if (ConstantOp == 0) 11547 return FoldPHIArgBinOpIntoPHI(PN); 11548 } else { 11549 return 0; // Cannot fold this operation. 11550 } 11551 11552 // Check to see if all arguments are the same operation. 11553 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11554 Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); 11555 if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) 11556 return 0; 11557 if (CastSrcTy) { 11558 if (I->getOperand(0)->getType() != CastSrcTy) 11559 return 0; // Cast operation must match. 11560 } else if (I->getOperand(1) != ConstantOp) { 11561 return 0; 11562 } 11563 } 11564 11565 // Okay, they are all the same operation. Create a new PHI node of the 11566 // correct type, and PHI together all of the LHS's of the instructions. 11567 PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), 11568 PN.getName()+".in"); 11569 NewPN->reserveOperandSpace(PN.getNumOperands()/2); 11570 11571 Value *InVal = FirstInst->getOperand(0); 11572 NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); 11573 11574 // Add all operands to the new PHI. 11575 for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { 11576 Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); 11577 if (NewInVal != InVal) 11578 InVal = 0; 11579 NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); 11580 } 11581 11582 Value *PhiVal; 11583 if (InVal) { 11584 // The new PHI unions all of the same values together. This is really 11585 // common, so we handle it intelligently here for compile-time speed. 11586 PhiVal = InVal; 11587 delete NewPN; 11588 } else { 11589 InsertNewInstBefore(NewPN, PN); 11590 PhiVal = NewPN; 11591 } 11592 11593 // Insert and return the new operation. 11594 if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) 11595 return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); 11596 11597 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) 11598 return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); 11599 11600 CmpInst *CIOp = cast<CmpInst>(FirstInst); 11601 return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 11602 PhiVal, ConstantOp); 11603} 11604 11605/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle 11606/// that is dead. 11607static bool DeadPHICycle(PHINode *PN, 11608 SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) { 11609 if (PN->use_empty()) return true; 11610 if (!PN->hasOneUse()) return false; 11611 11612 // Remember this node, and if we find the cycle, return. 11613 if (!PotentiallyDeadPHIs.insert(PN)) 11614 return true; 11615 11616 // Don't scan crazily complex things. 11617 if (PotentiallyDeadPHIs.size() == 16) 11618 return false; 11619 11620 if (PHINode *PU = dyn_cast<PHINode>(PN->use_back())) 11621 return DeadPHICycle(PU, PotentiallyDeadPHIs); 11622 11623 return false; 11624} 11625 11626/// PHIsEqualValue - Return true if this phi node is always equal to 11627/// NonPhiInVal. This happens with mutually cyclic phi nodes like: 11628/// z = some value; x = phi (y, z); y = phi (x, z) 11629static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 11630 SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { 11631 // See if we already saw this PHI node. 11632 if (!ValueEqualPHIs.insert(PN)) 11633 return true; 11634 11635 // Don't scan crazily complex things. 11636 if (ValueEqualPHIs.size() == 16) 11637 return false; 11638 11639 // Scan the operands to see if they are either phi nodes or are equal to 11640 // the value. 11641 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 11642 Value *Op = PN->getIncomingValue(i); 11643 if (PHINode *OpPN = dyn_cast<PHINode>(Op)) { 11644 if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) 11645 return false; 11646 } else if (Op != NonPhiInVal) 11647 return false; 11648 } 11649 11650 return true; 11651} 11652 11653 11654namespace { 11655struct PHIUsageRecord { 11656 unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) 11657 unsigned Shift; // The amount shifted. 11658 Instruction *Inst; // The trunc instruction. 11659 11660 PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) 11661 : PHIId(pn), Shift(Sh), Inst(User) {} 11662 11663 bool operator<(const PHIUsageRecord &RHS) const { 11664 if (PHIId < RHS.PHIId) return true; 11665 if (PHIId > RHS.PHIId) return false; 11666 if (Shift < RHS.Shift) return true; 11667 if (Shift > RHS.Shift) return false; 11668 return Inst->getType()->getPrimitiveSizeInBits() < 11669 RHS.Inst->getType()->getPrimitiveSizeInBits(); 11670 } 11671}; 11672 11673struct LoweredPHIRecord { 11674 PHINode *PN; // The PHI that was lowered. 11675 unsigned Shift; // The amount shifted. 11676 unsigned Width; // The width extracted. 11677 11678 LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) 11679 : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} 11680 11681 // Ctor form used by DenseMap. 11682 LoweredPHIRecord(PHINode *pn, unsigned Sh) 11683 : PN(pn), Shift(Sh), Width(0) {} 11684}; 11685} 11686 11687namespace llvm { 11688 template<> 11689 struct DenseMapInfo<LoweredPHIRecord> { 11690 static inline LoweredPHIRecord getEmptyKey() { 11691 return LoweredPHIRecord(0, 0); 11692 } 11693 static inline LoweredPHIRecord getTombstoneKey() { 11694 return LoweredPHIRecord(0, 1); 11695 } 11696 static unsigned getHashValue(const LoweredPHIRecord &Val) { 11697 return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ 11698 (Val.Width>>3); 11699 } 11700 static bool isEqual(const LoweredPHIRecord &LHS, 11701 const LoweredPHIRecord &RHS) { 11702 return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && 11703 LHS.Width == RHS.Width; 11704 } 11705 }; 11706 template <> 11707 struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; 11708} 11709 11710 11711/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an 11712/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If 11713/// so, we split the PHI into the various pieces being extracted. This sort of 11714/// thing is introduced when SROA promotes an aggregate to large integer values. 11715/// 11716/// TODO: The user of the trunc may be an bitcast to float/double/vector or an 11717/// inttoptr. We should produce new PHIs in the right type. 11718/// 11719Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { 11720 // PHIUsers - Keep track of all of the truncated values extracted from a set 11721 // of PHIs, along with their offset. These are the things we want to rewrite. 11722 SmallVector<PHIUsageRecord, 16> PHIUsers; 11723 11724 // PHIs are often mutually cyclic, so we keep track of a whole set of PHI 11725 // nodes which are extracted from. PHIsToSlice is a set we use to avoid 11726 // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to 11727 // check the uses of (to ensure they are all extracts). 11728 SmallVector<PHINode*, 8> PHIsToSlice; 11729 SmallPtrSet<PHINode*, 8> PHIsInspected; 11730 11731 PHIsToSlice.push_back(&FirstPhi); 11732 PHIsInspected.insert(&FirstPhi); 11733 11734 for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { 11735 PHINode *PN = PHIsToSlice[PHIId]; 11736 11737 // Scan the input list of the PHI. If any input is an invoke, and if the 11738 // input is defined in the predecessor, then we won't be split the critical 11739 // edge which is required to insert a truncate. Because of this, we have to 11740 // bail out. 11741 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 11742 InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); 11743 if (II == 0) continue; 11744 if (II->getParent() != PN->getIncomingBlock(i)) 11745 continue; 11746 11747 // If we have a phi, and if it's directly in the predecessor, then we have 11748 // a critical edge where we need to put the truncate. Since we can't 11749 // split the edge in instcombine, we have to bail out. 11750 return 0; 11751 } 11752 11753 11754 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); 11755 UI != E; ++UI) { 11756 Instruction *User = cast<Instruction>(*UI); 11757 11758 // If the user is a PHI, inspect its uses recursively. 11759 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 11760 if (PHIsInspected.insert(UserPN)) 11761 PHIsToSlice.push_back(UserPN); 11762 continue; 11763 } 11764 11765 // Truncates are always ok. 11766 if (isa<TruncInst>(User)) { 11767 PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); 11768 continue; 11769 } 11770 11771 // Otherwise it must be a lshr which can only be used by one trunc. 11772 if (User->getOpcode() != Instruction::LShr || 11773 !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || 11774 !isa<ConstantInt>(User->getOperand(1))) 11775 return 0; 11776 11777 unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); 11778 PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); 11779 } 11780 } 11781 11782 // If we have no users, they must be all self uses, just nuke the PHI. 11783 if (PHIUsers.empty()) 11784 return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); 11785 11786 // If this phi node is transformable, create new PHIs for all the pieces 11787 // extracted out of it. First, sort the users by their offset and size. 11788 array_pod_sort(PHIUsers.begin(), PHIUsers.end()); 11789 11790 DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; 11791 for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) 11792 errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; 11793 ); 11794 11795 // PredValues - This is a temporary used when rewriting PHI nodes. It is 11796 // hoisted out here to avoid construction/destruction thrashing. 11797 DenseMap<BasicBlock*, Value*> PredValues; 11798 11799 // ExtractedVals - Each new PHI we introduce is saved here so we don't 11800 // introduce redundant PHIs. 11801 DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; 11802 11803 for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { 11804 unsigned PHIId = PHIUsers[UserI].PHIId; 11805 PHINode *PN = PHIsToSlice[PHIId]; 11806 unsigned Offset = PHIUsers[UserI].Shift; 11807 const Type *Ty = PHIUsers[UserI].Inst->getType(); 11808 11809 PHINode *EltPHI; 11810 11811 // If we've already lowered a user like this, reuse the previously lowered 11812 // value. 11813 if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { 11814 11815 // Otherwise, Create the new PHI node for this user. 11816 EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); 11817 assert(EltPHI->getType() != PN->getType() && 11818 "Truncate didn't shrink phi?"); 11819 11820 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 11821 BasicBlock *Pred = PN->getIncomingBlock(i); 11822 Value *&PredVal = PredValues[Pred]; 11823 11824 // If we already have a value for this predecessor, reuse it. 11825 if (PredVal) { 11826 EltPHI->addIncoming(PredVal, Pred); 11827 continue; 11828 } 11829 11830 // Handle the PHI self-reuse case. 11831 Value *InVal = PN->getIncomingValue(i); 11832 if (InVal == PN) { 11833 PredVal = EltPHI; 11834 EltPHI->addIncoming(PredVal, Pred); 11835 continue; 11836 } 11837 11838 if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { 11839 // If the incoming value was a PHI, and if it was one of the PHIs we 11840 // already rewrote it, just use the lowered value. 11841 if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { 11842 PredVal = Res; 11843 EltPHI->addIncoming(PredVal, Pred); 11844 continue; 11845 } 11846 } 11847 11848 // Otherwise, do an extract in the predecessor. 11849 Builder->SetInsertPoint(Pred, Pred->getTerminator()); 11850 Value *Res = InVal; 11851 if (Offset) 11852 Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), 11853 Offset), "extract"); 11854 Res = Builder->CreateTrunc(Res, Ty, "extract.t"); 11855 PredVal = Res; 11856 EltPHI->addIncoming(Res, Pred); 11857 11858 // If the incoming value was a PHI, and if it was one of the PHIs we are 11859 // rewriting, we will ultimately delete the code we inserted. This 11860 // means we need to revisit that PHI to make sure we extract out the 11861 // needed piece. 11862 if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) 11863 if (PHIsInspected.count(OldInVal)) { 11864 unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), 11865 OldInVal)-PHIsToSlice.begin(); 11866 PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 11867 cast<Instruction>(Res))); 11868 ++UserE; 11869 } 11870 } 11871 PredValues.clear(); 11872 11873 DEBUG(errs() << " Made element PHI for offset " << Offset << ": " 11874 << *EltPHI << '\n'); 11875 ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; 11876 } 11877 11878 // Replace the use of this piece with the PHI node. 11879 ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); 11880 } 11881 11882 // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) 11883 // with undefs. 11884 Value *Undef = UndefValue::get(FirstPhi.getType()); 11885 for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) 11886 ReplaceInstUsesWith(*PHIsToSlice[i], Undef); 11887 return ReplaceInstUsesWith(FirstPhi, Undef); 11888} 11889 11890// PHINode simplification 11891// 11892Instruction *InstCombiner::visitPHINode(PHINode &PN) { 11893 // If LCSSA is around, don't mess with Phi nodes 11894 if (MustPreserveLCSSA) return 0; 11895 11896 if (Value *V = PN.hasConstantValue()) 11897 return ReplaceInstUsesWith(PN, V); 11898 11899 // If all PHI operands are the same operation, pull them through the PHI, 11900 // reducing code size. 11901 if (isa<Instruction>(PN.getIncomingValue(0)) && 11902 isa<Instruction>(PN.getIncomingValue(1)) && 11903 cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == 11904 cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && 11905 // FIXME: The hasOneUse check will fail for PHIs that use the value more 11906 // than themselves more than once. 11907 PN.getIncomingValue(0)->hasOneUse()) 11908 if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) 11909 return Result; 11910 11911 // If this is a trivial cycle in the PHI node graph, remove it. Basically, if 11912 // this PHI only has a single use (a PHI), and if that PHI only has one use (a 11913 // PHI)... break the cycle. 11914 if (PN.hasOneUse()) { 11915 Instruction *PHIUser = cast<Instruction>(PN.use_back()); 11916 if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { 11917 SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; 11918 PotentiallyDeadPHIs.insert(&PN); 11919 if (DeadPHICycle(PU, PotentiallyDeadPHIs)) 11920 return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); 11921 } 11922 11923 // If this phi has a single use, and if that use just computes a value for 11924 // the next iteration of a loop, delete the phi. This occurs with unused 11925 // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this 11926 // common case here is good because the only other things that catch this 11927 // are induction variable analysis (sometimes) and ADCE, which is only run 11928 // late. 11929 if (PHIUser->hasOneUse() && 11930 (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && 11931 PHIUser->use_back() == &PN) { 11932 return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); 11933 } 11934 } 11935 11936 // We sometimes end up with phi cycles that non-obviously end up being the 11937 // same value, for example: 11938 // z = some value; x = phi (y, z); y = phi (x, z) 11939 // where the phi nodes don't necessarily need to be in the same block. Do a 11940 // quick check to see if the PHI node only contains a single non-phi value, if 11941 // so, scan to see if the phi cycle is actually equal to that value. 11942 { 11943 unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); 11944 // Scan for the first non-phi operand. 11945 while (InValNo != NumOperandVals && 11946 isa<PHINode>(PN.getIncomingValue(InValNo))) 11947 ++InValNo; 11948 11949 if (InValNo != NumOperandVals) { 11950 Value *NonPhiInVal = PN.getOperand(InValNo); 11951 11952 // Scan the rest of the operands to see if there are any conflicts, if so 11953 // there is no need to recursively scan other phis. 11954 for (++InValNo; InValNo != NumOperandVals; ++InValNo) { 11955 Value *OpVal = PN.getIncomingValue(InValNo); 11956 if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) 11957 break; 11958 } 11959 11960 // If we scanned over all operands, then we have one unique value plus 11961 // phi values. Scan PHI nodes to see if they all merge in each other or 11962 // the value. 11963 if (InValNo == NumOperandVals) { 11964 SmallPtrSet<PHINode*, 16> ValueEqualPHIs; 11965 if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) 11966 return ReplaceInstUsesWith(PN, NonPhiInVal); 11967 } 11968 } 11969 } 11970 11971 // If there are multiple PHIs, sort their operands so that they all list 11972 // the blocks in the same order. This will help identical PHIs be eliminated 11973 // by other passes. Other passes shouldn't depend on this for correctness 11974 // however. 11975 PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); 11976 if (&PN != FirstPN) 11977 for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { 11978 BasicBlock *BBA = PN.getIncomingBlock(i); 11979 BasicBlock *BBB = FirstPN->getIncomingBlock(i); 11980 if (BBA != BBB) { 11981 Value *VA = PN.getIncomingValue(i); 11982 unsigned j = PN.getBasicBlockIndex(BBB); 11983 Value *VB = PN.getIncomingValue(j); 11984 PN.setIncomingBlock(i, BBB); 11985 PN.setIncomingValue(i, VB); 11986 PN.setIncomingBlock(j, BBA); 11987 PN.setIncomingValue(j, VA); 11988 // NOTE: Instcombine normally would want us to "return &PN" if we 11989 // modified any of the operands of an instruction. However, since we 11990 // aren't adding or removing uses (just rearranging them) we don't do 11991 // this in this case. 11992 } 11993 } 11994 11995 // If this is an integer PHI and we know that it has an illegal type, see if 11996 // it is only used by trunc or trunc(lshr) operations. If so, we split the 11997 // PHI into the various pieces being extracted. This sort of thing is 11998 // introduced when SROA promotes an aggregate to a single large integer type. 11999 if (isa<IntegerType>(PN.getType()) && TD && 12000 !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) 12001 if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) 12002 return Res; 12003 12004 return 0; 12005} 12006 12007Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { 12008 SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); 12009 12010 if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) 12011 return ReplaceInstUsesWith(GEP, V); 12012 12013 Value *PtrOp = GEP.getOperand(0); 12014 12015 if (isa<UndefValue>(GEP.getOperand(0))) 12016 return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); 12017 12018 // Eliminate unneeded casts for indices. 12019 if (TD) { 12020 bool MadeChange = false; 12021 unsigned PtrSize = TD->getPointerSizeInBits(); 12022 12023 gep_type_iterator GTI = gep_type_begin(GEP); 12024 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); 12025 I != E; ++I, ++GTI) { 12026 if (!isa<SequentialType>(*GTI)) continue; 12027 12028 // If we are using a wider index than needed for this platform, shrink it 12029 // to what we need. If narrower, sign-extend it to what we need. This 12030 // explicit cast can make subsequent optimizations more obvious. 12031 unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); 12032 if (OpBits == PtrSize) 12033 continue; 12034 12035 *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); 12036 MadeChange = true; 12037 } 12038 if (MadeChange) return &GEP; 12039 } 12040 12041 // Combine Indices - If the source pointer to this getelementptr instruction 12042 // is a getelementptr instruction, combine the indices of the two 12043 // getelementptr instructions into a single instruction. 12044 // 12045 if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { 12046 // Note that if our source is a gep chain itself that we wait for that 12047 // chain to be resolved before we perform this transformation. This 12048 // avoids us creating a TON of code in some cases. 12049 // 12050 if (GetElementPtrInst *SrcGEP = 12051 dyn_cast<GetElementPtrInst>(Src->getOperand(0))) 12052 if (SrcGEP->getNumOperands() == 2) 12053 return 0; // Wait until our source is folded to completion. 12054 12055 SmallVector<Value*, 8> Indices; 12056 12057 // Find out whether the last index in the source GEP is a sequential idx. 12058 bool EndsWithSequential = false; 12059 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); 12060 I != E; ++I) 12061 EndsWithSequential = !isa<StructType>(*I); 12062 12063 // Can we combine the two pointer arithmetics offsets? 12064 if (EndsWithSequential) { 12065 // Replace: gep (gep %P, long B), long A, ... 12066 // With: T = long A+B; gep %P, T, ... 12067 // 12068 Value *Sum; 12069 Value *SO1 = Src->getOperand(Src->getNumOperands()-1); 12070 Value *GO1 = GEP.getOperand(1); 12071 if (SO1 == Constant::getNullValue(SO1->getType())) { 12072 Sum = GO1; 12073 } else if (GO1 == Constant::getNullValue(GO1->getType())) { 12074 Sum = SO1; 12075 } else { 12076 // If they aren't the same type, then the input hasn't been processed 12077 // by the loop above yet (which canonicalizes sequential index types to 12078 // intptr_t). Just avoid transforming this until the input has been 12079 // normalized. 12080 if (SO1->getType() != GO1->getType()) 12081 return 0; 12082 Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); 12083 } 12084 12085 // Update the GEP in place if possible. 12086 if (Src->getNumOperands() == 2) { 12087 GEP.setOperand(0, Src->getOperand(0)); 12088 GEP.setOperand(1, Sum); 12089 return &GEP; 12090 } 12091 Indices.append(Src->op_begin()+1, Src->op_end()-1); 12092 Indices.push_back(Sum); 12093 Indices.append(GEP.op_begin()+2, GEP.op_end()); 12094 } else if (isa<Constant>(*GEP.idx_begin()) && 12095 cast<Constant>(*GEP.idx_begin())->isNullValue() && 12096 Src->getNumOperands() != 1) { 12097 // Otherwise we can do the fold if the first index of the GEP is a zero 12098 Indices.append(Src->op_begin()+1, Src->op_end()); 12099 Indices.append(GEP.idx_begin()+1, GEP.idx_end()); 12100 } 12101 12102 if (!Indices.empty()) 12103 return (cast<GEPOperator>(&GEP)->isInBounds() && 12104 Src->isInBounds()) ? 12105 GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), 12106 Indices.end(), GEP.getName()) : 12107 GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), 12108 Indices.end(), GEP.getName()); 12109 } 12110 12111 // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). 12112 if (Value *X = getBitCastOperand(PtrOp)) { 12113 assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); 12114 12115 // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 12116 // want to change the gep until the bitcasts are eliminated. 12117 if (getBitCastOperand(X)) { 12118 Worklist.AddValue(PtrOp); 12119 return 0; 12120 } 12121 12122 bool HasZeroPointerIndex = false; 12123 if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) 12124 HasZeroPointerIndex = C->isZero(); 12125 12126 // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... 12127 // into : GEP [10 x i8]* X, i32 0, ... 12128 // 12129 // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... 12130 // into : GEP i8* X, ... 12131 // 12132 // This occurs when the program declares an array extern like "int X[];" 12133 if (HasZeroPointerIndex) { 12134 const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); 12135 const PointerType *XTy = cast<PointerType>(X->getType()); 12136 if (const ArrayType *CATy = 12137 dyn_cast<ArrayType>(CPTy->getElementType())) { 12138 // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? 12139 if (CATy->getElementType() == XTy->getElementType()) { 12140 // -> GEP i8* X, ... 12141 SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); 12142 return cast<GEPOperator>(&GEP)->isInBounds() ? 12143 GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), 12144 GEP.getName()) : 12145 GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), 12146 GEP.getName()); 12147 } 12148 12149 if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){ 12150 // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? 12151 if (CATy->getElementType() == XATy->getElementType()) { 12152 // -> GEP [10 x i8]* X, i32 0, ... 12153 // At this point, we know that the cast source type is a pointer 12154 // to an array of the same type as the destination pointer 12155 // array. Because the array type is never stepped over (there 12156 // is a leading zero) we can fold the cast into this GEP. 12157 GEP.setOperand(0, X); 12158 return &GEP; 12159 } 12160 } 12161 } 12162 } else if (GEP.getNumOperands() == 2) { 12163 // Transform things like: 12164 // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V 12165 // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast 12166 const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType(); 12167 const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); 12168 if (TD && isa<ArrayType>(SrcElTy) && 12169 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == 12170 TD->getTypeAllocSize(ResElTy)) { 12171 Value *Idx[2]; 12172 Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); 12173 Idx[1] = GEP.getOperand(1); 12174 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 12175 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 12176 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 12177 // V and GEP are both pointer types --> BitCast 12178 return new BitCastInst(NewGEP, GEP.getType()); 12179 } 12180 12181 // Transform things like: 12182 // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp 12183 // (where tmp = 8*tmp2) into: 12184 // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast 12185 12186 if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) { 12187 uint64_t ArrayEltSize = 12188 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); 12189 12190 // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We 12191 // allow either a mul, shift, or constant here. 12192 Value *NewIdx = 0; 12193 ConstantInt *Scale = 0; 12194 if (ArrayEltSize == 1) { 12195 NewIdx = GEP.getOperand(1); 12196 Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); 12197 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { 12198 NewIdx = ConstantInt::get(CI->getType(), 1); 12199 Scale = CI; 12200 } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ 12201 if (Inst->getOpcode() == Instruction::Shl && 12202 isa<ConstantInt>(Inst->getOperand(1))) { 12203 ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); 12204 uint32_t ShAmtVal = ShAmt->getLimitedValue(64); 12205 Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 12206 1ULL << ShAmtVal); 12207 NewIdx = Inst->getOperand(0); 12208 } else if (Inst->getOpcode() == Instruction::Mul && 12209 isa<ConstantInt>(Inst->getOperand(1))) { 12210 Scale = cast<ConstantInt>(Inst->getOperand(1)); 12211 NewIdx = Inst->getOperand(0); 12212 } 12213 } 12214 12215 // If the index will be to exactly the right offset with the scale taken 12216 // out, perform the transformation. Note, we don't know whether Scale is 12217 // signed or not. We'll use unsigned version of division/modulo 12218 // operation after making sure Scale doesn't have the sign bit set. 12219 if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && 12220 Scale->getZExtValue() % ArrayEltSize == 0) { 12221 Scale = ConstantInt::get(Scale->getType(), 12222 Scale->getZExtValue() / ArrayEltSize); 12223 if (Scale->getZExtValue() != 1) { 12224 Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), 12225 false /*ZExt*/); 12226 NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); 12227 } 12228 12229 // Insert the new GEP instruction. 12230 Value *Idx[2]; 12231 Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); 12232 Idx[1] = NewIdx; 12233 Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 12234 Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : 12235 Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); 12236 // The NewGEP must be pointer typed, so must the old one -> BitCast 12237 return new BitCastInst(NewGEP, GEP.getType()); 12238 } 12239 } 12240 } 12241 } 12242 12243 /// See if we can simplify: 12244 /// X = bitcast A* to B* 12245 /// Y = gep X, <...constant indices...> 12246 /// into a gep of the original struct. This is important for SROA and alias 12247 /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. 12248 if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { 12249 if (TD && 12250 !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { 12251 // Determine how much the GEP moves the pointer. We are guaranteed to get 12252 // a constant back from EmitGEPOffset. 12253 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, *this)); 12254 int64_t Offset = OffsetV->getSExtValue(); 12255 12256 // If this GEP instruction doesn't move the pointer, just replace the GEP 12257 // with a bitcast of the real input to the dest type. 12258 if (Offset == 0) { 12259 // If the bitcast is of an allocation, and the allocation will be 12260 // converted to match the type of the cast, don't touch this. 12261 if (isa<AllocaInst>(BCI->getOperand(0)) || 12262 isMalloc(BCI->getOperand(0))) { 12263 // See if the bitcast simplifies, if so, don't nuke this GEP yet. 12264 if (Instruction *I = visitBitCast(*BCI)) { 12265 if (I != BCI) { 12266 I->takeName(BCI); 12267 BCI->getParent()->getInstList().insert(BCI, I); 12268 ReplaceInstUsesWith(*BCI, I); 12269 } 12270 return &GEP; 12271 } 12272 } 12273 return new BitCastInst(BCI->getOperand(0), GEP.getType()); 12274 } 12275 12276 // Otherwise, if the offset is non-zero, we need to find out if there is a 12277 // field at Offset in 'A's type. If so, we can pull the cast through the 12278 // GEP. 12279 SmallVector<Value*, 8> NewIndices; 12280 const Type *InTy = 12281 cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); 12282 if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { 12283 Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? 12284 Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), 12285 NewIndices.end()) : 12286 Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), 12287 NewIndices.end()); 12288 12289 if (NGEP->getType() == GEP.getType()) 12290 return ReplaceInstUsesWith(GEP, NGEP); 12291 NGEP->takeName(&GEP); 12292 return new BitCastInst(NGEP, GEP.getType()); 12293 } 12294 } 12295 } 12296 12297 return 0; 12298} 12299 12300Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 12301 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 12302 if (AI.isArrayAllocation()) { // Check C != 1 12303 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 12304 const Type *NewTy = 12305 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 12306 assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); 12307 AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 12308 New->setAlignment(AI.getAlignment()); 12309 12310 // Scan to the end of the allocation instructions, to skip over a block of 12311 // allocas if possible...also skip interleaved debug info 12312 // 12313 BasicBlock::iterator It = New; 12314 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 12315 12316 // Now that I is pointing to the first non-allocation-inst in the block, 12317 // insert our getelementptr instruction... 12318 // 12319 Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context)); 12320 Value *Idx[2]; 12321 Idx[0] = NullIdx; 12322 Idx[1] = NullIdx; 12323 Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, 12324 New->getName()+".sub", It); 12325 12326 // Now make everything use the getelementptr instead of the original 12327 // allocation. 12328 return ReplaceInstUsesWith(AI, V); 12329 } else if (isa<UndefValue>(AI.getArraySize())) { 12330 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 12331 } 12332 } 12333 12334 if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { 12335 // If alloca'ing a zero byte object, replace the alloca with a null pointer. 12336 // Note that we only do this for alloca's, because malloc should allocate 12337 // and return a unique pointer, even for a zero byte allocation. 12338 if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 12339 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 12340 12341 // If the alignment is 0 (unspecified), assign it the preferred alignment. 12342 if (AI.getAlignment() == 0) 12343 AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 12344 } 12345 12346 return 0; 12347} 12348 12349Instruction *InstCombiner::visitFree(Instruction &FI) { 12350 Value *Op = FI.getOperand(1); 12351 12352 // free undef -> unreachable. 12353 if (isa<UndefValue>(Op)) { 12354 // Insert a new store to null because we cannot modify the CFG here. 12355 new StoreInst(ConstantInt::getTrue(*Context), 12356 UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); 12357 return EraseInstFromFunction(FI); 12358 } 12359 12360 // If we have 'free null' delete the instruction. This can happen in stl code 12361 // when lots of inlining happens. 12362 if (isa<ConstantPointerNull>(Op)) 12363 return EraseInstFromFunction(FI); 12364 12365 // If we have a malloc call whose only use is a free call, delete both. 12366 if (isMalloc(Op)) { 12367 if (CallInst* CI = extractMallocCallFromBitCast(Op)) { 12368 if (Op->hasOneUse() && CI->hasOneUse()) { 12369 EraseInstFromFunction(FI); 12370 EraseInstFromFunction(*CI); 12371 return EraseInstFromFunction(*cast<Instruction>(Op)); 12372 } 12373 } else { 12374 // Op is a call to malloc 12375 if (Op->hasOneUse()) { 12376 EraseInstFromFunction(FI); 12377 return EraseInstFromFunction(*cast<Instruction>(Op)); 12378 } 12379 } 12380 } 12381 12382 return 0; 12383} 12384 12385/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 12386static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 12387 const TargetData *TD) { 12388 User *CI = cast<User>(LI.getOperand(0)); 12389 Value *CastOp = CI->getOperand(0); 12390 LLVMContext *Context = IC.getContext(); 12391 12392 const PointerType *DestTy = cast<PointerType>(CI->getType()); 12393 const Type *DestPTy = DestTy->getElementType(); 12394 if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 12395 12396 // If the address spaces don't match, don't eliminate the cast. 12397 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 12398 return 0; 12399 12400 const Type *SrcPTy = SrcTy->getElementType(); 12401 12402 if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 12403 isa<VectorType>(DestPTy)) { 12404 // If the source is an array, the code below will not succeed. Check to 12405 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 12406 // constants. 12407 if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 12408 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 12409 if (ASrcTy->getNumElements() != 0) { 12410 Value *Idxs[2]; 12411 Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); 12412 Idxs[1] = Idxs[0]; 12413 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); 12414 SrcTy = cast<PointerType>(CastOp->getType()); 12415 SrcPTy = SrcTy->getElementType(); 12416 } 12417 12418 if (IC.getTargetData() && 12419 (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 12420 isa<VectorType>(SrcPTy)) && 12421 // Do not allow turning this into a load of an integer, which is then 12422 // casted to a pointer, this pessimizes pointer analysis a lot. 12423 (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && 12424 IC.getTargetData()->getTypeSizeInBits(SrcPTy) == 12425 IC.getTargetData()->getTypeSizeInBits(DestPTy)) { 12426 12427 // Okay, we are casting from one integer or pointer type to another of 12428 // the same size. Instead of casting the pointer before the load, cast 12429 // the result of the loaded value. 12430 Value *NewLoad = 12431 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 12432 // Now cast the result of the load. 12433 return new BitCastInst(NewLoad, LI.getType()); 12434 } 12435 } 12436 } 12437 return 0; 12438} 12439 12440Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 12441 Value *Op = LI.getOperand(0); 12442 12443 // Attempt to improve the alignment. 12444 if (TD) { 12445 unsigned KnownAlign = 12446 GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); 12447 if (KnownAlign > 12448 (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : 12449 LI.getAlignment())) 12450 LI.setAlignment(KnownAlign); 12451 } 12452 12453 // load (cast X) --> cast (load X) iff safe. 12454 if (isa<CastInst>(Op)) 12455 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 12456 return Res; 12457 12458 // None of the following transforms are legal for volatile loads. 12459 if (LI.isVolatile()) return 0; 12460 12461 // Do really simple store-to-load forwarding and load CSE, to catch cases 12462 // where there are several consequtive memory accesses to the same location, 12463 // separated by a few arithmetic operations. 12464 BasicBlock::iterator BBI = &LI; 12465 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 12466 return ReplaceInstUsesWith(LI, AvailableVal); 12467 12468 // load(gep null, ...) -> unreachable 12469 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 12470 const Value *GEPI0 = GEPI->getOperand(0); 12471 // TODO: Consider a target hook for valid address spaces for this xform. 12472 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 12473 // Insert a new store to null instruction before the load to indicate 12474 // that this code is not reachable. We do this instead of inserting 12475 // an unreachable instruction directly because we cannot modify the 12476 // CFG. 12477 new StoreInst(UndefValue::get(LI.getType()), 12478 Constant::getNullValue(Op->getType()), &LI); 12479 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 12480 } 12481 } 12482 12483 // load null/undef -> unreachable 12484 // TODO: Consider a target hook for valid address spaces for this xform. 12485 if (isa<UndefValue>(Op) || 12486 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 12487 // Insert a new store to null instruction before the load to indicate that 12488 // this code is not reachable. We do this instead of inserting an 12489 // unreachable instruction directly because we cannot modify the CFG. 12490 new StoreInst(UndefValue::get(LI.getType()), 12491 Constant::getNullValue(Op->getType()), &LI); 12492 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 12493 } 12494 12495 // Instcombine load (constantexpr_cast global) -> cast (load global) 12496 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 12497 if (CE->isCast()) 12498 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 12499 return Res; 12500 12501 if (Op->hasOneUse()) { 12502 // Change select and PHI nodes to select values instead of addresses: this 12503 // helps alias analysis out a lot, allows many others simplifications, and 12504 // exposes redundancy in the code. 12505 // 12506 // Note that we cannot do the transformation unless we know that the 12507 // introduced loads cannot trap! Something like this is valid as long as 12508 // the condition is always false: load (select bool %C, int* null, int* %G), 12509 // but it would not be valid if we transformed it to load from null 12510 // unconditionally. 12511 // 12512 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 12513 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 12514 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && 12515 isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { 12516 Value *V1 = Builder->CreateLoad(SI->getOperand(1), 12517 SI->getOperand(1)->getName()+".val"); 12518 Value *V2 = Builder->CreateLoad(SI->getOperand(2), 12519 SI->getOperand(2)->getName()+".val"); 12520 return SelectInst::Create(SI->getCondition(), V1, V2); 12521 } 12522 12523 // load (select (cond, null, P)) -> load P 12524 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 12525 if (C->isNullValue()) { 12526 LI.setOperand(0, SI->getOperand(2)); 12527 return &LI; 12528 } 12529 12530 // load (select (cond, P, null)) -> load P 12531 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 12532 if (C->isNullValue()) { 12533 LI.setOperand(0, SI->getOperand(1)); 12534 return &LI; 12535 } 12536 } 12537 } 12538 return 0; 12539} 12540 12541/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 12542/// when possible. This makes it generally easy to do alias analysis and/or 12543/// SROA/mem2reg of the memory object. 12544static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 12545 User *CI = cast<User>(SI.getOperand(1)); 12546 Value *CastOp = CI->getOperand(0); 12547 12548 const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 12549 const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 12550 if (SrcTy == 0) return 0; 12551 12552 const Type *SrcPTy = SrcTy->getElementType(); 12553 12554 if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) 12555 return 0; 12556 12557 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 12558 /// to its first element. This allows us to handle things like: 12559 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 12560 /// on 32-bit hosts. 12561 SmallVector<Value*, 4> NewGEPIndices; 12562 12563 // If the source is an array, the code below will not succeed. Check to 12564 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 12565 // constants. 12566 if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { 12567 // Index through pointer. 12568 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext())); 12569 NewGEPIndices.push_back(Zero); 12570 12571 while (1) { 12572 if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { 12573 if (!STy->getNumElements()) /* Struct can be empty {} */ 12574 break; 12575 NewGEPIndices.push_back(Zero); 12576 SrcPTy = STy->getElementType(0); 12577 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 12578 NewGEPIndices.push_back(Zero); 12579 SrcPTy = ATy->getElementType(); 12580 } else { 12581 break; 12582 } 12583 } 12584 12585 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 12586 } 12587 12588 if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) 12589 return 0; 12590 12591 // If the pointers point into different address spaces or if they point to 12592 // values with different sizes, we can't do the transformation. 12593 if (!IC.getTargetData() || 12594 SrcTy->getAddressSpace() != 12595 cast<PointerType>(CI->getType())->getAddressSpace() || 12596 IC.getTargetData()->getTypeSizeInBits(SrcPTy) != 12597 IC.getTargetData()->getTypeSizeInBits(DestPTy)) 12598 return 0; 12599 12600 // Okay, we are casting from one integer or pointer type to another of 12601 // the same size. Instead of casting the pointer before 12602 // the store, cast the value to be stored. 12603 Value *NewCast; 12604 Value *SIOp0 = SI.getOperand(0); 12605 Instruction::CastOps opcode = Instruction::BitCast; 12606 const Type* CastSrcTy = SIOp0->getType(); 12607 const Type* CastDstTy = SrcPTy; 12608 if (isa<PointerType>(CastDstTy)) { 12609 if (CastSrcTy->isInteger()) 12610 opcode = Instruction::IntToPtr; 12611 } else if (isa<IntegerType>(CastDstTy)) { 12612 if (isa<PointerType>(SIOp0->getType())) 12613 opcode = Instruction::PtrToInt; 12614 } 12615 12616 // SIOp0 is a pointer to aggregate and this is a store to the first field, 12617 // emit a GEP to index into its first field. 12618 if (!NewGEPIndices.empty()) 12619 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), 12620 NewGEPIndices.end()); 12621 12622 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 12623 SIOp0->getName()+".c"); 12624 return new StoreInst(NewCast, CastOp); 12625} 12626 12627/// equivalentAddressValues - Test if A and B will obviously have the same 12628/// value. This includes recognizing that %t0 and %t1 will have the same 12629/// value in code like this: 12630/// %t0 = getelementptr \@a, 0, 3 12631/// store i32 0, i32* %t0 12632/// %t1 = getelementptr \@a, 0, 3 12633/// %t2 = load i32* %t1 12634/// 12635static bool equivalentAddressValues(Value *A, Value *B) { 12636 // Test if the values are trivially equivalent. 12637 if (A == B) return true; 12638 12639 // Test if the values come form identical arithmetic instructions. 12640 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 12641 // its only used to compare two uses within the same basic block, which 12642 // means that they'll always either have the same value or one of them 12643 // will have an undefined value. 12644 if (isa<BinaryOperator>(A) || 12645 isa<CastInst>(A) || 12646 isa<PHINode>(A) || 12647 isa<GetElementPtrInst>(A)) 12648 if (Instruction *BI = dyn_cast<Instruction>(B)) 12649 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 12650 return true; 12651 12652 // Otherwise they may not be equivalent. 12653 return false; 12654} 12655 12656// If this instruction has two uses, one of which is a llvm.dbg.declare, 12657// return the llvm.dbg.declare. 12658DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { 12659 if (!V->hasNUses(2)) 12660 return 0; 12661 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 12662 UI != E; ++UI) { 12663 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) 12664 return DI; 12665 if (isa<BitCastInst>(UI) && UI->hasOneUse()) { 12666 if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) 12667 return DI; 12668 } 12669 } 12670 return 0; 12671} 12672 12673Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 12674 Value *Val = SI.getOperand(0); 12675 Value *Ptr = SI.getOperand(1); 12676 12677 // If the RHS is an alloca with a single use, zapify the store, making the 12678 // alloca dead. 12679 // If the RHS is an alloca with a two uses, the other one being a 12680 // llvm.dbg.declare, zapify the store and the declare, making the 12681 // alloca dead. We must do this to prevent declare's from affecting 12682 // codegen. 12683 if (!SI.isVolatile()) { 12684 if (Ptr->hasOneUse()) { 12685 if (isa<AllocaInst>(Ptr)) { 12686 EraseInstFromFunction(SI); 12687 ++NumCombined; 12688 return 0; 12689 } 12690 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 12691 if (isa<AllocaInst>(GEP->getOperand(0))) { 12692 if (GEP->getOperand(0)->hasOneUse()) { 12693 EraseInstFromFunction(SI); 12694 ++NumCombined; 12695 return 0; 12696 } 12697 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { 12698 EraseInstFromFunction(*DI); 12699 EraseInstFromFunction(SI); 12700 ++NumCombined; 12701 return 0; 12702 } 12703 } 12704 } 12705 } 12706 if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { 12707 EraseInstFromFunction(*DI); 12708 EraseInstFromFunction(SI); 12709 ++NumCombined; 12710 return 0; 12711 } 12712 } 12713 12714 // Attempt to improve the alignment. 12715 if (TD) { 12716 unsigned KnownAlign = 12717 GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); 12718 if (KnownAlign > 12719 (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : 12720 SI.getAlignment())) 12721 SI.setAlignment(KnownAlign); 12722 } 12723 12724 // Do really simple DSE, to catch cases where there are several consecutive 12725 // stores to the same location, separated by a few arithmetic operations. This 12726 // situation often occurs with bitfield accesses. 12727 BasicBlock::iterator BBI = &SI; 12728 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 12729 --ScanInsts) { 12730 --BBI; 12731 // Don't count debug info directives, lest they affect codegen, 12732 // and we skip pointer-to-pointer bitcasts, which are NOPs. 12733 // It is necessary for correctness to skip those that feed into a 12734 // llvm.dbg.declare, as these are not present when debugging is off. 12735 if (isa<DbgInfoIntrinsic>(BBI) || 12736 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { 12737 ScanInsts++; 12738 continue; 12739 } 12740 12741 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 12742 // Prev store isn't volatile, and stores to the same location? 12743 if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), 12744 SI.getOperand(1))) { 12745 ++NumDeadStore; 12746 ++BBI; 12747 EraseInstFromFunction(*PrevSI); 12748 continue; 12749 } 12750 break; 12751 } 12752 12753 // If this is a load, we have to stop. However, if the loaded value is from 12754 // the pointer we're loading and is producing the pointer we're storing, 12755 // then *this* store is dead (X = load P; store X -> P). 12756 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 12757 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 12758 !SI.isVolatile()) { 12759 EraseInstFromFunction(SI); 12760 ++NumCombined; 12761 return 0; 12762 } 12763 // Otherwise, this is a load from some other location. Stores before it 12764 // may not be dead. 12765 break; 12766 } 12767 12768 // Don't skip over loads or things that can modify memory. 12769 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 12770 break; 12771 } 12772 12773 12774 if (SI.isVolatile()) return 0; // Don't hack volatile stores. 12775 12776 // store X, null -> turns into 'unreachable' in SimplifyCFG 12777 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 12778 if (!isa<UndefValue>(Val)) { 12779 SI.setOperand(0, UndefValue::get(Val->getType())); 12780 if (Instruction *U = dyn_cast<Instruction>(Val)) 12781 Worklist.Add(U); // Dropped a use. 12782 ++NumCombined; 12783 } 12784 return 0; // Do not modify these! 12785 } 12786 12787 // store undef, Ptr -> noop 12788 if (isa<UndefValue>(Val)) { 12789 EraseInstFromFunction(SI); 12790 ++NumCombined; 12791 return 0; 12792 } 12793 12794 // If the pointer destination is a cast, see if we can fold the cast into the 12795 // source instead. 12796 if (isa<CastInst>(Ptr)) 12797 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 12798 return Res; 12799 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 12800 if (CE->isCast()) 12801 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 12802 return Res; 12803 12804 12805 // If this store is the last instruction in the basic block (possibly 12806 // excepting debug info instructions and the pointer bitcasts that feed 12807 // into them), and if the block ends with an unconditional branch, try 12808 // to move it to the successor block. 12809 BBI = &SI; 12810 do { 12811 ++BBI; 12812 } while (isa<DbgInfoIntrinsic>(BBI) || 12813 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))); 12814 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 12815 if (BI->isUnconditional()) 12816 if (SimplifyStoreAtEndOfBlock(SI)) 12817 return 0; // xform done! 12818 12819 return 0; 12820} 12821 12822/// SimplifyStoreAtEndOfBlock - Turn things like: 12823/// if () { *P = v1; } else { *P = v2 } 12824/// into a phi node with a store in the successor. 12825/// 12826/// Simplify things like: 12827/// *P = v1; if () { *P = v2; } 12828/// into a phi node with a store in the successor. 12829/// 12830bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 12831 BasicBlock *StoreBB = SI.getParent(); 12832 12833 // Check to see if the successor block has exactly two incoming edges. If 12834 // so, see if the other predecessor contains a store to the same location. 12835 // if so, insert a PHI node (if needed) and move the stores down. 12836 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 12837 12838 // Determine whether Dest has exactly two predecessors and, if so, compute 12839 // the other predecessor. 12840 pred_iterator PI = pred_begin(DestBB); 12841 BasicBlock *OtherBB = 0; 12842 if (*PI != StoreBB) 12843 OtherBB = *PI; 12844 ++PI; 12845 if (PI == pred_end(DestBB)) 12846 return false; 12847 12848 if (*PI != StoreBB) { 12849 if (OtherBB) 12850 return false; 12851 OtherBB = *PI; 12852 } 12853 if (++PI != pred_end(DestBB)) 12854 return false; 12855 12856 // Bail out if all the relevant blocks aren't distinct (this can happen, 12857 // for example, if SI is in an infinite loop) 12858 if (StoreBB == DestBB || OtherBB == DestBB) 12859 return false; 12860 12861 // Verify that the other block ends in a branch and is not otherwise empty. 12862 BasicBlock::iterator BBI = OtherBB->getTerminator(); 12863 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 12864 if (!OtherBr || BBI == OtherBB->begin()) 12865 return false; 12866 12867 // If the other block ends in an unconditional branch, check for the 'if then 12868 // else' case. there is an instruction before the branch. 12869 StoreInst *OtherStore = 0; 12870 if (OtherBr->isUnconditional()) { 12871 --BBI; 12872 // Skip over debugging info. 12873 while (isa<DbgInfoIntrinsic>(BBI) || 12874 (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { 12875 if (BBI==OtherBB->begin()) 12876 return false; 12877 --BBI; 12878 } 12879 // If this isn't a store, isn't a store to the same location, or if the 12880 // alignments differ, bail out. 12881 OtherStore = dyn_cast<StoreInst>(BBI); 12882 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 12883 OtherStore->getAlignment() != SI.getAlignment()) 12884 return false; 12885 } else { 12886 // Otherwise, the other block ended with a conditional branch. If one of the 12887 // destinations is StoreBB, then we have the if/then case. 12888 if (OtherBr->getSuccessor(0) != StoreBB && 12889 OtherBr->getSuccessor(1) != StoreBB) 12890 return false; 12891 12892 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 12893 // if/then triangle. See if there is a store to the same ptr as SI that 12894 // lives in OtherBB. 12895 for (;; --BBI) { 12896 // Check to see if we find the matching store. 12897 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 12898 if (OtherStore->getOperand(1) != SI.getOperand(1) || 12899 OtherStore->getAlignment() != SI.getAlignment()) 12900 return false; 12901 break; 12902 } 12903 // If we find something that may be using or overwriting the stored 12904 // value, or if we run out of instructions, we can't do the xform. 12905 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 12906 BBI == OtherBB->begin()) 12907 return false; 12908 } 12909 12910 // In order to eliminate the store in OtherBr, we have to 12911 // make sure nothing reads or overwrites the stored value in 12912 // StoreBB. 12913 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 12914 // FIXME: This should really be AA driven. 12915 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 12916 return false; 12917 } 12918 } 12919 12920 // Insert a PHI node now if we need it. 12921 Value *MergedVal = OtherStore->getOperand(0); 12922 if (MergedVal != SI.getOperand(0)) { 12923 PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); 12924 PN->reserveOperandSpace(2); 12925 PN->addIncoming(SI.getOperand(0), SI.getParent()); 12926 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 12927 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 12928 } 12929 12930 // Advance to a place where it is safe to insert the new store and 12931 // insert it. 12932 BBI = DestBB->getFirstNonPHI(); 12933 InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), 12934 OtherStore->isVolatile(), 12935 SI.getAlignment()), *BBI); 12936 12937 // Nuke the old stores. 12938 EraseInstFromFunction(SI); 12939 EraseInstFromFunction(*OtherStore); 12940 ++NumCombined; 12941 return true; 12942} 12943 12944 12945Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { 12946 // Change br (not X), label True, label False to: br X, label False, True 12947 Value *X = 0; 12948 BasicBlock *TrueDest; 12949 BasicBlock *FalseDest; 12950 if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && 12951 !isa<Constant>(X)) { 12952 // Swap Destinations and condition... 12953 BI.setCondition(X); 12954 BI.setSuccessor(0, FalseDest); 12955 BI.setSuccessor(1, TrueDest); 12956 return &BI; 12957 } 12958 12959 // Cannonicalize fcmp_one -> fcmp_oeq 12960 FCmpInst::Predicate FPred; Value *Y; 12961 if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 12962 TrueDest, FalseDest)) && 12963 BI.getCondition()->hasOneUse()) 12964 if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || 12965 FPred == FCmpInst::FCMP_OGE) { 12966 FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); 12967 Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); 12968 12969 // Swap Destinations and condition. 12970 BI.setSuccessor(0, FalseDest); 12971 BI.setSuccessor(1, TrueDest); 12972 Worklist.Add(Cond); 12973 return &BI; 12974 } 12975 12976 // Cannonicalize icmp_ne -> icmp_eq 12977 ICmpInst::Predicate IPred; 12978 if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), 12979 TrueDest, FalseDest)) && 12980 BI.getCondition()->hasOneUse()) 12981 if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || 12982 IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || 12983 IPred == ICmpInst::ICMP_SGE) { 12984 ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); 12985 Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); 12986 // Swap Destinations and condition. 12987 BI.setSuccessor(0, FalseDest); 12988 BI.setSuccessor(1, TrueDest); 12989 Worklist.Add(Cond); 12990 return &BI; 12991 } 12992 12993 return 0; 12994} 12995 12996Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { 12997 Value *Cond = SI.getCondition(); 12998 if (Instruction *I = dyn_cast<Instruction>(Cond)) { 12999 if (I->getOpcode() == Instruction::Add) 13000 if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 13001 // change 'switch (X+4) case 1:' into 'switch (X) case -3' 13002 for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) 13003 SI.setOperand(i, 13004 ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), 13005 AddRHS)); 13006 SI.setOperand(0, I->getOperand(0)); 13007 Worklist.Add(I); 13008 return &SI; 13009 } 13010 } 13011 return 0; 13012} 13013 13014Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { 13015 Value *Agg = EV.getAggregateOperand(); 13016 13017 if (!EV.hasIndices()) 13018 return ReplaceInstUsesWith(EV, Agg); 13019 13020 if (Constant *C = dyn_cast<Constant>(Agg)) { 13021 if (isa<UndefValue>(C)) 13022 return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); 13023 13024 if (isa<ConstantAggregateZero>(C)) 13025 return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); 13026 13027 if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { 13028 // Extract the element indexed by the first index out of the constant 13029 Value *V = C->getOperand(*EV.idx_begin()); 13030 if (EV.getNumIndices() > 1) 13031 // Extract the remaining indices out of the constant indexed by the 13032 // first index 13033 return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); 13034 else 13035 return ReplaceInstUsesWith(EV, V); 13036 } 13037 return 0; // Can't handle other constants 13038 } 13039 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { 13040 // We're extracting from an insertvalue instruction, compare the indices 13041 const unsigned *exti, *exte, *insi, *inse; 13042 for (exti = EV.idx_begin(), insi = IV->idx_begin(), 13043 exte = EV.idx_end(), inse = IV->idx_end(); 13044 exti != exte && insi != inse; 13045 ++exti, ++insi) { 13046 if (*insi != *exti) 13047 // The insert and extract both reference distinctly different elements. 13048 // This means the extract is not influenced by the insert, and we can 13049 // replace the aggregate operand of the extract with the aggregate 13050 // operand of the insert. i.e., replace 13051 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 13052 // %E = extractvalue { i32, { i32 } } %I, 0 13053 // with 13054 // %E = extractvalue { i32, { i32 } } %A, 0 13055 return ExtractValueInst::Create(IV->getAggregateOperand(), 13056 EV.idx_begin(), EV.idx_end()); 13057 } 13058 if (exti == exte && insi == inse) 13059 // Both iterators are at the end: Index lists are identical. Replace 13060 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 13061 // %C = extractvalue { i32, { i32 } } %B, 1, 0 13062 // with "i32 42" 13063 return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); 13064 if (exti == exte) { 13065 // The extract list is a prefix of the insert list. i.e. replace 13066 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 13067 // %E = extractvalue { i32, { i32 } } %I, 1 13068 // with 13069 // %X = extractvalue { i32, { i32 } } %A, 1 13070 // %E = insertvalue { i32 } %X, i32 42, 0 13071 // by switching the order of the insert and extract (though the 13072 // insertvalue should be left in, since it may have other uses). 13073 Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), 13074 EV.idx_begin(), EV.idx_end()); 13075 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), 13076 insi, inse); 13077 } 13078 if (insi == inse) 13079 // The insert list is a prefix of the extract list 13080 // We can simply remove the common indices from the extract and make it 13081 // operate on the inserted value instead of the insertvalue result. 13082 // i.e., replace 13083 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 13084 // %E = extractvalue { i32, { i32 } } %I, 1, 0 13085 // with 13086 // %E extractvalue { i32 } { i32 42 }, 0 13087 return ExtractValueInst::Create(IV->getInsertedValueOperand(), 13088 exti, exte); 13089 } 13090 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { 13091 // We're extracting from an intrinsic, see if we're the only user, which 13092 // allows us to simplify multiple result intrinsics to simpler things that 13093 // just get one value.. 13094 if (II->hasOneUse()) { 13095 // Check if we're grabbing the overflow bit or the result of a 'with 13096 // overflow' intrinsic. If it's the latter we can remove the intrinsic 13097 // and replace it with a traditional binary instruction. 13098 switch (II->getIntrinsicID()) { 13099 case Intrinsic::uadd_with_overflow: 13100 case Intrinsic::sadd_with_overflow: 13101 if (*EV.idx_begin() == 0) { // Normal result. 13102 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 13103 II->replaceAllUsesWith(UndefValue::get(II->getType())); 13104 EraseInstFromFunction(*II); 13105 return BinaryOperator::CreateAdd(LHS, RHS); 13106 } 13107 break; 13108 case Intrinsic::usub_with_overflow: 13109 case Intrinsic::ssub_with_overflow: 13110 if (*EV.idx_begin() == 0) { // Normal result. 13111 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 13112 II->replaceAllUsesWith(UndefValue::get(II->getType())); 13113 EraseInstFromFunction(*II); 13114 return BinaryOperator::CreateSub(LHS, RHS); 13115 } 13116 break; 13117 case Intrinsic::umul_with_overflow: 13118 case Intrinsic::smul_with_overflow: 13119 if (*EV.idx_begin() == 0) { // Normal result. 13120 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 13121 II->replaceAllUsesWith(UndefValue::get(II->getType())); 13122 EraseInstFromFunction(*II); 13123 return BinaryOperator::CreateMul(LHS, RHS); 13124 } 13125 break; 13126 default: 13127 break; 13128 } 13129 } 13130 } 13131 // Can't simplify extracts from other values. Note that nested extracts are 13132 // already simplified implicitely by the above (extract ( extract (insert) ) 13133 // will be translated into extract ( insert ( extract ) ) first and then just 13134 // the value inserted, if appropriate). 13135 return 0; 13136} 13137 13138/// CheapToScalarize - Return true if the value is cheaper to scalarize than it 13139/// is to leave as a vector operation. 13140static bool CheapToScalarize(Value *V, bool isConstant) { 13141 if (isa<ConstantAggregateZero>(V)) 13142 return true; 13143 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { 13144 if (isConstant) return true; 13145 // If all elts are the same, we can extract. 13146 Constant *Op0 = C->getOperand(0); 13147 for (unsigned i = 1; i < C->getNumOperands(); ++i) 13148 if (C->getOperand(i) != Op0) 13149 return false; 13150 return true; 13151 } 13152 Instruction *I = dyn_cast<Instruction>(V); 13153 if (!I) return false; 13154 13155 // Insert element gets simplified to the inserted element or is deleted if 13156 // this is constant idx extract element and its a constant idx insertelt. 13157 if (I->getOpcode() == Instruction::InsertElement && isConstant && 13158 isa<ConstantInt>(I->getOperand(2))) 13159 return true; 13160 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 13161 return true; 13162 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 13163 if (BO->hasOneUse() && 13164 (CheapToScalarize(BO->getOperand(0), isConstant) || 13165 CheapToScalarize(BO->getOperand(1), isConstant))) 13166 return true; 13167 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 13168 if (CI->hasOneUse() && 13169 (CheapToScalarize(CI->getOperand(0), isConstant) || 13170 CheapToScalarize(CI->getOperand(1), isConstant))) 13171 return true; 13172 13173 return false; 13174} 13175 13176/// Read and decode a shufflevector mask. 13177/// 13178/// It turns undef elements into values that are larger than the number of 13179/// elements in the input. 13180static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { 13181 unsigned NElts = SVI->getType()->getNumElements(); 13182 if (isa<ConstantAggregateZero>(SVI->getOperand(2))) 13183 return std::vector<unsigned>(NElts, 0); 13184 if (isa<UndefValue>(SVI->getOperand(2))) 13185 return std::vector<unsigned>(NElts, 2*NElts); 13186 13187 std::vector<unsigned> Result; 13188 const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); 13189 for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) 13190 if (isa<UndefValue>(*i)) 13191 Result.push_back(NElts*2); // undef -> 8 13192 else 13193 Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); 13194 return Result; 13195} 13196 13197/// FindScalarElement - Given a vector and an element number, see if the scalar 13198/// value is already around as a register, for example if it were inserted then 13199/// extracted from the vector. 13200static Value *FindScalarElement(Value *V, unsigned EltNo, 13201 LLVMContext *Context) { 13202 assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); 13203 const VectorType *PTy = cast<VectorType>(V->getType()); 13204 unsigned Width = PTy->getNumElements(); 13205 if (EltNo >= Width) // Out of range access. 13206 return UndefValue::get(PTy->getElementType()); 13207 13208 if (isa<UndefValue>(V)) 13209 return UndefValue::get(PTy->getElementType()); 13210 else if (isa<ConstantAggregateZero>(V)) 13211 return Constant::getNullValue(PTy->getElementType()); 13212 else if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) 13213 return CP->getOperand(EltNo); 13214 else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 13215 // If this is an insert to a variable element, we don't know what it is. 13216 if (!isa<ConstantInt>(III->getOperand(2))) 13217 return 0; 13218 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 13219 13220 // If this is an insert to the element we are looking for, return the 13221 // inserted value. 13222 if (EltNo == IIElt) 13223 return III->getOperand(1); 13224 13225 // Otherwise, the insertelement doesn't modify the value, recurse on its 13226 // vector input. 13227 return FindScalarElement(III->getOperand(0), EltNo, Context); 13228 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 13229 unsigned LHSWidth = 13230 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); 13231 unsigned InEl = getShuffleMask(SVI)[EltNo]; 13232 if (InEl < LHSWidth) 13233 return FindScalarElement(SVI->getOperand(0), InEl, Context); 13234 else if (InEl < LHSWidth*2) 13235 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context); 13236 else 13237 return UndefValue::get(PTy->getElementType()); 13238 } 13239 13240 // Otherwise, we don't know. 13241 return 0; 13242} 13243 13244Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 13245 // If vector val is undef, replace extract with scalar undef. 13246 if (isa<UndefValue>(EI.getOperand(0))) 13247 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 13248 13249 // If vector val is constant 0, replace extract with scalar 0. 13250 if (isa<ConstantAggregateZero>(EI.getOperand(0))) 13251 return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); 13252 13253 if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { 13254 // If vector val is constant with all elements the same, replace EI with 13255 // that element. When the elements are not identical, we cannot replace yet 13256 // (we do that below, but only when the index is constant). 13257 Constant *op0 = C->getOperand(0); 13258 for (unsigned i = 1; i != C->getNumOperands(); ++i) 13259 if (C->getOperand(i) != op0) { 13260 op0 = 0; 13261 break; 13262 } 13263 if (op0) 13264 return ReplaceInstUsesWith(EI, op0); 13265 } 13266 13267 // If extracting a specified index from the vector, see if we can recursively 13268 // find a previously computed scalar that was inserted into the vector. 13269 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 13270 unsigned IndexVal = IdxC->getZExtValue(); 13271 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 13272 13273 // If this is extracting an invalid index, turn this into undef, to avoid 13274 // crashing the code below. 13275 if (IndexVal >= VectorWidth) 13276 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 13277 13278 // This instruction only demands the single element from the input vector. 13279 // If the input vector has a single use, simplify it based on this use 13280 // property. 13281 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 13282 APInt UndefElts(VectorWidth, 0); 13283 APInt DemandedMask(VectorWidth, 1 << IndexVal); 13284 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 13285 DemandedMask, UndefElts)) { 13286 EI.setOperand(0, V); 13287 return &EI; 13288 } 13289 } 13290 13291 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal, Context)) 13292 return ReplaceInstUsesWith(EI, Elt); 13293 13294 // If the this extractelement is directly using a bitcast from a vector of 13295 // the same number of elements, see if we can find the source element from 13296 // it. In this case, we will end up needing to bitcast the scalars. 13297 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 13298 if (const VectorType *VT = 13299 dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 13300 if (VT->getNumElements() == VectorWidth) 13301 if (Value *Elt = FindScalarElement(BCI->getOperand(0), 13302 IndexVal, Context)) 13303 return new BitCastInst(Elt, EI.getType()); 13304 } 13305 } 13306 13307 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 13308 // Push extractelement into predecessor operation if legal and 13309 // profitable to do so 13310 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 13311 if (I->hasOneUse() && 13312 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 13313 Value *newEI0 = 13314 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 13315 EI.getName()+".lhs"); 13316 Value *newEI1 = 13317 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 13318 EI.getName()+".rhs"); 13319 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 13320 } 13321 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 13322 // Extracting the inserted element? 13323 if (IE->getOperand(2) == EI.getOperand(1)) 13324 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 13325 // If the inserted and extracted elements are constants, they must not 13326 // be the same value, extract from the pre-inserted value instead. 13327 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 13328 Worklist.AddValue(EI.getOperand(0)); 13329 EI.setOperand(0, IE->getOperand(0)); 13330 return &EI; 13331 } 13332 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 13333 // If this is extracting an element from a shufflevector, figure out where 13334 // it came from and extract from the appropriate input element instead. 13335 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 13336 unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; 13337 Value *Src; 13338 unsigned LHSWidth = 13339 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); 13340 13341 if (SrcIdx < LHSWidth) 13342 Src = SVI->getOperand(0); 13343 else if (SrcIdx < LHSWidth*2) { 13344 SrcIdx -= LHSWidth; 13345 Src = SVI->getOperand(1); 13346 } else { 13347 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 13348 } 13349 return ExtractElementInst::Create(Src, 13350 ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, 13351 false)); 13352 } 13353 } 13354 // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) 13355 } 13356 return 0; 13357} 13358 13359/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 13360/// elements from either LHS or RHS, return the shuffle mask and true. 13361/// Otherwise, return false. 13362static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 13363 std::vector<Constant*> &Mask, 13364 LLVMContext *Context) { 13365 assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && 13366 "Invalid CollectSingleShuffleElements"); 13367 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 13368 13369 if (isa<UndefValue>(V)) { 13370 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); 13371 return true; 13372 } else if (V == LHS) { 13373 for (unsigned i = 0; i != NumElts; ++i) 13374 Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); 13375 return true; 13376 } else if (V == RHS) { 13377 for (unsigned i = 0; i != NumElts; ++i) 13378 Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts)); 13379 return true; 13380 } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 13381 // If this is an insert of an extract from some other vector, include it. 13382 Value *VecOp = IEI->getOperand(0); 13383 Value *ScalarOp = IEI->getOperand(1); 13384 Value *IdxOp = IEI->getOperand(2); 13385 13386 if (!isa<ConstantInt>(IdxOp)) 13387 return false; 13388 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 13389 13390 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 13391 // Okay, we can handle this if the vector we are insertinting into is 13392 // transitively ok. 13393 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { 13394 // If so, update the mask to reflect the inserted undef. 13395 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context)); 13396 return true; 13397 } 13398 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 13399 if (isa<ConstantInt>(EI->getOperand(1)) && 13400 EI->getOperand(0)->getType() == V->getType()) { 13401 unsigned ExtractedIdx = 13402 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 13403 13404 // This must be extracting from either LHS or RHS. 13405 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 13406 // Okay, we can handle this if the vector we are insertinting into is 13407 // transitively ok. 13408 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { 13409 // If so, update the mask to reflect the inserted value. 13410 if (EI->getOperand(0) == LHS) { 13411 Mask[InsertedIdx % NumElts] = 13412 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx); 13413 } else { 13414 assert(EI->getOperand(0) == RHS); 13415 Mask[InsertedIdx % NumElts] = 13416 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts); 13417 13418 } 13419 return true; 13420 } 13421 } 13422 } 13423 } 13424 } 13425 // TODO: Handle shufflevector here! 13426 13427 return false; 13428} 13429 13430/// CollectShuffleElements - We are building a shuffle of V, using RHS as the 13431/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask 13432/// that computes V and the LHS value of the shuffle. 13433static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, 13434 Value *&RHS, LLVMContext *Context) { 13435 assert(isa<VectorType>(V->getType()) && 13436 (RHS == 0 || V->getType() == RHS->getType()) && 13437 "Invalid shuffle!"); 13438 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 13439 13440 if (isa<UndefValue>(V)) { 13441 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); 13442 return V; 13443 } else if (isa<ConstantAggregateZero>(V)) { 13444 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0)); 13445 return V; 13446 } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 13447 // If this is an insert of an extract from some other vector, include it. 13448 Value *VecOp = IEI->getOperand(0); 13449 Value *ScalarOp = IEI->getOperand(1); 13450 Value *IdxOp = IEI->getOperand(2); 13451 13452 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 13453 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 13454 EI->getOperand(0)->getType() == V->getType()) { 13455 unsigned ExtractedIdx = 13456 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 13457 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 13458 13459 // Either the extracted from or inserted into vector must be RHSVec, 13460 // otherwise we'd end up with a shuffle of three inputs. 13461 if (EI->getOperand(0) == RHS || RHS == 0) { 13462 RHS = EI->getOperand(0); 13463 Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context); 13464 Mask[InsertedIdx % NumElts] = 13465 ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx); 13466 return V; 13467 } 13468 13469 if (VecOp == RHS) { 13470 Value *V = CollectShuffleElements(EI->getOperand(0), Mask, 13471 RHS, Context); 13472 // Everything but the extracted element is replaced with the RHS. 13473 for (unsigned i = 0; i != NumElts; ++i) { 13474 if (i != InsertedIdx) 13475 Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i); 13476 } 13477 return V; 13478 } 13479 13480 // If this insertelement is a chain that comes from exactly these two 13481 // vectors, return the vector and the effective shuffle. 13482 if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask, 13483 Context)) 13484 return EI->getOperand(0); 13485 13486 } 13487 } 13488 } 13489 // TODO: Handle shufflevector here! 13490 13491 // Otherwise, can't do anything fancy. Return an identity vector. 13492 for (unsigned i = 0; i != NumElts; ++i) 13493 Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); 13494 return V; 13495} 13496 13497Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 13498 Value *VecOp = IE.getOperand(0); 13499 Value *ScalarOp = IE.getOperand(1); 13500 Value *IdxOp = IE.getOperand(2); 13501 13502 // Inserting an undef or into an undefined place, remove this. 13503 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 13504 ReplaceInstUsesWith(IE, VecOp); 13505 13506 // If the inserted element was extracted from some other vector, and if the 13507 // indexes are constant, try to turn this into a shufflevector operation. 13508 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 13509 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 13510 EI->getOperand(0)->getType() == IE.getType()) { 13511 unsigned NumVectorElts = IE.getType()->getNumElements(); 13512 unsigned ExtractedIdx = 13513 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 13514 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 13515 13516 if (ExtractedIdx >= NumVectorElts) // Out of range extract. 13517 return ReplaceInstUsesWith(IE, VecOp); 13518 13519 if (InsertedIdx >= NumVectorElts) // Out of range insert. 13520 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 13521 13522 // If we are extracting a value from a vector, then inserting it right 13523 // back into the same place, just use the input vector. 13524 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 13525 return ReplaceInstUsesWith(IE, VecOp); 13526 13527 // If this insertelement isn't used by some other insertelement, turn it 13528 // (and any insertelements it points to), into one big shuffle. 13529 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { 13530 std::vector<Constant*> Mask; 13531 Value *RHS = 0; 13532 Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); 13533 if (RHS == 0) RHS = UndefValue::get(LHS->getType()); 13534 // We now have a shuffle of LHS, RHS, Mask. 13535 return new ShuffleVectorInst(LHS, RHS, 13536 ConstantVector::get(Mask)); 13537 } 13538 } 13539 } 13540 13541 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 13542 APInt UndefElts(VWidth, 0); 13543 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 13544 if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) 13545 return &IE; 13546 13547 return 0; 13548} 13549 13550 13551Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 13552 Value *LHS = SVI.getOperand(0); 13553 Value *RHS = SVI.getOperand(1); 13554 std::vector<unsigned> Mask = getShuffleMask(&SVI); 13555 13556 bool MadeChange = false; 13557 13558 // Undefined shuffle mask -> undefined value. 13559 if (isa<UndefValue>(SVI.getOperand(2))) 13560 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 13561 13562 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 13563 13564 if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) 13565 return 0; 13566 13567 APInt UndefElts(VWidth, 0); 13568 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 13569 if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 13570 LHS = SVI.getOperand(0); 13571 RHS = SVI.getOperand(1); 13572 MadeChange = true; 13573 } 13574 13575 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 13576 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 13577 if (LHS == RHS || isa<UndefValue>(LHS)) { 13578 if (isa<UndefValue>(LHS) && LHS == RHS) { 13579 // shuffle(undef,undef,mask) -> undef. 13580 return ReplaceInstUsesWith(SVI, LHS); 13581 } 13582 13583 // Remap any references to RHS to use LHS. 13584 std::vector<Constant*> Elts; 13585 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 13586 if (Mask[i] >= 2*e) 13587 Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); 13588 else { 13589 if ((Mask[i] >= e && isa<UndefValue>(RHS)) || 13590 (Mask[i] < e && isa<UndefValue>(LHS))) { 13591 Mask[i] = 2*e; // Turn into undef. 13592 Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); 13593 } else { 13594 Mask[i] = Mask[i] % e; // Force to LHS. 13595 Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i])); 13596 } 13597 } 13598 } 13599 SVI.setOperand(0, SVI.getOperand(1)); 13600 SVI.setOperand(1, UndefValue::get(RHS->getType())); 13601 SVI.setOperand(2, ConstantVector::get(Elts)); 13602 LHS = SVI.getOperand(0); 13603 RHS = SVI.getOperand(1); 13604 MadeChange = true; 13605 } 13606 13607 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 13608 bool isLHSID = true, isRHSID = true; 13609 13610 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 13611 if (Mask[i] >= e*2) continue; // Ignore undef values. 13612 // Is this an identity shuffle of the LHS value? 13613 isLHSID &= (Mask[i] == i); 13614 13615 // Is this an identity shuffle of the RHS value? 13616 isRHSID &= (Mask[i]-e == i); 13617 } 13618 13619 // Eliminate identity shuffles. 13620 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 13621 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 13622 13623 // If the LHS is a shufflevector itself, see if we can combine it with this 13624 // one without producing an unusual shuffle. Here we are really conservative: 13625 // we are absolutely afraid of producing a shuffle mask not in the input 13626 // program, because the code gen may not be smart enough to turn a merged 13627 // shuffle into two specific shuffles: it may produce worse code. As such, 13628 // we only merge two shuffles if the result is one of the two input shuffle 13629 // masks. In this case, merging the shuffles just removes one instruction, 13630 // which we know is safe. This is good for things like turning: 13631 // (splat(splat)) -> splat. 13632 if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { 13633 if (isa<UndefValue>(RHS)) { 13634 std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); 13635 13636 if (LHSMask.size() == Mask.size()) { 13637 std::vector<unsigned> NewMask; 13638 for (unsigned i = 0, e = Mask.size(); i != e; ++i) 13639 if (Mask[i] >= e) 13640 NewMask.push_back(2*e); 13641 else 13642 NewMask.push_back(LHSMask[Mask[i]]); 13643 13644 // If the result mask is equal to the src shuffle or this 13645 // shuffle mask, do the replacement. 13646 if (NewMask == LHSMask || NewMask == Mask) { 13647 unsigned LHSInNElts = 13648 cast<VectorType>(LHSSVI->getOperand(0)->getType())-> 13649 getNumElements(); 13650 std::vector<Constant*> Elts; 13651 for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { 13652 if (NewMask[i] >= LHSInNElts*2) { 13653 Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); 13654 } else { 13655 Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), 13656 NewMask[i])); 13657 } 13658 } 13659 return new ShuffleVectorInst(LHSSVI->getOperand(0), 13660 LHSSVI->getOperand(1), 13661 ConstantVector::get(Elts)); 13662 } 13663 } 13664 } 13665 } 13666 13667 return MadeChange ? &SVI : 0; 13668} 13669 13670 13671 13672 13673/// TryToSinkInstruction - Try to move the specified instruction from its 13674/// current block into the beginning of DestBlock, which can only happen if it's 13675/// safe to move the instruction past all of the instructions between it and the 13676/// end of its block. 13677static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { 13678 assert(I->hasOneUse() && "Invariants didn't hold!"); 13679 13680 // Cannot move control-flow-involving, volatile loads, vaarg, etc. 13681 if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) 13682 return false; 13683 13684 // Do not sink alloca instructions out of the entry block. 13685 if (isa<AllocaInst>(I) && I->getParent() == 13686 &DestBlock->getParent()->getEntryBlock()) 13687 return false; 13688 13689 // We can only sink load instructions if there is nothing between the load and 13690 // the end of block that could change the value. 13691 if (I->mayReadFromMemory()) { 13692 for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); 13693 Scan != E; ++Scan) 13694 if (Scan->mayWriteToMemory()) 13695 return false; 13696 } 13697 13698 BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); 13699 13700 CopyPrecedingStopPoint(I, InsertPos); 13701 I->moveBefore(InsertPos); 13702 ++NumSunkInst; 13703 return true; 13704} 13705 13706 13707/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding 13708/// all reachable code to the worklist. 13709/// 13710/// This has a couple of tricks to make the code faster and more powerful. In 13711/// particular, we constant fold and DCE instructions as we go, to avoid adding 13712/// them to the worklist (this significantly speeds up instcombine on code where 13713/// many instructions are dead or constant). Additionally, if we find a branch 13714/// whose condition is a known constant, we only visit the reachable successors. 13715/// 13716static bool AddReachableCodeToWorklist(BasicBlock *BB, 13717 SmallPtrSet<BasicBlock*, 64> &Visited, 13718 InstCombiner &IC, 13719 const TargetData *TD) { 13720 bool MadeIRChange = false; 13721 SmallVector<BasicBlock*, 256> Worklist; 13722 Worklist.push_back(BB); 13723 13724 std::vector<Instruction*> InstrsForInstCombineWorklist; 13725 InstrsForInstCombineWorklist.reserve(128); 13726 13727 SmallPtrSet<ConstantExpr*, 64> FoldedConstants; 13728 13729 while (!Worklist.empty()) { 13730 BB = Worklist.back(); 13731 Worklist.pop_back(); 13732 13733 // We have now visited this block! If we've already been here, ignore it. 13734 if (!Visited.insert(BB)) continue; 13735 13736 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 13737 Instruction *Inst = BBI++; 13738 13739 // DCE instruction if trivially dead. 13740 if (isInstructionTriviallyDead(Inst)) { 13741 ++NumDeadInst; 13742 DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); 13743 Inst->eraseFromParent(); 13744 continue; 13745 } 13746 13747 // ConstantProp instruction if trivially constant. 13748 if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) 13749 if (Constant *C = ConstantFoldInstruction(Inst, TD)) { 13750 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " 13751 << *Inst << '\n'); 13752 Inst->replaceAllUsesWith(C); 13753 ++NumConstProp; 13754 Inst->eraseFromParent(); 13755 continue; 13756 } 13757 13758 13759 13760 if (TD) { 13761 // See if we can constant fold its operands. 13762 for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); 13763 i != e; ++i) { 13764 ConstantExpr *CE = dyn_cast<ConstantExpr>(i); 13765 if (CE == 0) continue; 13766 13767 // If we already folded this constant, don't try again. 13768 if (!FoldedConstants.insert(CE)) 13769 continue; 13770 13771 Constant *NewC = ConstantFoldConstantExpression(CE, TD); 13772 if (NewC && NewC != CE) { 13773 *i = NewC; 13774 MadeIRChange = true; 13775 } 13776 } 13777 } 13778 13779 13780 InstrsForInstCombineWorklist.push_back(Inst); 13781 } 13782 13783 // Recursively visit successors. If this is a branch or switch on a 13784 // constant, only visit the reachable successor. 13785 TerminatorInst *TI = BB->getTerminator(); 13786 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 13787 if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { 13788 bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); 13789 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); 13790 Worklist.push_back(ReachableBB); 13791 continue; 13792 } 13793 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 13794 if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { 13795 // See if this is an explicit destination. 13796 for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) 13797 if (SI->getCaseValue(i) == Cond) { 13798 BasicBlock *ReachableBB = SI->getSuccessor(i); 13799 Worklist.push_back(ReachableBB); 13800 continue; 13801 } 13802 13803 // Otherwise it is the default destination. 13804 Worklist.push_back(SI->getSuccessor(0)); 13805 continue; 13806 } 13807 } 13808 13809 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 13810 Worklist.push_back(TI->getSuccessor(i)); 13811 } 13812 13813 // Once we've found all of the instructions to add to instcombine's worklist, 13814 // add them in reverse order. This way instcombine will visit from the top 13815 // of the function down. This jives well with the way that it adds all uses 13816 // of instructions to the worklist after doing a transformation, thus avoiding 13817 // some N^2 behavior in pathological cases. 13818 IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], 13819 InstrsForInstCombineWorklist.size()); 13820 13821 return MadeIRChange; 13822} 13823 13824bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { 13825 MadeIRChange = false; 13826 13827 DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " 13828 << F.getNameStr() << "\n"); 13829 13830 { 13831 // Do a depth-first traversal of the function, populate the worklist with 13832 // the reachable instructions. Ignore blocks that are not reachable. Keep 13833 // track of which blocks we visit. 13834 SmallPtrSet<BasicBlock*, 64> Visited; 13835 MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); 13836 13837 // Do a quick scan over the function. If we find any blocks that are 13838 // unreachable, remove any instructions inside of them. This prevents 13839 // the instcombine code from having to deal with some bad special cases. 13840 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 13841 if (!Visited.count(BB)) { 13842 Instruction *Term = BB->getTerminator(); 13843 while (Term != BB->begin()) { // Remove instrs bottom-up 13844 BasicBlock::iterator I = Term; --I; 13845 13846 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 13847 // A debug intrinsic shouldn't force another iteration if we weren't 13848 // going to do one without it. 13849 if (!isa<DbgInfoIntrinsic>(I)) { 13850 ++NumDeadInst; 13851 MadeIRChange = true; 13852 } 13853 13854 // If I is not void type then replaceAllUsesWith undef. 13855 // This allows ValueHandlers and custom metadata to adjust itself. 13856 if (!I->getType()->isVoidTy()) 13857 I->replaceAllUsesWith(UndefValue::get(I->getType())); 13858 I->eraseFromParent(); 13859 } 13860 } 13861 } 13862 13863 while (!Worklist.isEmpty()) { 13864 Instruction *I = Worklist.RemoveOne(); 13865 if (I == 0) continue; // skip null values. 13866 13867 // Check to see if we can DCE the instruction. 13868 if (isInstructionTriviallyDead(I)) { 13869 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 13870 EraseInstFromFunction(*I); 13871 ++NumDeadInst; 13872 MadeIRChange = true; 13873 continue; 13874 } 13875 13876 // Instruction isn't dead, see if we can constant propagate it. 13877 if (!I->use_empty() && isa<Constant>(I->getOperand(0))) 13878 if (Constant *C = ConstantFoldInstruction(I, TD)) { 13879 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); 13880 13881 // Add operands to the worklist. 13882 ReplaceInstUsesWith(*I, C); 13883 ++NumConstProp; 13884 EraseInstFromFunction(*I); 13885 MadeIRChange = true; 13886 continue; 13887 } 13888 13889 // See if we can trivially sink this instruction to a successor basic block. 13890 if (I->hasOneUse()) { 13891 BasicBlock *BB = I->getParent(); 13892 Instruction *UserInst = cast<Instruction>(I->use_back()); 13893 BasicBlock *UserParent; 13894 13895 // Get the block the use occurs in. 13896 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) 13897 UserParent = PN->getIncomingBlock(I->use_begin().getUse()); 13898 else 13899 UserParent = UserInst->getParent(); 13900 13901 if (UserParent != BB) { 13902 bool UserIsSuccessor = false; 13903 // See if the user is one of our successors. 13904 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) 13905 if (*SI == UserParent) { 13906 UserIsSuccessor = true; 13907 break; 13908 } 13909 13910 // If the user is one of our immediate successors, and if that successor 13911 // only has us as a predecessors (we'd have to split the critical edge 13912 // otherwise), we can keep going. 13913 if (UserIsSuccessor && UserParent->getSinglePredecessor()) 13914 // Okay, the CFG is simple enough, try to sink this instruction. 13915 MadeIRChange |= TryToSinkInstruction(I, UserParent); 13916 } 13917 } 13918 13919 // Now that we have an instruction, try combining it to simplify it. 13920 Builder->SetInsertPoint(I->getParent(), I); 13921 13922#ifndef NDEBUG 13923 std::string OrigI; 13924#endif 13925 DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); 13926 DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); 13927 13928 if (Instruction *Result = visit(*I)) { 13929 ++NumCombined; 13930 // Should we replace the old instruction with a new one? 13931 if (Result != I) { 13932 DEBUG(errs() << "IC: Old = " << *I << '\n' 13933 << " New = " << *Result << '\n'); 13934 13935 // Everything uses the new instruction now. 13936 I->replaceAllUsesWith(Result); 13937 13938 // Push the new instruction and any users onto the worklist. 13939 Worklist.Add(Result); 13940 Worklist.AddUsersToWorkList(*Result); 13941 13942 // Move the name to the new instruction first. 13943 Result->takeName(I); 13944 13945 // Insert the new instruction into the basic block... 13946 BasicBlock *InstParent = I->getParent(); 13947 BasicBlock::iterator InsertPos = I; 13948 13949 if (!isa<PHINode>(Result)) // If combining a PHI, don't insert 13950 while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. 13951 ++InsertPos; 13952 13953 InstParent->getInstList().insert(InsertPos, Result); 13954 13955 EraseInstFromFunction(*I); 13956 } else { 13957#ifndef NDEBUG 13958 DEBUG(errs() << "IC: Mod = " << OrigI << '\n' 13959 << " New = " << *I << '\n'); 13960#endif 13961 13962 // If the instruction was modified, it's possible that it is now dead. 13963 // if so, remove it. 13964 if (isInstructionTriviallyDead(I)) { 13965 EraseInstFromFunction(*I); 13966 } else { 13967 Worklist.Add(I); 13968 Worklist.AddUsersToWorkList(*I); 13969 } 13970 } 13971 MadeIRChange = true; 13972 } 13973 } 13974 13975 Worklist.Zap(); 13976 return MadeIRChange; 13977} 13978 13979 13980bool InstCombiner::runOnFunction(Function &F) { 13981 MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 13982 Context = &F.getContext(); 13983 TD = getAnalysisIfAvailable<TargetData>(); 13984 13985 13986 /// Builder - This is an IRBuilder that automatically inserts new 13987 /// instructions into the worklist when they are created. 13988 IRBuilder<true, TargetFolder, InstCombineIRInserter> 13989 TheBuilder(F.getContext(), TargetFolder(TD), 13990 InstCombineIRInserter(Worklist)); 13991 Builder = &TheBuilder; 13992 13993 bool EverMadeChange = false; 13994 13995 // Iterate while there is work to do. 13996 unsigned Iteration = 0; 13997 while (DoOneIteration(F, Iteration++)) 13998 EverMadeChange = true; 13999 14000 Builder = 0; 14001 return EverMadeChange; 14002} 14003 14004FunctionPass *llvm::createInstructionCombiningPass() { 14005 return new InstCombiner(); 14006} 14007