InstructionCombining.cpp revision 948cdeba97d767df6d50496cccd4a4837e0296c8
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// InstructionCombining - Combine instructions to form fewer, simple 11// instructions. This pass does not modify the CFG. This pass is where 12// algebraic simplification happens. 13// 14// This pass combines things like: 15// %Y = add i32 %X, 1 16// %Z = add i32 %Y, 1 17// into: 18// %Z = add i32 %X, 2 19// 20// This is a simple worklist driven algorithm. 21// 22// This pass guarantees that the following canonicalizations are performed on 23// the program: 24// 1. If a binary operator has a constant operand, it is moved to the RHS 25// 2. Bitwise operators with constant operands are always grouped so that 26// shifts are performed first, then or's, then and's, then xor's. 27// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible 28// 4. All cmp instructions on boolean values are replaced with logical ops 29// 5. add X, X is represented as (X*2) => (X << 1) 30// 6. Multiplies with a power-of-two constant argument are transformed into 31// shifts. 32// ... etc. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "instcombine" 37#include "llvm/Transforms/Scalar.h" 38#include "InstCombine.h" 39#include "llvm/IntrinsicInst.h" 40#include "llvm/LLVMContext.h" 41#include "llvm/DerivedTypes.h" 42#include "llvm/GlobalVariable.h" 43#include "llvm/Operator.h" 44#include "llvm/Analysis/ConstantFolding.h" 45#include "llvm/Analysis/InstructionSimplify.h" 46#include "llvm/Analysis/MemoryBuiltins.h" 47#include "llvm/Target/TargetData.h" 48#include "llvm/Transforms/Utils/BasicBlockUtils.h" 49#include "llvm/Transforms/Utils/Local.h" 50#include "llvm/Support/Debug.h" 51#include "llvm/Support/ErrorHandling.h" 52#include "llvm/Support/GetElementPtrTypeIterator.h" 53#include "llvm/Support/MathExtras.h" 54#include "llvm/Support/PatternMatch.h" 55#include "llvm/ADT/SmallPtrSet.h" 56#include "llvm/ADT/Statistic.h" 57#include "llvm/ADT/STLExtras.h" 58#include <algorithm> 59#include <climits> 60using namespace llvm; 61using namespace llvm::PatternMatch; 62 63STATISTIC(NumCombined , "Number of insts combined"); 64STATISTIC(NumConstProp, "Number of constant folds"); 65STATISTIC(NumDeadInst , "Number of dead inst eliminated"); 66STATISTIC(NumSunkInst , "Number of instructions sunk"); 67 68 69char InstCombiner::ID = 0; 70static RegisterPass<InstCombiner> 71X("instcombine", "Combine redundant instructions"); 72 73void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 74 AU.addPreservedID(LCSSAID); 75 AU.setPreservesCFG(); 76} 77 78 79/// ShouldChangeType - Return true if it is desirable to convert a computation 80/// from 'From' to 'To'. We don't want to convert from a legal to an illegal 81/// type for example, or from a smaller to a larger illegal type. 82bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { 83 assert(isa<IntegerType>(From) && isa<IntegerType>(To)); 84 85 // If we don't have TD, we don't know if the source/dest are legal. 86 if (!TD) return false; 87 88 unsigned FromWidth = From->getPrimitiveSizeInBits(); 89 unsigned ToWidth = To->getPrimitiveSizeInBits(); 90 bool FromLegal = TD->isLegalInteger(FromWidth); 91 bool ToLegal = TD->isLegalInteger(ToWidth); 92 93 // If this is a legal integer from type, and the result would be an illegal 94 // type, don't do the transformation. 95 if (FromLegal && !ToLegal) 96 return false; 97 98 // Otherwise, if both are illegal, do not increase the size of the result. We 99 // do allow things like i160 -> i64, but not i64 -> i160. 100 if (!FromLegal && !ToLegal && ToWidth > FromWidth) 101 return false; 102 103 return true; 104} 105 106 107// SimplifyCommutative - This performs a few simplifications for commutative 108// operators: 109// 110// 1. Order operands such that they are listed from right (least complex) to 111// left (most complex). This puts constants before unary operators before 112// binary operators. 113// 114// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) 115// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 116// 117bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { 118 bool Changed = false; 119 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) 120 Changed = !I.swapOperands(); 121 122 if (!I.isAssociative()) return Changed; 123 124 Instruction::BinaryOps Opcode = I.getOpcode(); 125 if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) 126 if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { 127 if (isa<Constant>(I.getOperand(1))) { 128 Constant *Folded = ConstantExpr::get(I.getOpcode(), 129 cast<Constant>(I.getOperand(1)), 130 cast<Constant>(Op->getOperand(1))); 131 I.setOperand(0, Op->getOperand(0)); 132 I.setOperand(1, Folded); 133 return true; 134 } 135 136 if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1))) 137 if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && 138 Op->hasOneUse() && Op1->hasOneUse()) { 139 Constant *C1 = cast<Constant>(Op->getOperand(1)); 140 Constant *C2 = cast<Constant>(Op1->getOperand(1)); 141 142 // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) 143 Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); 144 Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), 145 Op1->getOperand(0), 146 Op1->getName(), &I); 147 Worklist.Add(New); 148 I.setOperand(0, New); 149 I.setOperand(1, Folded); 150 return true; 151 } 152 } 153 return Changed; 154} 155 156// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction 157// if the LHS is a constant zero (which is the 'negate' form). 158// 159Value *InstCombiner::dyn_castNegVal(Value *V) const { 160 if (BinaryOperator::isNeg(V)) 161 return BinaryOperator::getNegArgument(V); 162 163 // Constants can be considered to be negated values if they can be folded. 164 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 165 return ConstantExpr::getNeg(C); 166 167 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 168 if (C->getType()->getElementType()->isInteger()) 169 return ConstantExpr::getNeg(C); 170 171 return 0; 172} 173 174// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the 175// instruction if the LHS is a constant negative zero (which is the 'negate' 176// form). 177// 178Value *InstCombiner::dyn_castFNegVal(Value *V) const { 179 if (BinaryOperator::isFNeg(V)) 180 return BinaryOperator::getFNegArgument(V); 181 182 // Constants can be considered to be negated values if they can be folded. 183 if (ConstantFP *C = dyn_cast<ConstantFP>(V)) 184 return ConstantExpr::getFNeg(C); 185 186 if (ConstantVector *C = dyn_cast<ConstantVector>(V)) 187 if (C->getType()->getElementType()->isFloatingPoint()) 188 return ConstantExpr::getFNeg(C); 189 190 return 0; 191} 192 193/// isFreeToInvert - Return true if the specified value is free to invert (apply 194/// ~ to). This happens in cases where the ~ can be eliminated. 195static inline bool isFreeToInvert(Value *V) { 196 // ~(~(X)) -> X. 197 if (BinaryOperator::isNot(V)) 198 return true; 199 200 // Constants can be considered to be not'ed values. 201 if (isa<ConstantInt>(V)) 202 return true; 203 204 // Compares can be inverted if they have a single use. 205 if (CmpInst *CI = dyn_cast<CmpInst>(V)) 206 return CI->hasOneUse(); 207 208 return false; 209} 210 211static inline Value *dyn_castNotVal(Value *V) { 212 // If this is not(not(x)) don't return that this is a not: we want the two 213 // not's to be folded first. 214 if (BinaryOperator::isNot(V)) { 215 Value *Operand = BinaryOperator::getNotArgument(V); 216 if (!isFreeToInvert(Operand)) 217 return Operand; 218 } 219 220 // Constants can be considered to be not'ed values... 221 if (ConstantInt *C = dyn_cast<ConstantInt>(V)) 222 return ConstantInt::get(C->getType(), ~C->getValue()); 223 return 0; 224} 225 226 227 228/// AddOne - Add one to a ConstantInt. 229static Constant *AddOne(Constant *C) { 230 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 231} 232/// SubOne - Subtract one from a ConstantInt. 233static Constant *SubOne(ConstantInt *C) { 234 return ConstantInt::get(C->getContext(), C->getValue()-1); 235} 236 237 238static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, 239 InstCombiner *IC) { 240 if (CastInst *CI = dyn_cast<CastInst>(&I)) 241 return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); 242 243 // Figure out if the constant is the left or the right argument. 244 bool ConstIsRHS = isa<Constant>(I.getOperand(1)); 245 Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); 246 247 if (Constant *SOC = dyn_cast<Constant>(SO)) { 248 if (ConstIsRHS) 249 return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); 250 return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); 251 } 252 253 Value *Op0 = SO, *Op1 = ConstOperand; 254 if (!ConstIsRHS) 255 std::swap(Op0, Op1); 256 257 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 258 return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, 259 SO->getName()+".op"); 260 if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) 261 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 262 SO->getName()+".cmp"); 263 if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) 264 return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, 265 SO->getName()+".cmp"); 266 llvm_unreachable("Unknown binary instruction type!"); 267} 268 269// FoldOpIntoSelect - Given an instruction with a select as one operand and a 270// constant as the other operand, try to fold the binary operator into the 271// select arguments. This also works for Cast instructions, which obviously do 272// not have a second operand. 273Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { 274 // Don't modify shared select instructions 275 if (!SI->hasOneUse()) return 0; 276 Value *TV = SI->getOperand(1); 277 Value *FV = SI->getOperand(2); 278 279 if (isa<Constant>(TV) || isa<Constant>(FV)) { 280 // Bool selects with constant operands can be folded to logical ops. 281 if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0; 282 283 Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); 284 Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); 285 286 return SelectInst::Create(SI->getCondition(), SelectTrueVal, 287 SelectFalseVal); 288 } 289 return 0; 290} 291 292 293/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which 294/// has a PHI node as operand #0, see if we can fold the instruction into the 295/// PHI (which is only possible if all operands to the PHI are constants). 296/// 297/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms 298/// that would normally be unprofitable because they strongly encourage jump 299/// threading. 300Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, 301 bool AllowAggressive) { 302 AllowAggressive = false; 303 PHINode *PN = cast<PHINode>(I.getOperand(0)); 304 unsigned NumPHIValues = PN->getNumIncomingValues(); 305 if (NumPHIValues == 0 || 306 // We normally only transform phis with a single use, unless we're trying 307 // hard to make jump threading happen. 308 (!PN->hasOneUse() && !AllowAggressive)) 309 return 0; 310 311 312 // Check to see if all of the operands of the PHI are simple constants 313 // (constantint/constantfp/undef). If there is one non-constant value, 314 // remember the BB it is in. If there is more than one or if *it* is a PHI, 315 // bail out. We don't do arbitrary constant expressions here because moving 316 // their computation can be expensive without a cost model. 317 BasicBlock *NonConstBB = 0; 318 for (unsigned i = 0; i != NumPHIValues; ++i) 319 if (!isa<Constant>(PN->getIncomingValue(i)) || 320 isa<ConstantExpr>(PN->getIncomingValue(i))) { 321 if (NonConstBB) return 0; // More than one non-const value. 322 if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. 323 NonConstBB = PN->getIncomingBlock(i); 324 325 // If the incoming non-constant value is in I's block, we have an infinite 326 // loop. 327 if (NonConstBB == I.getParent()) 328 return 0; 329 } 330 331 // If there is exactly one non-constant value, we can insert a copy of the 332 // operation in that block. However, if this is a critical edge, we would be 333 // inserting the computation one some other paths (e.g. inside a loop). Only 334 // do this if the pred block is unconditionally branching into the phi block. 335 if (NonConstBB != 0 && !AllowAggressive) { 336 BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); 337 if (!BI || !BI->isUnconditional()) return 0; 338 } 339 340 // Okay, we can do the transformation: create the new PHI node. 341 PHINode *NewPN = PHINode::Create(I.getType(), ""); 342 NewPN->reserveOperandSpace(PN->getNumOperands()/2); 343 InsertNewInstBefore(NewPN, *PN); 344 NewPN->takeName(PN); 345 346 // Next, add all of the operands to the PHI. 347 if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { 348 // We only currently try to fold the condition of a select when it is a phi, 349 // not the true/false values. 350 Value *TrueV = SI->getTrueValue(); 351 Value *FalseV = SI->getFalseValue(); 352 BasicBlock *PhiTransBB = PN->getParent(); 353 for (unsigned i = 0; i != NumPHIValues; ++i) { 354 BasicBlock *ThisBB = PN->getIncomingBlock(i); 355 Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); 356 Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); 357 Value *InV = 0; 358 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 359 InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; 360 } else { 361 assert(PN->getIncomingBlock(i) == NonConstBB); 362 InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, 363 FalseVInPred, 364 "phitmp", NonConstBB->getTerminator()); 365 Worklist.Add(cast<Instruction>(InV)); 366 } 367 NewPN->addIncoming(InV, ThisBB); 368 } 369 } else if (I.getNumOperands() == 2) { 370 Constant *C = cast<Constant>(I.getOperand(1)); 371 for (unsigned i = 0; i != NumPHIValues; ++i) { 372 Value *InV = 0; 373 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 374 if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 375 InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); 376 else 377 InV = ConstantExpr::get(I.getOpcode(), InC, C); 378 } else { 379 assert(PN->getIncomingBlock(i) == NonConstBB); 380 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 381 InV = BinaryOperator::Create(BO->getOpcode(), 382 PN->getIncomingValue(i), C, "phitmp", 383 NonConstBB->getTerminator()); 384 else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) 385 InV = CmpInst::Create(CI->getOpcode(), 386 CI->getPredicate(), 387 PN->getIncomingValue(i), C, "phitmp", 388 NonConstBB->getTerminator()); 389 else 390 llvm_unreachable("Unknown binop!"); 391 392 Worklist.Add(cast<Instruction>(InV)); 393 } 394 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 395 } 396 } else { 397 CastInst *CI = cast<CastInst>(&I); 398 const Type *RetTy = CI->getType(); 399 for (unsigned i = 0; i != NumPHIValues; ++i) { 400 Value *InV; 401 if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { 402 InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); 403 } else { 404 assert(PN->getIncomingBlock(i) == NonConstBB); 405 InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 406 I.getType(), "phitmp", 407 NonConstBB->getTerminator()); 408 Worklist.Add(cast<Instruction>(InV)); 409 } 410 NewPN->addIncoming(InV, PN->getIncomingBlock(i)); 411 } 412 } 413 return ReplaceInstUsesWith(I, NewPN); 414} 415 416 417/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits 418/// are carefully arranged to allow folding of expressions such as: 419/// 420/// (A < B) | (A > B) --> (A != B) 421/// 422/// Note that this is only valid if the first and second predicates have the 423/// same sign. Is illegal to do: (A u< B) | (A s> B) 424/// 425/// Three bits are used to represent the condition, as follows: 426/// 0 A > B 427/// 1 A == B 428/// 2 A < B 429/// 430/// <=> Value Definition 431/// 000 0 Always false 432/// 001 1 A > B 433/// 010 2 A == B 434/// 011 3 A >= B 435/// 100 4 A < B 436/// 101 5 A != B 437/// 110 6 A <= B 438/// 111 7 Always true 439/// 440static unsigned getICmpCode(const ICmpInst *ICI) { 441 switch (ICI->getPredicate()) { 442 // False -> 0 443 case ICmpInst::ICMP_UGT: return 1; // 001 444 case ICmpInst::ICMP_SGT: return 1; // 001 445 case ICmpInst::ICMP_EQ: return 2; // 010 446 case ICmpInst::ICMP_UGE: return 3; // 011 447 case ICmpInst::ICMP_SGE: return 3; // 011 448 case ICmpInst::ICMP_ULT: return 4; // 100 449 case ICmpInst::ICMP_SLT: return 4; // 100 450 case ICmpInst::ICMP_NE: return 5; // 101 451 case ICmpInst::ICMP_ULE: return 6; // 110 452 case ICmpInst::ICMP_SLE: return 6; // 110 453 // True -> 7 454 default: 455 llvm_unreachable("Invalid ICmp predicate!"); 456 return 0; 457 } 458} 459 460/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp 461/// predicate into a three bit mask. It also returns whether it is an ordered 462/// predicate by reference. 463static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { 464 isOrdered = false; 465 switch (CC) { 466 case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 467 case FCmpInst::FCMP_UNO: return 0; // 000 468 case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 469 case FCmpInst::FCMP_UGT: return 1; // 001 470 case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 471 case FCmpInst::FCMP_UEQ: return 2; // 010 472 case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 473 case FCmpInst::FCMP_UGE: return 3; // 011 474 case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 475 case FCmpInst::FCMP_ULT: return 4; // 100 476 case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 477 case FCmpInst::FCMP_UNE: return 5; // 101 478 case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 479 case FCmpInst::FCMP_ULE: return 6; // 110 480 // True -> 7 481 default: 482 // Not expecting FCMP_FALSE and FCMP_TRUE; 483 llvm_unreachable("Unexpected FCmp predicate!"); 484 return 0; 485 } 486} 487 488/// getICmpValue - This is the complement of getICmpCode, which turns an 489/// opcode and two operands into either a constant true or false, or a brand 490/// new ICmp instruction. The sign is passed in to determine which kind 491/// of predicate to use in the new icmp instruction. 492static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) { 493 switch (Code) { 494 default: assert(0 && "Illegal ICmp code!"); 495 case 0: 496 return ConstantInt::getFalse(LHS->getContext()); 497 case 1: 498 if (Sign) 499 return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); 500 return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); 501 case 2: 502 return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); 503 case 3: 504 if (Sign) 505 return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); 506 return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); 507 case 4: 508 if (Sign) 509 return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); 510 return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); 511 case 5: 512 return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); 513 case 6: 514 if (Sign) 515 return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); 516 return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); 517 case 7: 518 return ConstantInt::getTrue(LHS->getContext()); 519 } 520} 521 522/// getFCmpValue - This is the complement of getFCmpCode, which turns an 523/// opcode and two operands into either a FCmp instruction. isordered is passed 524/// in to determine which kind of predicate to use in the new fcmp instruction. 525static Value *getFCmpValue(bool isordered, unsigned code, 526 Value *LHS, Value *RHS) { 527 switch (code) { 528 default: llvm_unreachable("Illegal FCmp code!"); 529 case 0: 530 if (isordered) 531 return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); 532 else 533 return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); 534 case 1: 535 if (isordered) 536 return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); 537 else 538 return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); 539 case 2: 540 if (isordered) 541 return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); 542 else 543 return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); 544 case 3: 545 if (isordered) 546 return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); 547 else 548 return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); 549 case 4: 550 if (isordered) 551 return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); 552 else 553 return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); 554 case 5: 555 if (isordered) 556 return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); 557 else 558 return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); 559 case 6: 560 if (isordered) 561 return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); 562 else 563 return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); 564 case 7: return ConstantInt::getTrue(LHS->getContext()); 565 } 566} 567 568/// PredicatesFoldable - Return true if both predicates match sign or if at 569/// least one of them is an equality comparison (which is signless). 570static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { 571 return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || 572 (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || 573 (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); 574} 575 576// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where 577// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is 578// guaranteed to be a binary operator. 579Instruction *InstCombiner::OptAndOp(Instruction *Op, 580 ConstantInt *OpRHS, 581 ConstantInt *AndRHS, 582 BinaryOperator &TheAnd) { 583 Value *X = Op->getOperand(0); 584 Constant *Together = 0; 585 if (!Op->isShift()) 586 Together = ConstantExpr::getAnd(AndRHS, OpRHS); 587 588 switch (Op->getOpcode()) { 589 case Instruction::Xor: 590 if (Op->hasOneUse()) { 591 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 592 Value *And = Builder->CreateAnd(X, AndRHS); 593 And->takeName(Op); 594 return BinaryOperator::CreateXor(And, Together); 595 } 596 break; 597 case Instruction::Or: 598 if (Together == AndRHS) // (X | C) & C --> C 599 return ReplaceInstUsesWith(TheAnd, AndRHS); 600 601 if (Op->hasOneUse() && Together != OpRHS) { 602 // (X | C1) & C2 --> (X | (C1&C2)) & C2 603 Value *Or = Builder->CreateOr(X, Together); 604 Or->takeName(Op); 605 return BinaryOperator::CreateAnd(Or, AndRHS); 606 } 607 break; 608 case Instruction::Add: 609 if (Op->hasOneUse()) { 610 // Adding a one to a single bit bit-field should be turned into an XOR 611 // of the bit. First thing to check is to see if this AND is with a 612 // single bit constant. 613 const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); 614 615 // If there is only one bit set. 616 if (AndRHSV.isPowerOf2()) { 617 // Ok, at this point, we know that we are masking the result of the 618 // ADD down to exactly one bit. If the constant we are adding has 619 // no bits set below this bit, then we can eliminate the ADD. 620 const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); 621 622 // Check to see if any bits below the one bit set in AndRHSV are set. 623 if ((AddRHS & (AndRHSV-1)) == 0) { 624 // If not, the only thing that can effect the output of the AND is 625 // the bit specified by AndRHSV. If that bit is set, the effect of 626 // the XOR is to toggle the bit. If it is clear, then the ADD has 627 // no effect. 628 if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop 629 TheAnd.setOperand(0, X); 630 return &TheAnd; 631 } else { 632 // Pull the XOR out of the AND. 633 Value *NewAnd = Builder->CreateAnd(X, AndRHS); 634 NewAnd->takeName(Op); 635 return BinaryOperator::CreateXor(NewAnd, AndRHS); 636 } 637 } 638 } 639 } 640 break; 641 642 case Instruction::Shl: { 643 // We know that the AND will not produce any of the bits shifted in, so if 644 // the anded constant includes them, clear them now! 645 // 646 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 647 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 648 APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); 649 ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), 650 AndRHS->getValue() & ShlMask); 651 652 if (CI->getValue() == ShlMask) { 653 // Masking out bits that the shift already masks 654 return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. 655 } else if (CI != AndRHS) { // Reducing bits set in and. 656 TheAnd.setOperand(1, CI); 657 return &TheAnd; 658 } 659 break; 660 } 661 case Instruction::LShr: { 662 // We know that the AND will not produce any of the bits shifted in, so if 663 // the anded constant includes them, clear them now! This only applies to 664 // unsigned shifts, because a signed shr may bring in set bits! 665 // 666 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 667 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 668 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 669 ConstantInt *CI = ConstantInt::get(Op->getContext(), 670 AndRHS->getValue() & ShrMask); 671 672 if (CI->getValue() == ShrMask) { 673 // Masking out bits that the shift already masks. 674 return ReplaceInstUsesWith(TheAnd, Op); 675 } else if (CI != AndRHS) { 676 TheAnd.setOperand(1, CI); // Reduce bits set in and cst. 677 return &TheAnd; 678 } 679 break; 680 } 681 case Instruction::AShr: 682 // Signed shr. 683 // See if this is shifting in some sign extension, then masking it out 684 // with an and. 685 if (Op->hasOneUse()) { 686 uint32_t BitWidth = AndRHS->getType()->getBitWidth(); 687 uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); 688 APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); 689 Constant *C = ConstantInt::get(Op->getContext(), 690 AndRHS->getValue() & ShrMask); 691 if (C == AndRHS) { // Masking out bits shifted in. 692 // (Val ashr C1) & C2 -> (Val lshr C1) & C2 693 // Make the argument unsigned. 694 Value *ShVal = Op->getOperand(0); 695 ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); 696 return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); 697 } 698 } 699 break; 700 } 701 return 0; 702} 703 704 705/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is 706/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient 707/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates 708/// whether to treat the V, Lo and HI as signed or not. IB is the location to 709/// insert new instructions. 710Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, 711 bool isSigned, bool Inside, 712 Instruction &IB) { 713 assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 714 ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && 715 "Lo is not <= Hi in range emission code!"); 716 717 if (Inside) { 718 if (Lo == Hi) // Trivially false. 719 return new ICmpInst(ICmpInst::ICMP_NE, V, V); 720 721 // V >= Min && V < Hi --> V < Hi 722 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 723 ICmpInst::Predicate pred = (isSigned ? 724 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); 725 return new ICmpInst(pred, V, Hi); 726 } 727 728 // Emit V-Lo <u Hi-Lo 729 Constant *NegLo = ConstantExpr::getNeg(Lo); 730 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 731 Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); 732 return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); 733 } 734 735 if (Lo == Hi) // Trivially true. 736 return new ICmpInst(ICmpInst::ICMP_EQ, V, V); 737 738 // V < Min || V >= Hi -> V > Hi-1 739 Hi = SubOne(cast<ConstantInt>(Hi)); 740 if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { 741 ICmpInst::Predicate pred = (isSigned ? 742 ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 743 return new ICmpInst(pred, V, Hi); 744 } 745 746 // Emit V-Lo >u Hi-1-Lo 747 // Note that Hi has already had one subtracted from it, above. 748 ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); 749 Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); 750 Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); 751 return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); 752} 753 754// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with 755// any number of 0s on either side. The 1s are allowed to wrap from LSB to 756// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is 757// not, since all 1s are not contiguous. 758static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { 759 const APInt& V = Val->getValue(); 760 uint32_t BitWidth = Val->getType()->getBitWidth(); 761 if (!APIntOps::isShiftedMask(BitWidth, V)) return false; 762 763 // look for the first zero bit after the run of ones 764 MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); 765 // look for the first non-zero bit 766 ME = V.getActiveBits(); 767 return true; 768} 769 770/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, 771/// where isSub determines whether the operator is a sub. If we can fold one of 772/// the following xforms: 773/// 774/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask 775/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 776/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 777/// 778/// return (A +/- B). 779/// 780Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, 781 ConstantInt *Mask, bool isSub, 782 Instruction &I) { 783 Instruction *LHSI = dyn_cast<Instruction>(LHS); 784 if (!LHSI || LHSI->getNumOperands() != 2 || 785 !isa<ConstantInt>(LHSI->getOperand(1))) return 0; 786 787 ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); 788 789 switch (LHSI->getOpcode()) { 790 default: return 0; 791 case Instruction::And: 792 if (ConstantExpr::getAnd(N, Mask) == Mask) { 793 // If the AndRHS is a power of two minus one (0+1+), this is simple. 794 if ((Mask->getValue().countLeadingZeros() + 795 Mask->getValue().countPopulation()) == 796 Mask->getValue().getBitWidth()) 797 break; 798 799 // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ 800 // part, we don't need any explicit masks to take them out of A. If that 801 // is all N is, ignore it. 802 uint32_t MB = 0, ME = 0; 803 if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive 804 uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); 805 APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); 806 if (MaskedValueIsZero(RHS, Mask)) 807 break; 808 } 809 } 810 return 0; 811 case Instruction::Or: 812 case Instruction::Xor: 813 // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 814 if ((Mask->getValue().countLeadingZeros() + 815 Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() 816 && ConstantExpr::getAnd(N, Mask)->isNullValue()) 817 break; 818 return 0; 819 } 820 821 if (isSub) 822 return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); 823 return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); 824} 825 826/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. 827Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, 828 ICmpInst *LHS, ICmpInst *RHS) { 829 ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); 830 831 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 832 if (PredicatesFoldable(LHSCC, RHSCC)) { 833 if (LHS->getOperand(0) == RHS->getOperand(1) && 834 LHS->getOperand(1) == RHS->getOperand(0)) 835 LHS->swapOperands(); 836 if (LHS->getOperand(0) == RHS->getOperand(0) && 837 LHS->getOperand(1) == RHS->getOperand(1)) { 838 Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 839 unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); 840 bool isSigned = LHS->isSigned() || RHS->isSigned(); 841 Value *RV = getICmpValue(isSigned, Code, Op0, Op1); 842 if (Instruction *I = dyn_cast<Instruction>(RV)) 843 return I; 844 // Otherwise, it's a constant boolean value. 845 return ReplaceInstUsesWith(I, RV); 846 } 847 } 848 849 // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). 850 Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); 851 ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); 852 ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); 853 if (LHSCst == 0 || RHSCst == 0) return 0; 854 855 if (LHSCst == RHSCst && LHSCC == RHSCC) { 856 // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) 857 // where C is a power of 2 858 if (LHSCC == ICmpInst::ICMP_ULT && 859 LHSCst->getValue().isPowerOf2()) { 860 Value *NewOr = Builder->CreateOr(Val, Val2); 861 return new ICmpInst(LHSCC, NewOr, LHSCst); 862 } 863 864 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 865 if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { 866 Value *NewOr = Builder->CreateOr(Val, Val2); 867 return new ICmpInst(LHSCC, NewOr, LHSCst); 868 } 869 } 870 871 // From here on, we only handle: 872 // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. 873 if (Val != Val2) return 0; 874 875 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 876 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 877 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 878 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 879 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 880 return 0; 881 882 // We can't fold (ugt x, C) & (sgt x, C2). 883 if (!PredicatesFoldable(LHSCC, RHSCC)) 884 return 0; 885 886 // Ensure that the larger constant is on the RHS. 887 bool ShouldSwap; 888 if (CmpInst::isSigned(LHSCC) || 889 (ICmpInst::isEquality(LHSCC) && 890 CmpInst::isSigned(RHSCC))) 891 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 892 else 893 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 894 895 if (ShouldSwap) { 896 std::swap(LHS, RHS); 897 std::swap(LHSCst, RHSCst); 898 std::swap(LHSCC, RHSCC); 899 } 900 901 // At this point, we know we have have two icmp instructions 902 // comparing a value against two constants and and'ing the result 903 // together. Because of the above check, we know that we only have 904 // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 905 // (from the icmp folding check above), that the two constants 906 // are not equal and that the larger constant is on the RHS 907 assert(LHSCst != RHSCst && "Compares not folded above?"); 908 909 switch (LHSCC) { 910 default: llvm_unreachable("Unknown integer condition code!"); 911 case ICmpInst::ICMP_EQ: 912 switch (RHSCC) { 913 default: llvm_unreachable("Unknown integer condition code!"); 914 case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false 915 case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false 916 case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false 917 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 918 case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 919 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 920 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 921 return ReplaceInstUsesWith(I, LHS); 922 } 923 case ICmpInst::ICMP_NE: 924 switch (RHSCC) { 925 default: llvm_unreachable("Unknown integer condition code!"); 926 case ICmpInst::ICMP_ULT: 927 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 928 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); 929 break; // (X != 13 & X u< 15) -> no change 930 case ICmpInst::ICMP_SLT: 931 if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 932 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); 933 break; // (X != 13 & X s< 15) -> no change 934 case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 935 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 936 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 937 return ReplaceInstUsesWith(I, RHS); 938 case ICmpInst::ICMP_NE: 939 if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 940 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 941 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 942 return new ICmpInst(ICmpInst::ICMP_UGT, Add, 943 ConstantInt::get(Add->getType(), 1)); 944 } 945 break; // (X != 13 & X != 15) -> no change 946 } 947 break; 948 case ICmpInst::ICMP_ULT: 949 switch (RHSCC) { 950 default: llvm_unreachable("Unknown integer condition code!"); 951 case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false 952 case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false 953 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 954 case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change 955 break; 956 case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 957 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 958 return ReplaceInstUsesWith(I, LHS); 959 case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change 960 break; 961 } 962 break; 963 case ICmpInst::ICMP_SLT: 964 switch (RHSCC) { 965 default: llvm_unreachable("Unknown integer condition code!"); 966 case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false 967 case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false 968 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 969 case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change 970 break; 971 case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 972 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 973 return ReplaceInstUsesWith(I, LHS); 974 case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change 975 break; 976 } 977 break; 978 case ICmpInst::ICMP_UGT: 979 switch (RHSCC) { 980 default: llvm_unreachable("Unknown integer condition code!"); 981 case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 982 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 983 return ReplaceInstUsesWith(I, RHS); 984 case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change 985 break; 986 case ICmpInst::ICMP_NE: 987 if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 988 return new ICmpInst(LHSCC, Val, RHSCst); 989 break; // (X u> 13 & X != 15) -> no change 990 case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 991 return InsertRangeTest(Val, AddOne(LHSCst), 992 RHSCst, false, true, I); 993 case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change 994 break; 995 } 996 break; 997 case ICmpInst::ICMP_SGT: 998 switch (RHSCC) { 999 default: llvm_unreachable("Unknown integer condition code!"); 1000 case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 1001 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 1002 return ReplaceInstUsesWith(I, RHS); 1003 case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change 1004 break; 1005 case ICmpInst::ICMP_NE: 1006 if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 1007 return new ICmpInst(LHSCC, Val, RHSCst); 1008 break; // (X s> 13 & X != 15) -> no change 1009 case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 1010 return InsertRangeTest(Val, AddOne(LHSCst), 1011 RHSCst, true, true, I); 1012 case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change 1013 break; 1014 } 1015 break; 1016 } 1017 1018 return 0; 1019} 1020 1021Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, 1022 FCmpInst *RHS) { 1023 1024 if (LHS->getPredicate() == FCmpInst::FCMP_ORD && 1025 RHS->getPredicate() == FCmpInst::FCMP_ORD) { 1026 // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) 1027 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 1028 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 1029 // If either of the constants are nans, then the whole thing returns 1030 // false. 1031 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 1032 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1033 return new FCmpInst(FCmpInst::FCMP_ORD, 1034 LHS->getOperand(0), RHS->getOperand(0)); 1035 } 1036 1037 // Handle vector zeros. This occurs because the canonical form of 1038 // "fcmp ord x,x" is "fcmp ord x, 0". 1039 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 1040 isa<ConstantAggregateZero>(RHS->getOperand(1))) 1041 return new FCmpInst(FCmpInst::FCMP_ORD, 1042 LHS->getOperand(0), RHS->getOperand(0)); 1043 return 0; 1044 } 1045 1046 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 1047 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 1048 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 1049 1050 1051 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 1052 // Swap RHS operands to match LHS. 1053 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 1054 std::swap(Op1LHS, Op1RHS); 1055 } 1056 1057 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 1058 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 1059 if (Op0CC == Op1CC) 1060 return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); 1061 1062 if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) 1063 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1064 if (Op0CC == FCmpInst::FCMP_TRUE) 1065 return ReplaceInstUsesWith(I, RHS); 1066 if (Op1CC == FCmpInst::FCMP_TRUE) 1067 return ReplaceInstUsesWith(I, LHS); 1068 1069 bool Op0Ordered; 1070 bool Op1Ordered; 1071 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 1072 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 1073 if (Op1Pred == 0) { 1074 std::swap(LHS, RHS); 1075 std::swap(Op0Pred, Op1Pred); 1076 std::swap(Op0Ordered, Op1Ordered); 1077 } 1078 if (Op0Pred == 0) { 1079 // uno && ueq -> uno && (uno || eq) -> ueq 1080 // ord && olt -> ord && (ord && lt) -> olt 1081 if (Op0Ordered == Op1Ordered) 1082 return ReplaceInstUsesWith(I, RHS); 1083 1084 // uno && oeq -> uno && (ord && eq) -> false 1085 // uno && ord -> false 1086 if (!Op0Ordered) 1087 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); 1088 // ord && ueq -> ord && (uno || eq) -> oeq 1089 return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); 1090 } 1091 } 1092 1093 return 0; 1094} 1095 1096 1097Instruction *InstCombiner::visitAnd(BinaryOperator &I) { 1098 bool Changed = SimplifyCommutative(I); 1099 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1100 1101 if (Value *V = SimplifyAndInst(Op0, Op1, TD)) 1102 return ReplaceInstUsesWith(I, V); 1103 1104 // See if we can simplify any instructions used by the instruction whose sole 1105 // purpose is to compute bits we don't care about. 1106 if (SimplifyDemandedInstructionBits(I)) 1107 return &I; 1108 1109 if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { 1110 const APInt &AndRHSMask = AndRHS->getValue(); 1111 APInt NotAndRHS(~AndRHSMask); 1112 1113 // Optimize a variety of ((val OP C1) & C2) combinations... 1114 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 1115 Value *Op0LHS = Op0I->getOperand(0); 1116 Value *Op0RHS = Op0I->getOperand(1); 1117 switch (Op0I->getOpcode()) { 1118 default: break; 1119 case Instruction::Xor: 1120 case Instruction::Or: 1121 // If the mask is only needed on one incoming arm, push it up. 1122 if (!Op0I->hasOneUse()) break; 1123 1124 if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { 1125 // Not masking anything out for the LHS, move to RHS. 1126 Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, 1127 Op0RHS->getName()+".masked"); 1128 return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); 1129 } 1130 if (!isa<Constant>(Op0RHS) && 1131 MaskedValueIsZero(Op0RHS, NotAndRHS)) { 1132 // Not masking anything out for the RHS, move to LHS. 1133 Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, 1134 Op0LHS->getName()+".masked"); 1135 return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); 1136 } 1137 1138 break; 1139 case Instruction::Add: 1140 // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. 1141 // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1142 // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 1143 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) 1144 return BinaryOperator::CreateAnd(V, AndRHS); 1145 if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) 1146 return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes 1147 break; 1148 1149 case Instruction::Sub: 1150 // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. 1151 // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1152 // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 1153 if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) 1154 return BinaryOperator::CreateAnd(V, AndRHS); 1155 1156 // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS 1157 // has 1's for all bits that the subtraction with A might affect. 1158 if (Op0I->hasOneUse()) { 1159 uint32_t BitWidth = AndRHSMask.getBitWidth(); 1160 uint32_t Zeros = AndRHSMask.countLeadingZeros(); 1161 APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); 1162 1163 ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); 1164 if (!(A && A->isZero()) && // avoid infinite recursion. 1165 MaskedValueIsZero(Op0LHS, Mask)) { 1166 Value *NewNeg = Builder->CreateNeg(Op0RHS); 1167 return BinaryOperator::CreateAnd(NewNeg, AndRHS); 1168 } 1169 } 1170 break; 1171 1172 case Instruction::Shl: 1173 case Instruction::LShr: 1174 // (1 << x) & 1 --> zext(x == 0) 1175 // (1 >> x) & 1 --> zext(x == 0) 1176 if (AndRHSMask == 1 && Op0LHS == AndRHS) { 1177 Value *NewICmp = 1178 Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); 1179 return new ZExtInst(NewICmp, I.getType()); 1180 } 1181 break; 1182 } 1183 1184 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) 1185 if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) 1186 return Res; 1187 } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { 1188 // If this is an integer truncation or change from signed-to-unsigned, and 1189 // if the source is an and/or with immediate, transform it. This 1190 // frequently occurs for bitfield accesses. 1191 if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { 1192 if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && 1193 CastOp->getNumOperands() == 2) 1194 if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ 1195 if (CastOp->getOpcode() == Instruction::And) { 1196 // Change: and (cast (and X, C1) to T), C2 1197 // into : and (cast X to T), trunc_or_bitcast(C1)&C2 1198 // This will fold the two constants together, which may allow 1199 // other simplifications. 1200 Value *NewCast = Builder->CreateTruncOrBitCast( 1201 CastOp->getOperand(0), I.getType(), 1202 CastOp->getName()+".shrunk"); 1203 // trunc_or_bitcast(C1)&C2 1204 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 1205 C3 = ConstantExpr::getAnd(C3, AndRHS); 1206 return BinaryOperator::CreateAnd(NewCast, C3); 1207 } else if (CastOp->getOpcode() == Instruction::Or) { 1208 // Change: and (cast (or X, C1) to T), C2 1209 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 1210 Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); 1211 if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) 1212 // trunc(C1)&C2 1213 return ReplaceInstUsesWith(I, AndRHS); 1214 } 1215 } 1216 } 1217 } 1218 1219 // Try to fold constant and into select arguments. 1220 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 1221 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1222 return R; 1223 if (isa<PHINode>(Op0)) 1224 if (Instruction *NV = FoldOpIntoPhi(I)) 1225 return NV; 1226 } 1227 1228 1229 // (~A & ~B) == (~(A | B)) - De Morgan's Law 1230 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 1231 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 1232 if (Op0->hasOneUse() && Op1->hasOneUse()) { 1233 Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, 1234 I.getName()+".demorgan"); 1235 return BinaryOperator::CreateNot(Or); 1236 } 1237 1238 { 1239 Value *A = 0, *B = 0, *C = 0, *D = 0; 1240 // (A|B) & ~(A&B) -> A^B 1241 if (match(Op0, m_Or(m_Value(A), m_Value(B))) && 1242 match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && 1243 ((A == C && B == D) || (A == D && B == C))) 1244 return BinaryOperator::CreateXor(A, B); 1245 1246 // ~(A&B) & (A|B) -> A^B 1247 if (match(Op1, m_Or(m_Value(A), m_Value(B))) && 1248 match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && 1249 ((A == C && B == D) || (A == D && B == C))) 1250 return BinaryOperator::CreateXor(A, B); 1251 1252 if (Op0->hasOneUse() && 1253 match(Op0, m_Xor(m_Value(A), m_Value(B)))) { 1254 if (A == Op1) { // (A^B)&A -> A&(A^B) 1255 I.swapOperands(); // Simplify below 1256 std::swap(Op0, Op1); 1257 } else if (B == Op1) { // (A^B)&B -> B&(B^A) 1258 cast<BinaryOperator>(Op0)->swapOperands(); 1259 I.swapOperands(); // Simplify below 1260 std::swap(Op0, Op1); 1261 } 1262 } 1263 1264 if (Op1->hasOneUse() && 1265 match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 1266 if (B == Op0) { // B&(A^B) -> B&(B^A) 1267 cast<BinaryOperator>(Op1)->swapOperands(); 1268 std::swap(A, B); 1269 } 1270 if (A == Op0) // A&(A^B) -> A & ~B 1271 return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); 1272 } 1273 1274 // (A&((~A)|B)) -> A&B 1275 if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || 1276 match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) 1277 return BinaryOperator::CreateAnd(A, Op1); 1278 if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || 1279 match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) 1280 return BinaryOperator::CreateAnd(A, Op0); 1281 } 1282 1283 if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) 1284 if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) 1285 if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) 1286 return Res; 1287 1288 // fold (and (cast A), (cast B)) -> (cast (and A, B)) 1289 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) 1290 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 1291 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? 1292 const Type *SrcTy = Op0C->getOperand(0)->getType(); 1293 if (SrcTy == Op1C->getOperand(0)->getType() && 1294 SrcTy->isIntOrIntVector() && 1295 // Only do this if the casts both really cause code to be generated. 1296 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 1297 I.getType()) && 1298 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 1299 I.getType())) { 1300 Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), 1301 Op1C->getOperand(0), I.getName()); 1302 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 1303 } 1304 } 1305 1306 // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. 1307 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 1308 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 1309 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 1310 SI0->getOperand(1) == SI1->getOperand(1) && 1311 (SI0->hasOneUse() || SI1->hasOneUse())) { 1312 Value *NewOp = 1313 Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), 1314 SI0->getName()); 1315 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 1316 SI1->getOperand(1)); 1317 } 1318 } 1319 1320 // If and'ing two fcmp, try combine them into one. 1321 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 1322 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 1323 if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) 1324 return Res; 1325 } 1326 1327 return Changed ? &I : 0; 1328} 1329 1330/// CollectBSwapParts - Analyze the specified subexpression and see if it is 1331/// capable of providing pieces of a bswap. The subexpression provides pieces 1332/// of a bswap if it is proven that each of the non-zero bytes in the output of 1333/// the expression came from the corresponding "byte swapped" byte in some other 1334/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then 1335/// we know that the expression deposits the low byte of %X into the high byte 1336/// of the bswap result and that all other bytes are zero. This expression is 1337/// accepted, the high byte of ByteValues is set to X to indicate a correct 1338/// match. 1339/// 1340/// This function returns true if the match was unsuccessful and false if so. 1341/// On entry to the function the "OverallLeftShift" is a signed integer value 1342/// indicating the number of bytes that the subexpression is later shifted. For 1343/// example, if the expression is later right shifted by 16 bits, the 1344/// OverallLeftShift value would be -2 on entry. This is used to specify which 1345/// byte of ByteValues is actually being set. 1346/// 1347/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding 1348/// byte is masked to zero by a user. For example, in (X & 255), X will be 1349/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits 1350/// this function to working on up to 32-byte (256 bit) values. ByteMask is 1351/// always in the local (OverallLeftShift) coordinate space. 1352/// 1353static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, 1354 SmallVector<Value*, 8> &ByteValues) { 1355 if (Instruction *I = dyn_cast<Instruction>(V)) { 1356 // If this is an or instruction, it may be an inner node of the bswap. 1357 if (I->getOpcode() == Instruction::Or) { 1358 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1359 ByteValues) || 1360 CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, 1361 ByteValues); 1362 } 1363 1364 // If this is a logical shift by a constant multiple of 8, recurse with 1365 // OverallLeftShift and ByteMask adjusted. 1366 if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { 1367 unsigned ShAmt = 1368 cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); 1369 // Ensure the shift amount is defined and of a byte value. 1370 if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) 1371 return true; 1372 1373 unsigned ByteShift = ShAmt >> 3; 1374 if (I->getOpcode() == Instruction::Shl) { 1375 // X << 2 -> collect(X, +2) 1376 OverallLeftShift += ByteShift; 1377 ByteMask >>= ByteShift; 1378 } else { 1379 // X >>u 2 -> collect(X, -2) 1380 OverallLeftShift -= ByteShift; 1381 ByteMask <<= ByteShift; 1382 ByteMask &= (~0U >> (32-ByteValues.size())); 1383 } 1384 1385 if (OverallLeftShift >= (int)ByteValues.size()) return true; 1386 if (OverallLeftShift <= -(int)ByteValues.size()) return true; 1387 1388 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1389 ByteValues); 1390 } 1391 1392 // If this is a logical 'and' with a mask that clears bytes, clear the 1393 // corresponding bytes in ByteMask. 1394 if (I->getOpcode() == Instruction::And && 1395 isa<ConstantInt>(I->getOperand(1))) { 1396 // Scan every byte of the and mask, seeing if the byte is either 0 or 255. 1397 unsigned NumBytes = ByteValues.size(); 1398 APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); 1399 const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); 1400 1401 for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { 1402 // If this byte is masked out by a later operation, we don't care what 1403 // the and mask is. 1404 if ((ByteMask & (1 << i)) == 0) 1405 continue; 1406 1407 // If the AndMask is all zeros for this byte, clear the bit. 1408 APInt MaskB = AndMask & Byte; 1409 if (MaskB == 0) { 1410 ByteMask &= ~(1U << i); 1411 continue; 1412 } 1413 1414 // If the AndMask is not all ones for this byte, it's not a bytezap. 1415 if (MaskB != Byte) 1416 return true; 1417 1418 // Otherwise, this byte is kept. 1419 } 1420 1421 return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 1422 ByteValues); 1423 } 1424 } 1425 1426 // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be 1427 // the input value to the bswap. Some observations: 1) if more than one byte 1428 // is demanded from this input, then it could not be successfully assembled 1429 // into a byteswap. At least one of the two bytes would not be aligned with 1430 // their ultimate destination. 1431 if (!isPowerOf2_32(ByteMask)) return true; 1432 unsigned InputByteNo = CountTrailingZeros_32(ByteMask); 1433 1434 // 2) The input and ultimate destinations must line up: if byte 3 of an i32 1435 // is demanded, it needs to go into byte 0 of the result. This means that the 1436 // byte needs to be shifted until it lands in the right byte bucket. The 1437 // shift amount depends on the position: if the byte is coming from the high 1438 // part of the value (e.g. byte 3) then it must be shifted right. If from the 1439 // low part, it must be shifted left. 1440 unsigned DestByteNo = InputByteNo + OverallLeftShift; 1441 if (InputByteNo < ByteValues.size()/2) { 1442 if (ByteValues.size()-1-DestByteNo != InputByteNo) 1443 return true; 1444 } else { 1445 if (ByteValues.size()-1-DestByteNo != InputByteNo) 1446 return true; 1447 } 1448 1449 // If the destination byte value is already defined, the values are or'd 1450 // together, which isn't a bswap (unless it's an or of the same bits). 1451 if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) 1452 return true; 1453 ByteValues[DestByteNo] = V; 1454 return false; 1455} 1456 1457/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. 1458/// If so, insert the new bswap intrinsic and return it. 1459Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { 1460 const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); 1461 if (!ITy || ITy->getBitWidth() % 16 || 1462 // ByteMask only allows up to 32-byte values. 1463 ITy->getBitWidth() > 32*8) 1464 return 0; // Can only bswap pairs of bytes. Can't do vectors. 1465 1466 /// ByteValues - For each byte of the result, we keep track of which value 1467 /// defines each byte. 1468 SmallVector<Value*, 8> ByteValues; 1469 ByteValues.resize(ITy->getBitWidth()/8); 1470 1471 // Try to find all the pieces corresponding to the bswap. 1472 uint32_t ByteMask = ~0U >> (32-ByteValues.size()); 1473 if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) 1474 return 0; 1475 1476 // Check to see if all of the bytes come from the same value. 1477 Value *V = ByteValues[0]; 1478 if (V == 0) return 0; // Didn't find a byte? Must be zero. 1479 1480 // Check to make sure that all of the bytes come from the same value. 1481 for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) 1482 if (ByteValues[i] != V) 1483 return 0; 1484 const Type *Tys[] = { ITy }; 1485 Module *M = I.getParent()->getParent()->getParent(); 1486 Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); 1487 return CallInst::Create(F, V); 1488} 1489 1490/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check 1491/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then 1492/// we can simplify this expression to "cond ? C : D or B". 1493static Instruction *MatchSelectFromAndOr(Value *A, Value *B, 1494 Value *C, Value *D) { 1495 // If A is not a select of -1/0, this cannot match. 1496 Value *Cond = 0; 1497 if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) 1498 return 0; 1499 1500 // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. 1501 if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) 1502 return SelectInst::Create(Cond, C, B); 1503 if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 1504 return SelectInst::Create(Cond, C, B); 1505 // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. 1506 if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) 1507 return SelectInst::Create(Cond, C, D); 1508 if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) 1509 return SelectInst::Create(Cond, C, D); 1510 return 0; 1511} 1512 1513/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. 1514Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, 1515 ICmpInst *LHS, ICmpInst *RHS) { 1516 ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); 1517 1518 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 1519 if (PredicatesFoldable(LHSCC, RHSCC)) { 1520 if (LHS->getOperand(0) == RHS->getOperand(1) && 1521 LHS->getOperand(1) == RHS->getOperand(0)) 1522 LHS->swapOperands(); 1523 if (LHS->getOperand(0) == RHS->getOperand(0) && 1524 LHS->getOperand(1) == RHS->getOperand(1)) { 1525 Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 1526 unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); 1527 bool isSigned = LHS->isSigned() || RHS->isSigned(); 1528 Value *RV = getICmpValue(isSigned, Code, Op0, Op1); 1529 if (Instruction *I = dyn_cast<Instruction>(RV)) 1530 return I; 1531 // Otherwise, it's a constant boolean value. 1532 return ReplaceInstUsesWith(I, RV); 1533 } 1534 } 1535 1536 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 1537 Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); 1538 ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); 1539 ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); 1540 if (LHSCst == 0 || RHSCst == 0) return 0; 1541 1542 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 1543 if (LHSCst == RHSCst && LHSCC == RHSCC && 1544 LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { 1545 Value *NewOr = Builder->CreateOr(Val, Val2); 1546 return new ICmpInst(LHSCC, NewOr, LHSCst); 1547 } 1548 1549 // From here on, we only handle: 1550 // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. 1551 if (Val != Val2) return 0; 1552 1553 // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. 1554 if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || 1555 RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || 1556 LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || 1557 RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) 1558 return 0; 1559 1560 // We can't fold (ugt x, C) | (sgt x, C2). 1561 if (!PredicatesFoldable(LHSCC, RHSCC)) 1562 return 0; 1563 1564 // Ensure that the larger constant is on the RHS. 1565 bool ShouldSwap; 1566 if (CmpInst::isSigned(LHSCC) || 1567 (ICmpInst::isEquality(LHSCC) && 1568 CmpInst::isSigned(RHSCC))) 1569 ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); 1570 else 1571 ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); 1572 1573 if (ShouldSwap) { 1574 std::swap(LHS, RHS); 1575 std::swap(LHSCst, RHSCst); 1576 std::swap(LHSCC, RHSCC); 1577 } 1578 1579 // At this point, we know we have have two icmp instructions 1580 // comparing a value against two constants and or'ing the result 1581 // together. Because of the above check, we know that we only have 1582 // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the 1583 // icmp folding check above), that the two constants are not 1584 // equal. 1585 assert(LHSCst != RHSCst && "Compares not folded above?"); 1586 1587 switch (LHSCC) { 1588 default: llvm_unreachable("Unknown integer condition code!"); 1589 case ICmpInst::ICMP_EQ: 1590 switch (RHSCC) { 1591 default: llvm_unreachable("Unknown integer condition code!"); 1592 case ICmpInst::ICMP_EQ: 1593 if (LHSCst == SubOne(RHSCst)) { 1594 // (X == 13 | X == 14) -> X-13 <u 2 1595 Constant *AddCST = ConstantExpr::getNeg(LHSCst); 1596 Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); 1597 AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); 1598 return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); 1599 } 1600 break; // (X == 13 | X == 15) -> no change 1601 case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change 1602 case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change 1603 break; 1604 case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 1605 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 1606 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 1607 return ReplaceInstUsesWith(I, RHS); 1608 } 1609 break; 1610 case ICmpInst::ICMP_NE: 1611 switch (RHSCC) { 1612 default: llvm_unreachable("Unknown integer condition code!"); 1613 case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 1614 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 1615 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 1616 return ReplaceInstUsesWith(I, LHS); 1617 case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true 1618 case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true 1619 case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true 1620 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 1621 } 1622 break; 1623 case ICmpInst::ICMP_ULT: 1624 switch (RHSCC) { 1625 default: llvm_unreachable("Unknown integer condition code!"); 1626 case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change 1627 break; 1628 case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 1629 // If RHSCst is [us]MAXINT, it is always false. Not handling 1630 // this can cause overflow. 1631 if (RHSCst->isMaxValue(false)) 1632 return ReplaceInstUsesWith(I, LHS); 1633 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 1634 false, false, I); 1635 case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change 1636 break; 1637 case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 1638 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 1639 return ReplaceInstUsesWith(I, RHS); 1640 case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change 1641 break; 1642 } 1643 break; 1644 case ICmpInst::ICMP_SLT: 1645 switch (RHSCC) { 1646 default: llvm_unreachable("Unknown integer condition code!"); 1647 case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change 1648 break; 1649 case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 1650 // If RHSCst is [us]MAXINT, it is always false. Not handling 1651 // this can cause overflow. 1652 if (RHSCst->isMaxValue(true)) 1653 return ReplaceInstUsesWith(I, LHS); 1654 return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), 1655 true, false, I); 1656 case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change 1657 break; 1658 case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 1659 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 1660 return ReplaceInstUsesWith(I, RHS); 1661 case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change 1662 break; 1663 } 1664 break; 1665 case ICmpInst::ICMP_UGT: 1666 switch (RHSCC) { 1667 default: llvm_unreachable("Unknown integer condition code!"); 1668 case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 1669 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 1670 return ReplaceInstUsesWith(I, LHS); 1671 case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change 1672 break; 1673 case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true 1674 case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true 1675 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 1676 case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change 1677 break; 1678 } 1679 break; 1680 case ICmpInst::ICMP_SGT: 1681 switch (RHSCC) { 1682 default: llvm_unreachable("Unknown integer condition code!"); 1683 case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 1684 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 1685 return ReplaceInstUsesWith(I, LHS); 1686 case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change 1687 break; 1688 case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true 1689 case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true 1690 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 1691 case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change 1692 break; 1693 } 1694 break; 1695 } 1696 return 0; 1697} 1698 1699Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, 1700 FCmpInst *RHS) { 1701 if (LHS->getPredicate() == FCmpInst::FCMP_UNO && 1702 RHS->getPredicate() == FCmpInst::FCMP_UNO && 1703 LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { 1704 if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) 1705 if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { 1706 // If either of the constants are nans, then the whole thing returns 1707 // true. 1708 if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) 1709 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 1710 1711 // Otherwise, no need to compare the two constants, compare the 1712 // rest. 1713 return new FCmpInst(FCmpInst::FCMP_UNO, 1714 LHS->getOperand(0), RHS->getOperand(0)); 1715 } 1716 1717 // Handle vector zeros. This occurs because the canonical form of 1718 // "fcmp uno x,x" is "fcmp uno x, 0". 1719 if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && 1720 isa<ConstantAggregateZero>(RHS->getOperand(1))) 1721 return new FCmpInst(FCmpInst::FCMP_UNO, 1722 LHS->getOperand(0), RHS->getOperand(0)); 1723 1724 return 0; 1725 } 1726 1727 Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); 1728 Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); 1729 FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); 1730 1731 if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { 1732 // Swap RHS operands to match LHS. 1733 Op1CC = FCmpInst::getSwappedPredicate(Op1CC); 1734 std::swap(Op1LHS, Op1RHS); 1735 } 1736 if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { 1737 // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). 1738 if (Op0CC == Op1CC) 1739 return new FCmpInst((FCmpInst::Predicate)Op0CC, 1740 Op0LHS, Op0RHS); 1741 if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) 1742 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); 1743 if (Op0CC == FCmpInst::FCMP_FALSE) 1744 return ReplaceInstUsesWith(I, RHS); 1745 if (Op1CC == FCmpInst::FCMP_FALSE) 1746 return ReplaceInstUsesWith(I, LHS); 1747 bool Op0Ordered; 1748 bool Op1Ordered; 1749 unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); 1750 unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); 1751 if (Op0Ordered == Op1Ordered) { 1752 // If both are ordered or unordered, return a new fcmp with 1753 // or'ed predicates. 1754 Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); 1755 if (Instruction *I = dyn_cast<Instruction>(RV)) 1756 return I; 1757 // Otherwise, it's a constant boolean value... 1758 return ReplaceInstUsesWith(I, RV); 1759 } 1760 } 1761 return 0; 1762} 1763 1764/// FoldOrWithConstants - This helper function folds: 1765/// 1766/// ((A | B) & C1) | (B & C2) 1767/// 1768/// into: 1769/// 1770/// (A & C1) | B 1771/// 1772/// when the XOR of the two constants is "all ones" (-1). 1773Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, 1774 Value *A, Value *B, Value *C) { 1775 ConstantInt *CI1 = dyn_cast<ConstantInt>(C); 1776 if (!CI1) return 0; 1777 1778 Value *V1 = 0; 1779 ConstantInt *CI2 = 0; 1780 if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; 1781 1782 APInt Xor = CI1->getValue() ^ CI2->getValue(); 1783 if (!Xor.isAllOnesValue()) return 0; 1784 1785 if (V1 == A || V1 == B) { 1786 Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); 1787 return BinaryOperator::CreateOr(NewOp, V1); 1788 } 1789 1790 return 0; 1791} 1792 1793Instruction *InstCombiner::visitOr(BinaryOperator &I) { 1794 bool Changed = SimplifyCommutative(I); 1795 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1796 1797 if (Value *V = SimplifyOrInst(Op0, Op1, TD)) 1798 return ReplaceInstUsesWith(I, V); 1799 1800 1801 // See if we can simplify any instructions used by the instruction whose sole 1802 // purpose is to compute bits we don't care about. 1803 if (SimplifyDemandedInstructionBits(I)) 1804 return &I; 1805 1806 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 1807 ConstantInt *C1 = 0; Value *X = 0; 1808 // (X & C1) | C2 --> (X | C2) & (C1|C2) 1809 if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && 1810 Op0->hasOneUse()) { 1811 Value *Or = Builder->CreateOr(X, RHS); 1812 Or->takeName(Op0); 1813 return BinaryOperator::CreateAnd(Or, 1814 ConstantInt::get(I.getContext(), 1815 RHS->getValue() | C1->getValue())); 1816 } 1817 1818 // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) 1819 if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && 1820 Op0->hasOneUse()) { 1821 Value *Or = Builder->CreateOr(X, RHS); 1822 Or->takeName(Op0); 1823 return BinaryOperator::CreateXor(Or, 1824 ConstantInt::get(I.getContext(), 1825 C1->getValue() & ~RHS->getValue())); 1826 } 1827 1828 // Try to fold constant and into select arguments. 1829 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 1830 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1831 return R; 1832 if (isa<PHINode>(Op0)) 1833 if (Instruction *NV = FoldOpIntoPhi(I)) 1834 return NV; 1835 } 1836 1837 Value *A = 0, *B = 0; 1838 ConstantInt *C1 = 0, *C2 = 0; 1839 1840 // (A | B) | C and A | (B | C) -> bswap if possible. 1841 // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. 1842 if (match(Op0, m_Or(m_Value(), m_Value())) || 1843 match(Op1, m_Or(m_Value(), m_Value())) || 1844 (match(Op0, m_Shift(m_Value(), m_Value())) && 1845 match(Op1, m_Shift(m_Value(), m_Value())))) { 1846 if (Instruction *BSwap = MatchBSwap(I)) 1847 return BSwap; 1848 } 1849 1850 // (X^C)|Y -> (X|Y)^C iff Y&C == 0 1851 if (Op0->hasOneUse() && 1852 match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && 1853 MaskedValueIsZero(Op1, C1->getValue())) { 1854 Value *NOr = Builder->CreateOr(A, Op1); 1855 NOr->takeName(Op0); 1856 return BinaryOperator::CreateXor(NOr, C1); 1857 } 1858 1859 // Y|(X^C) -> (X|Y)^C iff Y&C == 0 1860 if (Op1->hasOneUse() && 1861 match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && 1862 MaskedValueIsZero(Op0, C1->getValue())) { 1863 Value *NOr = Builder->CreateOr(A, Op0); 1864 NOr->takeName(Op0); 1865 return BinaryOperator::CreateXor(NOr, C1); 1866 } 1867 1868 // (A & C)|(B & D) 1869 Value *C = 0, *D = 0; 1870 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 1871 match(Op1, m_And(m_Value(B), m_Value(D)))) { 1872 Value *V1 = 0, *V2 = 0, *V3 = 0; 1873 C1 = dyn_cast<ConstantInt>(C); 1874 C2 = dyn_cast<ConstantInt>(D); 1875 if (C1 && C2) { // (A & C1)|(B & C2) 1876 // If we have: ((V + N) & C1) | (V & C2) 1877 // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 1878 // replace with V+N. 1879 if (C1->getValue() == ~C2->getValue()) { 1880 if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ 1881 match(A, m_Add(m_Value(V1), m_Value(V2)))) { 1882 // Add commutes, try both ways. 1883 if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) 1884 return ReplaceInstUsesWith(I, A); 1885 if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) 1886 return ReplaceInstUsesWith(I, A); 1887 } 1888 // Or commutes, try both ways. 1889 if ((C1->getValue() & (C1->getValue()+1)) == 0 && 1890 match(B, m_Add(m_Value(V1), m_Value(V2)))) { 1891 // Add commutes, try both ways. 1892 if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) 1893 return ReplaceInstUsesWith(I, B); 1894 if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) 1895 return ReplaceInstUsesWith(I, B); 1896 } 1897 } 1898 1899 // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) 1900 // iff (C1&C2) == 0 and (N&~C1) == 0 1901 if ((C1->getValue() & C2->getValue()) == 0) { 1902 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && 1903 ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) 1904 (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) 1905 return BinaryOperator::CreateAnd(A, 1906 ConstantInt::get(A->getContext(), 1907 C1->getValue()|C2->getValue())); 1908 // Or commutes, try both ways. 1909 if (match(B, m_Or(m_Value(V1), m_Value(V2))) && 1910 ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) 1911 (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) 1912 return BinaryOperator::CreateAnd(B, 1913 ConstantInt::get(B->getContext(), 1914 C1->getValue()|C2->getValue())); 1915 } 1916 } 1917 1918 // Check to see if we have any common things being and'ed. If so, find the 1919 // terms for V1 & (V2|V3). 1920 if (Op0->hasOneUse() || Op1->hasOneUse()) { 1921 V1 = 0; 1922 if (A == B) // (A & C)|(A & D) == A & (C|D) 1923 V1 = A, V2 = C, V3 = D; 1924 else if (A == D) // (A & C)|(B & A) == A & (B|C) 1925 V1 = A, V2 = B, V3 = C; 1926 else if (C == B) // (A & C)|(C & D) == C & (A|D) 1927 V1 = C, V2 = A, V3 = D; 1928 else if (C == D) // (A & C)|(B & C) == C & (A|B) 1929 V1 = C, V2 = A, V3 = B; 1930 1931 if (V1) { 1932 Value *Or = Builder->CreateOr(V2, V3, "tmp"); 1933 return BinaryOperator::CreateAnd(V1, Or); 1934 } 1935 } 1936 1937 // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants 1938 if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) 1939 return Match; 1940 if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) 1941 return Match; 1942 if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) 1943 return Match; 1944 if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) 1945 return Match; 1946 1947 // ((A&~B)|(~A&B)) -> A^B 1948 if ((match(C, m_Not(m_Specific(D))) && 1949 match(B, m_Not(m_Specific(A))))) 1950 return BinaryOperator::CreateXor(A, D); 1951 // ((~B&A)|(~A&B)) -> A^B 1952 if ((match(A, m_Not(m_Specific(D))) && 1953 match(B, m_Not(m_Specific(C))))) 1954 return BinaryOperator::CreateXor(C, D); 1955 // ((A&~B)|(B&~A)) -> A^B 1956 if ((match(C, m_Not(m_Specific(B))) && 1957 match(D, m_Not(m_Specific(A))))) 1958 return BinaryOperator::CreateXor(A, B); 1959 // ((~B&A)|(B&~A)) -> A^B 1960 if ((match(A, m_Not(m_Specific(B))) && 1961 match(D, m_Not(m_Specific(C))))) 1962 return BinaryOperator::CreateXor(C, B); 1963 } 1964 1965 // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. 1966 if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { 1967 if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) 1968 if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 1969 SI0->getOperand(1) == SI1->getOperand(1) && 1970 (SI0->hasOneUse() || SI1->hasOneUse())) { 1971 Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), 1972 SI0->getName()); 1973 return BinaryOperator::Create(SI1->getOpcode(), NewOp, 1974 SI1->getOperand(1)); 1975 } 1976 } 1977 1978 // ((A|B)&1)|(B&-2) -> (A&1) | B 1979 if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 1980 match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 1981 Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); 1982 if (Ret) return Ret; 1983 } 1984 // (B&-2)|((A|B)&1) -> (A&1) | B 1985 if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || 1986 match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { 1987 Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); 1988 if (Ret) return Ret; 1989 } 1990 1991 // (~A | ~B) == (~(A & B)) - De Morgan's Law 1992 if (Value *Op0NotVal = dyn_castNotVal(Op0)) 1993 if (Value *Op1NotVal = dyn_castNotVal(Op1)) 1994 if (Op0->hasOneUse() && Op1->hasOneUse()) { 1995 Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, 1996 I.getName()+".demorgan"); 1997 return BinaryOperator::CreateNot(And); 1998 } 1999 2000 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 2001 if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 2002 if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) 2003 return Res; 2004 2005 // fold (or (cast A), (cast B)) -> (cast (or A, B)) 2006 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 2007 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 2008 if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? 2009 if (!isa<ICmpInst>(Op0C->getOperand(0)) || 2010 !isa<ICmpInst>(Op1C->getOperand(0))) { 2011 const Type *SrcTy = Op0C->getOperand(0)->getType(); 2012 if (SrcTy == Op1C->getOperand(0)->getType() && 2013 SrcTy->isIntOrIntVector() && 2014 // Only do this if the casts both really cause code to be 2015 // generated. 2016 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 2017 I.getType()) && 2018 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 2019 I.getType())) { 2020 Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), 2021 Op1C->getOperand(0), I.getName()); 2022 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2023 } 2024 } 2025 } 2026 } 2027 2028 2029 // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) 2030 if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { 2031 if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) 2032 if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) 2033 return Res; 2034 } 2035 2036 return Changed ? &I : 0; 2037} 2038 2039Instruction *InstCombiner::visitXor(BinaryOperator &I) { 2040 bool Changed = SimplifyCommutative(I); 2041 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2042 2043 if (isa<UndefValue>(Op1)) { 2044 if (isa<UndefValue>(Op0)) 2045 // Handle undef ^ undef -> 0 special case. This is a common 2046 // idiom (misuse). 2047 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2048 return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef 2049 } 2050 2051 // xor X, X = 0 2052 if (Op0 == Op1) 2053 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2054 2055 // See if we can simplify any instructions used by the instruction whose sole 2056 // purpose is to compute bits we don't care about. 2057 if (SimplifyDemandedInstructionBits(I)) 2058 return &I; 2059 if (isa<VectorType>(I.getType())) 2060 if (isa<ConstantAggregateZero>(Op1)) 2061 return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X 2062 2063 // Is this a ~ operation? 2064 if (Value *NotOp = dyn_castNotVal(&I)) { 2065 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { 2066 if (Op0I->getOpcode() == Instruction::And || 2067 Op0I->getOpcode() == Instruction::Or) { 2068 // ~(~X & Y) --> (X | ~Y) - De Morgan's Law 2069 // ~(~X | Y) === (X & ~Y) - De Morgan's Law 2070 if (dyn_castNotVal(Op0I->getOperand(1))) 2071 Op0I->swapOperands(); 2072 if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { 2073 Value *NotY = 2074 Builder->CreateNot(Op0I->getOperand(1), 2075 Op0I->getOperand(1)->getName()+".not"); 2076 if (Op0I->getOpcode() == Instruction::And) 2077 return BinaryOperator::CreateOr(Op0NotVal, NotY); 2078 return BinaryOperator::CreateAnd(Op0NotVal, NotY); 2079 } 2080 2081 // ~(X & Y) --> (~X | ~Y) - De Morgan's Law 2082 // ~(X | Y) === (~X & ~Y) - De Morgan's Law 2083 if (isFreeToInvert(Op0I->getOperand(0)) && 2084 isFreeToInvert(Op0I->getOperand(1))) { 2085 Value *NotX = 2086 Builder->CreateNot(Op0I->getOperand(0), "notlhs"); 2087 Value *NotY = 2088 Builder->CreateNot(Op0I->getOperand(1), "notrhs"); 2089 if (Op0I->getOpcode() == Instruction::And) 2090 return BinaryOperator::CreateOr(NotX, NotY); 2091 return BinaryOperator::CreateAnd(NotX, NotY); 2092 } 2093 } 2094 } 2095 } 2096 2097 2098 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { 2099 if (RHS->isOne() && Op0->hasOneUse()) { 2100 // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B 2101 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) 2102 return new ICmpInst(ICI->getInversePredicate(), 2103 ICI->getOperand(0), ICI->getOperand(1)); 2104 2105 if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) 2106 return new FCmpInst(FCI->getInversePredicate(), 2107 FCI->getOperand(0), FCI->getOperand(1)); 2108 } 2109 2110 // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). 2111 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 2112 if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { 2113 if (CI->hasOneUse() && Op0C->hasOneUse()) { 2114 Instruction::CastOps Opcode = Op0C->getOpcode(); 2115 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && 2116 (RHS == ConstantExpr::getCast(Opcode, 2117 ConstantInt::getTrue(I.getContext()), 2118 Op0C->getDestTy()))) { 2119 CI->setPredicate(CI->getInversePredicate()); 2120 return CastInst::Create(Opcode, CI, Op0C->getType()); 2121 } 2122 } 2123 } 2124 } 2125 2126 if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { 2127 // ~(c-X) == X-c-1 == X+(-c-1) 2128 if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) 2129 if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { 2130 Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); 2131 Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, 2132 ConstantInt::get(I.getType(), 1)); 2133 return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); 2134 } 2135 2136 if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { 2137 if (Op0I->getOpcode() == Instruction::Add) { 2138 // ~(X-c) --> (-c-1)-X 2139 if (RHS->isAllOnesValue()) { 2140 Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); 2141 return BinaryOperator::CreateSub( 2142 ConstantExpr::getSub(NegOp0CI, 2143 ConstantInt::get(I.getType(), 1)), 2144 Op0I->getOperand(0)); 2145 } else if (RHS->getValue().isSignBit()) { 2146 // (X + C) ^ signbit -> (X + C + signbit) 2147 Constant *C = ConstantInt::get(I.getContext(), 2148 RHS->getValue() + Op0CI->getValue()); 2149 return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); 2150 2151 } 2152 } else if (Op0I->getOpcode() == Instruction::Or) { 2153 // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 2154 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { 2155 Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); 2156 // Anything in both C1 and C2 is known to be zero, remove it from 2157 // NewRHS. 2158 Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); 2159 NewRHS = ConstantExpr::getAnd(NewRHS, 2160 ConstantExpr::getNot(CommonBits)); 2161 Worklist.Add(Op0I); 2162 I.setOperand(0, Op0I->getOperand(0)); 2163 I.setOperand(1, NewRHS); 2164 return &I; 2165 } 2166 } 2167 } 2168 } 2169 2170 // Try to fold constant and into select arguments. 2171 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2172 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2173 return R; 2174 if (isa<PHINode>(Op0)) 2175 if (Instruction *NV = FoldOpIntoPhi(I)) 2176 return NV; 2177 } 2178 2179 if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 2180 if (X == Op1) 2181 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 2182 2183 if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 2184 if (X == Op0) 2185 return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); 2186 2187 2188 BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); 2189 if (Op1I) { 2190 Value *A, *B; 2191 if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { 2192 if (A == Op0) { // B^(B|A) == (A|B)^B 2193 Op1I->swapOperands(); 2194 I.swapOperands(); 2195 std::swap(Op0, Op1); 2196 } else if (B == Op0) { // B^(A|B) == (A|B)^B 2197 I.swapOperands(); // Simplified below. 2198 std::swap(Op0, Op1); 2199 } 2200 } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { 2201 return ReplaceInstUsesWith(I, B); // A^(A^B) == B 2202 } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { 2203 return ReplaceInstUsesWith(I, A); // A^(B^A) == B 2204 } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 2205 Op1I->hasOneUse()){ 2206 if (A == Op0) { // A^(A&B) -> A^(B&A) 2207 Op1I->swapOperands(); 2208 std::swap(A, B); 2209 } 2210 if (B == Op0) { // A^(B&A) -> (B&A)^A 2211 I.swapOperands(); // Simplified below. 2212 std::swap(Op0, Op1); 2213 } 2214 } 2215 } 2216 2217 BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); 2218 if (Op0I) { 2219 Value *A, *B; 2220 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 2221 Op0I->hasOneUse()) { 2222 if (A == Op1) // (B|A)^B == (A|B)^B 2223 std::swap(A, B); 2224 if (B == Op1) // (A|B)^B == A & ~B 2225 return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); 2226 } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { 2227 return ReplaceInstUsesWith(I, B); // (A^B)^A == B 2228 } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { 2229 return ReplaceInstUsesWith(I, A); // (B^A)^A == B 2230 } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 2231 Op0I->hasOneUse()){ 2232 if (A == Op1) // (A&B)^A -> (B&A)^A 2233 std::swap(A, B); 2234 if (B == Op1 && // (B&A)^A == ~B & A 2235 !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C 2236 return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); 2237 } 2238 } 2239 } 2240 2241 // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. 2242 if (Op0I && Op1I && Op0I->isShift() && 2243 Op0I->getOpcode() == Op1I->getOpcode() && 2244 Op0I->getOperand(1) == Op1I->getOperand(1) && 2245 (Op1I->hasOneUse() || Op1I->hasOneUse())) { 2246 Value *NewOp = 2247 Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), 2248 Op0I->getName()); 2249 return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 2250 Op1I->getOperand(1)); 2251 } 2252 2253 if (Op0I && Op1I) { 2254 Value *A, *B, *C, *D; 2255 // (A & B)^(A | B) -> A ^ B 2256 if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 2257 match(Op1I, m_Or(m_Value(C), m_Value(D)))) { 2258 if ((A == C && B == D) || (A == D && B == C)) 2259 return BinaryOperator::CreateXor(A, B); 2260 } 2261 // (A | B)^(A & B) -> A ^ B 2262 if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && 2263 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 2264 if ((A == C && B == D) || (A == D && B == C)) 2265 return BinaryOperator::CreateXor(A, B); 2266 } 2267 2268 // (A & B)^(C & D) 2269 if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && 2270 match(Op0I, m_And(m_Value(A), m_Value(B))) && 2271 match(Op1I, m_And(m_Value(C), m_Value(D)))) { 2272 // (X & Y)^(X & Y) -> (Y^Z) & X 2273 Value *X = 0, *Y = 0, *Z = 0; 2274 if (A == C) 2275 X = A, Y = B, Z = D; 2276 else if (A == D) 2277 X = A, Y = B, Z = C; 2278 else if (B == C) 2279 X = B, Y = A, Z = D; 2280 else if (B == D) 2281 X = B, Y = A, Z = C; 2282 2283 if (X) { 2284 Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); 2285 return BinaryOperator::CreateAnd(NewOp, X); 2286 } 2287 } 2288 } 2289 2290 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 2291 if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 2292 if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 2293 if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { 2294 if (LHS->getOperand(0) == RHS->getOperand(1) && 2295 LHS->getOperand(1) == RHS->getOperand(0)) 2296 LHS->swapOperands(); 2297 if (LHS->getOperand(0) == RHS->getOperand(0) && 2298 LHS->getOperand(1) == RHS->getOperand(1)) { 2299 Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); 2300 unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); 2301 bool isSigned = LHS->isSigned() || RHS->isSigned(); 2302 Value *RV = getICmpValue(isSigned, Code, Op0, Op1); 2303 if (Instruction *I = dyn_cast<Instruction>(RV)) 2304 return I; 2305 // Otherwise, it's a constant boolean value. 2306 return ReplaceInstUsesWith(I, RV); 2307 } 2308 } 2309 2310 // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) 2311 if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { 2312 if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) 2313 if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? 2314 const Type *SrcTy = Op0C->getOperand(0)->getType(); 2315 if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && 2316 // Only do this if the casts both really cause code to be generated. 2317 ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 2318 I.getType()) && 2319 ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 2320 I.getType())) { 2321 Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), 2322 Op1C->getOperand(0), I.getName()); 2323 return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); 2324 } 2325 } 2326 } 2327 2328 return Changed ? &I : 0; 2329} 2330 2331 2332Instruction *InstCombiner::visitShl(BinaryOperator &I) { 2333 return commonShiftTransforms(I); 2334} 2335 2336Instruction *InstCombiner::visitLShr(BinaryOperator &I) { 2337 return commonShiftTransforms(I); 2338} 2339 2340Instruction *InstCombiner::visitAShr(BinaryOperator &I) { 2341 if (Instruction *R = commonShiftTransforms(I)) 2342 return R; 2343 2344 Value *Op0 = I.getOperand(0); 2345 2346 // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) 2347 if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) 2348 if (CSI->isAllOnesValue()) 2349 return ReplaceInstUsesWith(I, CSI); 2350 2351 // See if we can turn a signed shr into an unsigned shr. 2352 if (MaskedValueIsZero(Op0, 2353 APInt::getSignBit(I.getType()->getScalarSizeInBits()))) 2354 return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); 2355 2356 // Arithmetic shifting an all-sign-bit value is a no-op. 2357 unsigned NumSignBits = ComputeNumSignBits(Op0); 2358 if (NumSignBits == Op0->getType()->getScalarSizeInBits()) 2359 return ReplaceInstUsesWith(I, Op0); 2360 2361 return 0; 2362} 2363 2364Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { 2365 assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); 2366 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2367 2368 // shl X, 0 == X and shr X, 0 == X 2369 // shl 0, X == 0 and shr 0, X == 0 2370 if (Op1 == Constant::getNullValue(Op1->getType()) || 2371 Op0 == Constant::getNullValue(Op0->getType())) 2372 return ReplaceInstUsesWith(I, Op0); 2373 2374 if (isa<UndefValue>(Op0)) { 2375 if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef 2376 return ReplaceInstUsesWith(I, Op0); 2377 else // undef << X -> 0, undef >>u X -> 0 2378 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2379 } 2380 if (isa<UndefValue>(Op1)) { 2381 if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X 2382 return ReplaceInstUsesWith(I, Op0); 2383 else // X << undef, X >>u undef -> 0 2384 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2385 } 2386 2387 // See if we can fold away this shift. 2388 if (SimplifyDemandedInstructionBits(I)) 2389 return &I; 2390 2391 // Try to fold constant and into select arguments. 2392 if (isa<Constant>(Op0)) 2393 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 2394 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2395 return R; 2396 2397 if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) 2398 if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) 2399 return Res; 2400 return 0; 2401} 2402 2403Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, 2404 BinaryOperator &I) { 2405 bool isLeftShift = I.getOpcode() == Instruction::Shl; 2406 2407 // See if we can simplify any instructions used by the instruction whose sole 2408 // purpose is to compute bits we don't care about. 2409 uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); 2410 2411 // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate 2412 // a signed shift. 2413 // 2414 if (Op1->uge(TypeBits)) { 2415 if (I.getOpcode() != Instruction::AShr) 2416 return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); 2417 else { 2418 I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); 2419 return &I; 2420 } 2421 } 2422 2423 // ((X*C1) << C2) == (X * (C1 << C2)) 2424 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) 2425 if (BO->getOpcode() == Instruction::Mul && isLeftShift) 2426 if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) 2427 return BinaryOperator::CreateMul(BO->getOperand(0), 2428 ConstantExpr::getShl(BOOp, Op1)); 2429 2430 // Try to fold constant and into select arguments. 2431 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2432 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2433 return R; 2434 if (isa<PHINode>(Op0)) 2435 if (Instruction *NV = FoldOpIntoPhi(I)) 2436 return NV; 2437 2438 // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) 2439 if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { 2440 Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); 2441 // If 'shift2' is an ashr, we would have to get the sign bit into a funny 2442 // place. Don't try to do this transformation in this case. Also, we 2443 // require that the input operand is a shift-by-constant so that we have 2444 // confidence that the shifts will get folded together. We could do this 2445 // xform in more cases, but it is unlikely to be profitable. 2446 if (TrOp && I.isLogicalShift() && TrOp->isShift() && 2447 isa<ConstantInt>(TrOp->getOperand(1))) { 2448 // Okay, we'll do this xform. Make the shift of shift. 2449 Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); 2450 // (shift2 (shift1 & 0x00FF), c2) 2451 Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); 2452 2453 // For logical shifts, the truncation has the effect of making the high 2454 // part of the register be zeros. Emulate this by inserting an AND to 2455 // clear the top bits as needed. This 'and' will usually be zapped by 2456 // other xforms later if dead. 2457 unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); 2458 unsigned DstSize = TI->getType()->getScalarSizeInBits(); 2459 APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); 2460 2461 // The mask we constructed says what the trunc would do if occurring 2462 // between the shifts. We want to know the effect *after* the second 2463 // shift. We know that it is a logical shift by a constant, so adjust the 2464 // mask as appropriate. 2465 if (I.getOpcode() == Instruction::Shl) 2466 MaskV <<= Op1->getZExtValue(); 2467 else { 2468 assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); 2469 MaskV = MaskV.lshr(Op1->getZExtValue()); 2470 } 2471 2472 // shift1 & 0x00FF 2473 Value *And = Builder->CreateAnd(NSh, 2474 ConstantInt::get(I.getContext(), MaskV), 2475 TI->getName()); 2476 2477 // Return the value truncated to the interesting size. 2478 return new TruncInst(And, I.getType()); 2479 } 2480 } 2481 2482 if (Op0->hasOneUse()) { 2483 if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { 2484 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 2485 Value *V1, *V2; 2486 ConstantInt *CC; 2487 switch (Op0BO->getOpcode()) { 2488 default: break; 2489 case Instruction::Add: 2490 case Instruction::And: 2491 case Instruction::Or: 2492 case Instruction::Xor: { 2493 // These operators commute. 2494 // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) 2495 if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && 2496 match(Op0BO->getOperand(1), m_Shr(m_Value(V1), 2497 m_Specific(Op1)))) { 2498 Value *YS = // (Y << C) 2499 Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); 2500 // (X + (Y << C)) 2501 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, 2502 Op0BO->getOperand(1)->getName()); 2503 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 2504 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 2505 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 2506 } 2507 2508 // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) 2509 Value *Op0BOOp1 = Op0BO->getOperand(1); 2510 if (isLeftShift && Op0BOOp1->hasOneUse() && 2511 match(Op0BOOp1, 2512 m_And(m_Shr(m_Value(V1), m_Specific(Op1)), 2513 m_ConstantInt(CC))) && 2514 cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { 2515 Value *YS = // (Y << C) 2516 Builder->CreateShl(Op0BO->getOperand(0), Op1, 2517 Op0BO->getName()); 2518 // X & (CC << C) 2519 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 2520 V1->getName()+".mask"); 2521 return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); 2522 } 2523 } 2524 2525 // FALL THROUGH. 2526 case Instruction::Sub: { 2527 // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) 2528 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 2529 match(Op0BO->getOperand(0), m_Shr(m_Value(V1), 2530 m_Specific(Op1)))) { 2531 Value *YS = // (Y << C) 2532 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 2533 // (X + (Y << C)) 2534 Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, 2535 Op0BO->getOperand(0)->getName()); 2536 uint32_t Op1Val = Op1->getLimitedValue(TypeBits); 2537 return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), 2538 APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); 2539 } 2540 2541 // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) 2542 if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && 2543 match(Op0BO->getOperand(0), 2544 m_And(m_Shr(m_Value(V1), m_Value(V2)), 2545 m_ConstantInt(CC))) && V2 == Op1 && 2546 cast<BinaryOperator>(Op0BO->getOperand(0)) 2547 ->getOperand(0)->hasOneUse()) { 2548 Value *YS = // (Y << C) 2549 Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); 2550 // X & (CC << C) 2551 Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), 2552 V1->getName()+".mask"); 2553 2554 return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); 2555 } 2556 2557 break; 2558 } 2559 } 2560 2561 2562 // If the operand is an bitwise operator with a constant RHS, and the 2563 // shift is the only use, we can pull it out of the shift. 2564 if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { 2565 bool isValid = true; // Valid only for And, Or, Xor 2566 bool highBitSet = false; // Transform if high bit of constant set? 2567 2568 switch (Op0BO->getOpcode()) { 2569 default: isValid = false; break; // Do not perform transform! 2570 case Instruction::Add: 2571 isValid = isLeftShift; 2572 break; 2573 case Instruction::Or: 2574 case Instruction::Xor: 2575 highBitSet = false; 2576 break; 2577 case Instruction::And: 2578 highBitSet = true; 2579 break; 2580 } 2581 2582 // If this is a signed shift right, and the high bit is modified 2583 // by the logical operation, do not perform the transformation. 2584 // The highBitSet boolean indicates the value of the high bit of 2585 // the constant which would cause it to be modified for this 2586 // operation. 2587 // 2588 if (isValid && I.getOpcode() == Instruction::AShr) 2589 isValid = Op0C->getValue()[TypeBits-1] == highBitSet; 2590 2591 if (isValid) { 2592 Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); 2593 2594 Value *NewShift = 2595 Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); 2596 NewShift->takeName(Op0BO); 2597 2598 return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, 2599 NewRHS); 2600 } 2601 } 2602 } 2603 } 2604 2605 // Find out if this is a shift of a shift by a constant. 2606 BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); 2607 if (ShiftOp && !ShiftOp->isShift()) 2608 ShiftOp = 0; 2609 2610 if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { 2611 ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); 2612 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); 2613 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); 2614 assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); 2615 if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. 2616 Value *X = ShiftOp->getOperand(0); 2617 2618 uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. 2619 2620 const IntegerType *Ty = cast<IntegerType>(I.getType()); 2621 2622 // Check for (X << c1) << c2 and (X >> c1) >> c2 2623 if (I.getOpcode() == ShiftOp->getOpcode()) { 2624 // If this is oversized composite shift, then unsigned shifts get 0, ashr 2625 // saturates. 2626 if (AmtSum >= TypeBits) { 2627 if (I.getOpcode() != Instruction::AShr) 2628 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2629 AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. 2630 } 2631 2632 return BinaryOperator::Create(I.getOpcode(), X, 2633 ConstantInt::get(Ty, AmtSum)); 2634 } 2635 2636 if (ShiftOp->getOpcode() == Instruction::LShr && 2637 I.getOpcode() == Instruction::AShr) { 2638 if (AmtSum >= TypeBits) 2639 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); 2640 2641 // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. 2642 return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); 2643 } 2644 2645 if (ShiftOp->getOpcode() == Instruction::AShr && 2646 I.getOpcode() == Instruction::LShr) { 2647 // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. 2648 if (AmtSum >= TypeBits) 2649 AmtSum = TypeBits-1; 2650 2651 Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); 2652 2653 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 2654 return BinaryOperator::CreateAnd(Shift, 2655 ConstantInt::get(I.getContext(), Mask)); 2656 } 2657 2658 // Okay, if we get here, one shift must be left, and the other shift must be 2659 // right. See if the amounts are equal. 2660 if (ShiftAmt1 == ShiftAmt2) { 2661 // If we have ((X >>? C) << C), turn this into X & (-1 << C). 2662 if (I.getOpcode() == Instruction::Shl) { 2663 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); 2664 return BinaryOperator::CreateAnd(X, 2665 ConstantInt::get(I.getContext(),Mask)); 2666 } 2667 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). 2668 if (I.getOpcode() == Instruction::LShr) { 2669 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); 2670 return BinaryOperator::CreateAnd(X, 2671 ConstantInt::get(I.getContext(), Mask)); 2672 } 2673 // We can simplify ((X << C) >>s C) into a trunc + sext. 2674 // NOTE: we could do this for any C, but that would make 'unusual' integer 2675 // types. For now, just stick to ones well-supported by the code 2676 // generators. 2677 const Type *SExtType = 0; 2678 switch (Ty->getBitWidth() - ShiftAmt1) { 2679 case 1 : 2680 case 8 : 2681 case 16 : 2682 case 32 : 2683 case 64 : 2684 case 128: 2685 SExtType = IntegerType::get(I.getContext(), 2686 Ty->getBitWidth() - ShiftAmt1); 2687 break; 2688 default: break; 2689 } 2690 if (SExtType) 2691 return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); 2692 // Otherwise, we can't handle it yet. 2693 } else if (ShiftAmt1 < ShiftAmt2) { 2694 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; 2695 2696 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) 2697 if (I.getOpcode() == Instruction::Shl) { 2698 assert(ShiftOp->getOpcode() == Instruction::LShr || 2699 ShiftOp->getOpcode() == Instruction::AShr); 2700 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 2701 2702 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 2703 return BinaryOperator::CreateAnd(Shift, 2704 ConstantInt::get(I.getContext(),Mask)); 2705 } 2706 2707 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) 2708 if (I.getOpcode() == Instruction::LShr) { 2709 assert(ShiftOp->getOpcode() == Instruction::Shl); 2710 Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); 2711 2712 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 2713 return BinaryOperator::CreateAnd(Shift, 2714 ConstantInt::get(I.getContext(),Mask)); 2715 } 2716 2717 // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. 2718 } else { 2719 assert(ShiftAmt2 < ShiftAmt1); 2720 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; 2721 2722 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) 2723 if (I.getOpcode() == Instruction::Shl) { 2724 assert(ShiftOp->getOpcode() == Instruction::LShr || 2725 ShiftOp->getOpcode() == Instruction::AShr); 2726 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, 2727 ConstantInt::get(Ty, ShiftDiff)); 2728 2729 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); 2730 return BinaryOperator::CreateAnd(Shift, 2731 ConstantInt::get(I.getContext(),Mask)); 2732 } 2733 2734 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) 2735 if (I.getOpcode() == Instruction::LShr) { 2736 assert(ShiftOp->getOpcode() == Instruction::Shl); 2737 Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); 2738 2739 APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); 2740 return BinaryOperator::CreateAnd(Shift, 2741 ConstantInt::get(I.getContext(),Mask)); 2742 } 2743 2744 // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. 2745 } 2746 } 2747 return 0; 2748} 2749 2750 2751 2752/// FindElementAtOffset - Given a type and a constant offset, determine whether 2753/// or not there is a sequence of GEP indices into the type that will land us at 2754/// the specified offset. If so, fill them into NewIndices and return the 2755/// resultant element type, otherwise return null. 2756const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, 2757 SmallVectorImpl<Value*> &NewIndices) { 2758 if (!TD) return 0; 2759 if (!Ty->isSized()) return 0; 2760 2761 // Start with the index over the outer type. Note that the type size 2762 // might be zero (even if the offset isn't zero) if the indexed type 2763 // is something like [0 x {int, int}] 2764 const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); 2765 int64_t FirstIdx = 0; 2766 if (int64_t TySize = TD->getTypeAllocSize(Ty)) { 2767 FirstIdx = Offset/TySize; 2768 Offset -= FirstIdx*TySize; 2769 2770 // Handle hosts where % returns negative instead of values [0..TySize). 2771 if (Offset < 0) { 2772 --FirstIdx; 2773 Offset += TySize; 2774 assert(Offset >= 0); 2775 } 2776 assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); 2777 } 2778 2779 NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); 2780 2781 // Index into the types. If we fail, set OrigBase to null. 2782 while (Offset) { 2783 // Indexing into tail padding between struct/array elements. 2784 if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) 2785 return 0; 2786 2787 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 2788 const StructLayout *SL = TD->getStructLayout(STy); 2789 assert(Offset < (int64_t)SL->getSizeInBytes() && 2790 "Offset must stay within the indexed type"); 2791 2792 unsigned Elt = SL->getElementContainingOffset(Offset); 2793 NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 2794 Elt)); 2795 2796 Offset -= SL->getElementOffset(Elt); 2797 Ty = STy->getElementType(Elt); 2798 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 2799 uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); 2800 assert(EltSize && "Cannot index into a zero-sized array"); 2801 NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); 2802 Offset %= EltSize; 2803 Ty = AT->getElementType(); 2804 } else { 2805 // Otherwise, we can't index into the middle of this atomic type, bail. 2806 return 0; 2807 } 2808 } 2809 2810 return Ty; 2811} 2812 2813 2814 2815Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { 2816 SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); 2817 2818 if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) 2819 return ReplaceInstUsesWith(GEP, V); 2820 2821 Value *PtrOp = GEP.getOperand(0); 2822 2823 if (isa<UndefValue>(GEP.getOperand(0))) 2824 return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); 2825 2826 // Eliminate unneeded casts for indices. 2827 if (TD) { 2828 bool MadeChange = false; 2829 unsigned PtrSize = TD->getPointerSizeInBits(); 2830 2831 gep_type_iterator GTI = gep_type_begin(GEP); 2832 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); 2833 I != E; ++I, ++GTI) { 2834 if (!isa<SequentialType>(*GTI)) continue; 2835 2836 // If we are using a wider index than needed for this platform, shrink it 2837 // to what we need. If narrower, sign-extend it to what we need. This 2838 // explicit cast can make subsequent optimizations more obvious. 2839 unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); 2840 if (OpBits == PtrSize) 2841 continue; 2842 2843 *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); 2844 MadeChange = true; 2845 } 2846 if (MadeChange) return &GEP; 2847 } 2848 2849 // Combine Indices - If the source pointer to this getelementptr instruction 2850 // is a getelementptr instruction, combine the indices of the two 2851 // getelementptr instructions into a single instruction. 2852 // 2853 if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { 2854 // Note that if our source is a gep chain itself that we wait for that 2855 // chain to be resolved before we perform this transformation. This 2856 // avoids us creating a TON of code in some cases. 2857 // 2858 if (GetElementPtrInst *SrcGEP = 2859 dyn_cast<GetElementPtrInst>(Src->getOperand(0))) 2860 if (SrcGEP->getNumOperands() == 2) 2861 return 0; // Wait until our source is folded to completion. 2862 2863 SmallVector<Value*, 8> Indices; 2864 2865 // Find out whether the last index in the source GEP is a sequential idx. 2866 bool EndsWithSequential = false; 2867 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); 2868 I != E; ++I) 2869 EndsWithSequential = !isa<StructType>(*I); 2870 2871 // Can we combine the two pointer arithmetics offsets? 2872 if (EndsWithSequential) { 2873 // Replace: gep (gep %P, long B), long A, ... 2874 // With: T = long A+B; gep %P, T, ... 2875 // 2876 Value *Sum; 2877 Value *SO1 = Src->getOperand(Src->getNumOperands()-1); 2878 Value *GO1 = GEP.getOperand(1); 2879 if (SO1 == Constant::getNullValue(SO1->getType())) { 2880 Sum = GO1; 2881 } else if (GO1 == Constant::getNullValue(GO1->getType())) { 2882 Sum = SO1; 2883 } else { 2884 // If they aren't the same type, then the input hasn't been processed 2885 // by the loop above yet (which canonicalizes sequential index types to 2886 // intptr_t). Just avoid transforming this until the input has been 2887 // normalized. 2888 if (SO1->getType() != GO1->getType()) 2889 return 0; 2890 Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); 2891 } 2892 2893 // Update the GEP in place if possible. 2894 if (Src->getNumOperands() == 2) { 2895 GEP.setOperand(0, Src->getOperand(0)); 2896 GEP.setOperand(1, Sum); 2897 return &GEP; 2898 } 2899 Indices.append(Src->op_begin()+1, Src->op_end()-1); 2900 Indices.push_back(Sum); 2901 Indices.append(GEP.op_begin()+2, GEP.op_end()); 2902 } else if (isa<Constant>(*GEP.idx_begin()) && 2903 cast<Constant>(*GEP.idx_begin())->isNullValue() && 2904 Src->getNumOperands() != 1) { 2905 // Otherwise we can do the fold if the first index of the GEP is a zero 2906 Indices.append(Src->op_begin()+1, Src->op_end()); 2907 Indices.append(GEP.idx_begin()+1, GEP.idx_end()); 2908 } 2909 2910 if (!Indices.empty()) 2911 return (GEP.isInBounds() && Src->isInBounds()) ? 2912 GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), 2913 Indices.end(), GEP.getName()) : 2914 GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), 2915 Indices.end(), GEP.getName()); 2916 } 2917 2918 // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). 2919 Value *StrippedPtr = PtrOp->stripPointerCasts(); 2920 if (StrippedPtr != PtrOp) { 2921 const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); 2922 2923 bool HasZeroPointerIndex = false; 2924 if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) 2925 HasZeroPointerIndex = C->isZero(); 2926 2927 // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... 2928 // into : GEP [10 x i8]* X, i32 0, ... 2929 // 2930 // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... 2931 // into : GEP i8* X, ... 2932 // 2933 // This occurs when the program declares an array extern like "int X[];" 2934 if (HasZeroPointerIndex) { 2935 const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); 2936 if (const ArrayType *CATy = 2937 dyn_cast<ArrayType>(CPTy->getElementType())) { 2938 // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? 2939 if (CATy->getElementType() == StrippedPtrTy->getElementType()) { 2940 // -> GEP i8* X, ... 2941 SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); 2942 GetElementPtrInst *Res = 2943 GetElementPtrInst::Create(StrippedPtr, Idx.begin(), 2944 Idx.end(), GEP.getName()); 2945 Res->setIsInBounds(GEP.isInBounds()); 2946 return Res; 2947 } 2948 2949 if (const ArrayType *XATy = 2950 dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ 2951 // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? 2952 if (CATy->getElementType() == XATy->getElementType()) { 2953 // -> GEP [10 x i8]* X, i32 0, ... 2954 // At this point, we know that the cast source type is a pointer 2955 // to an array of the same type as the destination pointer 2956 // array. Because the array type is never stepped over (there 2957 // is a leading zero) we can fold the cast into this GEP. 2958 GEP.setOperand(0, StrippedPtr); 2959 return &GEP; 2960 } 2961 } 2962 } 2963 } else if (GEP.getNumOperands() == 2) { 2964 // Transform things like: 2965 // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V 2966 // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast 2967 const Type *SrcElTy = StrippedPtrTy->getElementType(); 2968 const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); 2969 if (TD && isa<ArrayType>(SrcElTy) && 2970 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == 2971 TD->getTypeAllocSize(ResElTy)) { 2972 Value *Idx[2]; 2973 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 2974 Idx[1] = GEP.getOperand(1); 2975 Value *NewGEP = GEP.isInBounds() ? 2976 Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) : 2977 Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); 2978 // V and GEP are both pointer types --> BitCast 2979 return new BitCastInst(NewGEP, GEP.getType()); 2980 } 2981 2982 // Transform things like: 2983 // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp 2984 // (where tmp = 8*tmp2) into: 2985 // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast 2986 2987 if (TD && isa<ArrayType>(SrcElTy) && 2988 ResElTy == Type::getInt8Ty(GEP.getContext())) { 2989 uint64_t ArrayEltSize = 2990 TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); 2991 2992 // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We 2993 // allow either a mul, shift, or constant here. 2994 Value *NewIdx = 0; 2995 ConstantInt *Scale = 0; 2996 if (ArrayEltSize == 1) { 2997 NewIdx = GEP.getOperand(1); 2998 Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); 2999 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { 3000 NewIdx = ConstantInt::get(CI->getType(), 1); 3001 Scale = CI; 3002 } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ 3003 if (Inst->getOpcode() == Instruction::Shl && 3004 isa<ConstantInt>(Inst->getOperand(1))) { 3005 ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); 3006 uint32_t ShAmtVal = ShAmt->getLimitedValue(64); 3007 Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 3008 1ULL << ShAmtVal); 3009 NewIdx = Inst->getOperand(0); 3010 } else if (Inst->getOpcode() == Instruction::Mul && 3011 isa<ConstantInt>(Inst->getOperand(1))) { 3012 Scale = cast<ConstantInt>(Inst->getOperand(1)); 3013 NewIdx = Inst->getOperand(0); 3014 } 3015 } 3016 3017 // If the index will be to exactly the right offset with the scale taken 3018 // out, perform the transformation. Note, we don't know whether Scale is 3019 // signed or not. We'll use unsigned version of division/modulo 3020 // operation after making sure Scale doesn't have the sign bit set. 3021 if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && 3022 Scale->getZExtValue() % ArrayEltSize == 0) { 3023 Scale = ConstantInt::get(Scale->getType(), 3024 Scale->getZExtValue() / ArrayEltSize); 3025 if (Scale->getZExtValue() != 1) { 3026 Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), 3027 false /*ZExt*/); 3028 NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); 3029 } 3030 3031 // Insert the new GEP instruction. 3032 Value *Idx[2]; 3033 Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); 3034 Idx[1] = NewIdx; 3035 Value *NewGEP = GEP.isInBounds() ? 3036 Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()): 3037 Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); 3038 // The NewGEP must be pointer typed, so must the old one -> BitCast 3039 return new BitCastInst(NewGEP, GEP.getType()); 3040 } 3041 } 3042 } 3043 } 3044 3045 /// See if we can simplify: 3046 /// X = bitcast A* to B* 3047 /// Y = gep X, <...constant indices...> 3048 /// into a gep of the original struct. This is important for SROA and alias 3049 /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. 3050 if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { 3051 if (TD && 3052 !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { 3053 // Determine how much the GEP moves the pointer. We are guaranteed to get 3054 // a constant back from EmitGEPOffset. 3055 ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); 3056 int64_t Offset = OffsetV->getSExtValue(); 3057 3058 // If this GEP instruction doesn't move the pointer, just replace the GEP 3059 // with a bitcast of the real input to the dest type. 3060 if (Offset == 0) { 3061 // If the bitcast is of an allocation, and the allocation will be 3062 // converted to match the type of the cast, don't touch this. 3063 if (isa<AllocaInst>(BCI->getOperand(0)) || 3064 isMalloc(BCI->getOperand(0))) { 3065 // See if the bitcast simplifies, if so, don't nuke this GEP yet. 3066 if (Instruction *I = visitBitCast(*BCI)) { 3067 if (I != BCI) { 3068 I->takeName(BCI); 3069 BCI->getParent()->getInstList().insert(BCI, I); 3070 ReplaceInstUsesWith(*BCI, I); 3071 } 3072 return &GEP; 3073 } 3074 } 3075 return new BitCastInst(BCI->getOperand(0), GEP.getType()); 3076 } 3077 3078 // Otherwise, if the offset is non-zero, we need to find out if there is a 3079 // field at Offset in 'A's type. If so, we can pull the cast through the 3080 // GEP. 3081 SmallVector<Value*, 8> NewIndices; 3082 const Type *InTy = 3083 cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); 3084 if (FindElementAtOffset(InTy, Offset, NewIndices)) { 3085 Value *NGEP = GEP.isInBounds() ? 3086 Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), 3087 NewIndices.end()) : 3088 Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), 3089 NewIndices.end()); 3090 3091 if (NGEP->getType() == GEP.getType()) 3092 return ReplaceInstUsesWith(GEP, NGEP); 3093 NGEP->takeName(&GEP); 3094 return new BitCastInst(NGEP, GEP.getType()); 3095 } 3096 } 3097 } 3098 3099 return 0; 3100} 3101 3102Instruction *InstCombiner::visitFree(Instruction &FI) { 3103 Value *Op = FI.getOperand(1); 3104 3105 // free undef -> unreachable. 3106 if (isa<UndefValue>(Op)) { 3107 // Insert a new store to null because we cannot modify the CFG here. 3108 new StoreInst(ConstantInt::getTrue(FI.getContext()), 3109 UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); 3110 return EraseInstFromFunction(FI); 3111 } 3112 3113 // If we have 'free null' delete the instruction. This can happen in stl code 3114 // when lots of inlining happens. 3115 if (isa<ConstantPointerNull>(Op)) 3116 return EraseInstFromFunction(FI); 3117 3118 // If we have a malloc call whose only use is a free call, delete both. 3119 if (isMalloc(Op)) { 3120 if (CallInst* CI = extractMallocCallFromBitCast(Op)) { 3121 if (Op->hasOneUse() && CI->hasOneUse()) { 3122 EraseInstFromFunction(FI); 3123 EraseInstFromFunction(*CI); 3124 return EraseInstFromFunction(*cast<Instruction>(Op)); 3125 } 3126 } else { 3127 // Op is a call to malloc 3128 if (Op->hasOneUse()) { 3129 EraseInstFromFunction(FI); 3130 return EraseInstFromFunction(*cast<Instruction>(Op)); 3131 } 3132 } 3133 } 3134 3135 return 0; 3136} 3137 3138 3139 3140Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { 3141 // Change br (not X), label True, label False to: br X, label False, True 3142 Value *X = 0; 3143 BasicBlock *TrueDest; 3144 BasicBlock *FalseDest; 3145 if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && 3146 !isa<Constant>(X)) { 3147 // Swap Destinations and condition... 3148 BI.setCondition(X); 3149 BI.setSuccessor(0, FalseDest); 3150 BI.setSuccessor(1, TrueDest); 3151 return &BI; 3152 } 3153 3154 // Cannonicalize fcmp_one -> fcmp_oeq 3155 FCmpInst::Predicate FPred; Value *Y; 3156 if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 3157 TrueDest, FalseDest)) && 3158 BI.getCondition()->hasOneUse()) 3159 if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || 3160 FPred == FCmpInst::FCMP_OGE) { 3161 FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); 3162 Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); 3163 3164 // Swap Destinations and condition. 3165 BI.setSuccessor(0, FalseDest); 3166 BI.setSuccessor(1, TrueDest); 3167 Worklist.Add(Cond); 3168 return &BI; 3169 } 3170 3171 // Cannonicalize icmp_ne -> icmp_eq 3172 ICmpInst::Predicate IPred; 3173 if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), 3174 TrueDest, FalseDest)) && 3175 BI.getCondition()->hasOneUse()) 3176 if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || 3177 IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || 3178 IPred == ICmpInst::ICMP_SGE) { 3179 ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); 3180 Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); 3181 // Swap Destinations and condition. 3182 BI.setSuccessor(0, FalseDest); 3183 BI.setSuccessor(1, TrueDest); 3184 Worklist.Add(Cond); 3185 return &BI; 3186 } 3187 3188 return 0; 3189} 3190 3191Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { 3192 Value *Cond = SI.getCondition(); 3193 if (Instruction *I = dyn_cast<Instruction>(Cond)) { 3194 if (I->getOpcode() == Instruction::Add) 3195 if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { 3196 // change 'switch (X+4) case 1:' into 'switch (X) case -3' 3197 for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) 3198 SI.setOperand(i, 3199 ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), 3200 AddRHS)); 3201 SI.setOperand(0, I->getOperand(0)); 3202 Worklist.Add(I); 3203 return &SI; 3204 } 3205 } 3206 return 0; 3207} 3208 3209Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { 3210 Value *Agg = EV.getAggregateOperand(); 3211 3212 if (!EV.hasIndices()) 3213 return ReplaceInstUsesWith(EV, Agg); 3214 3215 if (Constant *C = dyn_cast<Constant>(Agg)) { 3216 if (isa<UndefValue>(C)) 3217 return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); 3218 3219 if (isa<ConstantAggregateZero>(C)) 3220 return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); 3221 3222 if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { 3223 // Extract the element indexed by the first index out of the constant 3224 Value *V = C->getOperand(*EV.idx_begin()); 3225 if (EV.getNumIndices() > 1) 3226 // Extract the remaining indices out of the constant indexed by the 3227 // first index 3228 return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); 3229 else 3230 return ReplaceInstUsesWith(EV, V); 3231 } 3232 return 0; // Can't handle other constants 3233 } 3234 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { 3235 // We're extracting from an insertvalue instruction, compare the indices 3236 const unsigned *exti, *exte, *insi, *inse; 3237 for (exti = EV.idx_begin(), insi = IV->idx_begin(), 3238 exte = EV.idx_end(), inse = IV->idx_end(); 3239 exti != exte && insi != inse; 3240 ++exti, ++insi) { 3241 if (*insi != *exti) 3242 // The insert and extract both reference distinctly different elements. 3243 // This means the extract is not influenced by the insert, and we can 3244 // replace the aggregate operand of the extract with the aggregate 3245 // operand of the insert. i.e., replace 3246 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 3247 // %E = extractvalue { i32, { i32 } } %I, 0 3248 // with 3249 // %E = extractvalue { i32, { i32 } } %A, 0 3250 return ExtractValueInst::Create(IV->getAggregateOperand(), 3251 EV.idx_begin(), EV.idx_end()); 3252 } 3253 if (exti == exte && insi == inse) 3254 // Both iterators are at the end: Index lists are identical. Replace 3255 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 3256 // %C = extractvalue { i32, { i32 } } %B, 1, 0 3257 // with "i32 42" 3258 return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); 3259 if (exti == exte) { 3260 // The extract list is a prefix of the insert list. i.e. replace 3261 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 3262 // %E = extractvalue { i32, { i32 } } %I, 1 3263 // with 3264 // %X = extractvalue { i32, { i32 } } %A, 1 3265 // %E = insertvalue { i32 } %X, i32 42, 0 3266 // by switching the order of the insert and extract (though the 3267 // insertvalue should be left in, since it may have other uses). 3268 Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), 3269 EV.idx_begin(), EV.idx_end()); 3270 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), 3271 insi, inse); 3272 } 3273 if (insi == inse) 3274 // The insert list is a prefix of the extract list 3275 // We can simply remove the common indices from the extract and make it 3276 // operate on the inserted value instead of the insertvalue result. 3277 // i.e., replace 3278 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 3279 // %E = extractvalue { i32, { i32 } } %I, 1, 0 3280 // with 3281 // %E extractvalue { i32 } { i32 42 }, 0 3282 return ExtractValueInst::Create(IV->getInsertedValueOperand(), 3283 exti, exte); 3284 } 3285 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { 3286 // We're extracting from an intrinsic, see if we're the only user, which 3287 // allows us to simplify multiple result intrinsics to simpler things that 3288 // just get one value.. 3289 if (II->hasOneUse()) { 3290 // Check if we're grabbing the overflow bit or the result of a 'with 3291 // overflow' intrinsic. If it's the latter we can remove the intrinsic 3292 // and replace it with a traditional binary instruction. 3293 switch (II->getIntrinsicID()) { 3294 case Intrinsic::uadd_with_overflow: 3295 case Intrinsic::sadd_with_overflow: 3296 if (*EV.idx_begin() == 0) { // Normal result. 3297 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 3298 II->replaceAllUsesWith(UndefValue::get(II->getType())); 3299 EraseInstFromFunction(*II); 3300 return BinaryOperator::CreateAdd(LHS, RHS); 3301 } 3302 break; 3303 case Intrinsic::usub_with_overflow: 3304 case Intrinsic::ssub_with_overflow: 3305 if (*EV.idx_begin() == 0) { // Normal result. 3306 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 3307 II->replaceAllUsesWith(UndefValue::get(II->getType())); 3308 EraseInstFromFunction(*II); 3309 return BinaryOperator::CreateSub(LHS, RHS); 3310 } 3311 break; 3312 case Intrinsic::umul_with_overflow: 3313 case Intrinsic::smul_with_overflow: 3314 if (*EV.idx_begin() == 0) { // Normal result. 3315 Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); 3316 II->replaceAllUsesWith(UndefValue::get(II->getType())); 3317 EraseInstFromFunction(*II); 3318 return BinaryOperator::CreateMul(LHS, RHS); 3319 } 3320 break; 3321 default: 3322 break; 3323 } 3324 } 3325 } 3326 // Can't simplify extracts from other values. Note that nested extracts are 3327 // already simplified implicitely by the above (extract ( extract (insert) ) 3328 // will be translated into extract ( insert ( extract ) ) first and then just 3329 // the value inserted, if appropriate). 3330 return 0; 3331} 3332 3333 3334 3335 3336/// TryToSinkInstruction - Try to move the specified instruction from its 3337/// current block into the beginning of DestBlock, which can only happen if it's 3338/// safe to move the instruction past all of the instructions between it and the 3339/// end of its block. 3340static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { 3341 assert(I->hasOneUse() && "Invariants didn't hold!"); 3342 3343 // Cannot move control-flow-involving, volatile loads, vaarg, etc. 3344 if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) 3345 return false; 3346 3347 // Do not sink alloca instructions out of the entry block. 3348 if (isa<AllocaInst>(I) && I->getParent() == 3349 &DestBlock->getParent()->getEntryBlock()) 3350 return false; 3351 3352 // We can only sink load instructions if there is nothing between the load and 3353 // the end of block that could change the value. 3354 if (I->mayReadFromMemory()) { 3355 for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); 3356 Scan != E; ++Scan) 3357 if (Scan->mayWriteToMemory()) 3358 return false; 3359 } 3360 3361 BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); 3362 3363 I->moveBefore(InsertPos); 3364 ++NumSunkInst; 3365 return true; 3366} 3367 3368 3369/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding 3370/// all reachable code to the worklist. 3371/// 3372/// This has a couple of tricks to make the code faster and more powerful. In 3373/// particular, we constant fold and DCE instructions as we go, to avoid adding 3374/// them to the worklist (this significantly speeds up instcombine on code where 3375/// many instructions are dead or constant). Additionally, if we find a branch 3376/// whose condition is a known constant, we only visit the reachable successors. 3377/// 3378static bool AddReachableCodeToWorklist(BasicBlock *BB, 3379 SmallPtrSet<BasicBlock*, 64> &Visited, 3380 InstCombiner &IC, 3381 const TargetData *TD) { 3382 bool MadeIRChange = false; 3383 SmallVector<BasicBlock*, 256> Worklist; 3384 Worklist.push_back(BB); 3385 3386 std::vector<Instruction*> InstrsForInstCombineWorklist; 3387 InstrsForInstCombineWorklist.reserve(128); 3388 3389 SmallPtrSet<ConstantExpr*, 64> FoldedConstants; 3390 3391 while (!Worklist.empty()) { 3392 BB = Worklist.back(); 3393 Worklist.pop_back(); 3394 3395 // We have now visited this block! If we've already been here, ignore it. 3396 if (!Visited.insert(BB)) continue; 3397 3398 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 3399 Instruction *Inst = BBI++; 3400 3401 // DCE instruction if trivially dead. 3402 if (isInstructionTriviallyDead(Inst)) { 3403 ++NumDeadInst; 3404 DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); 3405 Inst->eraseFromParent(); 3406 continue; 3407 } 3408 3409 // ConstantProp instruction if trivially constant. 3410 if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) 3411 if (Constant *C = ConstantFoldInstruction(Inst, TD)) { 3412 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " 3413 << *Inst << '\n'); 3414 Inst->replaceAllUsesWith(C); 3415 ++NumConstProp; 3416 Inst->eraseFromParent(); 3417 continue; 3418 } 3419 3420 3421 3422 if (TD) { 3423 // See if we can constant fold its operands. 3424 for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); 3425 i != e; ++i) { 3426 ConstantExpr *CE = dyn_cast<ConstantExpr>(i); 3427 if (CE == 0) continue; 3428 3429 // If we already folded this constant, don't try again. 3430 if (!FoldedConstants.insert(CE)) 3431 continue; 3432 3433 Constant *NewC = ConstantFoldConstantExpression(CE, TD); 3434 if (NewC && NewC != CE) { 3435 *i = NewC; 3436 MadeIRChange = true; 3437 } 3438 } 3439 } 3440 3441 3442 InstrsForInstCombineWorklist.push_back(Inst); 3443 } 3444 3445 // Recursively visit successors. If this is a branch or switch on a 3446 // constant, only visit the reachable successor. 3447 TerminatorInst *TI = BB->getTerminator(); 3448 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 3449 if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { 3450 bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); 3451 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); 3452 Worklist.push_back(ReachableBB); 3453 continue; 3454 } 3455 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 3456 if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { 3457 // See if this is an explicit destination. 3458 for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) 3459 if (SI->getCaseValue(i) == Cond) { 3460 BasicBlock *ReachableBB = SI->getSuccessor(i); 3461 Worklist.push_back(ReachableBB); 3462 continue; 3463 } 3464 3465 // Otherwise it is the default destination. 3466 Worklist.push_back(SI->getSuccessor(0)); 3467 continue; 3468 } 3469 } 3470 3471 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 3472 Worklist.push_back(TI->getSuccessor(i)); 3473 } 3474 3475 // Once we've found all of the instructions to add to instcombine's worklist, 3476 // add them in reverse order. This way instcombine will visit from the top 3477 // of the function down. This jives well with the way that it adds all uses 3478 // of instructions to the worklist after doing a transformation, thus avoiding 3479 // some N^2 behavior in pathological cases. 3480 IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], 3481 InstrsForInstCombineWorklist.size()); 3482 3483 return MadeIRChange; 3484} 3485 3486bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { 3487 MadeIRChange = false; 3488 3489 DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " 3490 << F.getNameStr() << "\n"); 3491 3492 { 3493 // Do a depth-first traversal of the function, populate the worklist with 3494 // the reachable instructions. Ignore blocks that are not reachable. Keep 3495 // track of which blocks we visit. 3496 SmallPtrSet<BasicBlock*, 64> Visited; 3497 MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); 3498 3499 // Do a quick scan over the function. If we find any blocks that are 3500 // unreachable, remove any instructions inside of them. This prevents 3501 // the instcombine code from having to deal with some bad special cases. 3502 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 3503 if (!Visited.count(BB)) { 3504 Instruction *Term = BB->getTerminator(); 3505 while (Term != BB->begin()) { // Remove instrs bottom-up 3506 BasicBlock::iterator I = Term; --I; 3507 3508 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 3509 // A debug intrinsic shouldn't force another iteration if we weren't 3510 // going to do one without it. 3511 if (!isa<DbgInfoIntrinsic>(I)) { 3512 ++NumDeadInst; 3513 MadeIRChange = true; 3514 } 3515 3516 // If I is not void type then replaceAllUsesWith undef. 3517 // This allows ValueHandlers and custom metadata to adjust itself. 3518 if (!I->getType()->isVoidTy()) 3519 I->replaceAllUsesWith(UndefValue::get(I->getType())); 3520 I->eraseFromParent(); 3521 } 3522 } 3523 } 3524 3525 while (!Worklist.isEmpty()) { 3526 Instruction *I = Worklist.RemoveOne(); 3527 if (I == 0) continue; // skip null values. 3528 3529 // Check to see if we can DCE the instruction. 3530 if (isInstructionTriviallyDead(I)) { 3531 DEBUG(errs() << "IC: DCE: " << *I << '\n'); 3532 EraseInstFromFunction(*I); 3533 ++NumDeadInst; 3534 MadeIRChange = true; 3535 continue; 3536 } 3537 3538 // Instruction isn't dead, see if we can constant propagate it. 3539 if (!I->use_empty() && isa<Constant>(I->getOperand(0))) 3540 if (Constant *C = ConstantFoldInstruction(I, TD)) { 3541 DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); 3542 3543 // Add operands to the worklist. 3544 ReplaceInstUsesWith(*I, C); 3545 ++NumConstProp; 3546 EraseInstFromFunction(*I); 3547 MadeIRChange = true; 3548 continue; 3549 } 3550 3551 // See if we can trivially sink this instruction to a successor basic block. 3552 if (I->hasOneUse()) { 3553 BasicBlock *BB = I->getParent(); 3554 Instruction *UserInst = cast<Instruction>(I->use_back()); 3555 BasicBlock *UserParent; 3556 3557 // Get the block the use occurs in. 3558 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) 3559 UserParent = PN->getIncomingBlock(I->use_begin().getUse()); 3560 else 3561 UserParent = UserInst->getParent(); 3562 3563 if (UserParent != BB) { 3564 bool UserIsSuccessor = false; 3565 // See if the user is one of our successors. 3566 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) 3567 if (*SI == UserParent) { 3568 UserIsSuccessor = true; 3569 break; 3570 } 3571 3572 // If the user is one of our immediate successors, and if that successor 3573 // only has us as a predecessors (we'd have to split the critical edge 3574 // otherwise), we can keep going. 3575 if (UserIsSuccessor && UserParent->getSinglePredecessor()) 3576 // Okay, the CFG is simple enough, try to sink this instruction. 3577 MadeIRChange |= TryToSinkInstruction(I, UserParent); 3578 } 3579 } 3580 3581 // Now that we have an instruction, try combining it to simplify it. 3582 Builder->SetInsertPoint(I->getParent(), I); 3583 3584#ifndef NDEBUG 3585 std::string OrigI; 3586#endif 3587 DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); 3588 DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); 3589 3590 if (Instruction *Result = visit(*I)) { 3591 ++NumCombined; 3592 // Should we replace the old instruction with a new one? 3593 if (Result != I) { 3594 DEBUG(errs() << "IC: Old = " << *I << '\n' 3595 << " New = " << *Result << '\n'); 3596 3597 // Everything uses the new instruction now. 3598 I->replaceAllUsesWith(Result); 3599 3600 // Push the new instruction and any users onto the worklist. 3601 Worklist.Add(Result); 3602 Worklist.AddUsersToWorkList(*Result); 3603 3604 // Move the name to the new instruction first. 3605 Result->takeName(I); 3606 3607 // Insert the new instruction into the basic block... 3608 BasicBlock *InstParent = I->getParent(); 3609 BasicBlock::iterator InsertPos = I; 3610 3611 if (!isa<PHINode>(Result)) // If combining a PHI, don't insert 3612 while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. 3613 ++InsertPos; 3614 3615 InstParent->getInstList().insert(InsertPos, Result); 3616 3617 EraseInstFromFunction(*I); 3618 } else { 3619#ifndef NDEBUG 3620 DEBUG(errs() << "IC: Mod = " << OrigI << '\n' 3621 << " New = " << *I << '\n'); 3622#endif 3623 3624 // If the instruction was modified, it's possible that it is now dead. 3625 // if so, remove it. 3626 if (isInstructionTriviallyDead(I)) { 3627 EraseInstFromFunction(*I); 3628 } else { 3629 Worklist.Add(I); 3630 Worklist.AddUsersToWorkList(*I); 3631 } 3632 } 3633 MadeIRChange = true; 3634 } 3635 } 3636 3637 Worklist.Zap(); 3638 return MadeIRChange; 3639} 3640 3641 3642bool InstCombiner::runOnFunction(Function &F) { 3643 MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 3644 TD = getAnalysisIfAvailable<TargetData>(); 3645 3646 3647 /// Builder - This is an IRBuilder that automatically inserts new 3648 /// instructions into the worklist when they are created. 3649 IRBuilder<true, TargetFolder, InstCombineIRInserter> 3650 TheBuilder(F.getContext(), TargetFolder(TD), 3651 InstCombineIRInserter(Worklist)); 3652 Builder = &TheBuilder; 3653 3654 bool EverMadeChange = false; 3655 3656 // Iterate while there is work to do. 3657 unsigned Iteration = 0; 3658 while (DoOneIteration(F, Iteration++)) 3659 EverMadeChange = true; 3660 3661 Builder = 0; 3662 return EverMadeChange; 3663} 3664 3665FunctionPass *llvm::createInstructionCombiningPass() { 3666 return new InstCombiner(); 3667} 3668