InstCombineVectorOps.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===- InstCombineVectorOps.cpp -------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements instcombine for ExtractElement, InsertElement and 11// ShuffleVector. 12// 13//===----------------------------------------------------------------------===// 14 15#include "InstCombine.h" 16#include "llvm/IR/PatternMatch.h" 17using namespace llvm; 18using namespace PatternMatch; 19 20/// CheapToScalarize - Return true if the value is cheaper to scalarize than it 21/// is to leave as a vector operation. isConstant indicates whether we're 22/// extracting one known element. If false we're extracting a variable index. 23static bool CheapToScalarize(Value *V, bool isConstant) { 24 if (Constant *C = dyn_cast<Constant>(V)) { 25 if (isConstant) return true; 26 27 // If all elts are the same, we can extract it and use any of the values. 28 if (Constant *Op0 = C->getAggregateElement(0U)) { 29 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 30 ++i) 31 if (C->getAggregateElement(i) != Op0) 32 return false; 33 return true; 34 } 35 } 36 Instruction *I = dyn_cast<Instruction>(V); 37 if (!I) return false; 38 39 // Insert element gets simplified to the inserted element or is deleted if 40 // this is constant idx extract element and its a constant idx insertelt. 41 if (I->getOpcode() == Instruction::InsertElement && isConstant && 42 isa<ConstantInt>(I->getOperand(2))) 43 return true; 44 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 45 return true; 46 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 47 if (BO->hasOneUse() && 48 (CheapToScalarize(BO->getOperand(0), isConstant) || 49 CheapToScalarize(BO->getOperand(1), isConstant))) 50 return true; 51 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 52 if (CI->hasOneUse() && 53 (CheapToScalarize(CI->getOperand(0), isConstant) || 54 CheapToScalarize(CI->getOperand(1), isConstant))) 55 return true; 56 57 return false; 58} 59 60/// FindScalarElement - Given a vector and an element number, see if the scalar 61/// value is already around as a register, for example if it were inserted then 62/// extracted from the vector. 63static Value *FindScalarElement(Value *V, unsigned EltNo) { 64 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); 65 VectorType *VTy = cast<VectorType>(V->getType()); 66 unsigned Width = VTy->getNumElements(); 67 if (EltNo >= Width) // Out of range access. 68 return UndefValue::get(VTy->getElementType()); 69 70 if (Constant *C = dyn_cast<Constant>(V)) 71 return C->getAggregateElement(EltNo); 72 73 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 74 // If this is an insert to a variable element, we don't know what it is. 75 if (!isa<ConstantInt>(III->getOperand(2))) 76 return 0; 77 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 78 79 // If this is an insert to the element we are looking for, return the 80 // inserted value. 81 if (EltNo == IIElt) 82 return III->getOperand(1); 83 84 // Otherwise, the insertelement doesn't modify the value, recurse on its 85 // vector input. 86 return FindScalarElement(III->getOperand(0), EltNo); 87 } 88 89 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 90 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); 91 int InEl = SVI->getMaskValue(EltNo); 92 if (InEl < 0) 93 return UndefValue::get(VTy->getElementType()); 94 if (InEl < (int)LHSWidth) 95 return FindScalarElement(SVI->getOperand(0), InEl); 96 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); 97 } 98 99 // Extract a value from a vector add operation with a constant zero. 100 Value *Val = 0; Constant *Con = 0; 101 if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { 102 if (Con->getAggregateElement(EltNo)->isNullValue()) 103 return FindScalarElement(Val, EltNo); 104 } 105 106 // Otherwise, we don't know. 107 return 0; 108} 109 110// If we have a PHI node with a vector type that has only 2 uses: feed 111// itself and be an operand of extractelement at a constant location, 112// try to replace the PHI of the vector type with a PHI of a scalar type. 113Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 114 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 115 if (!PN->hasNUses(2)) 116 return NULL; 117 118 // If so, it's known at this point that one operand is PHI and the other is 119 // an extractelement node. Find the PHI user that is not the extractelement 120 // node. 121 auto iu = PN->user_begin(); 122 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 123 if (PHIUser == cast<Instruction>(&EI)) 124 PHIUser = cast<Instruction>(*(++iu)); 125 126 // Verify that this PHI user has one use, which is the PHI itself, 127 // and that it is a binary operation which is cheap to scalarize. 128 // otherwise return NULL. 129 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 130 !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) 131 return NULL; 132 133 // Create a scalar PHI node that will replace the vector PHI node 134 // just before the current PHI node. 135 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 136 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 137 // Scalarize each PHI operand. 138 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 139 Value *PHIInVal = PN->getIncomingValue(i); 140 BasicBlock *inBB = PN->getIncomingBlock(i); 141 Value *Elt = EI.getIndexOperand(); 142 // If the operand is the PHI induction variable: 143 if (PHIInVal == PHIUser) { 144 // Scalarize the binary operation. Its first operand is the 145 // scalar PHI and the second operand is extracted from the other 146 // vector operand. 147 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 148 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 149 Value *Op = InsertNewInstWith( 150 ExtractElementInst::Create(B0->getOperand(opId), Elt, 151 B0->getOperand(opId)->getName() + ".Elt"), 152 *B0); 153 Value *newPHIUser = InsertNewInstWith( 154 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 155 scalarPHI->addIncoming(newPHIUser, inBB); 156 } else { 157 // Scalarize PHI input: 158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 159 // Insert the new instruction into the predecessor basic block. 160 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 161 BasicBlock::iterator InsertPos; 162 if (pos && !isa<PHINode>(pos)) { 163 InsertPos = pos; 164 ++InsertPos; 165 } else { 166 InsertPos = inBB->getFirstInsertionPt(); 167 } 168 169 InsertNewInstWith(newEI, *InsertPos); 170 171 scalarPHI->addIncoming(newEI, inBB); 172 } 173 } 174 return ReplaceInstUsesWith(EI, scalarPHI); 175} 176 177Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 178 // If vector val is constant with all elements the same, replace EI with 179 // that element. We handle a known element # below. 180 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 181 if (CheapToScalarize(C, false)) 182 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 183 184 // If extracting a specified index from the vector, see if we can recursively 185 // find a previously computed scalar that was inserted into the vector. 186 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 187 unsigned IndexVal = IdxC->getZExtValue(); 188 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 189 190 // If this is extracting an invalid index, turn this into undef, to avoid 191 // crashing the code below. 192 if (IndexVal >= VectorWidth) 193 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 194 195 // This instruction only demands the single element from the input vector. 196 // If the input vector has a single use, simplify it based on this use 197 // property. 198 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 199 APInt UndefElts(VectorWidth, 0); 200 APInt DemandedMask(VectorWidth, 0); 201 DemandedMask.setBit(IndexVal); 202 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 203 DemandedMask, UndefElts)) { 204 EI.setOperand(0, V); 205 return &EI; 206 } 207 } 208 209 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) 210 return ReplaceInstUsesWith(EI, Elt); 211 212 // If the this extractelement is directly using a bitcast from a vector of 213 // the same number of elements, see if we can find the source element from 214 // it. In this case, we will end up needing to bitcast the scalars. 215 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 216 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 217 if (VT->getNumElements() == VectorWidth) 218 if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) 219 return new BitCastInst(Elt, EI.getType()); 220 } 221 222 // If there's a vector PHI feeding a scalar use through this extractelement 223 // instruction, try to scalarize the PHI. 224 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 225 Instruction *scalarPHI = scalarizePHI(EI, PN); 226 if (scalarPHI) 227 return scalarPHI; 228 } 229 } 230 231 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 232 // Push extractelement into predecessor operation if legal and 233 // profitable to do so 234 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 235 if (I->hasOneUse() && 236 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 237 Value *newEI0 = 238 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 239 EI.getName()+".lhs"); 240 Value *newEI1 = 241 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 242 EI.getName()+".rhs"); 243 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 244 } 245 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 246 // Extracting the inserted element? 247 if (IE->getOperand(2) == EI.getOperand(1)) 248 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 249 // If the inserted and extracted elements are constants, they must not 250 // be the same value, extract from the pre-inserted value instead. 251 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 252 Worklist.AddValue(EI.getOperand(0)); 253 EI.setOperand(0, IE->getOperand(0)); 254 return &EI; 255 } 256 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 257 // If this is extracting an element from a shufflevector, figure out where 258 // it came from and extract from the appropriate input element instead. 259 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 260 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 261 Value *Src; 262 unsigned LHSWidth = 263 SVI->getOperand(0)->getType()->getVectorNumElements(); 264 265 if (SrcIdx < 0) 266 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 267 if (SrcIdx < (int)LHSWidth) 268 Src = SVI->getOperand(0); 269 else { 270 SrcIdx -= LHSWidth; 271 Src = SVI->getOperand(1); 272 } 273 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 274 return ExtractElementInst::Create(Src, 275 ConstantInt::get(Int32Ty, 276 SrcIdx, false)); 277 } 278 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 279 // Canonicalize extractelement(cast) -> cast(extractelement) 280 // bitcasts can change the number of vector elements and they cost nothing 281 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 282 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 283 EI.getIndexOperand()); 284 Worklist.AddValue(EE); 285 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 286 } 287 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 288 if (SI->hasOneUse()) { 289 // TODO: For a select on vectors, it might be useful to do this if it 290 // has multiple extractelement uses. For vector select, that seems to 291 // fight the vectorizer. 292 293 // If we are extracting an element from a vector select or a select on 294 // vectors, a select on the scalars extracted from the vector arguments. 295 Value *TrueVal = SI->getTrueValue(); 296 Value *FalseVal = SI->getFalseValue(); 297 298 Value *Cond = SI->getCondition(); 299 if (Cond->getType()->isVectorTy()) { 300 Cond = Builder->CreateExtractElement(Cond, 301 EI.getIndexOperand(), 302 Cond->getName() + ".elt"); 303 } 304 305 Value *V1Elem 306 = Builder->CreateExtractElement(TrueVal, 307 EI.getIndexOperand(), 308 TrueVal->getName() + ".elt"); 309 310 Value *V2Elem 311 = Builder->CreateExtractElement(FalseVal, 312 EI.getIndexOperand(), 313 FalseVal->getName() + ".elt"); 314 return SelectInst::Create(Cond, 315 V1Elem, 316 V2Elem, 317 SI->getName() + ".elt"); 318 } 319 } 320 } 321 return 0; 322} 323 324/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 325/// elements from either LHS or RHS, return the shuffle mask and true. 326/// Otherwise, return false. 327static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 328 SmallVectorImpl<Constant*> &Mask) { 329 assert(LHS->getType() == RHS->getType() && 330 "Invalid CollectSingleShuffleElements"); 331 unsigned NumElts = V->getType()->getVectorNumElements(); 332 333 if (isa<UndefValue>(V)) { 334 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 335 return true; 336 } 337 338 if (V == LHS) { 339 for (unsigned i = 0; i != NumElts; ++i) 340 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 341 return true; 342 } 343 344 if (V == RHS) { 345 for (unsigned i = 0; i != NumElts; ++i) 346 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 347 i+NumElts)); 348 return true; 349 } 350 351 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 352 // If this is an insert of an extract from some other vector, include it. 353 Value *VecOp = IEI->getOperand(0); 354 Value *ScalarOp = IEI->getOperand(1); 355 Value *IdxOp = IEI->getOperand(2); 356 357 if (!isa<ConstantInt>(IdxOp)) 358 return false; 359 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 360 361 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 362 // Okay, we can handle this if the vector we are insertinting into is 363 // transitively ok. 364 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 365 // If so, update the mask to reflect the inserted undef. 366 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 367 return true; 368 } 369 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 370 if (isa<ConstantInt>(EI->getOperand(1))) { 371 unsigned ExtractedIdx = 372 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 373 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 374 375 // This must be extracting from either LHS or RHS. 376 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 377 // Okay, we can handle this if the vector we are insertinting into is 378 // transitively ok. 379 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 380 // If so, update the mask to reflect the inserted value. 381 if (EI->getOperand(0) == LHS) { 382 Mask[InsertedIdx % NumElts] = 383 ConstantInt::get(Type::getInt32Ty(V->getContext()), 384 ExtractedIdx); 385 } else { 386 assert(EI->getOperand(0) == RHS); 387 Mask[InsertedIdx % NumElts] = 388 ConstantInt::get(Type::getInt32Ty(V->getContext()), 389 ExtractedIdx + NumLHSElts); 390 } 391 return true; 392 } 393 } 394 } 395 } 396 } 397 398 return false; 399} 400 401 402/// We are building a shuffle to create V, which is a sequence of insertelement, 403/// extractelement pairs. If PermittedRHS is set, then we must either use it or 404/// not rely on the second vector source. Return an std::pair containing the 405/// left and right vectors of the proposed shuffle (or 0), and set the Mask 406/// parameter as required. 407/// 408/// Note: we intentionally don't try to fold earlier shuffles since they have 409/// often been chosen carefully to be efficiently implementable on the target. 410typedef std::pair<Value *, Value *> ShuffleOps; 411 412static ShuffleOps CollectShuffleElements(Value *V, 413 SmallVectorImpl<Constant *> &Mask, 414 Value *PermittedRHS) { 415 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 416 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 417 418 if (isa<UndefValue>(V)) { 419 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 420 return std::make_pair( 421 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 422 } 423 424 if (isa<ConstantAggregateZero>(V)) { 425 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 426 return std::make_pair(V, nullptr); 427 } 428 429 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 430 // If this is an insert of an extract from some other vector, include it. 431 Value *VecOp = IEI->getOperand(0); 432 Value *ScalarOp = IEI->getOperand(1); 433 Value *IdxOp = IEI->getOperand(2); 434 435 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 436 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 437 unsigned ExtractedIdx = 438 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 439 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 440 441 // Either the extracted from or inserted into vector must be RHSVec, 442 // otherwise we'd end up with a shuffle of three inputs. 443 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { 444 Value *RHS = EI->getOperand(0); 445 ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); 446 assert(LR.second == 0 || LR.second == RHS); 447 448 if (LR.first->getType() != RHS->getType()) { 449 // We tried our best, but we can't find anything compatible with RHS 450 // further up the chain. Return a trivial shuffle. 451 for (unsigned i = 0; i < NumElts; ++i) 452 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 453 return std::make_pair(V, nullptr); 454 } 455 456 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 457 Mask[InsertedIdx % NumElts] = 458 ConstantInt::get(Type::getInt32Ty(V->getContext()), 459 NumLHSElts+ExtractedIdx); 460 return std::make_pair(LR.first, RHS); 461 } 462 463 if (VecOp == PermittedRHS) { 464 // We've gone as far as we can: anything on the other side of the 465 // extractelement will already have been converted into a shuffle. 466 unsigned NumLHSElts = 467 EI->getOperand(0)->getType()->getVectorNumElements(); 468 for (unsigned i = 0; i != NumElts; ++i) 469 Mask.push_back(ConstantInt::get( 470 Type::getInt32Ty(V->getContext()), 471 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 472 return std::make_pair(EI->getOperand(0), PermittedRHS); 473 } 474 475 // If this insertelement is a chain that comes from exactly these two 476 // vectors, return the vector and the effective shuffle. 477 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 478 CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 479 Mask)) 480 return std::make_pair(EI->getOperand(0), PermittedRHS); 481 } 482 } 483 } 484 485 // Otherwise, can't do anything fancy. Return an identity vector. 486 for (unsigned i = 0; i != NumElts; ++i) 487 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 488 return std::make_pair(V, nullptr); 489} 490 491Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 492 Value *VecOp = IE.getOperand(0); 493 Value *ScalarOp = IE.getOperand(1); 494 Value *IdxOp = IE.getOperand(2); 495 496 // Inserting an undef or into an undefined place, remove this. 497 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 498 ReplaceInstUsesWith(IE, VecOp); 499 500 // If the inserted element was extracted from some other vector, and if the 501 // indexes are constant, try to turn this into a shufflevector operation. 502 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 503 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 504 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 505 unsigned NumExtractVectorElts = 506 EI->getOperand(0)->getType()->getVectorNumElements(); 507 unsigned ExtractedIdx = 508 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 509 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 510 511 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 512 return ReplaceInstUsesWith(IE, VecOp); 513 514 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 515 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 516 517 // If we are extracting a value from a vector, then inserting it right 518 // back into the same place, just use the input vector. 519 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 520 return ReplaceInstUsesWith(IE, VecOp); 521 522 // If this insertelement isn't used by some other insertelement, turn it 523 // (and any insertelements it points to), into one big shuffle. 524 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 525 SmallVector<Constant*, 16> Mask; 526 ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); 527 528 // The proposed shuffle may be trivial, in which case we shouldn't 529 // perform the combine. 530 if (LR.first != &IE && LR.second != &IE) { 531 // We now have a shuffle of LHS, RHS, Mask. 532 if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); 533 return new ShuffleVectorInst(LR.first, LR.second, 534 ConstantVector::get(Mask)); 535 } 536 } 537 } 538 } 539 540 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 541 APInt UndefElts(VWidth, 0); 542 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 543 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 544 if (V != &IE) 545 return ReplaceInstUsesWith(IE, V); 546 return &IE; 547 } 548 549 return 0; 550} 551 552/// Return true if we can evaluate the specified expression tree if the vector 553/// elements were shuffled in a different order. 554static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 555 unsigned Depth = 5) { 556 // We can always reorder the elements of a constant. 557 if (isa<Constant>(V)) 558 return true; 559 560 // We won't reorder vector arguments. No IPO here. 561 Instruction *I = dyn_cast<Instruction>(V); 562 if (!I) return false; 563 564 // Two users may expect different orders of the elements. Don't try it. 565 if (!I->hasOneUse()) 566 return false; 567 568 if (Depth == 0) return false; 569 570 switch (I->getOpcode()) { 571 case Instruction::Add: 572 case Instruction::FAdd: 573 case Instruction::Sub: 574 case Instruction::FSub: 575 case Instruction::Mul: 576 case Instruction::FMul: 577 case Instruction::UDiv: 578 case Instruction::SDiv: 579 case Instruction::FDiv: 580 case Instruction::URem: 581 case Instruction::SRem: 582 case Instruction::FRem: 583 case Instruction::Shl: 584 case Instruction::LShr: 585 case Instruction::AShr: 586 case Instruction::And: 587 case Instruction::Or: 588 case Instruction::Xor: 589 case Instruction::ICmp: 590 case Instruction::FCmp: 591 case Instruction::Trunc: 592 case Instruction::ZExt: 593 case Instruction::SExt: 594 case Instruction::FPToUI: 595 case Instruction::FPToSI: 596 case Instruction::UIToFP: 597 case Instruction::SIToFP: 598 case Instruction::FPTrunc: 599 case Instruction::FPExt: 600 case Instruction::GetElementPtr: { 601 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 602 if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1)) 603 return false; 604 } 605 return true; 606 } 607 case Instruction::InsertElement: { 608 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 609 if (!CI) return false; 610 int ElementNumber = CI->getLimitedValue(); 611 612 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 613 // can't put an element into multiple indices. 614 bool SeenOnce = false; 615 for (int i = 0, e = Mask.size(); i != e; ++i) { 616 if (Mask[i] == ElementNumber) { 617 if (SeenOnce) 618 return false; 619 SeenOnce = true; 620 } 621 } 622 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 623 } 624 } 625 return false; 626} 627 628/// Rebuild a new instruction just like 'I' but with the new operands given. 629/// In the event of type mismatch, the type of the operands is correct. 630static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { 631 // We don't want to use the IRBuilder here because we want the replacement 632 // instructions to appear next to 'I', not the builder's insertion point. 633 switch (I->getOpcode()) { 634 case Instruction::Add: 635 case Instruction::FAdd: 636 case Instruction::Sub: 637 case Instruction::FSub: 638 case Instruction::Mul: 639 case Instruction::FMul: 640 case Instruction::UDiv: 641 case Instruction::SDiv: 642 case Instruction::FDiv: 643 case Instruction::URem: 644 case Instruction::SRem: 645 case Instruction::FRem: 646 case Instruction::Shl: 647 case Instruction::LShr: 648 case Instruction::AShr: 649 case Instruction::And: 650 case Instruction::Or: 651 case Instruction::Xor: { 652 BinaryOperator *BO = cast<BinaryOperator>(I); 653 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 654 BinaryOperator *New = 655 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 656 NewOps[0], NewOps[1], "", BO); 657 if (isa<OverflowingBinaryOperator>(BO)) { 658 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 659 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 660 } 661 if (isa<PossiblyExactOperator>(BO)) { 662 New->setIsExact(BO->isExact()); 663 } 664 if (isa<FPMathOperator>(BO)) 665 New->copyFastMathFlags(I); 666 return New; 667 } 668 case Instruction::ICmp: 669 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 670 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 671 NewOps[0], NewOps[1]); 672 case Instruction::FCmp: 673 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 674 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 675 NewOps[0], NewOps[1]); 676 case Instruction::Trunc: 677 case Instruction::ZExt: 678 case Instruction::SExt: 679 case Instruction::FPToUI: 680 case Instruction::FPToSI: 681 case Instruction::UIToFP: 682 case Instruction::SIToFP: 683 case Instruction::FPTrunc: 684 case Instruction::FPExt: { 685 // It's possible that the mask has a different number of elements from 686 // the original cast. We recompute the destination type to match the mask. 687 Type *DestTy = 688 VectorType::get(I->getType()->getScalarType(), 689 NewOps[0]->getType()->getVectorNumElements()); 690 assert(NewOps.size() == 1 && "cast with #ops != 1"); 691 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 692 "", I); 693 } 694 case Instruction::GetElementPtr: { 695 Value *Ptr = NewOps[0]; 696 ArrayRef<Value*> Idx = NewOps.slice(1); 697 GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); 698 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 699 return GEP; 700 } 701 } 702 llvm_unreachable("failed to rebuild vector instructions"); 703} 704 705Value * 706InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 707 // Mask.size() does not need to be equal to the number of vector elements. 708 709 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 710 if (isa<UndefValue>(V)) { 711 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 712 Mask.size())); 713 } 714 if (isa<ConstantAggregateZero>(V)) { 715 return ConstantAggregateZero::get( 716 VectorType::get(V->getType()->getScalarType(), 717 Mask.size())); 718 } 719 if (Constant *C = dyn_cast<Constant>(V)) { 720 SmallVector<Constant *, 16> MaskValues; 721 for (int i = 0, e = Mask.size(); i != e; ++i) { 722 if (Mask[i] == -1) 723 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 724 else 725 MaskValues.push_back(Builder->getInt32(Mask[i])); 726 } 727 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 728 ConstantVector::get(MaskValues)); 729 } 730 731 Instruction *I = cast<Instruction>(V); 732 switch (I->getOpcode()) { 733 case Instruction::Add: 734 case Instruction::FAdd: 735 case Instruction::Sub: 736 case Instruction::FSub: 737 case Instruction::Mul: 738 case Instruction::FMul: 739 case Instruction::UDiv: 740 case Instruction::SDiv: 741 case Instruction::FDiv: 742 case Instruction::URem: 743 case Instruction::SRem: 744 case Instruction::FRem: 745 case Instruction::Shl: 746 case Instruction::LShr: 747 case Instruction::AShr: 748 case Instruction::And: 749 case Instruction::Or: 750 case Instruction::Xor: 751 case Instruction::ICmp: 752 case Instruction::FCmp: 753 case Instruction::Trunc: 754 case Instruction::ZExt: 755 case Instruction::SExt: 756 case Instruction::FPToUI: 757 case Instruction::FPToSI: 758 case Instruction::UIToFP: 759 case Instruction::SIToFP: 760 case Instruction::FPTrunc: 761 case Instruction::FPExt: 762 case Instruction::Select: 763 case Instruction::GetElementPtr: { 764 SmallVector<Value*, 8> NewOps; 765 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 766 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 767 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 768 NewOps.push_back(V); 769 NeedsRebuild |= (V != I->getOperand(i)); 770 } 771 if (NeedsRebuild) { 772 return BuildNew(I, NewOps); 773 } 774 return I; 775 } 776 case Instruction::InsertElement: { 777 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 778 779 // The insertelement was inserting at Element. Figure out which element 780 // that becomes after shuffling. The answer is guaranteed to be unique 781 // by CanEvaluateShuffled. 782 bool Found = false; 783 int Index = 0; 784 for (int e = Mask.size(); Index != e; ++Index) { 785 if (Mask[Index] == Element) { 786 Found = true; 787 break; 788 } 789 } 790 791 // If element is not in Mask, no need to handle the operand 1 (element to 792 // be inserted). Just evaluate values in operand 0 according to Mask. 793 if (!Found) 794 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 795 796 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 797 return InsertElementInst::Create(V, I->getOperand(1), 798 Builder->getInt32(Index), "", I); 799 } 800 } 801 llvm_unreachable("failed to reorder elements of vector instruction!"); 802} 803 804Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 805 Value *LHS = SVI.getOperand(0); 806 Value *RHS = SVI.getOperand(1); 807 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 808 809 bool MadeChange = false; 810 811 // Undefined shuffle mask -> undefined value. 812 if (isa<UndefValue>(SVI.getOperand(2))) 813 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 814 815 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 816 817 APInt UndefElts(VWidth, 0); 818 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 819 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 820 if (V != &SVI) 821 return ReplaceInstUsesWith(SVI, V); 822 LHS = SVI.getOperand(0); 823 RHS = SVI.getOperand(1); 824 MadeChange = true; 825 } 826 827 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 828 829 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 830 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 831 if (LHS == RHS || isa<UndefValue>(LHS)) { 832 if (isa<UndefValue>(LHS) && LHS == RHS) { 833 // shuffle(undef,undef,mask) -> undef. 834 Value *Result = (VWidth == LHSWidth) 835 ? LHS : UndefValue::get(SVI.getType()); 836 return ReplaceInstUsesWith(SVI, Result); 837 } 838 839 // Remap any references to RHS to use LHS. 840 SmallVector<Constant*, 16> Elts; 841 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 842 if (Mask[i] < 0) { 843 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 844 continue; 845 } 846 847 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 848 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 849 Mask[i] = -1; // Turn into undef. 850 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 851 } else { 852 Mask[i] = Mask[i] % e; // Force to LHS. 853 Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), 854 Mask[i])); 855 } 856 } 857 SVI.setOperand(0, SVI.getOperand(1)); 858 SVI.setOperand(1, UndefValue::get(RHS->getType())); 859 SVI.setOperand(2, ConstantVector::get(Elts)); 860 LHS = SVI.getOperand(0); 861 RHS = SVI.getOperand(1); 862 MadeChange = true; 863 } 864 865 if (VWidth == LHSWidth) { 866 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 867 bool isLHSID = true, isRHSID = true; 868 869 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 870 if (Mask[i] < 0) continue; // Ignore undef values. 871 // Is this an identity shuffle of the LHS value? 872 isLHSID &= (Mask[i] == (int)i); 873 874 // Is this an identity shuffle of the RHS value? 875 isRHSID &= (Mask[i]-e == i); 876 } 877 878 // Eliminate identity shuffles. 879 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 880 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 881 } 882 883 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 884 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 885 return ReplaceInstUsesWith(SVI, V); 886 } 887 888 // If the LHS is a shufflevector itself, see if we can combine it with this 889 // one without producing an unusual shuffle. 890 // Cases that might be simplified: 891 // 1. 892 // x1=shuffle(v1,v2,mask1) 893 // x=shuffle(x1,undef,mask) 894 // ==> 895 // x=shuffle(v1,undef,newMask) 896 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 897 // 2. 898 // x1=shuffle(v1,undef,mask1) 899 // x=shuffle(x1,x2,mask) 900 // where v1.size() == mask1.size() 901 // ==> 902 // x=shuffle(v1,x2,newMask) 903 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 904 // 3. 905 // x2=shuffle(v2,undef,mask2) 906 // x=shuffle(x1,x2,mask) 907 // where v2.size() == mask2.size() 908 // ==> 909 // x=shuffle(x1,v2,newMask) 910 // newMask[i] = (mask[i] < x1.size()) 911 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 912 // 4. 913 // x1=shuffle(v1,undef,mask1) 914 // x2=shuffle(v2,undef,mask2) 915 // x=shuffle(x1,x2,mask) 916 // where v1.size() == v2.size() 917 // ==> 918 // x=shuffle(v1,v2,newMask) 919 // newMask[i] = (mask[i] < x1.size()) 920 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 921 // 922 // Here we are really conservative: 923 // we are absolutely afraid of producing a shuffle mask not in the input 924 // program, because the code gen may not be smart enough to turn a merged 925 // shuffle into two specific shuffles: it may produce worse code. As such, 926 // we only merge two shuffles if the result is either a splat or one of the 927 // input shuffle masks. In this case, merging the shuffles just removes 928 // one instruction, which we know is safe. This is good for things like 929 // turning: (splat(splat)) -> splat, or 930 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 931 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 932 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 933 if (LHSShuffle) 934 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 935 LHSShuffle = NULL; 936 if (RHSShuffle) 937 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 938 RHSShuffle = NULL; 939 if (!LHSShuffle && !RHSShuffle) 940 return MadeChange ? &SVI : 0; 941 942 Value* LHSOp0 = NULL; 943 Value* LHSOp1 = NULL; 944 Value* RHSOp0 = NULL; 945 unsigned LHSOp0Width = 0; 946 unsigned RHSOp0Width = 0; 947 if (LHSShuffle) { 948 LHSOp0 = LHSShuffle->getOperand(0); 949 LHSOp1 = LHSShuffle->getOperand(1); 950 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 951 } 952 if (RHSShuffle) { 953 RHSOp0 = RHSShuffle->getOperand(0); 954 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 955 } 956 Value* newLHS = LHS; 957 Value* newRHS = RHS; 958 if (LHSShuffle) { 959 // case 1 960 if (isa<UndefValue>(RHS)) { 961 newLHS = LHSOp0; 962 newRHS = LHSOp1; 963 } 964 // case 2 or 4 965 else if (LHSOp0Width == LHSWidth) { 966 newLHS = LHSOp0; 967 } 968 } 969 // case 3 or 4 970 if (RHSShuffle && RHSOp0Width == LHSWidth) { 971 newRHS = RHSOp0; 972 } 973 // case 4 974 if (LHSOp0 == RHSOp0) { 975 newLHS = LHSOp0; 976 newRHS = NULL; 977 } 978 979 if (newLHS == LHS && newRHS == RHS) 980 return MadeChange ? &SVI : 0; 981 982 SmallVector<int, 16> LHSMask; 983 SmallVector<int, 16> RHSMask; 984 if (newLHS != LHS) 985 LHSMask = LHSShuffle->getShuffleMask(); 986 if (RHSShuffle && newRHS != RHS) 987 RHSMask = RHSShuffle->getShuffleMask(); 988 989 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 990 SmallVector<int, 16> newMask; 991 bool isSplat = true; 992 int SplatElt = -1; 993 // Create a new mask for the new ShuffleVectorInst so that the new 994 // ShuffleVectorInst is equivalent to the original one. 995 for (unsigned i = 0; i < VWidth; ++i) { 996 int eltMask; 997 if (Mask[i] < 0) { 998 // This element is an undef value. 999 eltMask = -1; 1000 } else if (Mask[i] < (int)LHSWidth) { 1001 // This element is from left hand side vector operand. 1002 // 1003 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1004 // new mask value for the element. 1005 if (newLHS != LHS) { 1006 eltMask = LHSMask[Mask[i]]; 1007 // If the value selected is an undef value, explicitly specify it 1008 // with a -1 mask value. 1009 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1010 eltMask = -1; 1011 } else 1012 eltMask = Mask[i]; 1013 } else { 1014 // This element is from right hand side vector operand 1015 // 1016 // If the value selected is an undef value, explicitly specify it 1017 // with a -1 mask value. (case 1) 1018 if (isa<UndefValue>(RHS)) 1019 eltMask = -1; 1020 // If RHS is going to be replaced (case 3 or 4), calculate the 1021 // new mask value for the element. 1022 else if (newRHS != RHS) { 1023 eltMask = RHSMask[Mask[i]-LHSWidth]; 1024 // If the value selected is an undef value, explicitly specify it 1025 // with a -1 mask value. 1026 if (eltMask >= (int)RHSOp0Width) { 1027 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1028 && "should have been check above"); 1029 eltMask = -1; 1030 } 1031 } else 1032 eltMask = Mask[i]-LHSWidth; 1033 1034 // If LHS's width is changed, shift the mask value accordingly. 1035 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1036 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1037 // If newRHS == newLHS, we want to remap any references from newRHS to 1038 // newLHS so that we can properly identify splats that may occur due to 1039 // obfuscation across the two vectors. 1040 if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) 1041 eltMask += newLHSWidth; 1042 } 1043 1044 // Check if this could still be a splat. 1045 if (eltMask >= 0) { 1046 if (SplatElt >= 0 && SplatElt != eltMask) 1047 isSplat = false; 1048 SplatElt = eltMask; 1049 } 1050 1051 newMask.push_back(eltMask); 1052 } 1053 1054 // If the result mask is equal to one of the original shuffle masks, 1055 // or is a splat, do the replacement. 1056 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1057 SmallVector<Constant*, 16> Elts; 1058 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 1059 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1060 if (newMask[i] < 0) { 1061 Elts.push_back(UndefValue::get(Int32Ty)); 1062 } else { 1063 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1064 } 1065 } 1066 if (newRHS == NULL) 1067 newRHS = UndefValue::get(newLHS->getType()); 1068 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1069 } 1070 1071 return MadeChange ? &SVI : 0; 1072} 1073