InstCombineVectorOps.cpp revision 6548096a2e2b34e685680e6e1055b8e407c2c243
1//===- InstCombineVectorOps.cpp -------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements instcombine for ExtractElement, InsertElement and 11// ShuffleVector. 12// 13//===----------------------------------------------------------------------===// 14 15#include "InstCombine.h" 16#include "llvm/Support/PatternMatch.h" 17using namespace llvm; 18using namespace PatternMatch; 19 20/// CheapToScalarize - Return true if the value is cheaper to scalarize than it 21/// is to leave as a vector operation. isConstant indicates whether we're 22/// extracting one known element. If false we're extracting a variable index. 23static bool CheapToScalarize(Value *V, bool isConstant) { 24 if (Constant *C = dyn_cast<Constant>(V)) { 25 if (isConstant) return true; 26 27 // If all elts are the same, we can extract it and use any of the values. 28 Constant *Op0 = C->getAggregateElement(0U); 29 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) 30 if (C->getAggregateElement(i) != Op0) 31 return false; 32 return true; 33 } 34 Instruction *I = dyn_cast<Instruction>(V); 35 if (!I) return false; 36 37 // Insert element gets simplified to the inserted element or is deleted if 38 // this is constant idx extract element and its a constant idx insertelt. 39 if (I->getOpcode() == Instruction::InsertElement && isConstant && 40 isa<ConstantInt>(I->getOperand(2))) 41 return true; 42 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 43 return true; 44 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 45 if (BO->hasOneUse() && 46 (CheapToScalarize(BO->getOperand(0), isConstant) || 47 CheapToScalarize(BO->getOperand(1), isConstant))) 48 return true; 49 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 50 if (CI->hasOneUse() && 51 (CheapToScalarize(CI->getOperand(0), isConstant) || 52 CheapToScalarize(CI->getOperand(1), isConstant))) 53 return true; 54 55 return false; 56} 57 58/// FindScalarElement - Given a vector and an element number, see if the scalar 59/// value is already around as a register, for example if it were inserted then 60/// extracted from the vector. 61static Value *FindScalarElement(Value *V, unsigned EltNo) { 62 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); 63 VectorType *VTy = cast<VectorType>(V->getType()); 64 unsigned Width = VTy->getNumElements(); 65 if (EltNo >= Width) // Out of range access. 66 return UndefValue::get(VTy->getElementType()); 67 68 if (Constant *C = dyn_cast<Constant>(V)) 69 return C->getAggregateElement(EltNo); 70 71 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 72 // If this is an insert to a variable element, we don't know what it is. 73 if (!isa<ConstantInt>(III->getOperand(2))) 74 return 0; 75 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 76 77 // If this is an insert to the element we are looking for, return the 78 // inserted value. 79 if (EltNo == IIElt) 80 return III->getOperand(1); 81 82 // Otherwise, the insertelement doesn't modify the value, recurse on its 83 // vector input. 84 return FindScalarElement(III->getOperand(0), EltNo); 85 } 86 87 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 88 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); 89 int InEl = SVI->getMaskValue(EltNo); 90 if (InEl < 0) 91 return UndefValue::get(VTy->getElementType()); 92 if (InEl < (int)LHSWidth) 93 return FindScalarElement(SVI->getOperand(0), InEl); 94 return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); 95 } 96 97 // Extract a value from a vector add operation with a constant zero. 98 Value *Val = 0; Constant *Con = 0; 99 if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { 100 if (Con->getAggregateElement(EltNo)->isNullValue()) 101 return FindScalarElement(Val, EltNo); 102 } 103 104 // Otherwise, we don't know. 105 return 0; 106} 107 108// If we have a PHI node with a vector type that has only 2 uses: feed 109// itself and be an operand of extractelemnt at a constant location, 110// try to replace the PHI of the vector type with a PHI of a scalar type 111Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 112 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 113 if (!PN->hasNUses(2)) 114 return NULL; 115 116 // If so, it's known at this point that one operand is PHI and the other is 117 // an extractelement node. Find the PHI user that is not the extractelement 118 // node. 119 Value::use_iterator iu = PN->use_begin(); 120 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 121 if (PHIUser == cast<Instruction>(&EI)) 122 PHIUser = cast<Instruction>(*(++iu)); 123 124 // Verify that this PHI user has one use, which is the PHI itself, 125 // and that it is a binary operation which is cheap to scalarize. 126 // otherwise return NULL. 127 if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) || 128 !(isa<BinaryOperator>(PHIUser)) || 129 !CheapToScalarize(PHIUser, true)) 130 return NULL; 131 132 // Create a scalar PHI node that will replace the vector PHI node 133 // just before the current PHI node. 134 PHINode * scalarPHI = cast<PHINode>( 135 InsertNewInstWith(PHINode::Create(EI.getType(), 136 PN->getNumIncomingValues(), ""), *PN)); 137 // Scalarize each PHI operand. 138 for (unsigned i=0; i < PN->getNumIncomingValues(); i++) { 139 Value *PHIInVal = PN->getIncomingValue(i); 140 BasicBlock *inBB = PN->getIncomingBlock(i); 141 Value *Elt = EI.getIndexOperand(); 142 // If the operand is the PHI induction variable: 143 if (PHIInVal == PHIUser) { 144 // Scalarize the binary operation. Its first operand is the 145 // scalar PHI and the second operand is extracted from the other 146 // vector operand. 147 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 148 unsigned opId = (B0->getOperand(0) == PN) ? 1: 0; 149 Value *Op = Builder->CreateExtractElement( 150 B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt"); 151 Value *newPHIUser = InsertNewInstWith( 152 BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op), 153 *B0); 154 scalarPHI->addIncoming(newPHIUser, inBB); 155 } else { 156 // Scalarize PHI input: 157 Instruction *newEI = 158 ExtractElementInst::Create(PHIInVal, Elt, ""); 159 // Insert the new instruction into the predecessor basic block. 160 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 161 BasicBlock::iterator InsertPos; 162 if (pos && !isa<PHINode>(pos)) { 163 InsertPos = pos; 164 ++InsertPos; 165 } else { 166 InsertPos = inBB->getFirstInsertionPt(); 167 } 168 169 InsertNewInstWith(newEI, *InsertPos); 170 171 scalarPHI->addIncoming(newEI, inBB); 172 } 173 } 174 return ReplaceInstUsesWith(EI, scalarPHI); 175} 176 177Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 178 // If vector val is constant with all elements the same, replace EI with 179 // that element. We handle a known element # below. 180 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 181 if (CheapToScalarize(C, false)) 182 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 183 184 // If extracting a specified index from the vector, see if we can recursively 185 // find a previously computed scalar that was inserted into the vector. 186 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 187 unsigned IndexVal = IdxC->getZExtValue(); 188 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 189 190 // If this is extracting an invalid index, turn this into undef, to avoid 191 // crashing the code below. 192 if (IndexVal >= VectorWidth) 193 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 194 195 // This instruction only demands the single element from the input vector. 196 // If the input vector has a single use, simplify it based on this use 197 // property. 198 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 199 APInt UndefElts(VectorWidth, 0); 200 APInt DemandedMask(VectorWidth, 0); 201 DemandedMask.setBit(IndexVal); 202 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 203 DemandedMask, UndefElts)) { 204 EI.setOperand(0, V); 205 return &EI; 206 } 207 } 208 209 if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) 210 return ReplaceInstUsesWith(EI, Elt); 211 212 // If the this extractelement is directly using a bitcast from a vector of 213 // the same number of elements, see if we can find the source element from 214 // it. In this case, we will end up needing to bitcast the scalars. 215 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 216 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 217 if (VT->getNumElements() == VectorWidth) 218 if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) 219 return new BitCastInst(Elt, EI.getType()); 220 } 221 222 // If there's a vector PHI feeding a scalar use through this extractelement 223 // instruction, try to scalarize the PHI. 224 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 225 Instruction *scalarPHI = scalarizePHI(EI, PN); 226 if (scalarPHI) 227 return (scalarPHI); 228 } 229 } 230 231 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 232 // Push extractelement into predecessor operation if legal and 233 // profitable to do so 234 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 235 if (I->hasOneUse() && 236 CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 237 Value *newEI0 = 238 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 239 EI.getName()+".lhs"); 240 Value *newEI1 = 241 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 242 EI.getName()+".rhs"); 243 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 244 } 245 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 246 // Extracting the inserted element? 247 if (IE->getOperand(2) == EI.getOperand(1)) 248 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 249 // If the inserted and extracted elements are constants, they must not 250 // be the same value, extract from the pre-inserted value instead. 251 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 252 Worklist.AddValue(EI.getOperand(0)); 253 EI.setOperand(0, IE->getOperand(0)); 254 return &EI; 255 } 256 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 257 // If this is extracting an element from a shufflevector, figure out where 258 // it came from and extract from the appropriate input element instead. 259 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 260 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 261 Value *Src; 262 unsigned LHSWidth = 263 SVI->getOperand(0)->getType()->getVectorNumElements(); 264 265 if (SrcIdx < 0) 266 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 267 if (SrcIdx < (int)LHSWidth) 268 Src = SVI->getOperand(0); 269 else { 270 SrcIdx -= LHSWidth; 271 Src = SVI->getOperand(1); 272 } 273 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 274 return ExtractElementInst::Create(Src, 275 ConstantInt::get(Int32Ty, 276 SrcIdx, false)); 277 } 278 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 279 // Canonicalize extractelement(cast) -> cast(extractelement) 280 // bitcasts can change the number of vector elements and they cost nothing 281 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 282 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 283 EI.getIndexOperand()); 284 Worklist.AddValue(EE); 285 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 286 } 287 } 288 } 289 return 0; 290} 291 292/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns 293/// elements from either LHS or RHS, return the shuffle mask and true. 294/// Otherwise, return false. 295static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 296 SmallVectorImpl<Constant*> &Mask) { 297 assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && 298 "Invalid CollectSingleShuffleElements"); 299 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 300 301 if (isa<UndefValue>(V)) { 302 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 303 return true; 304 } 305 306 if (V == LHS) { 307 for (unsigned i = 0; i != NumElts; ++i) 308 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 309 return true; 310 } 311 312 if (V == RHS) { 313 for (unsigned i = 0; i != NumElts; ++i) 314 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 315 i+NumElts)); 316 return true; 317 } 318 319 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 320 // If this is an insert of an extract from some other vector, include it. 321 Value *VecOp = IEI->getOperand(0); 322 Value *ScalarOp = IEI->getOperand(1); 323 Value *IdxOp = IEI->getOperand(2); 324 325 if (!isa<ConstantInt>(IdxOp)) 326 return false; 327 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 328 329 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 330 // Okay, we can handle this if the vector we are insertinting into is 331 // transitively ok. 332 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 333 // If so, update the mask to reflect the inserted undef. 334 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 335 return true; 336 } 337 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 338 if (isa<ConstantInt>(EI->getOperand(1)) && 339 EI->getOperand(0)->getType() == V->getType()) { 340 unsigned ExtractedIdx = 341 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 342 343 // This must be extracting from either LHS or RHS. 344 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 345 // Okay, we can handle this if the vector we are insertinting into is 346 // transitively ok. 347 if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 348 // If so, update the mask to reflect the inserted value. 349 if (EI->getOperand(0) == LHS) { 350 Mask[InsertedIdx % NumElts] = 351 ConstantInt::get(Type::getInt32Ty(V->getContext()), 352 ExtractedIdx); 353 } else { 354 assert(EI->getOperand(0) == RHS); 355 Mask[InsertedIdx % NumElts] = 356 ConstantInt::get(Type::getInt32Ty(V->getContext()), 357 ExtractedIdx+NumElts); 358 } 359 return true; 360 } 361 } 362 } 363 } 364 } 365 // TODO: Handle shufflevector here! 366 367 return false; 368} 369 370/// CollectShuffleElements - We are building a shuffle of V, using RHS as the 371/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask 372/// that computes V and the LHS value of the shuffle. 373static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, 374 Value *&RHS) { 375 assert(V->getType()->isVectorTy() && 376 (RHS == 0 || V->getType() == RHS->getType()) && 377 "Invalid shuffle!"); 378 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 379 380 if (isa<UndefValue>(V)) { 381 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 382 return V; 383 } 384 385 if (isa<ConstantAggregateZero>(V)) { 386 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 387 return V; 388 } 389 390 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 391 // If this is an insert of an extract from some other vector, include it. 392 Value *VecOp = IEI->getOperand(0); 393 Value *ScalarOp = IEI->getOperand(1); 394 Value *IdxOp = IEI->getOperand(2); 395 396 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 397 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 398 EI->getOperand(0)->getType() == V->getType()) { 399 unsigned ExtractedIdx = 400 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 401 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 402 403 // Either the extracted from or inserted into vector must be RHSVec, 404 // otherwise we'd end up with a shuffle of three inputs. 405 if (EI->getOperand(0) == RHS || RHS == 0) { 406 RHS = EI->getOperand(0); 407 Value *V = CollectShuffleElements(VecOp, Mask, RHS); 408 Mask[InsertedIdx % NumElts] = 409 ConstantInt::get(Type::getInt32Ty(V->getContext()), 410 NumElts+ExtractedIdx); 411 return V; 412 } 413 414 if (VecOp == RHS) { 415 Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); 416 // Update Mask to reflect that `ScalarOp' has been inserted at 417 // position `InsertedIdx' within the vector returned by IEI. 418 Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; 419 420 // Everything but the extracted element is replaced with the RHS. 421 for (unsigned i = 0; i != NumElts; ++i) { 422 if (i != InsertedIdx) 423 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), 424 NumElts+i); 425 } 426 return V; 427 } 428 429 // If this insertelement is a chain that comes from exactly these two 430 // vectors, return the vector and the effective shuffle. 431 if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) 432 return EI->getOperand(0); 433 } 434 } 435 } 436 // TODO: Handle shufflevector here! 437 438 // Otherwise, can't do anything fancy. Return an identity vector. 439 for (unsigned i = 0; i != NumElts; ++i) 440 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 441 return V; 442} 443 444Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 445 Value *VecOp = IE.getOperand(0); 446 Value *ScalarOp = IE.getOperand(1); 447 Value *IdxOp = IE.getOperand(2); 448 449 // Inserting an undef or into an undefined place, remove this. 450 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 451 ReplaceInstUsesWith(IE, VecOp); 452 453 // If the inserted element was extracted from some other vector, and if the 454 // indexes are constant, try to turn this into a shufflevector operation. 455 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 456 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && 457 EI->getOperand(0)->getType() == IE.getType()) { 458 unsigned NumVectorElts = IE.getType()->getNumElements(); 459 unsigned ExtractedIdx = 460 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 461 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 462 463 if (ExtractedIdx >= NumVectorElts) // Out of range extract. 464 return ReplaceInstUsesWith(IE, VecOp); 465 466 if (InsertedIdx >= NumVectorElts) // Out of range insert. 467 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 468 469 // If we are extracting a value from a vector, then inserting it right 470 // back into the same place, just use the input vector. 471 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 472 return ReplaceInstUsesWith(IE, VecOp); 473 474 // If this insertelement isn't used by some other insertelement, turn it 475 // (and any insertelements it points to), into one big shuffle. 476 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { 477 SmallVector<Constant*, 16> Mask; 478 Value *RHS = 0; 479 Value *LHS = CollectShuffleElements(&IE, Mask, RHS); 480 if (RHS == 0) RHS = UndefValue::get(LHS->getType()); 481 // We now have a shuffle of LHS, RHS, Mask. 482 return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); 483 } 484 } 485 } 486 487 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 488 APInt UndefElts(VWidth, 0); 489 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 490 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 491 if (V != &IE) 492 return ReplaceInstUsesWith(IE, V); 493 return &IE; 494 } 495 496 return 0; 497} 498 499 500Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 501 Value *LHS = SVI.getOperand(0); 502 Value *RHS = SVI.getOperand(1); 503 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 504 505 bool MadeChange = false; 506 507 // Undefined shuffle mask -> undefined value. 508 if (isa<UndefValue>(SVI.getOperand(2))) 509 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 510 511 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 512 513 APInt UndefElts(VWidth, 0); 514 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 515 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 516 if (V != &SVI) 517 return ReplaceInstUsesWith(SVI, V); 518 LHS = SVI.getOperand(0); 519 RHS = SVI.getOperand(1); 520 MadeChange = true; 521 } 522 523 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 524 525 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 526 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 527 if (LHS == RHS || isa<UndefValue>(LHS)) { 528 if (isa<UndefValue>(LHS) && LHS == RHS) { 529 // shuffle(undef,undef,mask) -> undef. 530 Value* result = (VWidth == LHSWidth) 531 ? LHS : UndefValue::get(SVI.getType()); 532 return ReplaceInstUsesWith(SVI, result); 533 } 534 535 // Remap any references to RHS to use LHS. 536 SmallVector<Constant*, 16> Elts; 537 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 538 if (Mask[i] < 0) { 539 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 540 continue; 541 } 542 543 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 544 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 545 Mask[i] = -1; // Turn into undef. 546 Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); 547 } else { 548 Mask[i] = Mask[i] % e; // Force to LHS. 549 Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), 550 Mask[i])); 551 } 552 } 553 SVI.setOperand(0, SVI.getOperand(1)); 554 SVI.setOperand(1, UndefValue::get(RHS->getType())); 555 SVI.setOperand(2, ConstantVector::get(Elts)); 556 LHS = SVI.getOperand(0); 557 RHS = SVI.getOperand(1); 558 MadeChange = true; 559 } 560 561 if (VWidth == LHSWidth) { 562 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 563 bool isLHSID = true, isRHSID = true; 564 565 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 566 if (Mask[i] < 0) continue; // Ignore undef values. 567 // Is this an identity shuffle of the LHS value? 568 isLHSID &= (Mask[i] == (int)i); 569 570 // Is this an identity shuffle of the RHS value? 571 isRHSID &= (Mask[i]-e == i); 572 } 573 574 // Eliminate identity shuffles. 575 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 576 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 577 } 578 579 // If the LHS is a shufflevector itself, see if we can combine it with this 580 // one without producing an unusual shuffle. 581 // Cases that might be simplified: 582 // 1. 583 // x1=shuffle(v1,v2,mask1) 584 // x=shuffle(x1,undef,mask) 585 // ==> 586 // x=shuffle(v1,undef,newMask) 587 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 588 // 2. 589 // x1=shuffle(v1,undef,mask1) 590 // x=shuffle(x1,x2,mask) 591 // where v1.size() == mask1.size() 592 // ==> 593 // x=shuffle(v1,x2,newMask) 594 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 595 // 3. 596 // x2=shuffle(v2,undef,mask2) 597 // x=shuffle(x1,x2,mask) 598 // where v2.size() == mask2.size() 599 // ==> 600 // x=shuffle(x1,v2,newMask) 601 // newMask[i] = (mask[i] < x1.size()) 602 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 603 // 4. 604 // x1=shuffle(v1,undef,mask1) 605 // x2=shuffle(v2,undef,mask2) 606 // x=shuffle(x1,x2,mask) 607 // where v1.size() == v2.size() 608 // ==> 609 // x=shuffle(v1,v2,newMask) 610 // newMask[i] = (mask[i] < x1.size()) 611 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 612 // 613 // Here we are really conservative: 614 // we are absolutely afraid of producing a shuffle mask not in the input 615 // program, because the code gen may not be smart enough to turn a merged 616 // shuffle into two specific shuffles: it may produce worse code. As such, 617 // we only merge two shuffles if the result is a splat, one of the input 618 // input shuffle masks, or if there's only one input to the shuffle. 619 // In this case, merging the shuffles just removes one instruction, which 620 // we know is safe. This is good for things like 621 // turning: (splat(splat)) -> splat, or 622 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 623 // 624 // FIXME: This is almost certainly far, far too conservative. We should 625 // have a better model. Perhaps a TargetTransformInfo hook to ask whether 626 // a shuffle is considered OK? 627 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 628 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 629 if (LHSShuffle) 630 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 631 LHSShuffle = NULL; 632 if (RHSShuffle) 633 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 634 RHSShuffle = NULL; 635 if (!LHSShuffle && !RHSShuffle) 636 return MadeChange ? &SVI : 0; 637 638 Value* LHSOp0 = NULL; 639 Value* LHSOp1 = NULL; 640 Value* RHSOp0 = NULL; 641 unsigned LHSOp0Width = 0; 642 unsigned RHSOp0Width = 0; 643 if (LHSShuffle) { 644 LHSOp0 = LHSShuffle->getOperand(0); 645 LHSOp1 = LHSShuffle->getOperand(1); 646 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 647 } 648 if (RHSShuffle) { 649 RHSOp0 = RHSShuffle->getOperand(0); 650 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 651 } 652 Value* newLHS = LHS; 653 Value* newRHS = RHS; 654 if (LHSShuffle) { 655 // case 1 656 if (isa<UndefValue>(RHS)) { 657 newLHS = LHSOp0; 658 newRHS = LHSOp1; 659 } 660 // case 2 or 4 661 else if (LHSOp0Width == LHSWidth) { 662 newLHS = LHSOp0; 663 } 664 } 665 // case 3 or 4 666 if (RHSShuffle && RHSOp0Width == LHSWidth) { 667 newRHS = RHSOp0; 668 } 669 // case 4 670 if (LHSOp0 == RHSOp0) { 671 newLHS = LHSOp0; 672 newRHS = NULL; 673 } 674 675 if (newLHS == LHS && newRHS == RHS) 676 return MadeChange ? &SVI : 0; 677 678 SmallVector<int, 16> LHSMask; 679 SmallVector<int, 16> RHSMask; 680 if (newLHS != LHS) 681 LHSMask = LHSShuffle->getShuffleMask(); 682 if (RHSShuffle && newRHS != RHS) 683 RHSMask = RHSShuffle->getShuffleMask(); 684 685 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 686 SmallVector<int, 16> newMask; 687 bool isSplat = true; 688 int SplatElt = -1; 689 // Create a new mask for the new ShuffleVectorInst so that the new 690 // ShuffleVectorInst is equivalent to the original one. 691 for (unsigned i = 0; i < VWidth; ++i) { 692 int eltMask; 693 if (Mask[i] < 0) { 694 // This element is an undef value. 695 eltMask = -1; 696 } else if (Mask[i] < (int)LHSWidth) { 697 // This element is from left hand side vector operand. 698 // 699 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 700 // new mask value for the element. 701 if (newLHS != LHS) { 702 eltMask = LHSMask[Mask[i]]; 703 // If the value selected is an undef value, explicitly specify it 704 // with a -1 mask value. 705 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 706 eltMask = -1; 707 } else 708 eltMask = Mask[i]; 709 } else { 710 // This element is from right hand side vector operand 711 // 712 // If the value selected is an undef value, explicitly specify it 713 // with a -1 mask value. (case 1) 714 if (isa<UndefValue>(RHS)) 715 eltMask = -1; 716 // If RHS is going to be replaced (case 3 or 4), calculate the 717 // new mask value for the element. 718 else if (newRHS != RHS) { 719 eltMask = RHSMask[Mask[i]-LHSWidth]; 720 // If the value selected is an undef value, explicitly specify it 721 // with a -1 mask value. 722 if (eltMask >= (int)RHSOp0Width) { 723 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 724 && "should have been check above"); 725 eltMask = -1; 726 } 727 } else 728 eltMask = Mask[i]-LHSWidth; 729 730 // If LHS's width is changed, shift the mask value accordingly. 731 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 732 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 733 // If newRHS == newLHS, we want to remap any references from newRHS to 734 // newLHS so that we can properly identify splats that may occur due to 735 // obfuscation accross the two vectors. 736 if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) 737 eltMask += newLHSWidth; 738 } 739 740 // Check if this could still be a splat. 741 if (eltMask >= 0) { 742 if (SplatElt >= 0 && SplatElt != eltMask) 743 isSplat = false; 744 SplatElt = eltMask; 745 } 746 747 newMask.push_back(eltMask); 748 } 749 750 // If the result mask is equal to one of the original shuffle masks, 751 // or is a splat, do the replacement. Similarly, if there is only one 752 // input vector, go ahead and do the folding. 753 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask || 754 isa<UndefValue>(RHS)) { 755 SmallVector<Constant*, 16> Elts; 756 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 757 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 758 if (newMask[i] < 0) { 759 Elts.push_back(UndefValue::get(Int32Ty)); 760 } else { 761 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 762 } 763 } 764 if (newRHS == NULL) 765 newRHS = UndefValue::get(newLHS->getType()); 766 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 767 } 768 769 return MadeChange ? &SVI : 0; 770} 771