1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//                     The LLVM Compiler Infrastructure
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details.
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//==-----------------------------------------------------------------------===//
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDILDevices.h"
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPUInstrInfo.h"
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/Statistic.h"
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/StringExtras.h"
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/StringRef.h"
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/Twine.h"
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Constants.h"
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunction.h"
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionAnalysis.h"
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Function.h"
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Instructions.h"
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Module.h"
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Support/Debug.h"
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Support/MathExtras.h"
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <sstream>
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSTATISTIC(PointerAssignments, "Number of dynamic pointer "
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    "assigments discovered");
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSTATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm;
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The Peephole optimization pass is used to do simple last minute optimizations
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// that are required for correct code or to remove redundant functions
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace {
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass OpaqueType;
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  TargetMachine &TM;
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  static char ID;
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  AMDGPUPeepholeOpt(TargetMachine &tm);
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  ~AMDGPUPeepholeOpt();
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const char *getPassName() const;
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool runOnFunction(Function &F);
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool doInitialization(Module &M);
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool doFinalization(Module &M);
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void getAnalysisUsage(AnalysisUsage &AU) const;
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprotected:
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Function to initiate all of the instruction level optimizations.
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool instLevelOptimizations(BasicBlock::iterator *inst);
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Quick check to see if we need to dump all of the pointers into the
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // arena. If this is correct, then we set all pointers to exist in arena. This
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // is a workaround for aliasing of pointers in a struct/union.
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool dumpAllIntoArena(Function &F);
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Because I don't want to invalidate any pointers while in the
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // safeNestedForEachFunction. I push atomic conversions to a vector and handle
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // it later. This function does the conversions if required.
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void doAtomicConversionIfNeeded(Function &F);
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Because __amdil_is_constant cannot be properly evaluated if
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // optimizations are disabled, the call's are placed in a vector
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // and evaluated after the __amdil_image* functions are evaluated
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // which should allow the __amdil_is_constant function to be
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // evaluated correctly.
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void doIsConstCallConversionIfNeeded();
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool mChanged;
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool mDebug;
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool mConvertAtomics;
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CodeGenOpt::Level optLevel;
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Run a series of tests to see if we can optimize a CALL instruction.
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool optimizeCallInst(BasicBlock::iterator *bbb);
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // A peephole optimization to optimize bit extract sequences.
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool optimizeBitExtract(Instruction *inst);
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // A peephole optimization to optimize bit insert sequences.
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool optimizeBitInsert(Instruction *inst);
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool setupBitInsert(Instruction *base,
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                      Instruction *&src,
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                      Constant *&mask,
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                      Constant *&shift);
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Expand the bit field insert instruction on versions of OpenCL that
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // don't support it.
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool expandBFI(CallInst *CI);
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Expand the bit field mask instruction on version of OpenCL that
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // don't support it.
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool expandBFM(CallInst *CI);
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // this case we need to expand them. These functions check for 24bit functions
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // and then expand.
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool isSigned24BitOps(CallInst *CI);
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void expandSigned24BitOps(CallInst *CI);
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // One optimization that can occur is that if the required workgroup size is
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // specified then the result of get_local_size is known at compile time and
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // can be returned accordingly.
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool isRWGLocalOpt(CallInst *CI);
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // On northern island cards, the division is slightly less accurate than on
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // previous generations, so we need to utilize a more accurate division. So we
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // can translate the accurate divide to a normal divide on all other cards.
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool convertAccurateDivide(CallInst *CI);
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void expandAccurateDivide(CallInst *CI);
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If the alignment is set incorrectly, it can produce really inefficient
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // code. This checks for this scenario and fixes it if possible.
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool correctMisalignedMemOp(Instruction *inst);
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If we are in no opt mode, then we need to make sure that
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // local samplers are properly propagated as constant propagation
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // doesn't occur and we need to know the value of kernel defined
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // samplers at compile time.
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool propagateSamplerInst(CallInst *CI);
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Helper functions
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Group of functions that recursively calculate the size of a structure based
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // on it's sub-types.
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(Type * const T, bool dereferencePtr = false);
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  LLVMContext *mCTX;
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Function *mF;
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const AMDGPUSubtarget *mSTM;
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  SmallVector<CallInst *, 16> isConstVec;
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; // class AMDGPUPeepholeOpt
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  char AMDGPUPeepholeOpt::ID = 0;
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// A template function that has two levels of looping before calling the
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// function with a pointer to the current iterator.
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtemplate<class InputIterator, class SecondIterator, class Function>
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunction safeNestedForEach(InputIterator First, InputIterator Last,
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                              SecondIterator S, Function F)
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for ( ; First != Last; ++First) {
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    SecondIterator sf, sl;
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (sf = First->begin(), sl = First->end();
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         sf != sl; )  {
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!F(&sf)) {
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++sf;
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return F;
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // anonymous namespace
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace llvm {
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  FunctionPass *
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  createAMDGPUPeepholeOpt(TargetMachine &tm)
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  {
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return new AMDGPUPeepholeOpt(tm);
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // llvm namespace
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  : FunctionPass(ID), TM(tm)
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mDebug = false;
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  optLevel = TM.getOptLevel();
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgconst char *
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::getPassName() const
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return "AMDGPU PeepHole Optimization Pass";
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgcontainsPointerType(Type *Ty)
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!Ty) {
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  switch(Ty->getTypeID()) {
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  default:
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::StructTyID: {
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const StructType *ST = dyn_cast<StructType>(Ty);
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (StructType::element_iterator stb = ST->element_begin(),
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ste = ST->element_end(); stb != ste; ++stb) {
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!containsPointerType(*stb)) {
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        continue;
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return true;
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::VectorTyID:
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::ArrayTyID:
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::PointerTyID:
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  };
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool dumpAll = false;
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (Function::const_arg_iterator cab = F.arg_begin(),
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       cae = F.arg_end(); cab != cae; ++cab) {
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const Argument *arg = cab;
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const PointerType *PT = dyn_cast<PointerType>(arg->getType());
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!PT) {
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      continue;
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *DereferencedType = PT->getElementType();
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!dyn_cast<StructType>(DereferencedType)
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ) {
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      continue;
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!containsPointerType(DereferencedType)) {
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      continue;
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // FIXME: Because a pointer inside of a struct/union may be aliased to
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // another pointer we need to take the conservative approach and place all
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // pointers into the arena until more advanced detection is implemented.
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dumpAll = true;
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return dumpAll;
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isConstVec.empty()) {
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return;
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CallInst *CI = isConstVec[x];
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *aType = Type::getInt32Ty(*mCTX);
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      : ConstantInt::get(aType, 0);
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->replaceAllUsesWith(Val);
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->eraseFromParent();
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  isConstVec.clear();
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Don't do anything if we don't have any atomic operations.
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (atomicFuncs.empty()) {
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return;
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Change the function name for the atomic if it is required
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  uint32_t size = atomicFuncs.size();
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (uint32_t x = 0; x < size; ++x) {
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    atomicFuncs[x].first->setOperand(
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        atomicFuncs[x].first->getNumOperands()-1,
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        atomicFuncs[x].second);
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mChanged = true;
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mConvertAtomics) {
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return;
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::runOnFunction(Function &MF)
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mChanged = false;
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mF = &MF;
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MF.dump();
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mCTX = &MF.getType()->getContext();
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  mConvertAtomics = true;
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org     std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  this));
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  doAtomicConversionIfNeeded(MF);
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  doIsConstCallConversionIfNeeded();
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MF.dump();
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return mChanged;
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Instruction *inst = (*bbb);
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CallInst *CI = dyn_cast<CallInst>(inst);
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isSigned24BitOps(CI)) {
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    expandSigned24BitOps(CI);
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ++(*bbb);
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->eraseFromParent();
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (propagateSamplerInst(CI)) {
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (expandBFI(CI) || expandBFM(CI)) {
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ++(*bbb);
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->eraseFromParent();
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (convertAccurateDivide(CI)) {
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    expandAccurateDivide(CI);
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ++(*bbb);
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->eraseFromParent();
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (calleeName.startswith("__amdil_is_constant")) {
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // If we do not have optimizations, then this
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // cannot be properly evaluated, so we add the
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // call instruction to a vector and process
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // them at the end of processing after the
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // samplers have been correctly handled.
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (optLevel == CodeGenOpt::None) {
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      isConstVec.push_back(CI);
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else {
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Type *aType = Type::getInt32Ty(*mCTX);
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        : ConstantInt::get(aType, 0);
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->replaceAllUsesWith(Val);
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ++(*bbb);
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->eraseFromParent();
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return true;
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (calleeName.equals("__amdil_is_asic_id_i32")) {
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *aType = Type::getInt32Ty(*mCTX);
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Value *Val = CV;
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (Val) {
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Val = ConstantInt::get(aType,
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          mSTM->device()->getDeviceFlag() & CV->getZExtValue());
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else {
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Val = ConstantInt::get(aType, 0);
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->replaceAllUsesWith(Val);
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ++(*bbb);
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->eraseFromParent();
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!F) {
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (F->getName().startswith("__atom") && !CI->getNumUses()
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      && F->getName().find("_xchg") == StringRef::npos) {
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::string buffer(F->getName().str() + "_noret");
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    F = dyn_cast<Function>(
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!mConvertAtomics) {
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StringRef name = F->getName();
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    mConvertAtomics = false;
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Instruction *&src,
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Constant *&mask,
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Constant *&shift)
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!base) {
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Null pointer passed into function.\n";
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool andOp = false;
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (base->getOpcode() == Instruction::Shl) {
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    shift = dyn_cast<Constant>(base->getOperand(1));
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (base->getOpcode() == Instruction::And) {
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    mask = dyn_cast<Constant>(base->getOperand(1));
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    andOp = true;
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // If the base is neither a Shl or a And, we don't fit any of the patterns above.
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  src = dyn_cast<Instruction>(base->getOperand(0));
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!src) {
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Failed setup since the base operand is not an instruction!\n";
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If we find an 'and' operation, then we don't need to
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // find the next operation as we already know the
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // bits that are valid at this point.
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (andOp) {
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (src->getOpcode() == Instruction::Shl && !shift) {
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    shift = dyn_cast<Constant>(src->getOperand(1));
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    src = dyn_cast<Instruction>(src->getOperand(0));
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (src->getOpcode() == Instruction::And && !mask) {
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    mask = dyn_cast<Constant>(src->getOperand(1));
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!mask && !shift) {
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Failed setup since both mask and shift are NULL!\n";
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // Did not find a constant mask or a shift.
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!inst) {
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!inst->isBinaryOp()) {
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (inst->getOpcode() != Instruction::Or) {
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optLevel == CodeGenOpt::None) {
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // We want to do an optimization on a sequence of ops that in the end equals a
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // single ISA instruction.
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Some simplified versions of this pattern are as follows:
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // (A & B) | (D << F) when (1 << F) >= B
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // (A << C) | (D & E) when (1 << C) >= E
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // The HD4XXX hardware doesn't support the ubit_insert instruction.
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type *aType = inst->getType();
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool isVector = aType->isVectorTy();
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int numEle = 1;
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // This optimization only works on 32bit integers.
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (aType->getScalarType()
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      != Type::getInt32Ty(inst->getContext())) {
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const VectorType *VT = dyn_cast<VectorType>(aType);
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    numEle = VT->getNumElements();
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // We currently cannot support more than 4 elements in a intrinsic and we
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // cannot support Vec3 types.
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (numEle > 4 || numEle == 3) {
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // TODO: Handle vectors.
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "!!! Vectors are not supported yet!\n";
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Instruction *LHSSrc = NULL, *RHSSrc = NULL;
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *LHSMask = NULL, *RHSMask = NULL;
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *LHSShift = NULL, *RHSShift = NULL;
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Found an OR Operation that failed setup!\n";
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inst->dump();
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (LHS) { LHS->dump(); }
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (LHSSrc) { LHSSrc->dump(); }
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (LHSMask) { LHSMask->dump(); }
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (LHSShift) { LHSShift->dump(); }
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // There was an issue with the setup for BitInsert.
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Found an OR Operation that failed setup!\n";
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inst->dump();
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (RHS) { RHS->dump(); }
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (RHSSrc) { RHSSrc->dump(); }
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (RHSMask) { RHSMask->dump(); }
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (RHSShift) { RHSShift->dump(); }
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // There was an issue with the setup for BitInsert.
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "Op:        "; inst->dump();
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHS:       "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHS Src:   "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHS Mask:  "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "RHS:       "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "RHS Src:   "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "RHS Mask:  "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *offset = NULL;
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *width = NULL;
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int32_t lhsMaskVal = 0, rhsMaskVal = 0;
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int32_t lhsShiftVal = 0, rhsShiftVal = 0;
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhsMaskVal = (int32_t)(LHSMask
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  rhsMaskVal = (int32_t)(RHSMask
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhsShiftVal = (int32_t)(LHSShift
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  rhsShiftVal = (int32_t)(RHSShift
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << (RHSMask ? " & E)" : ")");
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "width(B) = " << lhsMaskWidth;
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "offset(B) = " << lhsMaskOffset;
565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Constraints: \n";
567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\t(1) B ^ E == 0\n";
568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\t(2-LHS) B is a mask\n";
569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\t(2-LHS) E is a mask\n";
570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Failed constraint 1!\n";
578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHS = " << lhsMaskOffset << "";
583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "\nRHS = " << rhsMaskOffset << "";
586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "\n";
589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    offset = ConstantInt::get(aType, lhsMaskOffset, false);
592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    width = ConstantInt::get(aType, lhsMaskWidth, false);
593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    RHSSrc = RHS;
594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (mDebug) {
596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        dbgs() << "Failed constraint 2!\n";
598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!LHSShift) {
602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          "MaskShr", LHS);
604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else if (lhsShiftVal != lhsMaskOffset) {
605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          "MaskShr", LHS);
607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Optimizing LHS!\n";
610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    offset = ConstantInt::get(aType, rhsMaskOffset, false);
613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    width = ConstantInt::get(aType, rhsMaskWidth, false);
614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    LHSSrc = RHSSrc;
615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    RHSSrc = LHS;
616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (mDebug) {
618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        dbgs() << "Failed constraint 2!\n";
620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!RHSShift) {
624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          "MaskShr", RHS);
626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else if (rhsShiftVal != rhsMaskOffset) {
627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          "MaskShr", RHS);
629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Optimizing RHS!\n";
632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Failed constraint 3!\n";
636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "Width:  "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!offset || !width) {
646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (mDebug) {
647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      dbgs() << "Either width or offset are NULL, failed detection!\n";
648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Lets create the function signature.
652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  std::vector<Type *> callTypes;
653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  std::string name = "__amdil_ubit_insert";
659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Function *Func =
661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        getOrInsertFunction(llvm::StringRef(name), funcType));
663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *Operands[4] = {
664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    width,
665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    offset,
666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    LHSSrc,
667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    RHSSrc
668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  };
669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mDebug) {
671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "Old Inst: ";
672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    inst->dump();
673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "New Inst: ";
674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->dump();
675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dbgs() << "\n\n";
676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->insertBefore(inst);
678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  inst->replaceAllUsesWith(CI);
679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!inst) {
686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!inst->isBinaryOp()) {
689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (inst->getOpcode() != Instruction::And) {
692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optLevel == CodeGenOpt::None) {
695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // We want to do some simple optimizations on Shift right/And patterns. The
698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // value smaller than 32 and C is a mask. If C is a constant value, then the
700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // following transformation can occur. For signed integers, it turns into the
701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // integers, it turns into the function call dst =
703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Evergreen hardware.
706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // This does not work on HD4XXX hardware.
708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type *aType = inst->getType();
711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool isVector = aType->isVectorTy();
712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // XXX Support vector types
714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  int numEle = 1;
718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // This only works on 32bit integers
719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (aType->getScalarType()
720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      != Type::getInt32Ty(inst->getContext())) {
721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const VectorType *VT = dyn_cast<VectorType>(aType);
725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    numEle = VT->getNumElements();
726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // We currently cannot support more than 4 elements in a intrinsic and we
727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // cannot support Vec3 types.
728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (numEle > 4 || numEle == 3) {
729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If the first operand is not a shift instruction, then we can return as it
734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // doesn't match this pattern.
735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!ShiftInst || !ShiftInst->isShift()) {
736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If we are a shift left, then we need don't match this pattern.
739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (ShiftInst->getOpcode() == Instruction::Shl) {
740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool isSigned = ShiftInst->isArithmeticShift();
743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Lets make sure that the shift value and the and mask are constant integers.
746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!AndMask || !ShrVal) {
747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *newMaskConst;
750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *shiftValConst;
751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // Handle the vector case
753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Constant *> maskVals;
754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Constant *> shiftVals;
755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *scalarType = AndMaskVec->getType()->getScalarType();
758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    assert(AndMaskVec->getNumOperands() ==
759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ShrValVec->getNumOperands() && "cannot have a "
760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           "combination where the number of elements to a "
761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           "shift and an and are different!");
762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!AndCI || !ShiftIC) {
766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        return false;
767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!isMask_32(maskVal)) {
770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        return false;
771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // If the mask or shiftval is greater than the bitcount, then break out.
775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (maskVal >= 32 || shiftVal >= 32) {
776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        return false;
777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // If the mask val is greater than the the number of original bits left
779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // then this optimization is invalid.
780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (maskVal > (32 - shiftVal)) {
781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        return false;
782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst = ConstantVector::get(maskVals);
787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    shiftValConst = ConstantVector::get(shiftVals);
788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // Handle the scalar case
790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // This must be a mask value where all lower bits are set to 1 and then any
792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // bit higher is set to 0.
793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!isMask_32(maskVal)) {
794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // Count the number of bits set in the mask, this is the width of the
798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // resulting bit set that is extracted from the source value.
799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // If the mask or shift val is greater than the bitcount, then break out.
801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (maskVal >= 32 || shiftVal >= 32) {
802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // If the mask val is greater than the the number of original bits left then
805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    // this optimization is invalid.
806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (maskVal > (32 - shiftVal)) {
807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Lets create the function signature.
813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  std::vector<Type *> callTypes;
814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  callTypes.push_back(aType);
817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  std::string name = "llvm.AMDIL.bit.extract.u32";
819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (isVector) {
820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    name += ".v" + itostr(numEle) + "i32";
821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    name += ".";
823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Lets create the function.
825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Function *Func =
826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       getOrInsertFunction(llvm::StringRef(name), funcType));
828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *Operands[3] = {
829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    ShiftInst->getOperand(0),
830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    shiftValConst,
831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst
832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  };
833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Lets create the Call with the operands
834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->setDoesNotAccessMemory();
836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->insertBefore(inst);
837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  inst->replaceAllUsesWith(CI);
838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandBFI(CallInst *CI)
843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!LHS->getName().startswith("__amdil_bfi")) {
849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type* type = CI->getOperand(0)->getType();
852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *negOneConst = NULL;
853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (type->isVectorTy()) {
854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Constant *> negOneVals;
855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    negOneConst = ConstantInt::get(CI->getContext(),
856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        APInt(32, StringRef("-1"), 10));
857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (size_t x = 0,
858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      negOneVals.push_back(negOneConst);
860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    negOneConst = ConstantVector::get(negOneVals);
862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    negOneConst = ConstantInt::get(CI->getContext(),
864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        APInt(32, StringRef("-1"), 10));
865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // __amdil_bfi => (A & B) | (~A & C)
867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *lhs =
868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        CI->getOperand(1), "bfi_and", CI);
870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *rhs =
871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        "bfi_not", CI);
873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      "bfi_and", CI);
875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->replaceAllUsesWith(lhs);
877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandBFM(CallInst *CI)
882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!LHS->getName().startswith("__amdil_bfm")) {
888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *newMaskConst = NULL;
892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *newShiftConst = NULL;
893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type* type = CI->getOperand(0)->getType();
894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (type->isVectorTy()) {
895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Constant*> newMaskVals, newShiftVals;
896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (size_t x = 0,
899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      newMaskVals.push_back(newMaskConst);
901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      newShiftVals.push_back(newShiftConst);
902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst = ConstantVector::get(newMaskVals);
904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newShiftConst = ConstantVector::get(newShiftVals);
905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *lhs =
910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        newMaskConst, "bfm_mask", CI);
912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lhs, "bfm_shl", CI);
914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhs = BinaryOperator::Create(Instruction::Sub, lhs,
915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      newShiftConst, "bfm_sub", CI);
916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *rhs =
917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator::Create(Instruction::And, CI->getOperand(1),
918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        newMaskConst, "bfm_mask", CI);
919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->replaceAllUsesWith(lhs);
921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Instruction *inst = (*bbb);
928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optimizeCallInst(bbb)) {
929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return true;
930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optimizeBitExtract(inst)) {
932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optimizeBitInsert(inst)) {
935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (correctMisalignedMemOp(inst)) {
938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  LoadInst *linst = dyn_cast<LoadInst>(inst);
946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StoreInst *sinst = dyn_cast<StoreInst>(inst);
947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned alignment;
948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type* Ty = inst->getType();
949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (linst) {
950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    alignment = linst->getAlignment();
951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Ty = inst->getType();
952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (sinst) {
953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    alignment = sinst->getAlignment();
954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Ty = sinst->getValueOperand()->getType();
955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned size = getTypeSize(Ty);
959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (size == alignment || size < alignment) {
960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!Ty->isStructTy()) {
963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (alignment < 4) {
966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (linst) {
967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      linst->setAlignment(0);
968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return true;
969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else if (sinst) {
970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      sinst->setAlignment(0);
971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return true;
972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  std::string namePrefix = LHS->getName().substr(0, 14);
984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      && namePrefix != "__amdil__imul24_high") {
986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  assert(isSigned24BitOps(CI) && "Must be a "
998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      "signed 24 bit operation to call this function!");
999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *LHS = CI->getOperand(CI->getNumOperands()-1);
1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // On 7XX and 8XX we do not have signed 24bit, so we need to
1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // expand it to the following:
1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // imul24 turns into 32bit imul
1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // imad24 turns into 32bit imad
1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // imul24_high turns into 32bit imulhigh
1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *aType = CI->getOperand(0)->getType();
1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool isVector = aType->isVectorTy();
1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Type*> callTypes;
1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    callTypes.push_back(CI->getOperand(0)->getType());
1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    callTypes.push_back(CI->getOperand(1)->getType());
1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    callTypes.push_back(CI->getOperand(2)->getType());
1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    FunctionType *funcType =
1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::string name = "__amdil_imad";
1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (isVector) {
1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      name += "_v" + itostr(numEle) + "i32";
1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else {
1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      name += "_i32";
1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Function *Func = dyn_cast<Function>(
1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       CI->getParent()->getParent()->getParent()->
1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       getOrInsertFunction(llvm::StringRef(name), funcType));
1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Value *Operands[3] = {
1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->getOperand(0),
1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->getOperand(1),
1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->getOperand(2)
1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    };
1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    nCI->insertBefore(CI);
1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->replaceAllUsesWith(nCI);
1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator *mulOp =
1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          CI->getOperand(1), "imul24", CI);
1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->replaceAllUsesWith(mulOp);
1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Type *aType = CI->getOperand(0)->getType();
1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool isVector = aType->isVectorTy();
1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::vector<Type*> callTypes;
1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    callTypes.push_back(CI->getOperand(0)->getType());
1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    callTypes.push_back(CI->getOperand(1)->getType());
1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    FunctionType *funcType =
1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    std::string name = "__amdil_imul_high";
1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (isVector) {
1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      name += "_v" + itostr(numEle) + "i32";
1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else {
1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      name += "_i32";
1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Function *Func = dyn_cast<Function>(
1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       CI->getParent()->getParent()->getParent()->
1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                       getOrInsertFunction(llvm::StringRef(name), funcType));
1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    Value *Operands[2] = {
1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->getOperand(0),
1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      CI->getOperand(1)
1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    };
1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    nCI->insertBefore(CI);
1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    CI->replaceAllUsesWith(nCI);
1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return (CI != NULL
1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          && CI->getOperand(CI->getNumOperands() - 1)->getName()
1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          == "__amdil_get_local_size_int");
1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      && (mSTM->getDeviceName() == "cayman")) {
1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      == "__amdil_improved_div";
1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  assert(convertAccurateDivide(CI)
1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         && "expanding accurate divide can only happen if it is expandable!");
1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  BinaryOperator *divOp =
1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                           CI->getOperand(1), "fdiv32", CI);
1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  CI->replaceAllUsesWith(divOp);
1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (optLevel != CodeGenOpt::None) {
1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!CI) {
1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned funcNameIdx = 0;
1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  funcNameIdx = CI->getNumOperands() - 1;
1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (calleeName != "__amdil_image2d_read_norm"
1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   && calleeName != "__amdil_image2d_read_unnorm"
1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   && calleeName != "__amdil_image3d_read_norm"
1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   && calleeName != "__amdil_image3d_read_unnorm") {
1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned samplerIdx = 2;
1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  samplerIdx = 1;
1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Value *sampler = CI->getOperand(samplerIdx);
1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!lInst) {
1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If we are loading from what is not a global value, then we
1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // fail and return.
1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!gv) {
1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return false;
1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // If we don't have an initializer or we have an initializer and
1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // the initializer is not a 32bit integer, we fail.
1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!gv->hasInitializer()
1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // Now that we have the global variable initializer, lets replace
1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // all uses of the load instruction with the samplerVal and
1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  // reparse the __amdil_is_constant() function.
1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Constant *samplerVal = gv->getInitializer();
1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  lInst->replaceAllUsesWith(samplerVal);
1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return true;
1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doInitialization(Module &M)
1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doFinalization(Module &M)
1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  AU.addRequired<MachineFunctionAnalysis>();
1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  FunctionPass::getAnalysisUsage(AU);
1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  AU.setPreservesAll();
1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t size = 0;
1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!T) {
1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return size;
1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  switch (T->getTypeID()) {
1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::X86_FP80TyID:
1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::FP128TyID:
1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::PPC_FP128TyID:
1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::LabelTyID:
1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    assert(0 && "These types are not supported by this backend");
1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  default:
1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::FloatTyID:
1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::DoubleTyID:
1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = T->getPrimitiveSizeInBits() >> 3;
1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::PointerTyID:
1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::IntegerTyID:
1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::StructTyID:
1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::ArrayTyID:
1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::FunctionTyID:
1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  case Type::VectorTyID:
1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    break;
1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  };
1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return size;
1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  size_t size = 0;
1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!ST) {
1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return size;
1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type *curType;
1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StructType::element_iterator eib;
1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  StructType::element_iterator eie;
1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    curType = *eib;
1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size += getTypeSize(curType, dereferencePtr);
1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return size;
1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return IT ? (IT->getBitWidth() >> 3) : 0;
1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    assert(0 && "Should not be able to calculate the size of an function type");
1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return 0;
1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                    dereferencePtr) * AT->getNumElements())
1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                     : 0);
1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return VT ? (VT->getBitWidth() >> 3) : 0;
1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (!PT) {
1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return 0;
1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  Type *CT = PT->getElementType();
1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  if (CT->getTypeID() == Type::StructTyID &&
1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return getTypeSize(dyn_cast<StructType>(CT));
1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else if (dereferencePtr) {
1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    size_t size = 0;
1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return size;
1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  } else {
1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return 4;
1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    bool dereferencePtr) {
1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  //assert(0 && "Should not be able to calculate the size of an opaque type");
1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return 4;
1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1276