1//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21//    foo[i] = bar[i] * baz[i];
22// }
23//
24//  On ARM the addresses of 3 arrays should be kept in the registers, thus
25//  this code has quite large register pressure (loop body):
26//
27//  ldr     r1, [r5], #4
28//  ldr     r2, [r6], #4
29//  mul     r1, r2, r1
30//  str     r1, [r0], #4
31//
32//  Pass converts the code to something like:
33//
34//  static struct {
35//    int foo[N];
36//    int bar[N];
37//    int baz[N];
38//  } merged;
39//
40//  for (i = 0; i < N; ++i) {
41//    merged.foo[i] = merged.bar[i] * merged.baz[i];
42//  }
43//
44//  and in ARM code this becomes:
45//
46//  ldr     r0, [r5, #40]
47//  ldr     r1, [r5, #80]
48//  mul     r0, r1, r0
49//  str     r0, [r5], #4
50//
51//  note that we saved 2 registers here almostly "for free".
52// ===---------------------------------------------------------------------===//
53
54#define DEBUG_TYPE "arm-global-merge"
55#include "ARM.h"
56#include "llvm/CodeGen/Passes.h"
57#include "llvm/Attributes.h"
58#include "llvm/Constants.h"
59#include "llvm/DerivedTypes.h"
60#include "llvm/Function.h"
61#include "llvm/GlobalVariable.h"
62#include "llvm/Instructions.h"
63#include "llvm/Intrinsics.h"
64#include "llvm/Module.h"
65#include "llvm/Pass.h"
66#include "llvm/Target/TargetData.h"
67#include "llvm/Target/TargetLowering.h"
68#include "llvm/Target/TargetLoweringObjectFile.h"
69using namespace llvm;
70
71namespace {
72  class ARMGlobalMerge : public FunctionPass {
73    /// TLI - Keep a pointer of a TargetLowering to consult for determining
74    /// target type sizes.
75    const TargetLowering *TLI;
76
77    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
78                 Module &M, bool isConst) const;
79
80  public:
81    static char ID;             // Pass identification, replacement for typeid.
82    explicit ARMGlobalMerge(const TargetLowering *tli)
83      : FunctionPass(ID), TLI(tli) {}
84
85    virtual bool doInitialization(Module &M);
86    virtual bool runOnFunction(Function &F);
87
88    const char *getPassName() const {
89      return "Merge internal globals";
90    }
91
92    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
93      AU.setPreservesCFG();
94      FunctionPass::getAnalysisUsage(AU);
95    }
96
97    struct GlobalCmp {
98      const TargetData *TD;
99
100      GlobalCmp(const TargetData *td) : TD(td) { }
101
102      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
103        Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
104        Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
105
106        return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
107      }
108    };
109  };
110} // end anonymous namespace
111
112char ARMGlobalMerge::ID = 0;
113
114bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
115                             Module &M, bool isConst) const {
116  const TargetData *TD = TLI->getTargetData();
117
118  // FIXME: Infer the maximum possible offset depending on the actual users
119  // (these max offsets are different for the users inside Thumb or ARM
120  // functions)
121  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
122
123  // FIXME: Find better heuristics
124  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
125
126  Type *Int32Ty = Type::getInt32Ty(M.getContext());
127
128  for (size_t i = 0, e = Globals.size(); i != e; ) {
129    size_t j = 0;
130    uint64_t MergedSize = 0;
131    std::vector<Type*> Tys;
132    std::vector<Constant*> Inits;
133    for (j = i; j != e; ++j) {
134      Type *Ty = Globals[j]->getType()->getElementType();
135      MergedSize += TD->getTypeAllocSize(Ty);
136      if (MergedSize > MaxOffset) {
137        break;
138      }
139      Tys.push_back(Ty);
140      Inits.push_back(Globals[j]->getInitializer());
141    }
142
143    StructType *MergedTy = StructType::get(M.getContext(), Tys);
144    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
145    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
146                                                  GlobalValue::InternalLinkage,
147                                                  MergedInit, "_MergedGlobals");
148    for (size_t k = i; k < j; ++k) {
149      Constant *Idx[2] = {
150        ConstantInt::get(Int32Ty, 0),
151        ConstantInt::get(Int32Ty, k-i)
152      };
153      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
154      Globals[k]->replaceAllUsesWith(GEP);
155      Globals[k]->eraseFromParent();
156    }
157    i = j;
158  }
159
160  return true;
161}
162
163
164bool ARMGlobalMerge::doInitialization(Module &M) {
165  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
166  const TargetData *TD = TLI->getTargetData();
167  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
168  bool Changed = false;
169
170  // Grab all non-const globals.
171  for (Module::global_iterator I = M.global_begin(),
172         E = M.global_end(); I != E; ++I) {
173    // Merge is safe for "normal" internal globals only
174    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
175      continue;
176
177    // Ignore fancy-aligned globals for now.
178    unsigned Alignment = I->getAlignment();
179    Type *Ty = I->getType()->getElementType();
180    if (Alignment > TD->getABITypeAlignment(Ty))
181      continue;
182
183    // Ignore all 'special' globals.
184    if (I->getName().startswith("llvm.") ||
185        I->getName().startswith(".llvm."))
186      continue;
187
188    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
189      const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
190      if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
191        BSSGlobals.push_back(I);
192      else if (I->isConstant())
193        ConstGlobals.push_back(I);
194      else
195        Globals.push_back(I);
196    }
197  }
198
199  if (Globals.size() > 1)
200    Changed |= doMerge(Globals, M, false);
201  if (BSSGlobals.size() > 1)
202    Changed |= doMerge(BSSGlobals, M, false);
203
204  // FIXME: This currently breaks the EH processing due to way how the
205  // typeinfo detection works. We might want to detect the TIs and ignore
206  // them in the future.
207  // if (ConstGlobals.size() > 1)
208  //  Changed |= doMerge(ConstGlobals, M, true);
209
210  return Changed;
211}
212
213bool ARMGlobalMerge::runOnFunction(Function &F) {
214  return false;
215}
216
217FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
218  return new ARMGlobalMerge(tli);
219}
220