RegionInfo.h revision f96b0063674e6bf72da5429bd49097e33c2325c7
1//===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Calculate a program structure tree built out of single entry single exit
11// regions.
12// The basic ideas are taken from "The Program Structure Tree - Richard Johnson,
13// David Pearson, Keshav Pingali - 1994", however enriched with ideas from "The
14// Refined Process Structure Tree - Jussi Vanhatalo, Hagen Voelyer, Jana
15// Koehler - 2009".
16// The algorithm to calculate these data structures however is completely
17// different, as it takes advantage of existing information already available
18// in (Post)dominace tree and dominance frontier passes. This leads to a simpler
19// and in practice hopefully better performing algorithm. The runtime of the
20// algorithms described in the papers above are both linear in graph size,
21// O(V+E), whereas this algorithm is not, as the dominance frontier information
22// itself is not, but in practice runtime seems to be in the order of magnitude
23// of dominance tree calculation.
24//
25//===----------------------------------------------------------------------===//
26
27#ifndef LLVM_ANALYSIS_REGION_INFO_H
28#define LLVM_ANALYSIS_REGION_INFO_H
29
30#include "llvm/ADT/PointerIntPair.h"
31#include "llvm/Analysis/Dominators.h"
32#include "llvm/Analysis/PostDominators.h"
33#include "llvm/Support/Allocator.h"
34
35namespace llvm {
36
37class Region;
38class RegionInfo;
39class raw_ostream;
40
41/// @brief Marker class to iterate over the elements of a Region in flat mode.
42///
43/// The class is used to either iterate in Flat mode or by not using it to not
44/// iterate in Flat mode.  During a Flat mode iteration all Regions are entered
45/// and the iteration returns every BasicBlock.  If the Flat mode is not
46/// selected for SubRegions just one RegionNode containing the subregion is
47/// returned.
48template <class GraphType>
49class FlatIt {};
50
51/// @brief A RegionNode represents a subregion or a BasicBlock that is part of a
52/// Region.
53class RegionNode {
54  // DO NOT IMPLEMENT
55  RegionNode(const RegionNode &);
56  // DO NOT IMPLEMENT
57  const RegionNode &operator=(const RegionNode &);
58
59  /// This is the entry basic block that starts this region node.  If this is a
60  /// BasicBlock RegionNode, then entry is just the basic block, that this
61  /// RegionNode represents.  Otherwise it is the entry of this (Sub)RegionNode.
62  ///
63  /// In the BBtoRegionNode map of the parent of this node, BB will always map
64  /// to this node no matter which kind of node this one is.
65  ///
66  /// The node can hold either a Region or a BasicBlock.
67  /// Use one bit to save, if this RegionNode is a subregion or BasicBlock
68  /// RegionNode.
69  PointerIntPair<BasicBlock*, 1, bool> entry;
70
71protected:
72  /// @brief The parent Region of this RegionNode.
73  /// @see getParent()
74  Region* parent;
75
76public:
77  /// @brief Create a RegionNode.
78  ///
79  /// @param Parent      The parent of this RegionNode.
80  /// @param Entry       The entry BasicBlock of the RegionNode.  If this
81  ///                    RegionNode represents a BasicBlock, this is the
82  ///                    BasicBlock itself.  If it represents a subregion, this
83  ///                    is the entry BasicBlock of the subregion.
84  /// @param isSubRegion If this RegionNode represents a SubRegion.
85  inline RegionNode(Region* Parent, BasicBlock* Entry, bool isSubRegion = 0)
86    : entry(Entry, isSubRegion), parent(Parent) {}
87
88  /// @brief Get the parent Region of this RegionNode.
89  ///
90  /// The parent Region is the Region this RegionNode belongs to. If for
91  /// example a BasicBlock is element of two Regions, there exist two
92  /// RegionNodes for this BasicBlock. Each with the getParent() function
93  /// pointing to the Region this RegionNode belongs to.
94  ///
95  /// @return Get the parent Region of this RegionNode.
96  inline Region* getParent() const { return parent; }
97
98  /// @brief Get the entry BasicBlock of this RegionNode.
99  ///
100  /// If this RegionNode represents a BasicBlock this is just the BasicBlock
101  /// itself, otherwise we return the entry BasicBlock of the Subregion
102  ///
103  /// @return The entry BasicBlock of this RegionNode.
104  inline BasicBlock* getEntry() const { return entry.getPointer(); }
105
106  /// @brief Get the content of this RegionNode.
107  ///
108  /// This can be either a BasicBlock or a subregion. Before calling getNodeAs()
109  /// check the type of the content with the isSubRegion() function call.
110  ///
111  /// @return The content of this RegionNode.
112  template<class T>
113  inline T* getNodeAs() const;
114
115  /// @brief Is this RegionNode a subregion?
116  ///
117  /// @return True if it contains a subregion. False if it contains a
118  ///         BasicBlock.
119  inline bool isSubRegion() const {
120    return entry.getInt();
121  }
122};
123
124/// Print a RegionNode.
125inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node);
126
127template<>
128inline BasicBlock* RegionNode::getNodeAs<BasicBlock>() const {
129  assert(!isSubRegion() && "This is not a BasicBlock RegionNode!");
130  return getEntry();
131}
132
133template<>
134inline Region* RegionNode::getNodeAs<Region>() const {
135  assert(isSubRegion() && "This is not a subregion RegionNode!");
136  return reinterpret_cast<Region*>(const_cast<RegionNode*>(this));
137}
138
139//===----------------------------------------------------------------------===//
140/// @brief A single entry single exit Region.
141///
142/// A Region is a connected subgraph of a control flow graph that has exactly
143/// two connections to the remaining graph. It can be used to analyze or
144/// optimize parts of the control flow graph.
145///
146/// A <em> simple Region </em> is connected to the remaing graph by just two
147/// edges. One edge entering the Region and another one leaving the Region.
148///
149/// An <em> extended Region </em> (or just Region) is a subgraph that can be
150/// transform into a simple Region. The transformation is done by adding
151/// BasicBlocks that merge several entry or exit edges so that after the merge
152/// just one entry and one exit edge exists.
153///
154/// The \e Entry of a Region is the first BasicBlock that is passed after
155/// entering the Region. It is an element of the Region. The entry BasicBlock
156/// dominates all BasicBlocks in the Region.
157///
158/// The \e Exit of a Region is the first BasicBlock that is passed after
159/// leaving the Region. It is not an element of the Region. The exit BasicBlock,
160/// postdominates all BasicBlocks in the Region.
161///
162/// A <em> canonical Region </em> cannot be constructed by combining smaller
163/// Regions.
164///
165/// Region A is the \e parent of Region B, if B is completely contained in A.
166///
167/// Two canonical Regions either do not intersect at all or one is
168/// the parent of the other.
169///
170/// The <em> Program Structure Tree</em> is a graph (V, E) where V is the set of
171/// Regions in the control flow graph and E is the \e parent relation of these
172/// Regions.
173///
174/// Example:
175///
176/// \verbatim
177/// A simple control flow graph, that contains two regions.
178///
179///        1
180///       / |
181///      2   |
182///     / \   3
183///    4   5  |
184///    |   |  |
185///    6   7  8
186///     \  | /
187///      \ |/       Region A: 1 -> 9 {1,2,3,4,5,6,7,8}
188///        9        Region B: 2 -> 9 {2,4,5,6,7}
189/// \endverbatim
190///
191/// You can obtain more examples by either calling
192///
193/// <tt> "opt -regions -analyze anyprogram.ll" </tt>
194/// or
195/// <tt> "opt -view-regions-only anyprogram.ll" </tt>
196///
197/// on any LLVM file you are interested in.
198///
199/// The first call returns a textual representation of the program structure
200/// tree, the second one creates a graphical representation using graphviz.
201class Region : public RegionNode {
202  friend class RegionInfo;
203  // DO NOT IMPLEMENT
204  Region(const Region &);
205  // DO NOT IMPLEMENT
206  const Region &operator=(const Region &);
207
208  // Information necessary to manage this Region.
209  RegionInfo* RI;
210  DominatorTree *DT;
211
212  // The exit BasicBlock of this region.
213  // (The entry BasicBlock is part of RegionNode)
214  BasicBlock *exit;
215
216  typedef std::vector<Region*> RegionSet;
217
218  // The subregions of this region.
219  RegionSet children;
220
221  typedef std::map<BasicBlock*, RegionNode*> BBNodeMapT;
222
223  // Save the BasicBlock RegionNodes that are element of this Region.
224  mutable BBNodeMapT BBNodeMap;
225
226  /// verifyBBInRegion - Check if a BB is in this Region. This check also works
227  /// if the region is incorrectly built. (EXPENSIVE!)
228  void verifyBBInRegion(BasicBlock* BB) const;
229
230  /// verifyWalk - Walk over all the BBs of the region starting from BB and
231  /// verify that all reachable basic blocks are elements of the region.
232  /// (EXPENSIVE!)
233  void verifyWalk(BasicBlock* BB, std::set<BasicBlock*>* visitedBB) const;
234
235  /// verifyRegionNest - Verify if the region and its children are valid
236  /// regions (EXPENSIVE!)
237  void verifyRegionNest() const;
238
239public:
240  /// @brief Create a new region.
241  ///
242  /// @param Entry  The entry basic block of the region.
243  /// @param Exit   The exit basic block of the region.
244  /// @param RI     The region info object that is managing this region.
245  /// @param DT     The dominator tree of the current function.
246  /// @param Parent The surrounding region or NULL if this is a top level
247  ///               region.
248  Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RI,
249         DominatorTree *DT, Region *Parent = 0);
250
251  /// Delete the Region and all its subregions.
252  ~Region();
253
254  /// @brief Get the entry BasicBlock of the Region.
255  /// @return The entry BasicBlock of the region.
256  BasicBlock *getEntry() const { return RegionNode::getEntry(); }
257
258  /// @brief Get the exit BasicBlock of the Region.
259  /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
260  ///         Region.
261  BasicBlock *getExit() const { return exit; }
262
263  /// @brief Get the parent of the Region.
264  /// @return The parent of the Region or NULL if this is a top level
265  ///         Region.
266  Region *getParent() const { return RegionNode::getParent(); }
267
268  /// @brief Get the RegionNode representing the current Region.
269  /// @return The RegionNode representing the current Region.
270  RegionNode* getNode() const {
271    return const_cast<RegionNode*>(reinterpret_cast<const RegionNode*>(this));
272  }
273
274  /// @brief Get the nesting level of this Region.
275  ///
276  /// An toplevel Region has depth 0.
277  ///
278  /// @return The depth of the region.
279  unsigned getDepth() const;
280
281  /// @brief Is this a simple region?
282  ///
283  /// A region is simple if it has exactly one exit and one entry edge.
284  ///
285  /// @return True if the Region is simple.
286  bool isSimple() const;
287
288  /// @brief Returns the name of the Region.
289  /// @return The Name of the Region.
290  std::string getNameStr() const {
291    std::string exitName;
292
293    if (getExit())
294      exitName = getExit()->getNameStr();
295    else
296      exitName = "<Function Return>";
297
298    return getEntry()->getNameStr() + " => " + exitName;
299  }
300
301  /// @brief Return the RegionInfo object, that belongs to this Region.
302  RegionInfo *getRegionInfo() const {
303    return RI;
304  }
305
306  /// @brief Print the region.
307  ///
308  /// @param OS The output stream the Region is printed to.
309  /// @param printTree Print also the tree of subregions.
310  /// @param level The indentation level used for printing.
311  void print(raw_ostream& OS, bool printTree = true, unsigned level = 0) const;
312
313  /// @brief Print the region to stderr.
314  void dump() const;
315
316  /// @brief Check if the region contains a BasicBlock.
317  ///
318  /// @param BB The BasicBlock that might be contained in this Region.
319  /// @return True if the block is contained in the region otherwise false.
320  bool contains(const BasicBlock *BB) const;
321
322  /// @brief Check if the region contains another region.
323  ///
324  /// @param SubRegion The region that might be contained in this Region.
325  /// @return True if SubRegion is contained in the region otherwise false.
326  bool contains(const Region *SubRegion) const {
327    // Toplevel Region.
328    if (!getExit())
329      return true;
330
331    return contains(SubRegion->getEntry())
332      && (contains(SubRegion->getExit()) || SubRegion->getExit() == getExit());
333  }
334
335  /// @brief Check if the region contains an Instruction.
336  ///
337  /// @param Inst The Instruction that might be contained in this region.
338  /// @return True if the Instruction is contained in the region otherwise false.
339  bool contains(const Instruction *Inst) const {
340    return contains(Inst->getParent());
341  }
342
343  /// @brief Get the subregion that starts at a BasicBlock
344  ///
345  /// @param BB The BasicBlock the subregion should start.
346  /// @return The Subregion if available, otherwise NULL.
347  Region* getSubRegionNode(BasicBlock *BB) const;
348
349  /// @brief Get the RegionNode for a BasicBlock
350  ///
351  /// @param BB The BasicBlock at which the RegionNode should start.
352  /// @return If available, the RegionNode that represents the subregion
353  ///         starting at BB. If no subregion starts at BB, the RegionNode
354  ///         representing BB.
355  RegionNode* getNode(BasicBlock *BB) const;
356
357  /// @brief Get the BasicBlock RegionNode for a BasicBlock
358  ///
359  /// @param BB The BasicBlock for which the RegionNode is requested.
360  /// @return The RegionNode representing the BB.
361  RegionNode* getBBNode(BasicBlock *BB) const;
362
363  /// @brief Add a new subregion to this Region.
364  ///
365  /// @param SubRegion The new subregion that will be added.
366  void addSubRegion(Region *SubRegion);
367
368  /// @brief Remove a subregion from this Region.
369  ///
370  /// The subregion is not deleted, as it will probably be inserted into another
371  /// region.
372  /// @param SubRegion The SubRegion that will be removed.
373  Region *removeSubRegion(Region *SubRegion);
374
375  /// @brief Move all direct child nodes of this Region to another Region.
376  ///
377  /// @param To The Region the child nodes will be transfered to.
378  void transferChildrenTo(Region *To);
379
380  /// @brief Verify if the region is a correct region.
381  ///
382  /// Check if this is a correctly build Region. This is an expensive check, as
383  /// the complete CFG of the Region will be walked.
384  void verifyRegion() const;
385
386  /// @brief Clear the cache for BB RegionNodes.
387  ///
388  /// After calling this function the BasicBlock RegionNodes will be stored at
389  /// different memory locations. RegionNodes obtained before this function is
390  /// called are therefore not comparable to RegionNodes abtained afterwords.
391  void clearNodeCache();
392
393  /// @name Subregion Iterators
394  ///
395  /// These iterators iterator over all subregions of this Region.
396  //@{
397  typedef RegionSet::iterator iterator;
398  typedef RegionSet::const_iterator const_iterator;
399
400  iterator begin() { return children.begin(); }
401  iterator end() { return children.end(); }
402
403  const_iterator begin() const { return children.begin(); }
404  const_iterator end() const { return children.end(); }
405  //@}
406
407  /// @name BasicBlock Iterators
408  ///
409  /// These iterators iterate over all BasicBlock RegionNodes that are
410  /// contained in this Region. The iterator also iterates over BasicBlocks
411  /// that are elements of a subregion of this Region. It is therefore called a
412  /// flat iterator.
413  //@{
414  typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
415                      GraphTraits<FlatIt<RegionNode*> > > block_iterator;
416
417  typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
418                      false, GraphTraits<FlatIt<const RegionNode*> > >
419            const_block_iterator;
420
421  block_iterator block_begin();
422  block_iterator block_end();
423
424  const_block_iterator block_begin() const;
425  const_block_iterator block_end() const;
426  //@}
427
428  /// @name Element Iterators
429  ///
430  /// These iterators iterate over all BasicBlock and subregion RegionNodes that
431  /// are direct children of this Region. It does not iterate over any
432  /// RegionNodes that are also element of a subregion of this Region.
433  //@{
434  typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
435                      GraphTraits<RegionNode*> > element_iterator;
436
437  typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
438                      false, GraphTraits<const RegionNode*> >
439            const_element_iterator;
440
441  element_iterator element_begin();
442  element_iterator element_end();
443
444  const_element_iterator element_begin() const;
445  const_element_iterator element_end() const;
446  //@}
447};
448
449//===----------------------------------------------------------------------===//
450/// @brief Analysis that detects all canonical Regions.
451///
452/// The RegionInfo pass detects all canonical regions in a function. The Regions
453/// are connected using the parent relation. This builds a Program Structure
454/// Tree.
455class RegionInfo : public FunctionPass {
456  typedef DenseMap<BasicBlock*,BasicBlock*> BBtoBBMap;
457  typedef DenseMap<BasicBlock*, Region*> BBtoRegionMap;
458  typedef SmallPtrSet<Region*, 4> RegionSet;
459
460  // DO NOT IMPLEMENT
461  RegionInfo(const RegionInfo &);
462  // DO NOT IMPLEMENT
463  const RegionInfo &operator=(const RegionInfo &);
464
465  DominatorTree *DT;
466  PostDominatorTree *PDT;
467  DominanceFrontier *DF;
468
469  /// The top level region.
470  Region *TopLevelRegion;
471
472  /// Map every BB to the smallest region, that contains BB.
473  BBtoRegionMap BBtoRegion;
474
475  // isCommonDomFrontier - Returns true if BB is in the dominance frontier of
476  // entry, because it was inherited from exit. In the other case there is an
477  // edge going from entry to BB without passing exit.
478  bool isCommonDomFrontier(BasicBlock* BB, BasicBlock* entry,
479                           BasicBlock* exit) const;
480
481  // isRegion - Check if entry and exit surround a valid region, based on
482  // dominance tree and dominance frontier.
483  bool isRegion(BasicBlock* entry, BasicBlock* exit) const;
484
485  // insertShortCut - Saves a shortcut pointing from entry to exit.
486  // This function may extend this shortcut if possible.
487  void insertShortCut(BasicBlock* entry, BasicBlock* exit,
488                      BBtoBBMap* ShortCut) const;
489
490  // getNextPostDom - Returns the next BB that postdominates N, while skipping
491  // all post dominators that cannot finish a canonical region.
492  DomTreeNode *getNextPostDom(DomTreeNode* N, BBtoBBMap *ShortCut) const;
493
494  // isTrivialRegion - A region is trivial, if it contains only one BB.
495  bool isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const;
496
497  // createRegion - Creates a single entry single exit region.
498  Region *createRegion(BasicBlock *entry, BasicBlock *exit);
499
500  // findRegionsWithEntry - Detect all regions starting with bb 'entry'.
501  void findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut);
502
503  // scanForRegions - Detects regions in F.
504  void scanForRegions(Function &F, BBtoBBMap *ShortCut);
505
506  // getTopMostParent - Get the top most parent with the same entry block.
507  Region *getTopMostParent(Region *region);
508
509  // buildRegionsTree - build the region hierarchy after all region detected.
510  void buildRegionsTree(DomTreeNode *N, Region *region);
511
512  // Calculate - detecte all regions in function and build the region tree.
513  void Calculate(Function& F);
514
515  void releaseMemory();
516
517  // updateStatistics - Update statistic about created regions.
518  void updateStatistics(Region *R);
519
520  // isSimple - Check if a region is a simple region with exactly one entry
521  // edge and exactly one exit edge.
522  bool isSimple(Region* R) const;
523
524public:
525  static char ID;
526  explicit RegionInfo();
527
528  ~RegionInfo();
529
530  /// @name FunctionPass interface
531  //@{
532  virtual bool runOnFunction(Function &F);
533  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
534  virtual void print(raw_ostream &OS, const Module *) const;
535  virtual void verifyAnalysis() const;
536  //@}
537
538  /// @brief Get the smallest region that contains a BasicBlock.
539  ///
540  /// @param BB The basic block.
541  /// @return The smallest region, that contains BB or NULL, if there is no
542  /// region containing BB.
543  Region *getRegionFor(BasicBlock *BB) const;
544
545  /// @brief A shortcut for getRegionFor().
546  ///
547  /// @param BB The basic block.
548  /// @return The smallest region, that contains BB or NULL, if there is no
549  /// region containing BB.
550  Region *operator[](BasicBlock *BB) const;
551
552  /// @brief Find the smallest region that contains two regions.
553  ///
554  /// @param A The first region.
555  /// @param B The second region.
556  /// @return The smallest region containing A and B.
557  Region *getCommonRegion(Region* A, Region *B) const;
558
559  /// @brief Find the smallest region that contains two basic blocks.
560  ///
561  /// @param A The first basic block.
562  /// @param B The second basic block.
563  /// @return The smallest region that contains A and B.
564  Region* getCommonRegion(BasicBlock* A, BasicBlock *B) const {
565    return getCommonRegion(getRegionFor(A), getRegionFor(B));
566  }
567
568  /// @brief Find the smallest region that contains a set of regions.
569  ///
570  /// @param Regions A vector of regions.
571  /// @return The smallest region that contains all regions in Regions.
572  Region* getCommonRegion(SmallVectorImpl<Region*> &Regions) const;
573
574  /// @brief Find the smallest region that contains a set of basic blocks.
575  ///
576  /// @param BBs A vector of basic blocks.
577  /// @return The smallest region that contains all basic blocks in BBS.
578  Region* getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const;
579
580  Region *getTopLevelRegion() const {
581    return TopLevelRegion;
582  }
583
584  /// @brief Clear the Node Cache for all Regions.
585  ///
586  /// @see Region::clearNodeCache()
587  void clearNodeCache() {
588    if (TopLevelRegion)
589      TopLevelRegion->clearNodeCache();
590  }
591};
592
593inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node) {
594  if (Node.isSubRegion())
595    return OS << Node.getNodeAs<Region>()->getNameStr();
596  else
597    return OS << Node.getNodeAs<BasicBlock>()->getNameStr();
598}
599} // End llvm namespace
600#endif
601
602