1//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass looks for equivalent functions that are mergable and folds them.
11//
12// Order relation is defined on set of functions. It was made through
13// special function comparison procedure that returns
14// 0 when functions are equal,
15// -1 when Left function is less than right function, and
16// 1 for opposite case. We need total-ordering, so we need to maintain
17// four properties on the functions set:
18// a <= a (reflexivity)
19// if a <= b and b <= a then a = b (antisymmetry)
20// if a <= b and b <= c then a <= c (transitivity).
21// for all a and b: a <= b or b <= a (totality).
22//
23// Comparison iterates through each instruction in each basic block.
24// Functions are kept on binary tree. For each new function F we perform
25// lookup in binary tree.
26// In practice it works the following way:
27// -- We define Function* container class with custom "operator<" (FunctionPtr).
28// -- "FunctionPtr" instances are stored in std::set collection, so every
29//    std::set::insert operation will give you result in log(N) time.
30//
31// As an optimization, a hash of the function structure is calculated first, and
32// two functions are only compared if they have the same hash. This hash is
33// cheap to compute, and has the property that if function F == G according to
34// the comparison function, then hash(F) == hash(G). This consistency property
35// is critical to ensuring all possible merging opportunities are exploited.
36// Collisions in the hash affect the speed of the pass but not the correctness
37// or determinism of the resulting transformation.
38//
39// When a match is found the functions are folded. If both functions are
40// overridable, we move the functionality into a new internal function and
41// leave two overridable thunks to it.
42//
43//===----------------------------------------------------------------------===//
44//
45// Future work:
46//
47// * virtual functions.
48//
49// Many functions have their address taken by the virtual function table for
50// the object they belong to. However, as long as it's only used for a lookup
51// and call, this is irrelevant, and we'd like to fold such functions.
52//
53// * be smarter about bitcasts.
54//
55// In order to fold functions, we will sometimes add either bitcast instructions
56// or bitcast constant expressions. Unfortunately, this can confound further
57// analysis since the two functions differ where one has a bitcast and the
58// other doesn't. We should learn to look through bitcasts.
59//
60// * Compare complex types with pointer types inside.
61// * Compare cross-reference cases.
62// * Compare complex expressions.
63//
64// All the three issues above could be described as ability to prove that
65// fA == fB == fC == fE == fF == fG in example below:
66//
67//  void fA() {
68//    fB();
69//  }
70//  void fB() {
71//    fA();
72//  }
73//
74//  void fE() {
75//    fF();
76//  }
77//  void fF() {
78//    fG();
79//  }
80//  void fG() {
81//    fE();
82//  }
83//
84// Simplest cross-reference case (fA <--> fB) was implemented in previous
85// versions of MergeFunctions, though it presented only in two function pairs
86// in test-suite (that counts >50k functions)
87// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
88// could cover much more cases.
89//
90//===----------------------------------------------------------------------===//
91
92#include "llvm/Transforms/IPO.h"
93#include "llvm/ADT/DenseSet.h"
94#include "llvm/ADT/FoldingSet.h"
95#include "llvm/ADT/STLExtras.h"
96#include "llvm/ADT/SmallSet.h"
97#include "llvm/ADT/Statistic.h"
98#include "llvm/ADT/Hashing.h"
99#include "llvm/IR/CallSite.h"
100#include "llvm/IR/Constants.h"
101#include "llvm/IR/DataLayout.h"
102#include "llvm/IR/IRBuilder.h"
103#include "llvm/IR/InlineAsm.h"
104#include "llvm/IR/Instructions.h"
105#include "llvm/IR/LLVMContext.h"
106#include "llvm/IR/Module.h"
107#include "llvm/IR/Operator.h"
108#include "llvm/IR/ValueHandle.h"
109#include "llvm/IR/ValueMap.h"
110#include "llvm/Pass.h"
111#include "llvm/Support/CommandLine.h"
112#include "llvm/Support/Debug.h"
113#include "llvm/Support/ErrorHandling.h"
114#include "llvm/Support/raw_ostream.h"
115#include <vector>
116
117using namespace llvm;
118
119#define DEBUG_TYPE "mergefunc"
120
121STATISTIC(NumFunctionsMerged, "Number of functions merged");
122STATISTIC(NumThunksWritten, "Number of thunks generated");
123STATISTIC(NumAliasesWritten, "Number of aliases generated");
124STATISTIC(NumDoubleWeak, "Number of new functions created");
125
126static cl::opt<unsigned> NumFunctionsForSanityCheck(
127    "mergefunc-sanity",
128    cl::desc("How many functions in module could be used for "
129             "MergeFunctions pass sanity check. "
130             "'0' disables this check. Works only with '-debug' key."),
131    cl::init(0), cl::Hidden);
132
133namespace {
134
135/// GlobalNumberState assigns an integer to each global value in the program,
136/// which is used by the comparison routine to order references to globals. This
137/// state must be preserved throughout the pass, because Functions and other
138/// globals need to maintain their relative order. Globals are assigned a number
139/// when they are first visited. This order is deterministic, and so the
140/// assigned numbers are as well. When two functions are merged, neither number
141/// is updated. If the symbols are weak, this would be incorrect. If they are
142/// strong, then one will be replaced at all references to the other, and so
143/// direct callsites will now see one or the other symbol, and no update is
144/// necessary. Note that if we were guaranteed unique names, we could just
145/// compare those, but this would not work for stripped bitcodes or for those
146/// few symbols without a name.
147class GlobalNumberState {
148  struct Config : ValueMapConfig<GlobalValue*> {
149    enum { FollowRAUW = false };
150  };
151  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
152  // occurs, the mapping does not change. Tracking changes is unnecessary, and
153  // also problematic for weak symbols (which may be overwritten).
154  typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
155  ValueNumberMap GlobalNumbers;
156  // The next unused serial number to assign to a global.
157  uint64_t NextNumber;
158  public:
159    GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
160    uint64_t getNumber(GlobalValue* Global) {
161      ValueNumberMap::iterator MapIter;
162      bool Inserted;
163      std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
164      if (Inserted)
165        NextNumber++;
166      return MapIter->second;
167    }
168    void clear() {
169      GlobalNumbers.clear();
170    }
171};
172
173/// FunctionComparator - Compares two functions to determine whether or not
174/// they will generate machine code with the same behaviour. DataLayout is
175/// used if available. The comparator always fails conservatively (erring on the
176/// side of claiming that two functions are different).
177class FunctionComparator {
178public:
179  FunctionComparator(const Function *F1, const Function *F2,
180                     GlobalNumberState* GN)
181      : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
182
183  /// Test whether the two functions have equivalent behaviour.
184  int compare();
185  /// Hash a function. Equivalent functions will have the same hash, and unequal
186  /// functions will have different hashes with high probability.
187  typedef uint64_t FunctionHash;
188  static FunctionHash functionHash(Function &);
189
190private:
191  /// Test whether two basic blocks have equivalent behaviour.
192  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
193
194  /// Constants comparison.
195  /// Its analog to lexicographical comparison between hypothetical numbers
196  /// of next format:
197  /// <bitcastability-trait><raw-bit-contents>
198  ///
199  /// 1. Bitcastability.
200  /// Check whether L's type could be losslessly bitcasted to R's type.
201  /// On this stage method, in case when lossless bitcast is not possible
202  /// method returns -1 or 1, thus also defining which type is greater in
203  /// context of bitcastability.
204  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
205  ///          to the contents comparison.
206  ///          If types differ, remember types comparison result and check
207  ///          whether we still can bitcast types.
208  /// Stage 1: Types that satisfies isFirstClassType conditions are always
209  ///          greater then others.
210  /// Stage 2: Vector is greater then non-vector.
211  ///          If both types are vectors, then vector with greater bitwidth is
212  ///          greater.
213  ///          If both types are vectors with the same bitwidth, then types
214  ///          are bitcastable, and we can skip other stages, and go to contents
215  ///          comparison.
216  /// Stage 3: Pointer types are greater than non-pointers. If both types are
217  ///          pointers of the same address space - go to contents comparison.
218  ///          Different address spaces: pointer with greater address space is
219  ///          greater.
220  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
221  ///          We don't know how to bitcast them. So, we better don't do it,
222  ///          and return types comparison result (so it determines the
223  ///          relationship among constants we don't know how to bitcast).
224  ///
225  /// Just for clearance, let's see how the set of constants could look
226  /// on single dimension axis:
227  ///
228  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
229  /// Where: NFCT - Not a FirstClassType
230  ///        FCT - FirstClassTyp:
231  ///
232  /// 2. Compare raw contents.
233  /// It ignores types on this stage and only compares bits from L and R.
234  /// Returns 0, if L and R has equivalent contents.
235  /// -1 or 1 if values are different.
236  /// Pretty trivial:
237  /// 2.1. If contents are numbers, compare numbers.
238  ///    Ints with greater bitwidth are greater. Ints with same bitwidths
239  ///    compared by their contents.
240  /// 2.2. "And so on". Just to avoid discrepancies with comments
241  /// perhaps it would be better to read the implementation itself.
242  /// 3. And again about overall picture. Let's look back at how the ordered set
243  /// of constants will look like:
244  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
245  ///
246  /// Now look, what could be inside [FCT, "others"], for example:
247  /// [FCT, "others"] =
248  /// [
249  ///   [double 0.1], [double 1.23],
250  ///   [i32 1], [i32 2],
251  ///   { double 1.0 },       ; StructTyID, NumElements = 1
252  ///   { i32 1 },            ; StructTyID, NumElements = 1
253  ///   { double 1, i32 1 },  ; StructTyID, NumElements = 2
254  ///   { i32 1, double 1 }   ; StructTyID, NumElements = 2
255  /// ]
256  ///
257  /// Let's explain the order. Float numbers will be less than integers, just
258  /// because of cmpType terms: FloatTyID < IntegerTyID.
259  /// Floats (with same fltSemantics) are sorted according to their value.
260  /// Then you can see integers, and they are, like a floats,
261  /// could be easy sorted among each others.
262  /// The structures. Structures are grouped at the tail, again because of their
263  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
264  /// Structures with greater number of elements are greater. Structures with
265  /// greater elements going first are greater.
266  /// The same logic with vectors, arrays and other possible complex types.
267  ///
268  /// Bitcastable constants.
269  /// Let's assume, that some constant, belongs to some group of
270  /// "so-called-equal" values with different types, and at the same time
271  /// belongs to another group of constants with equal types
272  /// and "really" equal values.
273  ///
274  /// Now, prove that this is impossible:
275  ///
276  /// If constant A with type TyA is bitcastable to B with type TyB, then:
277  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
278  ///    those should be vectors (if TyA is vector), pointers
279  ///    (if TyA is pointer), or else (if TyA equal to TyB), those types should
280  ///    be equal to TyB.
281  /// 2. All constants with non-equal, but bitcastable types to TyA, are
282  ///    bitcastable to B.
283  ///    Once again, just because we allow it to vectors and pointers only.
284  ///    This statement could be expanded as below:
285  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
286  ///      vector B, and thus bitcastable to B as well.
287  /// 2.2. All pointers of the same address space, no matter what they point to,
288  ///      bitcastable. So if C is pointer, it could be bitcasted to A and to B.
289  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
290  /// QED.
291  ///
292  /// In another words, for pointers and vectors, we ignore top-level type and
293  /// look at their particular properties (bit-width for vectors, and
294  /// address space for pointers).
295  /// If these properties are equal - compare their contents.
296  int cmpConstants(const Constant *L, const Constant *R);
297
298  /// Compares two global values by number. Uses the GlobalNumbersState to
299  /// identify the same gobals across function calls.
300  int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
301
302  /// Assign or look up previously assigned numbers for the two values, and
303  /// return whether the numbers are equal. Numbers are assigned in the order
304  /// visited.
305  /// Comparison order:
306  /// Stage 0: Value that is function itself is always greater then others.
307  ///          If left and right values are references to their functions, then
308  ///          they are equal.
309  /// Stage 1: Constants are greater than non-constants.
310  ///          If both left and right are constants, then the result of
311  ///          cmpConstants is used as cmpValues result.
312  /// Stage 2: InlineAsm instances are greater than others. If both left and
313  ///          right are InlineAsm instances, InlineAsm* pointers casted to
314  ///          integers and compared as numbers.
315  /// Stage 3: For all other cases we compare order we meet these values in
316  ///          their functions. If right value was met first during scanning,
317  ///          then left value is greater.
318  ///          In another words, we compare serial numbers, for more details
319  ///          see comments for sn_mapL and sn_mapR.
320  int cmpValues(const Value *L, const Value *R);
321
322  /// Compare two Instructions for equivalence, similar to
323  /// Instruction::isSameOperationAs but with modifications to the type
324  /// comparison.
325  /// Stages are listed in "most significant stage first" order:
326  /// On each stage below, we do comparison between some left and right
327  /// operation parts. If parts are non-equal, we assign parts comparison
328  /// result to the operation comparison result and exit from method.
329  /// Otherwise we proceed to the next stage.
330  /// Stages:
331  /// 1. Operations opcodes. Compared as numbers.
332  /// 2. Number of operands.
333  /// 3. Operation types. Compared with cmpType method.
334  /// 4. Compare operation subclass optional data as stream of bytes:
335  /// just convert it to integers and call cmpNumbers.
336  /// 5. Compare in operation operand types with cmpType in
337  /// most significant operand first order.
338  /// 6. Last stage. Check operations for some specific attributes.
339  /// For example, for Load it would be:
340  /// 6.1.Load: volatile (as boolean flag)
341  /// 6.2.Load: alignment (as integer numbers)
342  /// 6.3.Load: synch-scope (as integer numbers)
343  /// 6.4.Load: range metadata (as integer numbers)
344  /// On this stage its better to see the code, since its not more than 10-15
345  /// strings for particular instruction, and could change sometimes.
346  int cmpOperations(const Instruction *L, const Instruction *R) const;
347
348  /// Compare two GEPs for equivalent pointer arithmetic.
349  /// Parts to be compared for each comparison stage,
350  /// most significant stage first:
351  /// 1. Address space. As numbers.
352  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
353  /// 3. Pointer operand type (using cmpType method).
354  /// 4. Number of operands.
355  /// 5. Compare operands, using cmpValues method.
356  int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
357  int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
358    return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
359  }
360
361  /// cmpType - compares two types,
362  /// defines total ordering among the types set.
363  ///
364  /// Return values:
365  /// 0 if types are equal,
366  /// -1 if Left is less than Right,
367  /// +1 if Left is greater than Right.
368  ///
369  /// Description:
370  /// Comparison is broken onto stages. Like in lexicographical comparison
371  /// stage coming first has higher priority.
372  /// On each explanation stage keep in mind total ordering properties.
373  ///
374  /// 0. Before comparison we coerce pointer types of 0 address space to
375  /// integer.
376  /// We also don't bother with same type at left and right, so
377  /// just return 0 in this case.
378  ///
379  /// 1. If types are of different kind (different type IDs).
380  ///    Return result of type IDs comparison, treating them as numbers.
381  /// 2. If types are integers, check that they have the same width. If they
382  /// are vectors, check that they have the same count and subtype.
383  /// 3. Types have the same ID, so check whether they are one of:
384  /// * Void
385  /// * Float
386  /// * Double
387  /// * X86_FP80
388  /// * FP128
389  /// * PPC_FP128
390  /// * Label
391  /// * Metadata
392  /// We can treat these types as equal whenever their IDs are same.
393  /// 4. If Left and Right are pointers, return result of address space
394  /// comparison (numbers comparison). We can treat pointer types of same
395  /// address space as equal.
396  /// 5. If types are complex.
397  /// Then both Left and Right are to be expanded and their element types will
398  /// be checked with the same way. If we get Res != 0 on some stage, return it.
399  /// Otherwise return 0.
400  /// 6. For all other cases put llvm_unreachable.
401  int cmpTypes(Type *TyL, Type *TyR) const;
402
403  int cmpNumbers(uint64_t L, uint64_t R) const;
404  int cmpAPInts(const APInt &L, const APInt &R) const;
405  int cmpAPFloats(const APFloat &L, const APFloat &R) const;
406  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
407  int cmpMem(StringRef L, StringRef R) const;
408  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
409  int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
410  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
411
412  // The two functions undergoing comparison.
413  const Function *FnL, *FnR;
414
415  /// Assign serial numbers to values from left function, and values from
416  /// right function.
417  /// Explanation:
418  /// Being comparing functions we need to compare values we meet at left and
419  /// right sides.
420  /// Its easy to sort things out for external values. It just should be
421  /// the same value at left and right.
422  /// But for local values (those were introduced inside function body)
423  /// we have to ensure they were introduced at exactly the same place,
424  /// and plays the same role.
425  /// Let's assign serial number to each value when we meet it first time.
426  /// Values that were met at same place will be with same serial numbers.
427  /// In this case it would be good to explain few points about values assigned
428  /// to BBs and other ways of implementation (see below).
429  ///
430  /// 1. Safety of BB reordering.
431  /// It's safe to change the order of BasicBlocks in function.
432  /// Relationship with other functions and serial numbering will not be
433  /// changed in this case.
434  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
435  /// from the entry, and then take each terminator. So it doesn't matter how in
436  /// fact BBs are ordered in function. And since cmpValues are called during
437  /// this walk, the numbering depends only on how BBs located inside the CFG.
438  /// So the answer is - yes. We will get the same numbering.
439  ///
440  /// 2. Impossibility to use dominance properties of values.
441  /// If we compare two instruction operands: first is usage of local
442  /// variable AL from function FL, and second is usage of local variable AR
443  /// from FR, we could compare their origins and check whether they are
444  /// defined at the same place.
445  /// But, we are still not able to compare operands of PHI nodes, since those
446  /// could be operands from further BBs we didn't scan yet.
447  /// So it's impossible to use dominance properties in general.
448  DenseMap<const Value*, int> sn_mapL, sn_mapR;
449
450  // The global state we will use
451  GlobalNumberState* GlobalNumbers;
452};
453
454class FunctionNode {
455  mutable AssertingVH<Function> F;
456  FunctionComparator::FunctionHash Hash;
457public:
458  // Note the hash is recalculated potentially multiple times, but it is cheap.
459  FunctionNode(Function *F)
460    : F(F), Hash(FunctionComparator::functionHash(*F))  {}
461  Function *getFunc() const { return F; }
462  FunctionComparator::FunctionHash getHash() const { return Hash; }
463
464  /// Replace the reference to the function F by the function G, assuming their
465  /// implementations are equal.
466  void replaceBy(Function *G) const {
467    F = G;
468  }
469
470  void release() { F = nullptr; }
471};
472} // end anonymous namespace
473
474int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
475  if (L < R) return -1;
476  if (L > R) return 1;
477  return 0;
478}
479
480int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
481  if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
482    return Res;
483  if (L.ugt(R)) return 1;
484  if (R.ugt(L)) return -1;
485  return 0;
486}
487
488int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
489  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
490  // then by value interpreted as a bitstring (aka APInt).
491  const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
492  if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
493                           APFloat::semanticsPrecision(SR)))
494    return Res;
495  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
496                           APFloat::semanticsMaxExponent(SR)))
497    return Res;
498  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
499                           APFloat::semanticsMinExponent(SR)))
500    return Res;
501  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
502                           APFloat::semanticsSizeInBits(SR)))
503    return Res;
504  return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
505}
506
507int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
508  // Prevent heavy comparison, compare sizes first.
509  if (int Res = cmpNumbers(L.size(), R.size()))
510    return Res;
511
512  // Compare strings lexicographically only when it is necessary: only when
513  // strings are equal in size.
514  return L.compare(R);
515}
516
517int FunctionComparator::cmpAttrs(const AttributeSet L,
518                                 const AttributeSet R) const {
519  if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
520    return Res;
521
522  for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
523    AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
524                           RE = R.end(i);
525    for (; LI != LE && RI != RE; ++LI, ++RI) {
526      Attribute LA = *LI;
527      Attribute RA = *RI;
528      if (LA < RA)
529        return -1;
530      if (RA < LA)
531        return 1;
532    }
533    if (LI != LE)
534      return 1;
535    if (RI != RE)
536      return -1;
537  }
538  return 0;
539}
540
541int FunctionComparator::cmpRangeMetadata(const MDNode* L,
542                                         const MDNode* R) const {
543  if (L == R)
544    return 0;
545  if (!L)
546    return -1;
547  if (!R)
548    return 1;
549  // Range metadata is a sequence of numbers. Make sure they are the same
550  // sequence.
551  // TODO: Note that as this is metadata, it is possible to drop and/or merge
552  // this data when considering functions to merge. Thus this comparison would
553  // return 0 (i.e. equivalent), but merging would become more complicated
554  // because the ranges would need to be unioned. It is not likely that
555  // functions differ ONLY in this metadata if they are actually the same
556  // function semantically.
557  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
558    return Res;
559  for (size_t I = 0; I < L->getNumOperands(); ++I) {
560    ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
561    ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
562    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
563      return Res;
564  }
565  return 0;
566}
567
568int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
569                                                const Instruction *R) const {
570  ImmutableCallSite LCS(L);
571  ImmutableCallSite RCS(R);
572
573  assert(LCS && RCS && "Must be calls or invokes!");
574  assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
575
576  if (int Res =
577          cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
578    return Res;
579
580  for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
581    auto OBL = LCS.getOperandBundleAt(i);
582    auto OBR = RCS.getOperandBundleAt(i);
583
584    if (int Res = OBL.getTagName().compare(OBR.getTagName()))
585      return Res;
586
587    if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
588      return Res;
589  }
590
591  return 0;
592}
593
594/// Constants comparison:
595/// 1. Check whether type of L constant could be losslessly bitcasted to R
596/// type.
597/// 2. Compare constant contents.
598/// For more details see declaration comments.
599int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
600
601  Type *TyL = L->getType();
602  Type *TyR = R->getType();
603
604  // Check whether types are bitcastable. This part is just re-factored
605  // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
606  // we also pack into result which type is "less" for us.
607  int TypesRes = cmpTypes(TyL, TyR);
608  if (TypesRes != 0) {
609    // Types are different, but check whether we can bitcast them.
610    if (!TyL->isFirstClassType()) {
611      if (TyR->isFirstClassType())
612        return -1;
613      // Neither TyL nor TyR are values of first class type. Return the result
614      // of comparing the types
615      return TypesRes;
616    }
617    if (!TyR->isFirstClassType()) {
618      if (TyL->isFirstClassType())
619        return 1;
620      return TypesRes;
621    }
622
623    // Vector -> Vector conversions are always lossless if the two vector types
624    // have the same size, otherwise not.
625    unsigned TyLWidth = 0;
626    unsigned TyRWidth = 0;
627
628    if (auto *VecTyL = dyn_cast<VectorType>(TyL))
629      TyLWidth = VecTyL->getBitWidth();
630    if (auto *VecTyR = dyn_cast<VectorType>(TyR))
631      TyRWidth = VecTyR->getBitWidth();
632
633    if (TyLWidth != TyRWidth)
634      return cmpNumbers(TyLWidth, TyRWidth);
635
636    // Zero bit-width means neither TyL nor TyR are vectors.
637    if (!TyLWidth) {
638      PointerType *PTyL = dyn_cast<PointerType>(TyL);
639      PointerType *PTyR = dyn_cast<PointerType>(TyR);
640      if (PTyL && PTyR) {
641        unsigned AddrSpaceL = PTyL->getAddressSpace();
642        unsigned AddrSpaceR = PTyR->getAddressSpace();
643        if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
644          return Res;
645      }
646      if (PTyL)
647        return 1;
648      if (PTyR)
649        return -1;
650
651      // TyL and TyR aren't vectors, nor pointers. We don't know how to
652      // bitcast them.
653      return TypesRes;
654    }
655  }
656
657  // OK, types are bitcastable, now check constant contents.
658
659  if (L->isNullValue() && R->isNullValue())
660    return TypesRes;
661  if (L->isNullValue() && !R->isNullValue())
662    return 1;
663  if (!L->isNullValue() && R->isNullValue())
664    return -1;
665
666  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
667  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
668  if (GlobalValueL && GlobalValueR) {
669    return cmpGlobalValues(GlobalValueL, GlobalValueR);
670  }
671
672  if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
673    return Res;
674
675  if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
676    const auto *SeqR = cast<ConstantDataSequential>(R);
677    // This handles ConstantDataArray and ConstantDataVector. Note that we
678    // compare the two raw data arrays, which might differ depending on the host
679    // endianness. This isn't a problem though, because the endiness of a module
680    // will affect the order of the constants, but this order is the same
681    // for a given input module and host platform.
682    return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
683  }
684
685  switch (L->getValueID()) {
686  case Value::UndefValueVal:
687  case Value::ConstantTokenNoneVal:
688    return TypesRes;
689  case Value::ConstantIntVal: {
690    const APInt &LInt = cast<ConstantInt>(L)->getValue();
691    const APInt &RInt = cast<ConstantInt>(R)->getValue();
692    return cmpAPInts(LInt, RInt);
693  }
694  case Value::ConstantFPVal: {
695    const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
696    const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
697    return cmpAPFloats(LAPF, RAPF);
698  }
699  case Value::ConstantArrayVal: {
700    const ConstantArray *LA = cast<ConstantArray>(L);
701    const ConstantArray *RA = cast<ConstantArray>(R);
702    uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
703    uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
704    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
705      return Res;
706    for (uint64_t i = 0; i < NumElementsL; ++i) {
707      if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
708                                 cast<Constant>(RA->getOperand(i))))
709        return Res;
710    }
711    return 0;
712  }
713  case Value::ConstantStructVal: {
714    const ConstantStruct *LS = cast<ConstantStruct>(L);
715    const ConstantStruct *RS = cast<ConstantStruct>(R);
716    unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
717    unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
718    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
719      return Res;
720    for (unsigned i = 0; i != NumElementsL; ++i) {
721      if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
722                                 cast<Constant>(RS->getOperand(i))))
723        return Res;
724    }
725    return 0;
726  }
727  case Value::ConstantVectorVal: {
728    const ConstantVector *LV = cast<ConstantVector>(L);
729    const ConstantVector *RV = cast<ConstantVector>(R);
730    unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
731    unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
732    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
733      return Res;
734    for (uint64_t i = 0; i < NumElementsL; ++i) {
735      if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
736                                 cast<Constant>(RV->getOperand(i))))
737        return Res;
738    }
739    return 0;
740  }
741  case Value::ConstantExprVal: {
742    const ConstantExpr *LE = cast<ConstantExpr>(L);
743    const ConstantExpr *RE = cast<ConstantExpr>(R);
744    unsigned NumOperandsL = LE->getNumOperands();
745    unsigned NumOperandsR = RE->getNumOperands();
746    if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
747      return Res;
748    for (unsigned i = 0; i < NumOperandsL; ++i) {
749      if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
750                                 cast<Constant>(RE->getOperand(i))))
751        return Res;
752    }
753    return 0;
754  }
755  case Value::BlockAddressVal: {
756    const BlockAddress *LBA = cast<BlockAddress>(L);
757    const BlockAddress *RBA = cast<BlockAddress>(R);
758    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
759      return Res;
760    if (LBA->getFunction() == RBA->getFunction()) {
761      // They are BBs in the same function. Order by which comes first in the
762      // BB order of the function. This order is deterministic.
763      Function* F = LBA->getFunction();
764      BasicBlock *LBB = LBA->getBasicBlock();
765      BasicBlock *RBB = RBA->getBasicBlock();
766      if (LBB == RBB)
767        return 0;
768      for(BasicBlock &BB : F->getBasicBlockList()) {
769        if (&BB == LBB) {
770          assert(&BB != RBB);
771          return -1;
772        }
773        if (&BB == RBB)
774          return 1;
775      }
776      llvm_unreachable("Basic Block Address does not point to a basic block in "
777                       "its function.");
778      return -1;
779    } else {
780      // cmpValues said the functions are the same. So because they aren't
781      // literally the same pointer, they must respectively be the left and
782      // right functions.
783      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
784      // cmpValues will tell us if these are equivalent BasicBlocks, in the
785      // context of their respective functions.
786      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
787    }
788  }
789  default: // Unknown constant, abort.
790    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
791    llvm_unreachable("Constant ValueID not recognized.");
792    return -1;
793  }
794}
795
796int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
797  return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
798}
799
800/// cmpType - compares two types,
801/// defines total ordering among the types set.
802/// See method declaration comments for more details.
803int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
804  PointerType *PTyL = dyn_cast<PointerType>(TyL);
805  PointerType *PTyR = dyn_cast<PointerType>(TyR);
806
807  const DataLayout &DL = FnL->getParent()->getDataLayout();
808  if (PTyL && PTyL->getAddressSpace() == 0)
809    TyL = DL.getIntPtrType(TyL);
810  if (PTyR && PTyR->getAddressSpace() == 0)
811    TyR = DL.getIntPtrType(TyR);
812
813  if (TyL == TyR)
814    return 0;
815
816  if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
817    return Res;
818
819  switch (TyL->getTypeID()) {
820  default:
821    llvm_unreachable("Unknown type!");
822    // Fall through in Release mode.
823  case Type::IntegerTyID:
824    return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
825                      cast<IntegerType>(TyR)->getBitWidth());
826  case Type::VectorTyID: {
827    VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
828    if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
829      return Res;
830    return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
831  }
832  // TyL == TyR would have returned true earlier, because types are uniqued.
833  case Type::VoidTyID:
834  case Type::FloatTyID:
835  case Type::DoubleTyID:
836  case Type::X86_FP80TyID:
837  case Type::FP128TyID:
838  case Type::PPC_FP128TyID:
839  case Type::LabelTyID:
840  case Type::MetadataTyID:
841  case Type::TokenTyID:
842    return 0;
843
844  case Type::PointerTyID: {
845    assert(PTyL && PTyR && "Both types must be pointers here.");
846    return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
847  }
848
849  case Type::StructTyID: {
850    StructType *STyL = cast<StructType>(TyL);
851    StructType *STyR = cast<StructType>(TyR);
852    if (STyL->getNumElements() != STyR->getNumElements())
853      return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
854
855    if (STyL->isPacked() != STyR->isPacked())
856      return cmpNumbers(STyL->isPacked(), STyR->isPacked());
857
858    for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
859      if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
860        return Res;
861    }
862    return 0;
863  }
864
865  case Type::FunctionTyID: {
866    FunctionType *FTyL = cast<FunctionType>(TyL);
867    FunctionType *FTyR = cast<FunctionType>(TyR);
868    if (FTyL->getNumParams() != FTyR->getNumParams())
869      return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
870
871    if (FTyL->isVarArg() != FTyR->isVarArg())
872      return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
873
874    if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
875      return Res;
876
877    for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
878      if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
879        return Res;
880    }
881    return 0;
882  }
883
884  case Type::ArrayTyID: {
885    ArrayType *ATyL = cast<ArrayType>(TyL);
886    ArrayType *ATyR = cast<ArrayType>(TyR);
887    if (ATyL->getNumElements() != ATyR->getNumElements())
888      return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
889    return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
890  }
891  }
892}
893
894// Determine whether the two operations are the same except that pointer-to-A
895// and pointer-to-B are equivalent. This should be kept in sync with
896// Instruction::isSameOperationAs.
897// Read method declaration comments for more details.
898int FunctionComparator::cmpOperations(const Instruction *L,
899                                      const Instruction *R) const {
900  // Differences from Instruction::isSameOperationAs:
901  //  * replace type comparison with calls to isEquivalentType.
902  //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
903  //  * because of the above, we don't test for the tail bit on calls later on
904  if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
905    return Res;
906
907  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
908    return Res;
909
910  if (int Res = cmpTypes(L->getType(), R->getType()))
911    return Res;
912
913  if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
914                           R->getRawSubclassOptionalData()))
915    return Res;
916
917  if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
918    if (int Res = cmpTypes(AI->getAllocatedType(),
919                           cast<AllocaInst>(R)->getAllocatedType()))
920      return Res;
921    if (int Res =
922            cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
923      return Res;
924  }
925
926  // We have two instructions of identical opcode and #operands.  Check to see
927  // if all operands are the same type
928  for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
929    if (int Res =
930            cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
931      return Res;
932  }
933
934  // Check special state that is a part of some instructions.
935  if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
936    if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
937      return Res;
938    if (int Res =
939            cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
940      return Res;
941    if (int Res =
942            cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
943      return Res;
944    if (int Res =
945            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
946      return Res;
947    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
948        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
949  }
950  if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
951    if (int Res =
952            cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
953      return Res;
954    if (int Res =
955            cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
956      return Res;
957    if (int Res =
958            cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
959      return Res;
960    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
961  }
962  if (const CmpInst *CI = dyn_cast<CmpInst>(L))
963    return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
964  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
965    if (int Res = cmpNumbers(CI->getCallingConv(),
966                             cast<CallInst>(R)->getCallingConv()))
967      return Res;
968    if (int Res =
969            cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
970      return Res;
971    if (int Res = cmpOperandBundlesSchema(CI, R))
972      return Res;
973    return cmpRangeMetadata(
974        CI->getMetadata(LLVMContext::MD_range),
975        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
976  }
977  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
978    if (int Res = cmpNumbers(II->getCallingConv(),
979                             cast<InvokeInst>(R)->getCallingConv()))
980      return Res;
981    if (int Res =
982            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
983      return Res;
984    if (int Res = cmpOperandBundlesSchema(II, R))
985      return Res;
986    return cmpRangeMetadata(
987        II->getMetadata(LLVMContext::MD_range),
988        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
989  }
990  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
991    ArrayRef<unsigned> LIndices = IVI->getIndices();
992    ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
993    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
994      return Res;
995    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
996      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
997        return Res;
998    }
999  }
1000  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
1001    ArrayRef<unsigned> LIndices = EVI->getIndices();
1002    ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
1003    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
1004      return Res;
1005    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
1006      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
1007        return Res;
1008    }
1009  }
1010  if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
1011    if (int Res =
1012            cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
1013      return Res;
1014    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
1015  }
1016
1017  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
1018    if (int Res = cmpNumbers(CXI->isVolatile(),
1019                             cast<AtomicCmpXchgInst>(R)->isVolatile()))
1020      return Res;
1021    if (int Res = cmpNumbers(CXI->isWeak(),
1022                             cast<AtomicCmpXchgInst>(R)->isWeak()))
1023      return Res;
1024    if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
1025                             cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
1026      return Res;
1027    if (int Res = cmpNumbers(CXI->getFailureOrdering(),
1028                             cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
1029      return Res;
1030    return cmpNumbers(CXI->getSynchScope(),
1031                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
1032  }
1033  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
1034    if (int Res = cmpNumbers(RMWI->getOperation(),
1035                             cast<AtomicRMWInst>(R)->getOperation()))
1036      return Res;
1037    if (int Res = cmpNumbers(RMWI->isVolatile(),
1038                             cast<AtomicRMWInst>(R)->isVolatile()))
1039      return Res;
1040    if (int Res = cmpNumbers(RMWI->getOrdering(),
1041                             cast<AtomicRMWInst>(R)->getOrdering()))
1042      return Res;
1043    return cmpNumbers(RMWI->getSynchScope(),
1044                      cast<AtomicRMWInst>(R)->getSynchScope());
1045  }
1046  return 0;
1047}
1048
1049// Determine whether two GEP operations perform the same underlying arithmetic.
1050// Read method declaration comments for more details.
1051int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
1052                               const GEPOperator *GEPR) {
1053
1054  unsigned int ASL = GEPL->getPointerAddressSpace();
1055  unsigned int ASR = GEPR->getPointerAddressSpace();
1056
1057  if (int Res = cmpNumbers(ASL, ASR))
1058    return Res;
1059
1060  // When we have target data, we can reduce the GEP down to the value in bytes
1061  // added to the address.
1062  const DataLayout &DL = FnL->getParent()->getDataLayout();
1063  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
1064  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
1065  if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
1066      GEPR->accumulateConstantOffset(DL, OffsetR))
1067    return cmpAPInts(OffsetL, OffsetR);
1068  if (int Res = cmpTypes(GEPL->getSourceElementType(),
1069                         GEPR->getSourceElementType()))
1070    return Res;
1071
1072  if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
1073    return Res;
1074
1075  for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
1076    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
1077      return Res;
1078  }
1079
1080  return 0;
1081}
1082
1083int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
1084                                     const InlineAsm *R) const {
1085  // InlineAsm's are uniqued. If they are the same pointer, obviously they are
1086  // the same, otherwise compare the fields.
1087  if (L == R)
1088    return 0;
1089  if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
1090    return Res;
1091  if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
1092    return Res;
1093  if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
1094    return Res;
1095  if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
1096    return Res;
1097  if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
1098    return Res;
1099  if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
1100    return Res;
1101  llvm_unreachable("InlineAsm blocks were not uniqued.");
1102  return 0;
1103}
1104
1105/// Compare two values used by the two functions under pair-wise comparison. If
1106/// this is the first time the values are seen, they're added to the mapping so
1107/// that we will detect mismatches on next use.
1108/// See comments in declaration for more details.
1109int FunctionComparator::cmpValues(const Value *L, const Value *R) {
1110  // Catch self-reference case.
1111  if (L == FnL) {
1112    if (R == FnR)
1113      return 0;
1114    return -1;
1115  }
1116  if (R == FnR) {
1117    if (L == FnL)
1118      return 0;
1119    return 1;
1120  }
1121
1122  const Constant *ConstL = dyn_cast<Constant>(L);
1123  const Constant *ConstR = dyn_cast<Constant>(R);
1124  if (ConstL && ConstR) {
1125    if (L == R)
1126      return 0;
1127    return cmpConstants(ConstL, ConstR);
1128  }
1129
1130  if (ConstL)
1131    return 1;
1132  if (ConstR)
1133    return -1;
1134
1135  const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
1136  const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
1137
1138  if (InlineAsmL && InlineAsmR)
1139    return cmpInlineAsm(InlineAsmL, InlineAsmR);
1140  if (InlineAsmL)
1141    return 1;
1142  if (InlineAsmR)
1143    return -1;
1144
1145  auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
1146       RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
1147
1148  return cmpNumbers(LeftSN.first->second, RightSN.first->second);
1149}
1150// Test whether two basic blocks have equivalent behaviour.
1151int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
1152                                       const BasicBlock *BBR) {
1153  BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
1154  BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
1155
1156  do {
1157    if (int Res = cmpValues(&*InstL, &*InstR))
1158      return Res;
1159
1160    const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
1161    const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR);
1162
1163    if (GEPL && !GEPR)
1164      return 1;
1165    if (GEPR && !GEPL)
1166      return -1;
1167
1168    if (GEPL && GEPR) {
1169      if (int Res =
1170              cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
1171        return Res;
1172      if (int Res = cmpGEPs(GEPL, GEPR))
1173        return Res;
1174    } else {
1175      if (int Res = cmpOperations(&*InstL, &*InstR))
1176        return Res;
1177      assert(InstL->getNumOperands() == InstR->getNumOperands());
1178
1179      for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
1180        Value *OpL = InstL->getOperand(i);
1181        Value *OpR = InstR->getOperand(i);
1182        if (int Res = cmpValues(OpL, OpR))
1183          return Res;
1184        // cmpValues should ensure this is true.
1185        assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
1186      }
1187    }
1188
1189    ++InstL, ++InstR;
1190  } while (InstL != InstLE && InstR != InstRE);
1191
1192  if (InstL != InstLE && InstR == InstRE)
1193    return 1;
1194  if (InstL == InstLE && InstR != InstRE)
1195    return -1;
1196  return 0;
1197}
1198
1199// Test whether the two functions have equivalent behaviour.
1200int FunctionComparator::compare() {
1201  sn_mapL.clear();
1202  sn_mapR.clear();
1203
1204  if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
1205    return Res;
1206
1207  if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
1208    return Res;
1209
1210  if (FnL->hasGC()) {
1211    if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
1212      return Res;
1213  }
1214
1215  if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
1216    return Res;
1217
1218  if (FnL->hasSection()) {
1219    if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
1220      return Res;
1221  }
1222
1223  if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
1224    return Res;
1225
1226  // TODO: if it's internal and only used in direct calls, we could handle this
1227  // case too.
1228  if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
1229    return Res;
1230
1231  if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
1232    return Res;
1233
1234  assert(FnL->arg_size() == FnR->arg_size() &&
1235         "Identically typed functions have different numbers of args!");
1236
1237  // Visit the arguments so that they get enumerated in the order they're
1238  // passed in.
1239  for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
1240                                    ArgRI = FnR->arg_begin(),
1241                                    ArgLE = FnL->arg_end();
1242       ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
1243    if (cmpValues(&*ArgLI, &*ArgRI) != 0)
1244      llvm_unreachable("Arguments repeat!");
1245  }
1246
1247  // We do a CFG-ordered walk since the actual ordering of the blocks in the
1248  // linked list is immaterial. Our walk starts at the entry block for both
1249  // functions, then takes each block from each terminator in order. As an
1250  // artifact, this also means that unreachable blocks are ignored.
1251  SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
1252  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
1253
1254  FnLBBs.push_back(&FnL->getEntryBlock());
1255  FnRBBs.push_back(&FnR->getEntryBlock());
1256
1257  VisitedBBs.insert(FnLBBs[0]);
1258  while (!FnLBBs.empty()) {
1259    const BasicBlock *BBL = FnLBBs.pop_back_val();
1260    const BasicBlock *BBR = FnRBBs.pop_back_val();
1261
1262    if (int Res = cmpValues(BBL, BBR))
1263      return Res;
1264
1265    if (int Res = cmpBasicBlocks(BBL, BBR))
1266      return Res;
1267
1268    const TerminatorInst *TermL = BBL->getTerminator();
1269    const TerminatorInst *TermR = BBR->getTerminator();
1270
1271    assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
1272    for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
1273      if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
1274        continue;
1275
1276      FnLBBs.push_back(TermL->getSuccessor(i));
1277      FnRBBs.push_back(TermR->getSuccessor(i));
1278    }
1279  }
1280  return 0;
1281}
1282
1283namespace {
1284// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
1285// hash of a sequence of 64bit ints, but the entire input does not need to be
1286// available at once. This interface is necessary for functionHash because it
1287// needs to accumulate the hash as the structure of the function is traversed
1288// without saving these values to an intermediate buffer. This form of hashing
1289// is not often needed, as usually the object to hash is just read from a
1290// buffer.
1291class HashAccumulator64 {
1292  uint64_t Hash;
1293public:
1294  // Initialize to random constant, so the state isn't zero.
1295  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
1296  void add(uint64_t V) {
1297     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
1298  }
1299  // No finishing is required, because the entire hash value is used.
1300  uint64_t getHash() { return Hash; }
1301};
1302} // end anonymous namespace
1303
1304// A function hash is calculated by considering only the number of arguments and
1305// whether a function is varargs, the order of basic blocks (given by the
1306// successors of each basic block in depth first order), and the order of
1307// opcodes of each instruction within each of these basic blocks. This mirrors
1308// the strategy compare() uses to compare functions by walking the BBs in depth
1309// first order and comparing each instruction in sequence. Because this hash
1310// does not look at the operands, it is insensitive to things such as the
1311// target of calls and the constants used in the function, which makes it useful
1312// when possibly merging functions which are the same modulo constants and call
1313// targets.
1314FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
1315  HashAccumulator64 H;
1316  H.add(F.isVarArg());
1317  H.add(F.arg_size());
1318
1319  SmallVector<const BasicBlock *, 8> BBs;
1320  SmallSet<const BasicBlock *, 16> VisitedBBs;
1321
1322  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
1323  // accumulating the hash of the function "structure." (BB and opcode sequence)
1324  BBs.push_back(&F.getEntryBlock());
1325  VisitedBBs.insert(BBs[0]);
1326  while (!BBs.empty()) {
1327    const BasicBlock *BB = BBs.pop_back_val();
1328    // This random value acts as a block header, as otherwise the partition of
1329    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
1330    H.add(45798);
1331    for (auto &Inst : *BB) {
1332      H.add(Inst.getOpcode());
1333    }
1334    const TerminatorInst *Term = BB->getTerminator();
1335    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
1336      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
1337        continue;
1338      BBs.push_back(Term->getSuccessor(i));
1339    }
1340  }
1341  return H.getHash();
1342}
1343
1344
1345namespace {
1346
1347/// MergeFunctions finds functions which will generate identical machine code,
1348/// by considering all pointer types to be equivalent. Once identified,
1349/// MergeFunctions will fold them by replacing a call to one to a call to a
1350/// bitcast of the other.
1351///
1352class MergeFunctions : public ModulePass {
1353public:
1354  static char ID;
1355  MergeFunctions()
1356    : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
1357      HasGlobalAliases(false) {
1358    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
1359  }
1360
1361  bool runOnModule(Module &M) override;
1362
1363private:
1364  // The function comparison operator is provided here so that FunctionNodes do
1365  // not need to become larger with another pointer.
1366  class FunctionNodeCmp {
1367    GlobalNumberState* GlobalNumbers;
1368  public:
1369    FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
1370    bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
1371      // Order first by hashes, then full function comparison.
1372      if (LHS.getHash() != RHS.getHash())
1373        return LHS.getHash() < RHS.getHash();
1374      FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
1375      return FCmp.compare() == -1;
1376    }
1377  };
1378  typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
1379
1380  GlobalNumberState GlobalNumbers;
1381
1382  /// A work queue of functions that may have been modified and should be
1383  /// analyzed again.
1384  std::vector<WeakVH> Deferred;
1385
1386  /// Checks the rules of order relation introduced among functions set.
1387  /// Returns true, if sanity check has been passed, and false if failed.
1388  bool doSanityCheck(std::vector<WeakVH> &Worklist);
1389
1390  /// Insert a ComparableFunction into the FnTree, or merge it away if it's
1391  /// equal to one that's already present.
1392  bool insert(Function *NewFunction);
1393
1394  /// Remove a Function from the FnTree and queue it up for a second sweep of
1395  /// analysis.
1396  void remove(Function *F);
1397
1398  /// Find the functions that use this Value and remove them from FnTree and
1399  /// queue the functions.
1400  void removeUsers(Value *V);
1401
1402  /// Replace all direct calls of Old with calls of New. Will bitcast New if
1403  /// necessary to make types match.
1404  void replaceDirectCallers(Function *Old, Function *New);
1405
1406  /// Merge two equivalent functions. Upon completion, G may be deleted, or may
1407  /// be converted into a thunk. In either case, it should never be visited
1408  /// again.
1409  void mergeTwoFunctions(Function *F, Function *G);
1410
1411  /// Replace G with a thunk or an alias to F. Deletes G.
1412  void writeThunkOrAlias(Function *F, Function *G);
1413
1414  /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
1415  /// of G with bitcast(F). Deletes G.
1416  void writeThunk(Function *F, Function *G);
1417
1418  /// Replace G with an alias to F. Deletes G.
1419  void writeAlias(Function *F, Function *G);
1420
1421  /// Replace function F with function G in the function tree.
1422  void replaceFunctionInTree(const FunctionNode &FN, Function *G);
1423
1424  /// The set of all distinct functions. Use the insert() and remove() methods
1425  /// to modify it. The map allows efficient lookup and deferring of Functions.
1426  FnTreeType FnTree;
1427  // Map functions to the iterators of the FunctionNode which contains them
1428  // in the FnTree. This must be updated carefully whenever the FnTree is
1429  // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
1430  // dangling iterators into FnTree. The invariant that preserves this is that
1431  // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
1432  ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
1433
1434  /// Whether or not the target supports global aliases.
1435  bool HasGlobalAliases;
1436};
1437
1438} // end anonymous namespace
1439
1440char MergeFunctions::ID = 0;
1441INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
1442
1443ModulePass *llvm::createMergeFunctionsPass() {
1444  return new MergeFunctions();
1445}
1446
1447bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
1448  if (const unsigned Max = NumFunctionsForSanityCheck) {
1449    unsigned TripleNumber = 0;
1450    bool Valid = true;
1451
1452    dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
1453
1454    unsigned i = 0;
1455    for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
1456         I != E && i < Max; ++I, ++i) {
1457      unsigned j = i;
1458      for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
1459        Function *F1 = cast<Function>(*I);
1460        Function *F2 = cast<Function>(*J);
1461        int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
1462        int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
1463
1464        // If F1 <= F2, then F2 >= F1, otherwise report failure.
1465        if (Res1 != -Res2) {
1466          dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
1467                 << "\n";
1468          F1->dump();
1469          F2->dump();
1470          Valid = false;
1471        }
1472
1473        if (Res1 == 0)
1474          continue;
1475
1476        unsigned k = j;
1477        for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
1478             ++k, ++K, ++TripleNumber) {
1479          if (K == J)
1480            continue;
1481
1482          Function *F3 = cast<Function>(*K);
1483          int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
1484          int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
1485
1486          bool Transitive = true;
1487
1488          if (Res1 != 0 && Res1 == Res4) {
1489            // F1 > F2, F2 > F3 => F1 > F3
1490            Transitive = Res3 == Res1;
1491          } else if (Res3 != 0 && Res3 == -Res4) {
1492            // F1 > F3, F3 > F2 => F1 > F2
1493            Transitive = Res3 == Res1;
1494          } else if (Res4 != 0 && -Res3 == Res4) {
1495            // F2 > F3, F3 > F1 => F2 > F1
1496            Transitive = Res4 == -Res1;
1497          }
1498
1499          if (!Transitive) {
1500            dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
1501                   << TripleNumber << "\n";
1502            dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
1503                   << Res4 << "\n";
1504            F1->dump();
1505            F2->dump();
1506            F3->dump();
1507            Valid = false;
1508          }
1509        }
1510      }
1511    }
1512
1513    dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
1514    return Valid;
1515  }
1516  return true;
1517}
1518
1519bool MergeFunctions::runOnModule(Module &M) {
1520  bool Changed = false;
1521
1522  // All functions in the module, ordered by hash. Functions with a unique
1523  // hash value are easily eliminated.
1524  std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
1525    HashedFuncs;
1526  for (Function &Func : M) {
1527    if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
1528      HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
1529    }
1530  }
1531
1532  std::stable_sort(
1533      HashedFuncs.begin(), HashedFuncs.end(),
1534      [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
1535         const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
1536        return a.first < b.first;
1537      });
1538
1539  auto S = HashedFuncs.begin();
1540  for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
1541    // If the hash value matches the previous value or the next one, we must
1542    // consider merging it. Otherwise it is dropped and never considered again.
1543    if ((I != S && std::prev(I)->first == I->first) ||
1544        (std::next(I) != IE && std::next(I)->first == I->first) ) {
1545      Deferred.push_back(WeakVH(I->second));
1546    }
1547  }
1548
1549  do {
1550    std::vector<WeakVH> Worklist;
1551    Deferred.swap(Worklist);
1552
1553    DEBUG(doSanityCheck(Worklist));
1554
1555    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
1556    DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
1557
1558    // Insert only strong functions and merge them. Strong function merging
1559    // always deletes one of them.
1560    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
1561           E = Worklist.end(); I != E; ++I) {
1562      if (!*I) continue;
1563      Function *F = cast<Function>(*I);
1564      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
1565          !F->mayBeOverridden()) {
1566        Changed |= insert(F);
1567      }
1568    }
1569
1570    // Insert only weak functions and merge them. By doing these second we
1571    // create thunks to the strong function when possible. When two weak
1572    // functions are identical, we create a new strong function with two weak
1573    // weak thunks to it which are identical but not mergable.
1574    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
1575           E = Worklist.end(); I != E; ++I) {
1576      if (!*I) continue;
1577      Function *F = cast<Function>(*I);
1578      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
1579          F->mayBeOverridden()) {
1580        Changed |= insert(F);
1581      }
1582    }
1583    DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
1584  } while (!Deferred.empty());
1585
1586  FnTree.clear();
1587  GlobalNumbers.clear();
1588
1589  return Changed;
1590}
1591
1592// Replace direct callers of Old with New.
1593void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
1594  Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
1595  for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
1596    Use *U = &*UI;
1597    ++UI;
1598    CallSite CS(U->getUser());
1599    if (CS && CS.isCallee(U)) {
1600      // Transfer the called function's attributes to the call site. Due to the
1601      // bitcast we will 'lose' ABI changing attributes because the 'called
1602      // function' is no longer a Function* but the bitcast. Code that looks up
1603      // the attributes from the called function will fail.
1604
1605      // FIXME: This is not actually true, at least not anymore. The callsite
1606      // will always have the same ABI affecting attributes as the callee,
1607      // because otherwise the original input has UB. Note that Old and New
1608      // always have matching ABI, so no attributes need to be changed.
1609      // Transferring other attributes may help other optimizations, but that
1610      // should be done uniformly and not in this ad-hoc way.
1611      auto &Context = New->getContext();
1612      auto NewFuncAttrs = New->getAttributes();
1613      auto CallSiteAttrs = CS.getAttributes();
1614
1615      CallSiteAttrs = CallSiteAttrs.addAttributes(
1616          Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
1617
1618      for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
1619        AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
1620        if (Attrs.getNumSlots())
1621          CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
1622      }
1623
1624      CS.setAttributes(CallSiteAttrs);
1625
1626      remove(CS.getInstruction()->getParent()->getParent());
1627      U->set(BitcastNew);
1628    }
1629  }
1630}
1631
1632// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
1633void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
1634  if (HasGlobalAliases && G->hasUnnamedAddr()) {
1635    if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
1636        G->hasWeakLinkage()) {
1637      writeAlias(F, G);
1638      return;
1639    }
1640  }
1641
1642  writeThunk(F, G);
1643}
1644
1645// Helper for writeThunk,
1646// Selects proper bitcast operation,
1647// but a bit simpler then CastInst::getCastOpcode.
1648static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
1649  Type *SrcTy = V->getType();
1650  if (SrcTy->isStructTy()) {
1651    assert(DestTy->isStructTy());
1652    assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
1653    Value *Result = UndefValue::get(DestTy);
1654    for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
1655      Value *Element = createCast(
1656          Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
1657          DestTy->getStructElementType(I));
1658
1659      Result =
1660          Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
1661    }
1662    return Result;
1663  }
1664  assert(!DestTy->isStructTy());
1665  if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
1666    return Builder.CreateIntToPtr(V, DestTy);
1667  else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
1668    return Builder.CreatePtrToInt(V, DestTy);
1669  else
1670    return Builder.CreateBitCast(V, DestTy);
1671}
1672
1673// Replace G with a simple tail call to bitcast(F). Also replace direct uses
1674// of G with bitcast(F). Deletes G.
1675void MergeFunctions::writeThunk(Function *F, Function *G) {
1676  if (!G->mayBeOverridden()) {
1677    // Redirect direct callers of G to F.
1678    replaceDirectCallers(G, F);
1679  }
1680
1681  // If G was internal then we may have replaced all uses of G with F. If so,
1682  // stop here and delete G. There's no need for a thunk.
1683  if (G->hasLocalLinkage() && G->use_empty()) {
1684    G->eraseFromParent();
1685    return;
1686  }
1687
1688  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
1689                                    G->getParent());
1690  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
1691  IRBuilder<false> Builder(BB);
1692
1693  SmallVector<Value *, 16> Args;
1694  unsigned i = 0;
1695  FunctionType *FFTy = F->getFunctionType();
1696  for (Argument & AI : NewG->args()) {
1697    Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
1698    ++i;
1699  }
1700
1701  CallInst *CI = Builder.CreateCall(F, Args);
1702  CI->setTailCall();
1703  CI->setCallingConv(F->getCallingConv());
1704  CI->setAttributes(F->getAttributes());
1705  if (NewG->getReturnType()->isVoidTy()) {
1706    Builder.CreateRetVoid();
1707  } else {
1708    Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
1709  }
1710
1711  NewG->copyAttributesFrom(G);
1712  NewG->takeName(G);
1713  removeUsers(G);
1714  G->replaceAllUsesWith(NewG);
1715  G->eraseFromParent();
1716
1717  DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
1718  ++NumThunksWritten;
1719}
1720
1721// Replace G with an alias to F and delete G.
1722void MergeFunctions::writeAlias(Function *F, Function *G) {
1723  auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
1724  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
1725  GA->takeName(G);
1726  GA->setVisibility(G->getVisibility());
1727  removeUsers(G);
1728  G->replaceAllUsesWith(GA);
1729  G->eraseFromParent();
1730
1731  DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
1732  ++NumAliasesWritten;
1733}
1734
1735// Merge two equivalent functions. Upon completion, Function G is deleted.
1736void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
1737  if (F->mayBeOverridden()) {
1738    assert(G->mayBeOverridden());
1739
1740    // Make them both thunks to the same internal function.
1741    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
1742                                   F->getParent());
1743    H->copyAttributesFrom(F);
1744    H->takeName(F);
1745    removeUsers(F);
1746    F->replaceAllUsesWith(H);
1747
1748    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
1749
1750    if (HasGlobalAliases) {
1751      writeAlias(F, G);
1752      writeAlias(F, H);
1753    } else {
1754      writeThunk(F, G);
1755      writeThunk(F, H);
1756    }
1757
1758    F->setAlignment(MaxAlignment);
1759    F->setLinkage(GlobalValue::PrivateLinkage);
1760    ++NumDoubleWeak;
1761  } else {
1762    writeThunkOrAlias(F, G);
1763  }
1764
1765  ++NumFunctionsMerged;
1766}
1767
1768/// Replace function F by function G.
1769void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
1770                                           Function *G) {
1771  Function *F = FN.getFunc();
1772  assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
1773         "The two functions must be equal");
1774
1775  auto I = FNodesInTree.find(F);
1776  assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
1777  assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
1778
1779  FnTreeType::iterator IterToFNInFnTree = I->second;
1780  assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
1781  // Remove F -> FN and insert G -> FN
1782  FNodesInTree.erase(I);
1783  FNodesInTree.insert({G, IterToFNInFnTree});
1784  // Replace F with G in FN, which is stored inside the FnTree.
1785  FN.replaceBy(G);
1786}
1787
1788// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
1789// that was already inserted.
1790bool MergeFunctions::insert(Function *NewFunction) {
1791  std::pair<FnTreeType::iterator, bool> Result =
1792      FnTree.insert(FunctionNode(NewFunction));
1793
1794  if (Result.second) {
1795    assert(FNodesInTree.count(NewFunction) == 0);
1796    FNodesInTree.insert({NewFunction, Result.first});
1797    DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
1798    return false;
1799  }
1800
1801  const FunctionNode &OldF = *Result.first;
1802
1803  // Don't merge tiny functions, since it can just end up making the function
1804  // larger.
1805  // FIXME: Should still merge them if they are unnamed_addr and produce an
1806  // alias.
1807  if (NewFunction->size() == 1) {
1808    if (NewFunction->front().size() <= 2) {
1809      DEBUG(dbgs() << NewFunction->getName()
1810                   << " is to small to bother merging\n");
1811      return false;
1812    }
1813  }
1814
1815  // Impose a total order (by name) on the replacement of functions. This is
1816  // important when operating on more than one module independently to prevent
1817  // cycles of thunks calling each other when the modules are linked together.
1818  //
1819  // When one function is weak and the other is strong there is an order imposed
1820  // already. We process strong functions before weak functions.
1821  if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) ||
1822      (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden()))
1823    if (OldF.getFunc()->getName() > NewFunction->getName()) {
1824      // Swap the two functions.
1825      Function *F = OldF.getFunc();
1826      replaceFunctionInTree(*Result.first, NewFunction);
1827      NewFunction = F;
1828      assert(OldF.getFunc() != F && "Must have swapped the functions.");
1829    }
1830
1831  // Never thunk a strong function to a weak function.
1832  assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
1833
1834  DEBUG(dbgs() << "  " << OldF.getFunc()->getName()
1835               << " == " << NewFunction->getName() << '\n');
1836
1837  Function *DeleteF = NewFunction;
1838  mergeTwoFunctions(OldF.getFunc(), DeleteF);
1839  return true;
1840}
1841
1842// Remove a function from FnTree. If it was already in FnTree, add
1843// it to Deferred so that we'll look at it in the next round.
1844void MergeFunctions::remove(Function *F) {
1845  auto I = FNodesInTree.find(F);
1846  if (I != FNodesInTree.end()) {
1847    DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
1848    FnTree.erase(I->second);
1849    // I->second has been invalidated, remove it from the FNodesInTree map to
1850    // preserve the invariant.
1851    FNodesInTree.erase(I);
1852    Deferred.emplace_back(F);
1853  }
1854}
1855
1856// For each instruction used by the value, remove() the function that contains
1857// the instruction. This should happen right before a call to RAUW.
1858void MergeFunctions::removeUsers(Value *V) {
1859  std::vector<Value *> Worklist;
1860  Worklist.push_back(V);
1861  SmallSet<Value*, 8> Visited;
1862  Visited.insert(V);
1863  while (!Worklist.empty()) {
1864    Value *V = Worklist.back();
1865    Worklist.pop_back();
1866
1867    for (User *U : V->users()) {
1868      if (Instruction *I = dyn_cast<Instruction>(U)) {
1869        remove(I->getParent()->getParent());
1870      } else if (isa<GlobalValue>(U)) {
1871        // do nothing
1872      } else if (Constant *C = dyn_cast<Constant>(U)) {
1873        for (User *UU : C->users()) {
1874          if (!Visited.insert(UU).second)
1875            Worklist.push_back(UU);
1876        }
1877      }
1878    }
1879  }
1880}
1881