1//===- IndenticalCodeFolding.cpp ------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "mcld/LD/IdenticalCodeFolding.h"
10
11#include "mcld/GeneralOptions.h"
12#include "mcld/Module.h"
13#include "mcld/Fragment/RegionFragment.h"
14#include "mcld/LD/LDContext.h"
15#include "mcld/LD/LDSection.h"
16#include "mcld/LD/RelocData.h"
17#include "mcld/LD/Relocator.h"
18#include "mcld/LD/ResolveInfo.h"
19#include "mcld/LD/SectionData.h"
20#include "mcld/LinkerConfig.h"
21#include "mcld/MC/Input.h"
22#include "mcld/Support/Demangle.h"
23#include "mcld/Support/MsgHandling.h"
24#include "mcld/Target/GNULDBackend.h"
25
26#include <llvm/ADT/StringRef.h>
27#include <llvm/Support/Casting.h>
28#include <llvm/Support/Format.h>
29
30#include <cassert>
31#include <map>
32#include <set>
33
34#include <zlib.h>
35
36namespace mcld {
37
38static bool isSymCtorOrDtor(const ResolveInfo& pSym) {
39  // We can always fold ctors and dtors since accessing function pointer in C++
40  // is forbidden.
41  llvm::StringRef name(pSym.name(), pSym.nameSize());
42  if (!name.startswith("_ZZ") && !name.startswith("_ZN")) {
43    return false;
44  }
45  return isCtorOrDtor(pSym.name(), pSym.nameSize());
46}
47
48IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig,
49                                           const TargetLDBackend& pBackend,
50                                           Module& pModule)
51    : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) {
52}
53
54void IdenticalCodeFolding::foldIdenticalCode() {
55  // 1. Find folding candidates.
56  FoldingCandidates candidate_list;
57  findCandidates(candidate_list);
58
59  // 2. Initialize constant section content
60  for (size_t i = 0; i < candidate_list.size(); ++i) {
61    candidate_list[i].initConstantContent(m_Backend, m_KeptSections);
62  }
63
64  // 3. Find identical code until convergence
65  bool converged = false;
66  size_t iterations = 0;
67  while (!converged && (iterations < m_Config.options().getICFIterations())) {
68    converged = matchCandidates(candidate_list);
69    ++iterations;
70  }
71  if (m_Config.options().printICFSections()) {
72    debug(diag::debug_icf_iterations) << iterations;
73  }
74
75  // 4. Fold the identical code
76  typedef std::set<Input*> FoldedObjects;
77  FoldedObjects folded_objs;
78  KeptSections::iterator kept, keptEnd = m_KeptSections.end();
79  size_t index = 0;
80  for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) {
81    LDSection* sect = (*kept).first;
82    Input* obj = (*kept).second.first;
83    size_t kept_index = (*kept).second.second;
84    if (index != kept_index) {
85      sect->setKind(LDFileFormat::Folded);
86      folded_objs.insert(obj);
87
88      if (m_Config.options().printICFSections()) {
89        KeptSections::iterator it = m_KeptSections.begin() + kept_index;
90        LDSection* kept_sect = (*it).first;
91        Input* kept_obj = (*it).second.first;
92        debug(diag::debug_icf_folded_section) << sect->name() << obj->name()
93                                              << kept_sect->name()
94                                              << kept_obj->name();
95      }
96    }
97  }
98
99  // Adjust the fragment reference of the folded symbols.
100  FoldedObjects::iterator fobj, fobjEnd = folded_objs.end();
101  for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) {
102    LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd();
103    for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) {
104      if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) {
105        LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol();
106        FragmentRef* frag_ref = out_sym->fragRef();
107        LDSection* sect = &(frag_ref->frag()->getParent()->getSection());
108        if (sect->kind() == LDFileFormat::Folded) {
109          size_t kept_index = m_KeptSections[sect].second;
110          LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first;
111          frag_ref->assign(kept_sect->getSectionData()->front(),
112                           frag_ref->offset());
113        }
114      }
115    }  // for each symbol
116  }    // for each folded object
117}
118
119void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) {
120  Module::obj_iterator obj, objEnd = m_Module.obj_end();
121  for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
122    std::set<const LDSection*> funcptr_access_set;
123    typedef std::map<LDSection*, LDSection*> CandidateMap;
124    CandidateMap candidate_map;
125    LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
126    for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
127      switch ((*sect)->kind()) {
128        case LDFileFormat::TEXT: {
129          candidate_map.insert(
130              std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL)));
131          break;
132        }
133        case LDFileFormat::Relocation: {
134          LDSection* target = (*sect)->getLink();
135          if (target->kind() == LDFileFormat::TEXT) {
136            candidate_map[target] = *sect;
137          }
138
139          // Safe icf
140          if (m_Config.options().getICFMode() == GeneralOptions::ICF::Safe) {
141            RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end();
142            for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) {
143              LDSymbol* sym = rel->symInfo()->outSymbol();
144              if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) {
145                const LDSection* def =
146                    &sym->fragRef()->frag()->getParent()->getSection();
147                if (!isSymCtorOrDtor(*rel->symInfo()) &&
148                    m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) &&
149                    m_Backend.getRelocator()
150                        ->mayHaveFunctionPointerAccess(*rel)) {
151                  funcptr_access_set.insert(def);
152                }
153              }
154            }  // for each reloc
155          }
156
157          break;
158        }
159        default: {
160          // skip
161          break;
162        }
163      }  // end of switch
164    }    // for each section
165
166    CandidateMap::iterator candidate, candidateEnd = candidate_map.end();
167    for (candidate = candidate_map.begin(); candidate != candidateEnd;
168         ++candidate) {
169      if ((m_Config.options().getICFMode() == GeneralOptions::ICF::All) ||
170          (funcptr_access_set.count(candidate->first) == 0)) {
171        size_t index = m_KeptSections.size();
172        m_KeptSections[candidate->first] = ObjectAndId(*obj, index);
173        pCandidateList.push_back(
174            FoldingCandidate(candidate->first, candidate->second, *obj));
175      }
176    }  // for each possible candidate
177  }  // for each obj
178}
179
180bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) {
181  typedef std::multimap<uint32_t, size_t> ChecksumMap;
182  ChecksumMap checksum_map;
183  std::vector<std::string> contents(pCandidateList.size());
184  bool converged = true;
185
186  for (size_t index = 0; index < pCandidateList.size(); ++index) {
187    contents[index] = pCandidateList[index].getContentWithVariables(
188        m_Backend, m_KeptSections);
189    uint32_t checksum = ::crc32(0xFFFFFFFF,
190                                (const uint8_t*)contents[index].c_str(),
191                                contents[index].length());
192
193    size_t count = checksum_map.count(checksum);
194    if (count == 0) {
195      checksum_map.insert(std::make_pair(checksum, index));
196    } else {
197      std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret =
198          checksum_map.equal_range(checksum);
199      for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) {
200        size_t kept_index = (*it).second;
201        if (contents[index].compare(contents[kept_index]) == 0) {
202          m_KeptSections[pCandidateList[index].sect].second = kept_index;
203          converged = false;
204          break;
205        }
206      }
207    }
208  }
209
210  return converged;
211}
212
213void IdenticalCodeFolding::FoldingCandidate::initConstantContent(
214    const TargetLDBackend& pBackend,
215    const IdenticalCodeFolding::KeptSections& pKeptSections) {
216  // Get the static content from text.
217  assert(sect != NULL && sect->hasSectionData());
218  SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end();
219  for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) {
220    switch (frag->getKind()) {
221      case Fragment::Region: {
222        const RegionFragment& region = llvm::cast<RegionFragment>(*frag);
223        content.append(region.getRegion().begin(), region.size());
224        break;
225      }
226      default: {
227        // FIXME: Currently we only take care of RegionFragment.
228        break;
229      }
230    }
231  }
232
233  // Get the static content from relocs.
234  if (reloc_sect != NULL && reloc_sect->hasRelocData()) {
235    for (Relocation& rel : *reloc_sect->getRelocData()) {
236      llvm::format_object<Relocation::Type,
237                          Relocation::Address,
238                          Relocation::Address,
239                          Relocation::Address> rel_info("%x%llx%llx%llx",
240                                                        rel.type(),
241                                                        rel.symValue(),
242                                                        rel.addend(),
243                                                        rel.place());
244      char rel_str[48];
245      rel_info.print(rel_str, sizeof(rel_str));
246      content.append(rel_str);
247
248      // Handle the recursive call.
249      LDSymbol* sym = rel.symInfo()->outSymbol();
250      if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) {
251        LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
252        if (def == sect) {
253          continue;
254        }
255      }
256
257      if (!pBackend.isSymbolPreemptible(*rel.symInfo()) && sym->hasFragRef() &&
258          (pKeptSections.find(
259               &sym->fragRef()->frag()->getParent()->getSection()) !=
260           pKeptSections.end())) {
261        // Mark this reloc as a variable.
262        variable_relocs.push_back(&rel);
263      } else {
264        // TODO: Support inlining merge sections if possible (target-dependent).
265        if ((sym->binding() == ResolveInfo::Local) ||
266            (sym->binding() == ResolveInfo::Absolute)) {
267          // ABS or Local symbols.
268          content.append(sym->name()).append(obj->name()).append(
269              obj->path().native());
270        } else {
271          content.append(sym->name());
272        }
273      }
274    }
275  }
276}
277
278std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables(
279    const TargetLDBackend& pBackend,
280    const IdenticalCodeFolding::KeptSections& pKeptSections) {
281  std::string result(content);
282  // Compute the variable content from relocs.
283  std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end();
284  for (rel = variable_relocs.begin(); rel != relEnd; ++rel) {
285    LDSymbol* sym = (*rel)->symInfo()->outSymbol();
286    LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
287    // Use the kept section index.
288    KeptSections::const_iterator it = pKeptSections.find(def);
289    llvm::format_object<size_t> kept_info("%x", (*it).second.second);
290    char kept_str[8];
291    kept_info.print(kept_str, sizeof(kept_str));
292    result.append(kept_str);
293  }
294
295  return result;
296}
297
298}  // namespace mcld
299