1//===- GarbageCollection.cpp ----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "mcld/LD/GarbageCollection.h"
10
11#include "mcld/Fragment/Fragment.h"
12#include "mcld/Fragment/Relocation.h"
13#include "mcld/LD/LDContext.h"
14#include "mcld/LD/LDFileFormat.h"
15#include "mcld/LD/LDSection.h"
16#include "mcld/LD/LDSymbol.h"
17#include "mcld/LD/SectionData.h"
18#include "mcld/LD/RelocData.h"
19#include "mcld/LinkerConfig.h"
20#include "mcld/LinkerScript.h"
21#include "mcld/Module.h"
22#include "mcld/Support/MsgHandling.h"
23#include "mcld/Target/TargetLDBackend.h"
24
25#include <llvm/Support/Casting.h>
26
27#include <queue>
28#if !defined(MCLD_ON_WIN32)
29#include <fnmatch.h>
30#define fnmatch0(pattern, string) (fnmatch(pattern, string, 0) == 0)
31#else
32#include <windows.h>
33#include <shlwapi.h>
34#define fnmatch0(pattern, string) (PathMatchSpec(string, pattern) == true)
35#endif
36
37namespace mcld {
38
39//===----------------------------------------------------------------------===//
40// Non-member functions
41//===----------------------------------------------------------------------===//
42// FIXME: these rules should be added into SectionMap, while currently adding to
43// SectionMap will cause the output order change in .text section and leads to
44// the .ARM.exidx order incorrect. We should sort the .ARM.exidx.
45static const char* pattern_to_keep[] = {".text*personality*",
46                                        ".data*personality*",
47                                        ".gnu.linkonce.d*personality*",
48                                        ".sdata*personality*"};
49
50/// shouldKeep - check the section name for the keep sections
51static bool shouldKeep(const std::string& pName) {
52  static const unsigned int pattern_size =
53      sizeof(pattern_to_keep) / sizeof(pattern_to_keep[0]);
54  for (unsigned int i = 0; i < pattern_size; ++i) {
55    if (fnmatch0(pattern_to_keep[i], pName.c_str()))
56      return true;
57  }
58  return false;
59}
60
61/// shouldProcessGC - check if the section kind is handled in GC
62static bool mayProcessGC(const LDSection& pSection) {
63  if (pSection.kind() == LDFileFormat::TEXT ||
64      pSection.kind() == LDFileFormat::DATA ||
65      pSection.kind() == LDFileFormat::BSS ||
66      pSection.kind() == LDFileFormat::GCCExceptTable)
67    return true;
68  return false;
69}
70
71//===----------------------------------------------------------------------===//
72// GarbageCollection::SectionReachedListMap
73//===----------------------------------------------------------------------===//
74void GarbageCollection::SectionReachedListMap::addReference(
75    const LDSection& pFrom,
76    const LDSection& pTo) {
77  m_ReachedSections[&pFrom].insert(&pTo);
78}
79
80GarbageCollection::SectionListTy&
81GarbageCollection::SectionReachedListMap::getReachedList(
82    const LDSection& pSection) {
83  return m_ReachedSections[&pSection];
84}
85
86GarbageCollection::SectionListTy*
87GarbageCollection::SectionReachedListMap::findReachedList(
88    const LDSection& pSection) {
89  ReachedSectionsTy::iterator it = m_ReachedSections.find(&pSection);
90  if (it == m_ReachedSections.end())
91    return NULL;
92  return &it->second;
93}
94
95//===----------------------------------------------------------------------===//
96// GarbageCollection
97//===----------------------------------------------------------------------===//
98GarbageCollection::GarbageCollection(const LinkerConfig& pConfig,
99                                     const TargetLDBackend& pBackend,
100                                     Module& pModule)
101    : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) {
102}
103
104GarbageCollection::~GarbageCollection() {
105}
106
107bool GarbageCollection::run() {
108  // 1. traverse all the relocations to set up the reached sections of each
109  // section
110  setUpReachedSections();
111  m_Backend.setUpReachedSectionsForGC(m_Module, m_SectionReachedListMap);
112
113  // 2. get all sections defined the entry point
114  SectionVecTy entry;
115  getEntrySections(entry);
116
117  // 3. find all the referenced sections those can be reached by entry
118  findReferencedSections(entry);
119
120  // 4. stripSections - set the unreached sections to Ignore
121  stripSections();
122  return true;
123}
124
125void GarbageCollection::setUpReachedSections() {
126  // traverse all the input relocations to setup the reached sections
127  Module::obj_iterator input, inEnd = m_Module.obj_end();
128  for (input = m_Module.obj_begin(); input != inEnd; ++input) {
129    LDContext::sect_iterator rs, rsEnd = (*input)->context()->relocSectEnd();
130    for (rs = (*input)->context()->relocSectBegin(); rs != rsEnd; ++rs) {
131      // bypass the discarded relocation section
132      // 1. its section kind is changed to Ignore. (The target section is a
133      // discarded group section.)
134      // 2. it has no reloc data. (All symbols in the input relocs are in the
135      // discarded group sections)
136      LDSection* reloc_sect = *rs;
137      LDSection* apply_sect = reloc_sect->getLink();
138      if ((LDFileFormat::Ignore == reloc_sect->kind()) ||
139          (!reloc_sect->hasRelocData()))
140        continue;
141
142      // bypass the apply target sections which are not handled by gc
143      if (!mayProcessGC(*apply_sect))
144        continue;
145
146      bool add_first = false;
147      SectionListTy* reached_sects = NULL;
148      RelocData::iterator reloc_it, rEnd = reloc_sect->getRelocData()->end();
149      for (reloc_it = reloc_sect->getRelocData()->begin(); reloc_it != rEnd;
150           ++reloc_it) {
151        Relocation* reloc = llvm::cast<Relocation>(reloc_it);
152        ResolveInfo* sym = reloc->symInfo();
153        // only the target symbols defined in the input fragments can make the
154        // reference
155        if (sym == NULL)
156          continue;
157        if (!sym->isDefine() || !sym->outSymbol()->hasFragRef())
158          continue;
159
160        // only the target symbols defined in the concerned sections can make
161        // the reference
162        const LDSection* target_sect =
163            &sym->outSymbol()->fragRef()->frag()->getParent()->getSection();
164        if (!mayProcessGC(*target_sect))
165          continue;
166
167        // setup the reached list, if we first add the element to reached list
168        // of this section, create an entry in ReachedSections map
169        if (!add_first) {
170          reached_sects = &m_SectionReachedListMap.getReachedList(*apply_sect);
171          add_first = true;
172        }
173        reached_sects->insert(target_sect);
174      }
175      reached_sects = NULL;
176      add_first = false;
177    }
178  }
179}
180
181void GarbageCollection::getEntrySections(SectionVecTy& pEntry) {
182  // all the KEEP sections defined in ldscript are entries, traverse all the
183  // input sections and check the SectionMap to find the KEEP sections
184  Module::obj_iterator obj, objEnd = m_Module.obj_end();
185  SectionMap& sect_map = m_Module.getScript().sectionMap();
186  for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
187    const std::string input_name = (*obj)->name();
188    LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
189    for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
190      LDSection* section = *sect;
191      if (!mayProcessGC(*section))
192        continue;
193
194      SectionMap::Input* sm_input =
195          sect_map.find(input_name, section->name()).second;
196      if (((sm_input != NULL) && (InputSectDesc::Keep == sm_input->policy())) ||
197          shouldKeep(section->name()))
198        pEntry.push_back(section);
199    }
200  }
201
202  // when building shared object or the --export-dynamic has been given, the
203  // global define symbols are entries
204  if (LinkerConfig::DynObj == m_Config.codeGenType() ||
205      m_Config.options().exportDynamic()) {
206    NamePool::syminfo_iterator info_it,
207        info_end = m_Module.getNamePool().syminfo_end();
208    for (info_it = m_Module.getNamePool().syminfo_begin(); info_it != info_end;
209         ++info_it) {
210      ResolveInfo* info = info_it.getEntry();
211      if (!info->isDefine() || info->isLocal() ||
212          info->shouldForceLocal(m_Config))
213        continue;
214      LDSymbol* sym = info->outSymbol();
215      if (sym == NULL || !sym->hasFragRef())
216        continue;
217
218      // only the target symbols defined in the concerned sections can be
219      // entries
220      const LDSection* sect =
221          &sym->fragRef()->frag()->getParent()->getSection();
222      if (!mayProcessGC(*sect))
223        continue;
224      pEntry.push_back(sect);
225    }
226  }
227
228  // when building executable or PIE
229  if (LinkerConfig::Exec == m_Config.codeGenType() ||
230      m_Config.options().isPIE()) {
231    // 1. the entry symbol is the entry
232    LDSymbol* entry_sym =
233        m_Module.getNamePool().findSymbol(m_Backend.getEntry(m_Module));
234    assert(entry_sym != NULL);
235    pEntry.push_back(&entry_sym->fragRef()->frag()->getParent()->getSection());
236
237    // 2. the symbols have been seen in dynamic objects are entries. If
238    // --export-dynamic is set, then these sections already been added. No need
239    // to add them again
240    if (!m_Config.options().exportDynamic()) {
241      NamePool::syminfo_iterator info_it,
242          info_end = m_Module.getNamePool().syminfo_end();
243      for (info_it = m_Module.getNamePool().syminfo_begin();
244           info_it != info_end; ++info_it) {
245        ResolveInfo* info = info_it.getEntry();
246        if (!info->isDefine() || info->isLocal())
247          continue;
248
249        if (!info->isInDyn())
250          continue;
251
252        LDSymbol* sym = info->outSymbol();
253        if (sym == NULL || !sym->hasFragRef())
254          continue;
255
256        // only the target symbols defined in the concerned sections can be
257        // entries
258        const LDSection* sect =
259            &sym->fragRef()->frag()->getParent()->getSection();
260        if (!mayProcessGC(*sect))
261          continue;
262
263        pEntry.push_back(sect);
264      }
265    }
266  }
267
268  // symbols set by -u should not be garbage collected. Set them entries.
269  GeneralOptions::const_undef_sym_iterator usym;
270  GeneralOptions::const_undef_sym_iterator usymEnd =
271      m_Config.options().undef_sym_end();
272  for (usym = m_Config.options().undef_sym_begin(); usym != usymEnd; ++usym) {
273    LDSymbol* sym = m_Module.getNamePool().findSymbol(*usym);
274    assert(sym);
275    ResolveInfo* info = sym->resolveInfo();
276    assert(info);
277    if (!info->isDefine() || !sym->hasFragRef())
278      continue;
279    // only the symbols defined in the concerned sections can be entries
280    const LDSection* sect = &sym->fragRef()->frag()->getParent()->getSection();
281    if (!mayProcessGC(*sect))
282      continue;
283    pEntry.push_back(sect);
284  }
285}
286
287void GarbageCollection::findReferencedSections(SectionVecTy& pEntry) {
288  // list of sections waiting to be processed
289  typedef std::queue<const LDSection*> WorkListTy;
290  WorkListTy work_list;
291  // start from each entry, resolve the transitive closure
292  SectionVecTy::iterator entry_it, entry_end = pEntry.end();
293  for (entry_it = pEntry.begin(); entry_it != entry_end; ++entry_it) {
294    // add entry point to work list
295    work_list.push(*entry_it);
296
297    // add section from the work_list to the referencedSections until every
298    // reached sections are added
299    while (!work_list.empty()) {
300      const LDSection* sect = work_list.front();
301      work_list.pop();
302      // add section to the ReferencedSections, if the section has been put into
303      // referencedSections, skip this section
304      if (!m_ReferencedSections.insert(sect).second)
305        continue;
306
307      // get the section reached list, if the section do not has one, which
308      // means no referenced between it and other sections, then skip it
309      SectionListTy* reach_list =
310          m_SectionReachedListMap.findReachedList(*sect);
311      if (reach_list == NULL)
312        continue;
313
314      // put the reached sections to work list, skip the one already be in
315      // referencedSections
316      SectionListTy::iterator it, end = reach_list->end();
317      for (it = reach_list->begin(); it != end; ++it) {
318        if (m_ReferencedSections.find(*it) == m_ReferencedSections.end())
319          work_list.push(*it);
320      }
321    }
322  }
323}
324
325void GarbageCollection::stripSections() {
326  // Traverse all the input Regular and BSS sections, if a section is not found
327  // in the ReferencedSections, then it should be garbage collected
328  Module::obj_iterator obj, objEnd = m_Module.obj_end();
329  for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
330    LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
331    for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
332      LDSection* section = *sect;
333      if (!mayProcessGC(*section))
334        continue;
335
336      if (m_ReferencedSections.find(section) == m_ReferencedSections.end()) {
337        section->setKind(LDFileFormat::Ignore);
338        debug(diag::debug_print_gc_sections) << section->name()
339                                             << (*obj)->name();
340      }
341    }
342  }
343
344  // Traverse all the relocation sections, if its target section is set to
345  // Ignore, then set the relocation section to Ignore as well
346  Module::obj_iterator input, inEnd = m_Module.obj_end();
347  for (input = m_Module.obj_begin(); input != inEnd; ++input) {
348    LDContext::sect_iterator rs, rsEnd = (*input)->context()->relocSectEnd();
349    for (rs = (*input)->context()->relocSectBegin(); rs != rsEnd; ++rs) {
350      LDSection* reloc_sect = *rs;
351      if (LDFileFormat::Ignore == reloc_sect->getLink()->kind())
352        reloc_sect->setKind(LDFileFormat::Ignore);
353    }
354  }
355}
356
357}  // namespace mcld
358