GNUArchiveReader.cpp revision cedee4b38f4786845183be7f5916dd520a170ae0
1//===- GNUArchiveReader.cpp -----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include <mcld/MC/MCLDInfo.h>
10#include <mcld/MC/MCLDInput.h>
11#include <mcld/MC/InputTree.h>
12#include <mcld/LD/GNUArchiveReader.h>
13#include <mcld/LD/ResolveInfo.h>
14#include <mcld/LD/ELFObjectReader.h>
15#include <mcld/Support/FileSystem.h>
16#include <mcld/Support/FileHandle.h>
17#include <mcld/Support/MemoryArea.h>
18#include <mcld/Support/MemoryRegion.h>
19#include <mcld/Support/MemoryAreaFactory.h>
20#include <mcld/Support/MsgHandling.h>
21#include <mcld/Support/Path.h>
22#include <mcld/ADT/SizeTraits.h>
23
24#include <llvm/ADT/StringRef.h>
25#include <llvm/Support/Host.h>
26
27#include <cstring>
28#include <cstdlib>
29
30using namespace mcld;
31
32GNUArchiveReader::GNUArchiveReader(MCLDInfo& pLDInfo,
33                                   MemoryAreaFactory& pMemAreaFactory,
34                                   ELFObjectReader& pELFObjectReader)
35 : m_LDInfo(pLDInfo),
36   m_MemAreaFactory(pMemAreaFactory),
37   m_ELFObjectReader(pELFObjectReader)
38{
39}
40
41GNUArchiveReader::~GNUArchiveReader()
42{
43}
44
45/// isMyFormat
46bool GNUArchiveReader::isMyFormat(Input& pInput) const
47{
48  assert(pInput.hasMemArea());
49  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
50                                                   Archive::MAGIC_LEN);
51  const char* str = reinterpret_cast<const char*>(region->getBuffer());
52
53  bool result = false;
54  assert(NULL != str);
55  if (isArchive(str) || isThinArchive(str))
56    result = true;
57
58  pInput.memArea()->release(region);
59  return result;
60}
61
62/// isArchive
63bool GNUArchiveReader::isArchive(const char* pStr) const
64{
65  return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
66}
67
68/// isThinArchive
69bool GNUArchiveReader::isThinArchive(const char* pStr) const
70{
71  return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
72}
73
74/// isThinArchive
75bool GNUArchiveReader::isThinArchive(Input& pInput) const
76{
77  assert(pInput.hasMemArea());
78  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
79                                                   Archive::MAGIC_LEN);
80  const char* str = reinterpret_cast<const char*>(region->getBuffer());
81
82  bool result = false;
83  assert(NULL != str);
84  if (isThinArchive(str))
85    result = true;
86
87  pInput.memArea()->release(region);
88  return result;
89}
90
91bool GNUArchiveReader::readArchive(Archive& pArchive)
92{
93  // read the symtab of the archive
94  readSymbolTable(pArchive);
95
96  // read the strtab of the archive
97  readStringTable(pArchive);
98
99  // add root archive to ArchiveMemberMap
100  pArchive.addArchiveMember(pArchive.getARFile().name(),
101                            pArchive.inputs().root(),
102                            &InputTree::Downward);
103
104  // include the needed members in the archive and build up the input tree
105  bool willSymResolved;
106  do {
107    willSymResolved = false;
108    for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
109      // bypass if we already decided to include this symbol or not
110      if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
111        continue;
112
113      // bypass if another symbol with the same object file offset is included
114      if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
115        pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
116        continue;
117      }
118
119      // check if we should include this defined symbol
120      Archive::Symbol::Status status =
121        shouldIncludeSymbol(pArchive.getSymbolName(idx));
122      if (Archive::Symbol::Unknown != status)
123        pArchive.setSymbolStatus(idx, status);
124
125      if (Archive::Symbol::Include == status) {
126        Input* cur_archive = &(pArchive.getARFile());
127        Input* member = cur_archive;
128        uint32_t file_offset = pArchive.getObjFileOffset(idx);
129        while ((member != NULL) && (Input::Object != member->type())) {
130          uint32_t nested_offset = 0;
131          // use the file offset in current archive to find out the member we
132          // want to include
133          member = readMemberHeader(pArchive,
134                                    *cur_archive,
135                                    file_offset,
136                                    nested_offset);
137          assert(member != NULL);
138          // bypass if we get an archive that is already in the map
139          if (Input::Archive == member->type()) {
140              cur_archive = member;
141              file_offset = nested_offset;
142              continue;
143          }
144
145          // insert a node into the subtree of current archive.
146          Archive::ArchiveMember* parent =
147            pArchive.getArchiveMember(cur_archive->name());
148
149          assert(NULL != parent);
150          pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
151
152          // move the iterator to new created node, and also adjust the
153          // direction to Afterward for next insertion in this subtree
154          parent->move->move(parent->lastPos);
155          parent->move = &InputTree::Afterward;
156
157          if (m_ELFObjectReader.isMyFormat(*member)) {
158            member->setType(Input::Object);
159            pArchive.addObjectMember(pArchive.getObjFileOffset(idx),
160                                     parent->lastPos);
161            m_ELFObjectReader.readObject(*member);
162            m_ELFObjectReader.readSections(*member);
163            m_ELFObjectReader.readSymbols(*member);
164          }
165          else if (isMyFormat(*member)) {
166            member->setType(Input::Archive);
167            // when adding a new archive node, set the iterator to archive
168            // itself, and set the direction to Downward
169            pArchive.addArchiveMember(member->name(),
170                                      parent->lastPos,
171                                      &InputTree::Downward);
172            cur_archive = member;
173            file_offset = nested_offset;
174          }
175        } // end of while
176        willSymResolved = true;
177      } // end of if
178    } // end of for
179  } while (willSymResolved);
180
181  return true;
182}
183
184/// readMemberHeader - read the header of a member in a archive file and then
185/// return the corresponding archive member (it may be an input object or
186/// another archive)
187/// @param pArchiveRoot  - the archive root that holds the strtab (extended
188///                        name table)
189/// @param pArchiveFile  - the archive that contains the needed object
190/// @param pFileOffset   - file offset of the member header in the archive
191/// @param pNestedOffset - used when we find a nested archive
192Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
193                                          Input& pArchiveFile,
194                                          uint32_t pFileOffset,
195                                          uint32_t& pNestedOffset)
196{
197  assert(pArchiveFile.hasMemArea());
198
199  MemoryRegion* header_region =
200    pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
201                                    sizeof(Archive::MemberHeader));
202  const Archive::MemberHeader* header =
203    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
204
205  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
206
207  // int size = atoi(header->size);
208
209  // parse the member name and nested offset if any
210  std::string member_name;
211  llvm::StringRef name_field(header->name, 16);
212  if ('/' != header->name[0]) {
213    // this is an object file in an archive
214    size_t pos = name_field.find_first_of('/');
215    member_name.assign(name_field.substr(0, pos).str());
216  }
217  else {
218    // this is an object/archive file in a thin archive
219    size_t begin = 1;
220    size_t end = name_field.find_first_of(" :");
221    uint32_t name_offset = 0;
222    // parse the name offset
223    name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
224
225    if (':' == name_field[end]) {
226      // there is a nested offset
227      begin = end + 1;
228      end = name_field.find_first_of(' ', begin);
229      name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
230    }
231
232    // get the member name from the extended name table
233    begin = name_offset;
234    end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
235    member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
236  }
237
238  Input* member = NULL;
239  if (!isThinArchive(pArchiveFile)) {
240    // this is an object file in an archive
241    member =
242      m_LDInfo.inputFactory().produce(member_name,
243                                      pArchiveFile.path(),
244                                      Input::Unknown,
245                                      (pFileOffset +
246                                       sizeof(Archive::MemberHeader)));
247    assert(member != NULL);
248    member->setMemArea(pArchiveFile.memArea());
249    LDContext *input_context = m_LDInfo.contextFactory().produce();
250    member->setContext(input_context);
251  }
252  else {
253    // this is a member in a thin archive
254    // try to find if this is a archive already in the map first
255    Archive::ArchiveMember* ar_member =
256      pArchiveRoot.getArchiveMember(member_name);
257    if (NULL != ar_member) {
258      return ar_member->file;
259    }
260
261    // get nested file path, the nested file's member name is the relative
262    // path to the archive containing it.
263    sys::fs::Path input_path(pArchiveFile.path().parent_path());
264    if (!input_path.empty())
265      input_path.append(member_name);
266    else
267      input_path.assign(member_name);
268    member =
269      m_LDInfo.inputFactory().produce(member_name, input_path, Input::Unknown);
270
271    assert(member != NULL);
272    MemoryArea* input_memory =
273      m_MemAreaFactory.produce(member->path(), FileHandle::ReadOnly);
274    if (input_memory->handler()->isGood()) {
275      member->setMemArea(input_memory);
276    }
277    else {
278      error(diag::err_cannot_open_input) << member->name() << member->path();
279      return NULL;
280    }
281    LDContext *input_context = m_LDInfo.contextFactory().produce(input_path);
282    member->setContext(input_context);
283  }
284
285  pArchiveFile.memArea()->release(header_region);
286  return member;
287}
288
289/// readSymbolTable - read the archive symbol map (armap)
290bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
291{
292  assert(pArchive.getARFile().hasMemArea());
293
294  MemoryRegion* header_region =
295    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
296                                             Archive::MAGIC_LEN),
297                                            sizeof(Archive::MemberHeader));
298  const Archive::MemberHeader* header =
299    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
300  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
301
302  int symtab_size = atoi(header->size);
303  pArchive.setSymTabSize(symtab_size);
304
305  MemoryRegion* symtab_region =
306    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
307                                             Archive::MAGIC_LEN +
308                                             sizeof(Archive::MemberHeader)),
309                                            symtab_size);
310  const uint32_t* data =
311    reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
312
313  // read the number of symbols
314  uint32_t number = 0;
315  if (llvm::sys::isLittleEndianHost())
316    number = bswap32(*data);
317  else
318    number = *data;
319
320  // set up the pointers for file offset and name offset
321  ++data;
322  const char* name = reinterpret_cast<const char*>(data + number);
323
324  // add the archive symbols
325  for (uint32_t i = 0; i < number; ++i) {
326    if (llvm::sys::isLittleEndianHost())
327      pArchive.addSymbol(name, bswap32(*data));
328    else
329      pArchive.addSymbol(name, *data);
330    name += strlen(name) + 1;
331    ++data;
332  }
333
334  pArchive.getARFile().memArea()->release(header_region);
335  pArchive.getARFile().memArea()->release(symtab_region);
336  return true;
337}
338
339/// readStringTable - read the strtab for long file name of the archive
340bool GNUArchiveReader::readStringTable(Archive& pArchive)
341{
342  size_t offset = Archive::MAGIC_LEN +
343                  sizeof(Archive::MemberHeader) +
344                  pArchive.getSymTabSize();
345
346  if (0x0 != (offset & 1))
347    ++offset;
348
349  assert(pArchive.getARFile().hasMemArea());
350
351  MemoryRegion* header_region =
352    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
353                                             offset),
354                                            sizeof(Archive::MemberHeader));
355  const Archive::MemberHeader* header =
356    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
357
358  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2));
359
360  int strtab_size = atoi(header->size);
361
362  MemoryRegion* strtab_region =
363    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
364                                             offset +
365                                             sizeof(Archive::MemberHeader)),
366                                            strtab_size);
367  const char* strtab =
368    reinterpret_cast<const char*>(strtab_region->getBuffer());
369
370  pArchive.getStrTable().assign(strtab, strtab_size);
371
372  pArchive.getARFile().memArea()->release(header_region);
373  pArchive.getARFile().memArea()->release(strtab_region);
374  return true;
375}
376
377/// shouldIncludeStatus - given a sym name from armap and check if including
378/// the corresponding archive member, and then return the decision
379enum Archive::Symbol::Status
380GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
381{
382  // TODO: handle symbol version issue and user defined symbols
383  ResolveInfo* info = m_LDInfo.getNamePool().findInfo(pSymName);
384  if (NULL != info) {
385    if (!info->isUndef())
386      return Archive::Symbol::Exclude;
387    if (info->isWeak())
388      return Archive::Symbol::Unknown;
389    return Archive::Symbol::Include;
390  }
391  return Archive::Symbol::Unknown;
392}
393
394