GNUArchiveReader.cpp revision 22add6ff3426df1a85089fe6a6e1597ee3b6f300
1//===- GNUArchiveReader.cpp -----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include <mcld/LD/GNUArchiveReader.h>
10
11#include <mcld/Module.h>
12#include <mcld/InputTree.h>
13#include <mcld/MC/Attribute.h>
14#include <mcld/MC/MCLDInput.h>
15#include <mcld/LD/ResolveInfo.h>
16#include <mcld/LD/ELFObjectReader.h>
17#include <mcld/Support/FileSystem.h>
18#include <mcld/Support/FileHandle.h>
19#include <mcld/Support/MemoryArea.h>
20#include <mcld/Support/MemoryRegion.h>
21#include <mcld/Support/MsgHandling.h>
22#include <mcld/Support/Path.h>
23#include <mcld/ADT/SizeTraits.h>
24
25#include <llvm/ADT/StringRef.h>
26#include <llvm/Support/Host.h>
27
28#include <cstring>
29#include <cstdlib>
30
31using namespace mcld;
32
33GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                   ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule),
36   m_ELFObjectReader(pELFObjectReader)
37{
38}
39
40GNUArchiveReader::~GNUArchiveReader()
41{
42}
43
44/// isMyFormat
45bool GNUArchiveReader::isMyFormat(Input& pInput) const
46{
47  assert(pInput.hasMemArea());
48  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
49                                                   Archive::MAGIC_LEN);
50  const char* str = reinterpret_cast<const char*>(region->getBuffer());
51
52  bool result = false;
53  assert(NULL != str);
54  if (isArchive(str) || isThinArchive(str))
55    result = true;
56
57  pInput.memArea()->release(region);
58  return result;
59}
60
61/// isArchive
62bool GNUArchiveReader::isArchive(const char* pStr) const
63{
64  return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
65}
66
67/// isThinArchive
68bool GNUArchiveReader::isThinArchive(const char* pStr) const
69{
70  return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
71}
72
73/// isThinArchive
74bool GNUArchiveReader::isThinArchive(Input& pInput) const
75{
76  assert(pInput.hasMemArea());
77  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
78                                                   Archive::MAGIC_LEN);
79  const char* str = reinterpret_cast<const char*>(region->getBuffer());
80
81  bool result = false;
82  assert(NULL != str);
83  if (isThinArchive(str))
84    result = true;
85
86  pInput.memArea()->release(region);
87  return result;
88}
89
90bool GNUArchiveReader::readArchive(Archive& pArchive)
91{
92  if (pArchive.getARFile().attribute()->isWholeArchive())
93    return includeAllMembers(pArchive);
94
95  // if this is the first time read this archive, setup symtab and strtab
96  if (pArchive.getSymbolTable().empty()) {
97  // read the symtab of the archive
98  readSymbolTable(pArchive);
99
100  // read the strtab of the archive
101  readStringTable(pArchive);
102
103  // add root archive to ArchiveMemberMap
104  pArchive.addArchiveMember(pArchive.getARFile().name(),
105                            pArchive.inputs().root(),
106                            &InputTree::Downward);
107  }
108
109  // include the needed members in the archive and build up the input tree
110  bool willSymResolved;
111  do {
112    willSymResolved = false;
113    for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
114      // bypass if we already decided to include this symbol or not
115      if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
116        continue;
117
118      // bypass if another symbol with the same object file offset is included
119      if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
120        pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
121        continue;
122      }
123
124      // check if we should include this defined symbol
125      Archive::Symbol::Status status =
126        shouldIncludeSymbol(pArchive.getSymbolName(idx));
127      if (Archive::Symbol::Unknown != status)
128        pArchive.setSymbolStatus(idx, status);
129
130      if (Archive::Symbol::Include == status) {
131        // include the object member from the given offset
132        includeMember(pArchive, pArchive.getObjFileOffset(idx));
133        willSymResolved = true;
134      } // end of if
135    } // end of for
136  } while (willSymResolved);
137
138  return true;
139}
140
141/// readMemberHeader - read the header of a member in a archive file and then
142/// return the corresponding archive member (it may be an input object or
143/// another archive)
144/// @param pArchiveRoot  - the archive root that holds the strtab (extended
145///                        name table)
146/// @param pArchiveFile  - the archive that contains the needed object
147/// @param pFileOffset   - file offset of the member header in the archive
148/// @param pNestedOffset - used when we find a nested archive
149/// @param pMemberSize   - the file size of this member
150Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
151                                          Input& pArchiveFile,
152                                          uint32_t pFileOffset,
153                                          uint32_t& pNestedOffset,
154                                          size_t& pMemberSize)
155{
156  assert(pArchiveFile.hasMemArea());
157
158  MemoryRegion* header_region =
159    pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
160                                    sizeof(Archive::MemberHeader));
161  const Archive::MemberHeader* header =
162    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
163
164  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
165
166  pMemberSize = atoi(header->size);
167
168  // parse the member name and nested offset if any
169  std::string member_name;
170  llvm::StringRef name_field(header->name, sizeof(header->name));
171  if ('/' != header->name[0]) {
172    // this is an object file in an archive
173    size_t pos = name_field.find_first_of('/');
174    member_name.assign(name_field.substr(0, pos).str());
175  }
176  else {
177    // this is an object/archive file in a thin archive
178    size_t begin = 1;
179    size_t end = name_field.find_first_of(" :");
180    uint32_t name_offset = 0;
181    // parse the name offset
182    name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
183
184    if (':' == name_field[end]) {
185      // there is a nested offset
186      begin = end + 1;
187      end = name_field.find_first_of(' ', begin);
188      name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
189    }
190
191    // get the member name from the extended name table
192    assert(pArchiveRoot.hasStrTable());
193    begin = name_offset;
194    end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
195    member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
196  }
197
198  Input* member = NULL;
199  bool isThinAR = isThinArchive(pArchiveFile);
200  if (!isThinAR) {
201    // this is an object file in an archive
202    member = pArchiveRoot.getMemberFile(pArchiveFile,
203                                        isThinAR,
204                                        member_name,
205                                        pArchiveFile.path(),
206                                        (pFileOffset +
207                                         sizeof(Archive::MemberHeader)));
208  }
209  else {
210    // this is a member in a thin archive
211    // try to find if this is a archive already in the map first
212    Archive::ArchiveMember* ar_member =
213      pArchiveRoot.getArchiveMember(member_name);
214    if (NULL != ar_member) {
215      return ar_member->file;
216    }
217
218    // get nested file path, the nested file's member name is the relative
219    // path to the archive containing it.
220    sys::fs::Path input_path(pArchiveFile.path().parent_path());
221    if (!input_path.empty())
222      input_path.append(member_name);
223    else
224      input_path.assign(member_name);
225
226    member = pArchiveRoot.getMemberFile(pArchiveFile,
227                                        isThinAR,
228                                        member_name,
229                                        input_path);
230  }
231
232  pArchiveFile.memArea()->release(header_region);
233  return member;
234}
235
236/// readSymbolTable - read the archive symbol map (armap)
237bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
238{
239  assert(pArchive.getARFile().hasMemArea());
240
241  MemoryRegion* header_region =
242    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
243                                             Archive::MAGIC_LEN),
244                                            sizeof(Archive::MemberHeader));
245  const Archive::MemberHeader* header =
246    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
247  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
248
249  int symtab_size = atoi(header->size);
250  pArchive.setSymTabSize(symtab_size);
251
252  if (!pArchive.getARFile().attribute()->isWholeArchive()) {
253    MemoryRegion* symtab_region =
254      pArchive.getARFile().memArea()->request(
255                                            (pArchive.getARFile().fileOffset() +
256                                             Archive::MAGIC_LEN +
257                                             sizeof(Archive::MemberHeader)),
258                                            symtab_size);
259    const uint32_t* data =
260      reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
261
262    // read the number of symbols
263    uint32_t number = 0;
264    if (llvm::sys::isLittleEndianHost())
265      number = bswap32(*data);
266    else
267      number = *data;
268
269    // set up the pointers for file offset and name offset
270    ++data;
271    const char* name = reinterpret_cast<const char*>(data + number);
272
273    // add the archive symbols
274    for (uint32_t i = 0; i < number; ++i) {
275      if (llvm::sys::isLittleEndianHost())
276        pArchive.addSymbol(name, bswap32(*data));
277      else
278        pArchive.addSymbol(name, *data);
279      name += strlen(name) + 1;
280      ++data;
281    }
282    pArchive.getARFile().memArea()->release(symtab_region);
283  }
284  pArchive.getARFile().memArea()->release(header_region);
285  return true;
286}
287
288/// readStringTable - read the strtab for long file name of the archive
289bool GNUArchiveReader::readStringTable(Archive& pArchive)
290{
291  size_t offset = Archive::MAGIC_LEN +
292                  sizeof(Archive::MemberHeader) +
293                  pArchive.getSymTabSize();
294
295  if (0x0 != (offset & 1))
296    ++offset;
297
298  assert(pArchive.getARFile().hasMemArea());
299
300  MemoryRegion* header_region =
301    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
302                                             offset),
303                                            sizeof(Archive::MemberHeader));
304  const Archive::MemberHeader* header =
305    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
306
307  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
308
309  if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
310    // read the extended name table
311    int strtab_size = atoi(header->size);
312    MemoryRegion* strtab_region =
313      pArchive.getARFile().memArea()->request(
314                                   (pArchive.getARFile().fileOffset() +
315                                    offset + sizeof(Archive::MemberHeader)),
316                                   strtab_size);
317    const char* strtab =
318      reinterpret_cast<const char*>(strtab_region->getBuffer());
319    pArchive.getStrTable().assign(strtab, strtab_size);
320    pArchive.getARFile().memArea()->release(strtab_region);
321  }
322  pArchive.getARFile().memArea()->release(header_region);
323  return true;
324}
325
326/// shouldIncludeStatus - given a sym name from armap and check if including
327/// the corresponding archive member, and then return the decision
328enum Archive::Symbol::Status
329GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
330{
331  // TODO: handle symbol version issue and user defined symbols
332  const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
333  if (NULL != info) {
334    if (!info->isUndef())
335      return Archive::Symbol::Exclude;
336    if (info->isWeak())
337      return Archive::Symbol::Unknown;
338    return Archive::Symbol::Include;
339  }
340  return Archive::Symbol::Unknown;
341}
342
343/// includeMember - include the object member in the given file offset, and
344/// return the size of the object
345/// @param pArchiveRoot - the archive root
346/// @param pFileOffset  - file offset of the member header in the archive
347size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset)
348{
349  Input* cur_archive = &(pArchive.getARFile());
350  Input* member = NULL;
351  uint32_t file_offset = pFileOffset;
352  size_t size = 0;
353  do {
354    uint32_t nested_offset = 0;
355    // use the file offset in current archive to find out the member we
356    // want to include
357    member = readMemberHeader(pArchive,
358                              *cur_archive,
359                              file_offset,
360                              nested_offset,
361                              size);
362    assert(member != NULL);
363    // bypass if we get an archive that is already in the map
364    if (Input::Archive == member->type()) {
365        cur_archive = member;
366        file_offset = nested_offset;
367        continue;
368    }
369
370    // insert a node into the subtree of current archive.
371    Archive::ArchiveMember* parent =
372      pArchive.getArchiveMember(cur_archive->name());
373
374    assert(NULL != parent);
375    pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
376
377    // move the iterator to new created node, and also adjust the
378    // direction to Afterward for next insertion in this subtree
379    parent->move->move(parent->lastPos);
380    parent->move = &InputTree::Afterward;
381
382    if (m_ELFObjectReader.isMyFormat(*member)) {
383      member->setType(Input::Object);
384      pArchive.addObjectMember(pFileOffset, parent->lastPos);
385      m_ELFObjectReader.readHeader(*member);
386      m_ELFObjectReader.readSections(*member);
387      m_ELFObjectReader.readSymbols(*member);
388      m_Module.getObjectList().push_back(member);
389    }
390    else if (isMyFormat(*member)) {
391      member->setType(Input::Archive);
392      // when adding a new archive node, set the iterator to archive
393      // itself, and set the direction to Downward
394      pArchive.addArchiveMember(member->name(),
395                                parent->lastPos,
396                                &InputTree::Downward);
397      cur_archive = member;
398      file_offset = nested_offset;
399    }
400  } while (Input::Object != member->type());
401  return size;
402}
403
404/// includeAllMembers - include all object members. This is called if
405/// --whole-archive is the attribute for this archive file.
406bool GNUArchiveReader::includeAllMembers(Archive& pArchive)
407{
408  // read the symtab of the archive
409  readSymbolTable(pArchive);
410
411  // read the strtab of the archive
412  readStringTable(pArchive);
413
414  // add root archive to ArchiveMemberMap
415  pArchive.addArchiveMember(pArchive.getARFile().name(),
416                            pArchive.inputs().root(),
417                            &InputTree::Downward);
418
419  bool isThinAR = isThinArchive(pArchive.getARFile());
420  uint32_t begin_offset = pArchive.getARFile().fileOffset() +
421                          Archive::MAGIC_LEN +
422                          sizeof(Archive::MemberHeader) +
423                          pArchive.getSymTabSize();
424  if (pArchive.hasStrTable()) {
425    if (0x0 != (begin_offset & 1))
426      ++begin_offset;
427    begin_offset += sizeof(Archive::MemberHeader) +
428                    pArchive.getStrTable().size();
429  }
430  uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size();
431  for (uint32_t offset = begin_offset;
432       offset < end_offset;
433       offset += sizeof(Archive::MemberHeader)) {
434
435    size_t size = includeMember(pArchive, offset);
436
437    if (!isThinAR) {
438      offset += size;
439    }
440
441    if (0x0 != (offset & 1))
442      ++offset;
443  }
444  return true;
445}
446
447