1//===- GNUArchiveReader.cpp -----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include <mcld/LD/GNUArchiveReader.h>
10
11#include <mcld/Module.h>
12#include <mcld/InputTree.h>
13#include <mcld/LinkerConfig.h>
14#include <mcld/MC/Attribute.h>
15#include <mcld/MC/Input.h>
16#include <mcld/LD/ResolveInfo.h>
17#include <mcld/LD/ELFObjectReader.h>
18#include <mcld/Support/FileSystem.h>
19#include <mcld/Support/FileHandle.h>
20#include <mcld/Support/MemoryArea.h>
21#include <mcld/Support/MsgHandling.h>
22#include <mcld/Support/Path.h>
23#include <mcld/ADT/SizeTraits.h>
24
25#include <llvm/ADT/StringRef.h>
26#include <llvm/Support/Host.h>
27
28#include <cstring>
29#include <cstdlib>
30
31using namespace mcld;
32
33GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                   ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule),
36   m_ELFObjectReader(pELFObjectReader)
37{
38}
39
40GNUArchiveReader::~GNUArchiveReader()
41{
42}
43
44/// isMyFormat
45bool GNUArchiveReader::isMyFormat(Input& pInput, bool &pContinue) const
46{
47  assert(pInput.hasMemArea());
48  if (pInput.memArea()->size() < Archive::MAGIC_LEN)
49    return false;
50
51  llvm::StringRef region =
52      pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
53  const char* str = region.begin();
54
55  bool result = false;
56  assert(NULL != str);
57  pContinue = true;
58  if (isArchive(str) || isThinArchive(str))
59    result = true;
60
61  return result;
62}
63
64/// isArchive
65bool GNUArchiveReader::isArchive(const char* pStr) const
66{
67  return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
68}
69
70/// isThinArchive
71bool GNUArchiveReader::isThinArchive(const char* pStr) const
72{
73  return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
74}
75
76/// isThinArchive
77bool GNUArchiveReader::isThinArchive(Input& pInput) const
78{
79  assert(pInput.hasMemArea());
80  llvm::StringRef region =
81      pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
82  const char* str = region.begin();
83
84  bool result = false;
85  assert(NULL != str);
86  if (isThinArchive(str))
87    result = true;
88
89  return result;
90}
91
92bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
93                                   Archive& pArchive)
94{
95  // bypass the empty archive
96  if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
97    return true;
98
99  if (pArchive.getARFile().attribute()->isWholeArchive())
100    return includeAllMembers(pConfig, pArchive);
101
102  // if this is the first time read this archive, setup symtab and strtab
103  if (pArchive.getSymbolTable().empty()) {
104  // read the symtab of the archive
105  readSymbolTable(pArchive);
106
107  // read the strtab of the archive
108  readStringTable(pArchive);
109
110  // add root archive to ArchiveMemberMap
111  pArchive.addArchiveMember(pArchive.getARFile().name(),
112                            pArchive.inputs().root(),
113                            &InputTree::Downward);
114  }
115
116  // include the needed members in the archive and build up the input tree
117  bool willSymResolved;
118  do {
119    willSymResolved = false;
120    for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
121      // bypass if we already decided to include this symbol or not
122      if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
123        continue;
124
125      // bypass if another symbol with the same object file offset is included
126      if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
127        pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
128        continue;
129      }
130
131      // check if we should include this defined symbol
132      Archive::Symbol::Status status =
133        shouldIncludeSymbol(pArchive.getSymbolName(idx));
134      if (Archive::Symbol::Unknown != status)
135        pArchive.setSymbolStatus(idx, status);
136
137      if (Archive::Symbol::Include == status) {
138        // include the object member from the given offset
139        includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
140        willSymResolved = true;
141      } // end of if
142    } // end of for
143  } while (willSymResolved);
144
145  return true;
146}
147
148/// readMemberHeader - read the header of a member in a archive file and then
149/// return the corresponding archive member (it may be an input object or
150/// another archive)
151/// @param pArchiveRoot  - the archive root that holds the strtab (extended
152///                        name table)
153/// @param pArchiveFile  - the archive that contains the needed object
154/// @param pFileOffset   - file offset of the member header in the archive
155/// @param pNestedOffset - used when we find a nested archive
156/// @param pMemberSize   - the file size of this member
157Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
158                                          Input& pArchiveFile,
159                                          uint32_t pFileOffset,
160                                          uint32_t& pNestedOffset,
161                                          size_t& pMemberSize)
162{
163  assert(pArchiveFile.hasMemArea());
164
165  llvm::StringRef header_region =
166    pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
167                                    sizeof(Archive::MemberHeader));
168  const Archive::MemberHeader* header =
169    reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
170
171  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
172
173  pMemberSize = atoi(header->size);
174
175  // parse the member name and nested offset if any
176  std::string member_name;
177  llvm::StringRef name_field(header->name, sizeof(header->name));
178  if ('/' != header->name[0]) {
179    // this is an object file in an archive
180    size_t pos = name_field.find_first_of('/');
181    member_name.assign(name_field.substr(0, pos).str());
182  }
183  else {
184    // this is an object/archive file in a thin archive
185    size_t begin = 1;
186    size_t end = name_field.find_first_of(" :");
187    uint32_t name_offset = 0;
188    // parse the name offset
189    name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
190
191    if (':' == name_field[end]) {
192      // there is a nested offset
193      begin = end + 1;
194      end = name_field.find_first_of(' ', begin);
195      name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
196    }
197
198    // get the member name from the extended name table
199    assert(pArchiveRoot.hasStrTable());
200    begin = name_offset;
201    end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
202    member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
203  }
204
205  Input* member = NULL;
206  bool isThinAR = isThinArchive(pArchiveFile);
207  if (!isThinAR) {
208    // this is an object file in an archive
209    member = pArchiveRoot.getMemberFile(pArchiveFile,
210                                        isThinAR,
211                                        member_name,
212                                        pArchiveFile.path(),
213                                        (pFileOffset +
214                                         sizeof(Archive::MemberHeader)));
215  }
216  else {
217    // this is a member in a thin archive
218    // try to find if this is a archive already in the map first
219    Archive::ArchiveMember* ar_member =
220      pArchiveRoot.getArchiveMember(member_name);
221    if (NULL != ar_member) {
222      return ar_member->file;
223    }
224
225    // get nested file path, the nested file's member name is the relative
226    // path to the archive containing it.
227    sys::fs::Path input_path(pArchiveFile.path().parent_path());
228    if (!input_path.empty())
229      input_path.append(member_name);
230    else
231      input_path.assign(member_name);
232
233    member = pArchiveRoot.getMemberFile(pArchiveFile,
234                                        isThinAR,
235                                        member_name,
236                                        input_path);
237  }
238
239  return member;
240}
241
242template <size_t SIZE>
243static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion)
244{
245  typedef typename SizeTraits<SIZE>::Offset Offset;
246
247  const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
248
249  // read the number of symbols
250  Offset number = 0;
251  if (llvm::sys::IsLittleEndianHost)
252    number = mcld::bswap<SIZE>(*data);
253  else
254    number = *data;
255
256  // set up the pointers for file offset and name offset
257  ++data;
258  const char* name = reinterpret_cast<const char*>(data + number);
259
260  // add the archive symbols
261  for (Offset i = 0; i < number; ++i) {
262    if (llvm::sys::IsLittleEndianHost)
263      pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
264    else
265      pArchive.addSymbol(name, *data);
266    name += strlen(name) + 1;
267    ++data;
268  }
269}
270
271/// readSymbolTable - read the archive symbol map (armap)
272bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
273{
274  assert(pArchive.getARFile().hasMemArea());
275
276  llvm::StringRef header_region =
277    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
278                                             Archive::MAGIC_LEN),
279                                            sizeof(Archive::MemberHeader));
280  const Archive::MemberHeader* header =
281    reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
282  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
283
284  int symtab_size = atoi(header->size);
285  pArchive.setSymTabSize(symtab_size);
286
287  if (!pArchive.getARFile().attribute()->isWholeArchive()) {
288    llvm::StringRef symtab_region = pArchive.getARFile().memArea()->request(
289        (pArchive.getARFile().fileOffset() +
290         Archive::MAGIC_LEN +
291         sizeof(Archive::MemberHeader)),
292        symtab_size);
293
294    if (0 == strncmp(header->name, Archive::SVR4_SYMTAB_NAME,
295                                   strlen(Archive::SVR4_SYMTAB_NAME)))
296      readSymbolTableEntries<32>(pArchive, symtab_region);
297    else if (0 == strncmp(header->name, Archive::IRIX6_SYMTAB_NAME,
298                                        strlen(Archive::IRIX6_SYMTAB_NAME)))
299      readSymbolTableEntries<64>(pArchive, symtab_region);
300    else
301      unreachable(diag::err_unsupported_archive);
302
303  }
304  return true;
305}
306
307/// readStringTable - read the strtab for long file name of the archive
308bool GNUArchiveReader::readStringTable(Archive& pArchive)
309{
310  size_t offset = Archive::MAGIC_LEN +
311                  sizeof(Archive::MemberHeader) +
312                  pArchive.getSymTabSize();
313
314  if (0x0 != (offset & 1))
315    ++offset;
316
317  assert(pArchive.getARFile().hasMemArea());
318
319  llvm::StringRef header_region =
320    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
321                                             offset),
322                                            sizeof(Archive::MemberHeader));
323  const Archive::MemberHeader* header =
324    reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
325
326  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
327
328  if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
329    // read the extended name table
330    int strtab_size = atoi(header->size);
331    llvm::StringRef strtab_region =
332      pArchive.getARFile().memArea()->request(
333                                   (pArchive.getARFile().fileOffset() +
334                                    offset + sizeof(Archive::MemberHeader)),
335                                   strtab_size);
336    const char* strtab = strtab_region.begin();
337    pArchive.getStrTable().assign(strtab, strtab_size);
338  }
339  return true;
340}
341
342/// shouldIncludeStatus - given a sym name from armap and check if including
343/// the corresponding archive member, and then return the decision
344enum Archive::Symbol::Status
345GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
346{
347  // TODO: handle symbol version issue and user defined symbols
348  const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
349  if (NULL != info) {
350    if (!info->isUndef())
351      return Archive::Symbol::Exclude;
352    if (info->isWeak())
353      return Archive::Symbol::Unknown;
354    return Archive::Symbol::Include;
355  }
356  return Archive::Symbol::Unknown;
357}
358
359/// includeMember - include the object member in the given file offset, and
360/// return the size of the object
361/// @param pConfig - LinkerConfig
362/// @param pArchiveRoot - the archive root
363/// @param pFileOffset  - file offset of the member header in the archive
364size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
365                                       Archive& pArchive,
366                                       uint32_t pFileOffset)
367{
368  Input* cur_archive = &(pArchive.getARFile());
369  Input* member = NULL;
370  uint32_t file_offset = pFileOffset;
371  size_t size = 0;
372  do {
373    uint32_t nested_offset = 0;
374    // use the file offset in current archive to find out the member we
375    // want to include
376    member = readMemberHeader(pArchive,
377                              *cur_archive,
378                              file_offset,
379                              nested_offset,
380                              size);
381    assert(member != NULL);
382    // bypass if we get an archive that is already in the map
383    if (Input::Archive == member->type()) {
384        cur_archive = member;
385        file_offset = nested_offset;
386        continue;
387    }
388
389    // insert a node into the subtree of current archive.
390    Archive::ArchiveMember* parent =
391      pArchive.getArchiveMember(cur_archive->name());
392
393    assert(NULL != parent);
394    pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
395
396    // move the iterator to new created node, and also adjust the
397    // direction to Afterward for next insertion in this subtree
398    parent->move->move(parent->lastPos);
399    parent->move = &InputTree::Afterward;
400    bool doContinue = false;
401
402    if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
403      member->setType(Input::Object);
404      // Set this object as no export if the archive is in the exclude libs.
405      if (pArchive.getARFile().noExport()) {
406        member->setNoExport();
407      }
408      pArchive.addObjectMember(pFileOffset, parent->lastPos);
409      m_ELFObjectReader.readHeader(*member);
410      m_ELFObjectReader.readSections(*member);
411      m_ELFObjectReader.readSymbols(*member);
412      m_Module.getObjectList().push_back(member);
413    }
414    else if (doContinue && isMyFormat(*member, doContinue)) {
415      member->setType(Input::Archive);
416      // when adding a new archive node, set the iterator to archive
417      // itself, and set the direction to Downward
418      pArchive.addArchiveMember(member->name(),
419                                parent->lastPos,
420                                &InputTree::Downward);
421      cur_archive = member;
422      file_offset = nested_offset;
423    }
424    else {
425      warning(diag::warn_unrecognized_input_file) << member->path()
426        << pConfig.targets().triple().str();
427    }
428  } while (Input::Object != member->type());
429  return size;
430}
431
432/// includeAllMembers - include all object members. This is called if
433/// --whole-archive is the attribute for this archive file.
434bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
435                                         Archive& pArchive)
436{
437  // read the symtab of the archive
438  readSymbolTable(pArchive);
439
440  // read the strtab of the archive
441  readStringTable(pArchive);
442
443  // add root archive to ArchiveMemberMap
444  pArchive.addArchiveMember(pArchive.getARFile().name(),
445                            pArchive.inputs().root(),
446                            &InputTree::Downward);
447
448  bool isThinAR = isThinArchive(pArchive.getARFile());
449  uint32_t begin_offset = pArchive.getARFile().fileOffset() +
450                          Archive::MAGIC_LEN +
451                          sizeof(Archive::MemberHeader) +
452                          pArchive.getSymTabSize();
453  if (pArchive.hasStrTable()) {
454    if (0x0 != (begin_offset & 1))
455      ++begin_offset;
456    begin_offset += sizeof(Archive::MemberHeader) +
457                    pArchive.getStrTable().size();
458  }
459  uint32_t end_offset = pArchive.getARFile().memArea()->size();
460  for (uint32_t offset = begin_offset;
461       offset < end_offset;
462       offset += sizeof(Archive::MemberHeader)) {
463
464    size_t size = includeMember(pConfig, pArchive, offset);
465
466    if (!isThinAR) {
467      offset += size;
468    }
469
470    if (0x0 != (offset & 1))
471      ++offset;
472  }
473  return true;
474}
475