1//===- GNUArchiveReader.cpp -----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "mcld/LD/GNUArchiveReader.h"
10
11#include "mcld/InputTree.h"
12#include "mcld/LinkerConfig.h"
13#include "mcld/Module.h"
14#include "mcld/ADT/SizeTraits.h"
15#include "mcld/MC/Attribute.h"
16#include "mcld/MC/Input.h"
17#include "mcld/LD/ELFObjectReader.h"
18#include "mcld/LD/ResolveInfo.h"
19#include "mcld/Support/FileHandle.h"
20#include "mcld/Support/FileSystem.h"
21#include "mcld/Support/MemoryArea.h"
22#include "mcld/Support/MsgHandling.h"
23#include "mcld/Support/Path.h"
24
25#include <llvm/ADT/StringRef.h>
26#include <llvm/Support/Host.h>
27
28#include <cstdlib>
29#include <cstring>
30
31namespace mcld {
32
33GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                   ELFObjectReader& pELFObjectReader)
35    : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) {
36}
37
38GNUArchiveReader::~GNUArchiveReader() {
39}
40
41/// isMyFormat
42bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const {
43  assert(pInput.hasMemArea());
44  if (pInput.memArea()->size() < Archive::MAGIC_LEN)
45    return false;
46
47  llvm::StringRef region =
48      pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
49  const char* str = region.begin();
50
51  bool result = false;
52  assert(str != NULL);
53  pContinue = true;
54  if (isArchive(str) || isThinArchive(str))
55    result = true;
56
57  return result;
58}
59
60/// isArchive
61bool GNUArchiveReader::isArchive(const char* pStr) const {
62  return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0);
63}
64
65/// isThinArchive
66bool GNUArchiveReader::isThinArchive(const char* pStr) const {
67  return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0);
68}
69
70/// isThinArchive
71bool GNUArchiveReader::isThinArchive(Input& pInput) const {
72  assert(pInput.hasMemArea());
73  llvm::StringRef region =
74      pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
75  const char* str = region.begin();
76
77  bool result = false;
78  assert(str != NULL);
79  if (isThinArchive(str))
80    result = true;
81
82  return result;
83}
84
85bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
86                                   Archive& pArchive) {
87  // bypass the empty archive
88  if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
89    return true;
90
91  if (pArchive.getARFile().attribute()->isWholeArchive())
92    return includeAllMembers(pConfig, pArchive);
93
94  // if this is the first time read this archive, setup symtab and strtab
95  if (pArchive.getSymbolTable().empty()) {
96    // read the symtab of the archive
97    readSymbolTable(pArchive);
98
99    // read the strtab of the archive
100    readStringTable(pArchive);
101
102    // add root archive to ArchiveMemberMap
103    pArchive.addArchiveMember(pArchive.getARFile().name(),
104                              pArchive.inputs().root(),
105                              &InputTree::Downward);
106  }
107
108  // include the needed members in the archive and build up the input tree
109  bool willSymResolved;
110  do {
111    willSymResolved = false;
112    for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
113      // bypass if we already decided to include this symbol or not
114      if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
115        continue;
116
117      // bypass if another symbol with the same object file offset is included
118      if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
119        pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
120        continue;
121      }
122
123      // check if we should include this defined symbol
124      Archive::Symbol::Status status =
125          shouldIncludeSymbol(pArchive.getSymbolName(idx));
126      if (Archive::Symbol::Unknown != status)
127        pArchive.setSymbolStatus(idx, status);
128
129      if (Archive::Symbol::Include == status) {
130        // include the object member from the given offset
131        includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
132        willSymResolved = true;
133      }  // end of if
134    }    // end of for
135  } while (willSymResolved);
136
137  return true;
138}
139
140/// readMemberHeader - read the header of a member in a archive file and then
141/// return the corresponding archive member (it may be an input object or
142/// another archive)
143/// @param pArchiveRoot  - the archive root that holds the strtab (extended
144///                        name table)
145/// @param pArchiveFile  - the archive that contains the needed object
146/// @param pFileOffset   - file offset of the member header in the archive
147/// @param pNestedOffset - used when we find a nested archive
148/// @param pMemberSize   - the file size of this member
149Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
150                                          Input& pArchiveFile,
151                                          uint32_t pFileOffset,
152                                          uint32_t& pNestedOffset,
153                                          size_t& pMemberSize) {
154  assert(pArchiveFile.hasMemArea());
155
156  llvm::StringRef header_region = pArchiveFile.memArea()->request(
157      (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader));
158  const Archive::MemberHeader* header =
159      reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
160
161  assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
162         0);
163
164  pMemberSize = atoi(header->size);
165
166  // parse the member name and nested offset if any
167  std::string member_name;
168  llvm::StringRef name_field(header->name, sizeof(header->name));
169  if (header->name[0] != '/') {
170    // this is an object file in an archive
171    size_t pos = name_field.find_first_of('/');
172    member_name.assign(name_field.substr(0, pos).str());
173  } else {
174    // this is an object/archive file in a thin archive
175    size_t begin = 1;
176    size_t end = name_field.find_first_of(" :");
177    uint32_t name_offset = 0;
178    // parse the name offset
179    name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
180
181    if (name_field[end] == ':') {
182      // there is a nested offset
183      begin = end + 1;
184      end = name_field.find_first_of(' ', begin);
185      name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
186    }
187
188    // get the member name from the extended name table
189    assert(pArchiveRoot.hasStrTable());
190    begin = name_offset;
191    end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
192    member_name.assign(
193        pArchiveRoot.getStrTable().substr(begin, end - begin - 1));
194  }
195
196  Input* member = NULL;
197  bool isThinAR = isThinArchive(pArchiveFile);
198  if (!isThinAR) {
199    // this is an object file in an archive
200    member = pArchiveRoot.getMemberFile(
201        pArchiveFile,
202        isThinAR,
203        member_name,
204        pArchiveFile.path(),
205        (pFileOffset + sizeof(Archive::MemberHeader)));
206  } else {
207    // this is a member in a thin archive
208    // try to find if this is a archive already in the map first
209    Archive::ArchiveMember* ar_member =
210        pArchiveRoot.getArchiveMember(member_name);
211    if (ar_member != NULL) {
212      return ar_member->file;
213    }
214
215    // get nested file path, the nested file's member name is the relative
216    // path to the archive containing it.
217    sys::fs::Path input_path(pArchiveFile.path().parent_path());
218    if (!input_path.empty())
219      input_path.append(sys::fs::Path(member_name));
220    else
221      input_path.assign(member_name);
222
223    member = pArchiveRoot.getMemberFile(
224        pArchiveFile, isThinAR, member_name, input_path);
225  }
226
227  return member;
228}
229
230template <size_t SIZE>
231static void readSymbolTableEntries(Archive& pArchive,
232                                   llvm::StringRef pMemRegion) {
233  typedef typename SizeTraits<SIZE>::Offset Offset;
234
235  const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
236
237  // read the number of symbols
238  Offset number = 0;
239  if (llvm::sys::IsLittleEndianHost)
240    number = mcld::bswap<SIZE>(*data);
241  else
242    number = *data;
243
244  // set up the pointers for file offset and name offset
245  ++data;
246  const char* name = reinterpret_cast<const char*>(data + number);
247
248  // add the archive symbols
249  for (Offset i = 0; i < number; ++i) {
250    if (llvm::sys::IsLittleEndianHost)
251      pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
252    else
253      pArchive.addSymbol(name, *data);
254    name += strlen(name) + 1;
255    ++data;
256  }
257}
258
259/// readSymbolTable - read the archive symbol map (armap)
260bool GNUArchiveReader::readSymbolTable(Archive& pArchive) {
261  assert(pArchive.getARFile().hasMemArea());
262  MemoryArea* memory_area = pArchive.getARFile().memArea();
263
264  llvm::StringRef header_region = memory_area->request(
265      (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN),
266      sizeof(Archive::MemberHeader));
267  const Archive::MemberHeader* header =
268      reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
269  assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
270         0);
271
272  int symtab_size = atoi(header->size);
273  pArchive.setSymTabSize(symtab_size);
274
275  if (!pArchive.getARFile().attribute()->isWholeArchive()) {
276    llvm::StringRef symtab_region = memory_area->request(
277        (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN +
278         sizeof(Archive::MemberHeader)),
279        symtab_size);
280
281    if (strncmp(header->name,
282                Archive::SVR4_SYMTAB_NAME,
283                strlen(Archive::SVR4_SYMTAB_NAME)) == 0)
284      readSymbolTableEntries<32>(pArchive, symtab_region);
285    else if (strncmp(header->name,
286                     Archive::IRIX6_SYMTAB_NAME,
287                     strlen(Archive::IRIX6_SYMTAB_NAME)) == 0)
288      readSymbolTableEntries<64>(pArchive, symtab_region);
289    else
290      unreachable(diag::err_unsupported_archive);
291  }
292  return true;
293}
294
295/// readStringTable - read the strtab for long file name of the archive
296bool GNUArchiveReader::readStringTable(Archive& pArchive) {
297  size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
298                  pArchive.getSymTabSize();
299
300  if ((offset & 1) != 0x0)
301    ++offset;
302
303  assert(pArchive.getARFile().hasMemArea());
304  MemoryArea* memory_area = pArchive.getARFile().memArea();
305
306  llvm::StringRef header_region =
307      memory_area->request((pArchive.getARFile().fileOffset() + offset),
308                           sizeof(Archive::MemberHeader));
309  const Archive::MemberHeader* header =
310      reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
311
312  assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) ==
313         0);
314
315  if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) {
316    // read the extended name table
317    int strtab_size = atoi(header->size);
318    llvm::StringRef strtab_region =
319        memory_area->request((pArchive.getARFile().fileOffset() + offset +
320                              sizeof(Archive::MemberHeader)),
321                             strtab_size);
322    const char* strtab = strtab_region.begin();
323    pArchive.getStrTable().assign(strtab, strtab_size);
324  }
325  return true;
326}
327
328/// shouldIncludeStatus - given a sym name from armap and check if including
329/// the corresponding archive member, and then return the decision
330enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol(
331    const llvm::StringRef& pSymName) const {
332  // TODO: handle symbol version issue and user defined symbols
333  const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
334  if (info != NULL) {
335    if (!info->isUndef())
336      return Archive::Symbol::Exclude;
337    if (info->isWeak())
338      return Archive::Symbol::Unknown;
339    return Archive::Symbol::Include;
340  }
341  return Archive::Symbol::Unknown;
342}
343
344/// includeMember - include the object member in the given file offset, and
345/// return the size of the object
346/// @param pConfig - LinkerConfig
347/// @param pArchiveRoot - the archive root
348/// @param pFileOffset  - file offset of the member header in the archive
349size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
350                                       Archive& pArchive,
351                                       uint32_t pFileOffset) {
352  Input* cur_archive = &(pArchive.getARFile());
353  Input* member = NULL;
354  uint32_t file_offset = pFileOffset;
355  size_t size = 0;
356  do {
357    uint32_t nested_offset = 0;
358    // use the file offset in current archive to find out the member we
359    // want to include
360    member = readMemberHeader(
361        pArchive, *cur_archive, file_offset, nested_offset, size);
362    assert(member != NULL);
363    // bypass if we get an archive that is already in the map
364    if (Input::Archive == member->type()) {
365      cur_archive = member;
366      file_offset = nested_offset;
367      continue;
368    }
369
370    // insert a node into the subtree of current archive.
371    Archive::ArchiveMember* parent =
372        pArchive.getArchiveMember(cur_archive->name());
373
374    assert(parent != NULL);
375    pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
376
377    // move the iterator to new created node, and also adjust the
378    // direction to Afterward for next insertion in this subtree
379    parent->move->move(parent->lastPos);
380    parent->move = &InputTree::Afterward;
381    bool doContinue = false;
382
383    if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
384      member->setType(Input::Object);
385      // Set this object as no export if the archive is in the exclude libs.
386      if (pArchive.getARFile().noExport()) {
387        member->setNoExport();
388      }
389      pArchive.addObjectMember(pFileOffset, parent->lastPos);
390      m_ELFObjectReader.readHeader(*member);
391      m_ELFObjectReader.readSections(*member);
392      m_ELFObjectReader.readSymbols(*member);
393      m_Module.getObjectList().push_back(member);
394    } else if (doContinue && isMyFormat(*member, doContinue)) {
395      member->setType(Input::Archive);
396      // when adding a new archive node, set the iterator to archive
397      // itself, and set the direction to Downward
398      pArchive.addArchiveMember(
399          member->name(), parent->lastPos, &InputTree::Downward);
400      cur_archive = member;
401      file_offset = nested_offset;
402    } else {
403      warning(diag::warn_unrecognized_input_file)
404          << member->path() << pConfig.targets().triple().str();
405    }
406  } while (Input::Object != member->type());
407  return size;
408}
409
410/// includeAllMembers - include all object members. This is called if
411/// --whole-archive is the attribute for this archive file.
412bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
413                                         Archive& pArchive) {
414  // read the symtab of the archive
415  readSymbolTable(pArchive);
416
417  // read the strtab of the archive
418  readStringTable(pArchive);
419
420  // add root archive to ArchiveMemberMap
421  pArchive.addArchiveMember(pArchive.getARFile().name(),
422                            pArchive.inputs().root(),
423                            &InputTree::Downward);
424
425  bool isThinAR = isThinArchive(pArchive.getARFile());
426  uint32_t begin_offset = pArchive.getARFile().fileOffset() +
427                          Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) +
428                          pArchive.getSymTabSize();
429  if (pArchive.hasStrTable()) {
430    if ((begin_offset & 1) != 0x0)
431      ++begin_offset;
432    begin_offset +=
433        sizeof(Archive::MemberHeader) + pArchive.getStrTable().size();
434  }
435  uint32_t end_offset = pArchive.getARFile().memArea()->size();
436  for (uint32_t offset = begin_offset; offset < end_offset;
437       offset += sizeof(Archive::MemberHeader)) {
438    size_t size = includeMember(pConfig, pArchive, offset);
439
440    if (!isThinAR) {
441      offset += size;
442    }
443
444    if ((offset & 1) != 0x0)
445      ++offset;
446  }
447  return true;
448}
449
450}  // namespace mcld
451