GNUArchiveReader.cpp revision 21433dddd6366055d6b305675f4afca0b4592dcd
1//===- GNUArchiveReader.cpp -----------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include <mcld/LD/GNUArchiveReader.h>
10
11#include <mcld/Module.h>
12#include <mcld/InputTree.h>
13#include <mcld/MC/Attribute.h>
14#include <mcld/MC/MCLDInput.h>
15#include <mcld/LD/ResolveInfo.h>
16#include <mcld/LD/ELFObjectReader.h>
17#include <mcld/Support/FileSystem.h>
18#include <mcld/Support/FileHandle.h>
19#include <mcld/Support/MemoryArea.h>
20#include <mcld/Support/MemoryRegion.h>
21#include <mcld/Support/MsgHandling.h>
22#include <mcld/Support/Path.h>
23#include <mcld/ADT/SizeTraits.h>
24
25#include <llvm/ADT/StringRef.h>
26#include <llvm/Support/Host.h>
27
28#include <cstring>
29#include <cstdlib>
30
31using namespace mcld;
32
33GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                   ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule),
36   m_ELFObjectReader(pELFObjectReader)
37{
38}
39
40GNUArchiveReader::~GNUArchiveReader()
41{
42}
43
44/// isMyFormat
45bool GNUArchiveReader::isMyFormat(Input& pInput) const
46{
47  assert(pInput.hasMemArea());
48  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
49                                                   Archive::MAGIC_LEN);
50  const char* str = reinterpret_cast<const char*>(region->getBuffer());
51
52  bool result = false;
53  assert(NULL != str);
54  if (isArchive(str) || isThinArchive(str))
55    result = true;
56
57  pInput.memArea()->release(region);
58  return result;
59}
60
61/// isArchive
62bool GNUArchiveReader::isArchive(const char* pStr) const
63{
64  return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
65}
66
67/// isThinArchive
68bool GNUArchiveReader::isThinArchive(const char* pStr) const
69{
70  return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
71}
72
73/// isThinArchive
74bool GNUArchiveReader::isThinArchive(Input& pInput) const
75{
76  assert(pInput.hasMemArea());
77  MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(),
78                                                   Archive::MAGIC_LEN);
79  const char* str = reinterpret_cast<const char*>(region->getBuffer());
80
81  bool result = false;
82  assert(NULL != str);
83  if (isThinArchive(str))
84    result = true;
85
86  pInput.memArea()->release(region);
87  return result;
88}
89
90bool GNUArchiveReader::readArchive(Archive& pArchive)
91{
92  // bypass the empty archive
93  if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->handler()->size())
94    return true;
95
96  if (pArchive.getARFile().attribute()->isWholeArchive())
97    return includeAllMembers(pArchive);
98
99  // if this is the first time read this archive, setup symtab and strtab
100  if (pArchive.getSymbolTable().empty()) {
101  // read the symtab of the archive
102  readSymbolTable(pArchive);
103
104  // read the strtab of the archive
105  readStringTable(pArchive);
106
107  // add root archive to ArchiveMemberMap
108  pArchive.addArchiveMember(pArchive.getARFile().name(),
109                            pArchive.inputs().root(),
110                            &InputTree::Downward);
111  }
112
113  // include the needed members in the archive and build up the input tree
114  bool willSymResolved;
115  do {
116    willSymResolved = false;
117    for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
118      // bypass if we already decided to include this symbol or not
119      if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
120        continue;
121
122      // bypass if another symbol with the same object file offset is included
123      if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
124        pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
125        continue;
126      }
127
128      // check if we should include this defined symbol
129      Archive::Symbol::Status status =
130        shouldIncludeSymbol(pArchive.getSymbolName(idx));
131      if (Archive::Symbol::Unknown != status)
132        pArchive.setSymbolStatus(idx, status);
133
134      if (Archive::Symbol::Include == status) {
135        // include the object member from the given offset
136        includeMember(pArchive, pArchive.getObjFileOffset(idx));
137        willSymResolved = true;
138      } // end of if
139    } // end of for
140  } while (willSymResolved);
141
142  return true;
143}
144
145/// readMemberHeader - read the header of a member in a archive file and then
146/// return the corresponding archive member (it may be an input object or
147/// another archive)
148/// @param pArchiveRoot  - the archive root that holds the strtab (extended
149///                        name table)
150/// @param pArchiveFile  - the archive that contains the needed object
151/// @param pFileOffset   - file offset of the member header in the archive
152/// @param pNestedOffset - used when we find a nested archive
153/// @param pMemberSize   - the file size of this member
154Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
155                                          Input& pArchiveFile,
156                                          uint32_t pFileOffset,
157                                          uint32_t& pNestedOffset,
158                                          size_t& pMemberSize)
159{
160  assert(pArchiveFile.hasMemArea());
161
162  MemoryRegion* header_region =
163    pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
164                                    sizeof(Archive::MemberHeader));
165  const Archive::MemberHeader* header =
166    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
167
168  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
169
170  pMemberSize = atoi(header->size);
171
172  // parse the member name and nested offset if any
173  std::string member_name;
174  llvm::StringRef name_field(header->name, sizeof(header->name));
175  if ('/' != header->name[0]) {
176    // this is an object file in an archive
177    size_t pos = name_field.find_first_of('/');
178    member_name.assign(name_field.substr(0, pos).str());
179  }
180  else {
181    // this is an object/archive file in a thin archive
182    size_t begin = 1;
183    size_t end = name_field.find_first_of(" :");
184    uint32_t name_offset = 0;
185    // parse the name offset
186    name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
187
188    if (':' == name_field[end]) {
189      // there is a nested offset
190      begin = end + 1;
191      end = name_field.find_first_of(' ', begin);
192      name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
193    }
194
195    // get the member name from the extended name table
196    assert(pArchiveRoot.hasStrTable());
197    begin = name_offset;
198    end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
199    member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
200  }
201
202  Input* member = NULL;
203  bool isThinAR = isThinArchive(pArchiveFile);
204  if (!isThinAR) {
205    // this is an object file in an archive
206    member = pArchiveRoot.getMemberFile(pArchiveFile,
207                                        isThinAR,
208                                        member_name,
209                                        pArchiveFile.path(),
210                                        (pFileOffset +
211                                         sizeof(Archive::MemberHeader)));
212  }
213  else {
214    // this is a member in a thin archive
215    // try to find if this is a archive already in the map first
216    Archive::ArchiveMember* ar_member =
217      pArchiveRoot.getArchiveMember(member_name);
218    if (NULL != ar_member) {
219      return ar_member->file;
220    }
221
222    // get nested file path, the nested file's member name is the relative
223    // path to the archive containing it.
224    sys::fs::Path input_path(pArchiveFile.path().parent_path());
225    if (!input_path.empty())
226      input_path.append(member_name);
227    else
228      input_path.assign(member_name);
229
230    member = pArchiveRoot.getMemberFile(pArchiveFile,
231                                        isThinAR,
232                                        member_name,
233                                        input_path);
234  }
235
236  pArchiveFile.memArea()->release(header_region);
237  return member;
238}
239
240/// readSymbolTable - read the archive symbol map (armap)
241bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
242{
243  assert(pArchive.getARFile().hasMemArea());
244
245  MemoryRegion* header_region =
246    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
247                                             Archive::MAGIC_LEN),
248                                            sizeof(Archive::MemberHeader));
249  const Archive::MemberHeader* header =
250    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
251  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
252
253  int symtab_size = atoi(header->size);
254  pArchive.setSymTabSize(symtab_size);
255
256  if (!pArchive.getARFile().attribute()->isWholeArchive()) {
257    MemoryRegion* symtab_region =
258      pArchive.getARFile().memArea()->request(
259                                            (pArchive.getARFile().fileOffset() +
260                                             Archive::MAGIC_LEN +
261                                             sizeof(Archive::MemberHeader)),
262                                            symtab_size);
263    const uint32_t* data =
264      reinterpret_cast<const uint32_t*>(symtab_region->getBuffer());
265
266    // read the number of symbols
267    uint32_t number = 0;
268    if (llvm::sys::IsLittleEndianHost)
269      number = mcld::bswap32(*data);
270    else
271      number = *data;
272
273    // set up the pointers for file offset and name offset
274    ++data;
275    const char* name = reinterpret_cast<const char*>(data + number);
276
277    // add the archive symbols
278    for (uint32_t i = 0; i < number; ++i) {
279      if (llvm::sys::IsLittleEndianHost)
280        pArchive.addSymbol(name, mcld::bswap32(*data));
281      else
282        pArchive.addSymbol(name, *data);
283      name += strlen(name) + 1;
284      ++data;
285    }
286    pArchive.getARFile().memArea()->release(symtab_region);
287  }
288  pArchive.getARFile().memArea()->release(header_region);
289  return true;
290}
291
292/// readStringTable - read the strtab for long file name of the archive
293bool GNUArchiveReader::readStringTable(Archive& pArchive)
294{
295  size_t offset = Archive::MAGIC_LEN +
296                  sizeof(Archive::MemberHeader) +
297                  pArchive.getSymTabSize();
298
299  if (0x0 != (offset & 1))
300    ++offset;
301
302  assert(pArchive.getARFile().hasMemArea());
303
304  MemoryRegion* header_region =
305    pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
306                                             offset),
307                                            sizeof(Archive::MemberHeader));
308  const Archive::MemberHeader* header =
309    reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer());
310
311  assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
312
313  if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
314    // read the extended name table
315    int strtab_size = atoi(header->size);
316    MemoryRegion* strtab_region =
317      pArchive.getARFile().memArea()->request(
318                                   (pArchive.getARFile().fileOffset() +
319                                    offset + sizeof(Archive::MemberHeader)),
320                                   strtab_size);
321    const char* strtab =
322      reinterpret_cast<const char*>(strtab_region->getBuffer());
323    pArchive.getStrTable().assign(strtab, strtab_size);
324    pArchive.getARFile().memArea()->release(strtab_region);
325  }
326  pArchive.getARFile().memArea()->release(header_region);
327  return true;
328}
329
330/// shouldIncludeStatus - given a sym name from armap and check if including
331/// the corresponding archive member, and then return the decision
332enum Archive::Symbol::Status
333GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
334{
335  // TODO: handle symbol version issue and user defined symbols
336  const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
337  if (NULL != info) {
338    if (!info->isUndef())
339      return Archive::Symbol::Exclude;
340    if (info->isWeak())
341      return Archive::Symbol::Unknown;
342    return Archive::Symbol::Include;
343  }
344  return Archive::Symbol::Unknown;
345}
346
347/// includeMember - include the object member in the given file offset, and
348/// return the size of the object
349/// @param pArchiveRoot - the archive root
350/// @param pFileOffset  - file offset of the member header in the archive
351size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset)
352{
353  Input* cur_archive = &(pArchive.getARFile());
354  Input* member = NULL;
355  uint32_t file_offset = pFileOffset;
356  size_t size = 0;
357  do {
358    uint32_t nested_offset = 0;
359    // use the file offset in current archive to find out the member we
360    // want to include
361    member = readMemberHeader(pArchive,
362                              *cur_archive,
363                              file_offset,
364                              nested_offset,
365                              size);
366    assert(member != NULL);
367    // bypass if we get an archive that is already in the map
368    if (Input::Archive == member->type()) {
369        cur_archive = member;
370        file_offset = nested_offset;
371        continue;
372    }
373
374    // insert a node into the subtree of current archive.
375    Archive::ArchiveMember* parent =
376      pArchive.getArchiveMember(cur_archive->name());
377
378    assert(NULL != parent);
379    pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
380
381    // move the iterator to new created node, and also adjust the
382    // direction to Afterward for next insertion in this subtree
383    parent->move->move(parent->lastPos);
384    parent->move = &InputTree::Afterward;
385
386    if (m_ELFObjectReader.isMyFormat(*member)) {
387      member->setType(Input::Object);
388      pArchive.addObjectMember(pFileOffset, parent->lastPos);
389      m_ELFObjectReader.readHeader(*member);
390      m_ELFObjectReader.readSections(*member);
391      m_ELFObjectReader.readSymbols(*member);
392      m_Module.getObjectList().push_back(member);
393    }
394    else if (isMyFormat(*member)) {
395      member->setType(Input::Archive);
396      // when adding a new archive node, set the iterator to archive
397      // itself, and set the direction to Downward
398      pArchive.addArchiveMember(member->name(),
399                                parent->lastPos,
400                                &InputTree::Downward);
401      cur_archive = member;
402      file_offset = nested_offset;
403    }
404  } while (Input::Object != member->type());
405  return size;
406}
407
408/// includeAllMembers - include all object members. This is called if
409/// --whole-archive is the attribute for this archive file.
410bool GNUArchiveReader::includeAllMembers(Archive& pArchive)
411{
412  // read the symtab of the archive
413  readSymbolTable(pArchive);
414
415  // read the strtab of the archive
416  readStringTable(pArchive);
417
418  // add root archive to ArchiveMemberMap
419  pArchive.addArchiveMember(pArchive.getARFile().name(),
420                            pArchive.inputs().root(),
421                            &InputTree::Downward);
422
423  bool isThinAR = isThinArchive(pArchive.getARFile());
424  uint32_t begin_offset = pArchive.getARFile().fileOffset() +
425                          Archive::MAGIC_LEN +
426                          sizeof(Archive::MemberHeader) +
427                          pArchive.getSymTabSize();
428  if (pArchive.hasStrTable()) {
429    if (0x0 != (begin_offset & 1))
430      ++begin_offset;
431    begin_offset += sizeof(Archive::MemberHeader) +
432                    pArchive.getStrTable().size();
433  }
434  uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size();
435  for (uint32_t offset = begin_offset;
436       offset < end_offset;
437       offset += sizeof(Archive::MemberHeader)) {
438
439    size_t size = includeMember(pArchive, offset);
440
441    if (!isThinAR) {
442      offset += size;
443    }
444
445    if (0x0 != (offset & 1))
446      ++offset;
447  }
448  return true;
449}
450
451