ArchiveReader.cpp revision 551ccae044b0ff658fe629dd67edd5ffe75d10e8
1//===- ArchiveReader.cpp - Code to read LLVM bytecode from .a files -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ReadArchiveFile interface, which allows a linker to
11// read all of the LLVM bytecode files contained in a .a file.  This file
12// understands the standard system .a file format.  This can only handle the .a
13// variant prevalent on Linux systems so far, but may be extended.  See
14// information in this source file for more information:
15//   http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/bfd/archive.c?cvsroot=src
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Bytecode/Reader.h"
20#include "llvm/Module.h"
21#include "llvm/Support/FileUtilities.h"
22#include <cstdlib>
23#include <iostream>
24using namespace llvm;
25
26namespace {
27  struct ar_hdr {
28    char name[16];
29    char date[12];
30    char uid[6];
31    char gid[6];
32    char mode[8];
33    char size[10];
34    char fmag[2];          // Always equal to '`\n'
35  };
36
37  enum ObjectType {
38    UserObject,            // A user .o/.bc file
39    Unknown,               // Unknown file, just ignore it
40    SVR4LongFilename,      // a "//" section used for long file names
41    ArchiveSymbolTable,    // Symbol table produced by ranlib.
42  };
43}
44
45/// getObjectType - Determine the type of object that this header represents.
46/// This is capable of parsing the variety of special sections used for various
47/// purposes.
48///
49static enum ObjectType getObjectType(ar_hdr *H, std::string MemberName,
50                                     unsigned char *MemberData, unsigned Size) {
51  // Check for sections with special names...
52  if (MemberName == "__.SYMDEF       " || MemberName == "__.SYMDEF SORTED")
53    return ArchiveSymbolTable;
54  else if (MemberName == "//              ")
55    return SVR4LongFilename;
56
57  // Check to see if it looks like an llvm object file...
58  if (Size >= 4 && !memcmp(MemberData, "llvm", 4))
59    return UserObject;
60
61  return Unknown;
62}
63
64static inline bool Error(std::string *ErrorStr, const char *Message) {
65  if (ErrorStr) *ErrorStr = Message;
66  return true;
67}
68
69static bool ParseSymbolTableSection(unsigned char *Buffer, unsigned Size,
70                                    std::string *S) {
71  // Currently not supported (succeeds without doing anything)
72  return false;
73}
74
75static bool ReadArchiveBuffer(const std::string &ArchiveName,
76                              unsigned char *Buffer, unsigned Length,
77                              std::vector<Module*> &Objects,
78                              std::string *ErrorStr) {
79  if (Length < 8 || memcmp(Buffer, "!<arch>\n", 8))
80    return Error(ErrorStr, "signature incorrect for an archive file!");
81  Buffer += 8;  Length -= 8; // Skip the magic string.
82
83  std::vector<char> LongFilenames;
84
85  while (Length >= sizeof(ar_hdr)) {
86    ar_hdr *Hdr = (ar_hdr*)Buffer;
87    unsigned SizeFromHeader = atoi(Hdr->size);
88    if (SizeFromHeader + sizeof(ar_hdr) > Length)
89      return Error(ErrorStr, "invalid record length in archive file!");
90
91    unsigned char *MemberData = Buffer + sizeof(ar_hdr);
92    unsigned MemberSize = SizeFromHeader;
93    // Get name of archive member.
94    char *startp = Hdr->name;
95    char *endp = (char *) memchr (startp, '/', sizeof(ar_hdr));
96    if (memcmp (Hdr->name, "#1/", 3) == 0) {
97      // 4.4BSD/MacOSX long filenames are abbreviated as "#1/L", where L is an
98      // ASCII-coded decimal number representing the length of the name buffer,
99      // which is prepended to the archive member's contents.
100      unsigned NameLength = atoi (&Hdr->name[3]);
101      startp = (char *) MemberData;
102      endp = startp + NameLength;
103      MemberData += NameLength;
104      MemberSize -= NameLength;
105    } else if (startp == endp && isdigit (Hdr->name[1])) {
106      // SVR4 long filenames are abbreviated as "/I", where I is
107      // an ASCII-coded decimal index into the LongFilenames vector.
108      unsigned NameIndex = atoi (&Hdr->name[1]);
109      assert (LongFilenames.size () > NameIndex
110              && "SVR4-style long filename for archive member not found");
111      startp = &LongFilenames[NameIndex];
112      endp = strchr (startp, '/');
113    } else if (startp == endp && Hdr->name[1] == '/') {
114      // This is for the SVR4 long filename table (there might be other
115      // names starting with // but I don't know about them). Make sure that
116      // getObjectType sees it.
117      endp = &Hdr->name[sizeof (Hdr->name)];
118    }
119    if (!endp) {
120      // 4.4BSD/MacOSX *short* filenames are not guaranteed to have a
121      // terminator. Start at the end of the field and backtrack over spaces.
122      endp = startp + sizeof(Hdr->name);
123      while (endp[-1] == ' ')
124        --endp;
125    }
126    std::string MemberName (startp, endp);
127    std::string FullMemberName = ArchiveName + "(" + MemberName + ")";
128
129    switch (getObjectType(Hdr, MemberName, MemberData, MemberSize)) {
130    case SVR4LongFilename:
131      // If this is a long filename section, read all of the file names into the
132      // LongFilenames vector.
133      LongFilenames.assign (MemberData, MemberData + MemberSize);
134      break;
135    case UserObject: {
136      Module *M = ParseBytecodeBuffer(MemberData, MemberSize,
137                                      FullMemberName, ErrorStr);
138      if (!M) return true;
139      Objects.push_back(M);
140      break;
141    }
142    case ArchiveSymbolTable:
143      if (ParseSymbolTableSection(MemberData, MemberSize, ErrorStr))
144        return true;
145      break;
146    default:
147      std::cerr << "ReadArchiveBuffer: WARNING: Skipping unknown file: "
148                << FullMemberName << "\n";
149      break;   // Just ignore unknown files.
150    }
151
152    // Round SizeFromHeader up to an even number...
153    SizeFromHeader = (SizeFromHeader+1)/2*2;
154    Buffer += sizeof(ar_hdr)+SizeFromHeader;   // Move to the next entry
155    Length -= sizeof(ar_hdr)+SizeFromHeader;
156  }
157
158  return Length != 0;
159}
160
161
162// ReadArchiveFile - Read bytecode files from the specified .a file, returning
163// true on error, or false on success.  This does not support reading files from
164// standard input.
165//
166bool llvm::ReadArchiveFile(const std::string &Filename,
167                           std::vector<Module*> &Objects,std::string *ErrorStr){
168  unsigned Length;
169
170    // mmap in the file all at once...
171  unsigned char *Buffer =
172     (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
173  if (Buffer == 0) {
174    if (ErrorStr) *ErrorStr = "Error reading file '" + Filename + "'!";
175    return true;
176  }
177
178  // Parse the archive files we mmap'ped in
179  bool Result = ReadArchiveBuffer(Filename, Buffer, Length, Objects, ErrorStr);
180
181  // Unmmap the archive...
182  UnmapFileFromAddressSpace(Buffer, Length);
183
184  if (Result)    // Free any loaded objects
185    while (!Objects.empty()) {
186      delete Objects.back();
187      Objects.pop_back();
188    }
189
190  return Result;
191}
192