ArchiveReader.cpp revision b70abe1c5adaf26e8d73d9aa4e5c76ed830cc94e
1//===- ArchiveReader.cpp - Code to read LLVM bytecode from .a files -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ReadArchiveFile interface, which allows a linker to
11// read all of the LLVM bytecode files contained in a .a file.  This file
12// understands the standard system .a file format.  This can only handle the .a
13// variant prevalent on Linux systems so far, but may be extended.  See
14// information in this source file for more information:
15//   http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/bfd/archive.c?cvsroot=src
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Bytecode/Reader.h"
20#include "llvm/Module.h"
21#include "Support/FileUtilities.h"
22#include "Config/sys/mman.h"
23#include "Config/fcntl.h"
24#include <cstdlib>
25
26namespace llvm {
27
28namespace {
29  struct ar_hdr {
30    char name[16];
31    char date[12];
32    char uid[6];
33    char gid[6];
34    char mode[8];
35    char size[10];
36    char fmag[2];          // Always equal to '`\n'
37  };
38
39  enum ObjectType {
40    UserObject,            // A user .o/.bc file
41    Unknown,               // Unknown file, just ignore it
42    SVR4LongFilename,      // a "//" section used for long file names
43    ArchiveSymbolTable,    // Symbol table produced by ranlib.
44  };
45}
46
47/// getObjectType - Determine the type of object that this header represents.
48/// This is capable of parsing the variety of special sections used for various
49/// purposes.
50///
51static enum ObjectType getObjectType(ar_hdr *H, unsigned char *MemberData,
52                                     unsigned Size) {
53  // Check for sections with special names...
54  if (!memcmp(H->name, "__.SYMDEF       ", 16))
55    return ArchiveSymbolTable;
56  if (!memcmp(H->name, "__.SYMDEF SORTED", 16))
57    return ArchiveSymbolTable;
58  if (!memcmp(H->name, "//              ", 16))
59    return SVR4LongFilename;
60
61  // Check to see if it looks like an llvm object file...
62  if (Size >= 4 && !memcmp(MemberData, "llvm", 4))
63    return UserObject;
64
65  return Unknown;
66}
67
68static inline bool Error(std::string *ErrorStr, const char *Message) {
69  if (ErrorStr) *ErrorStr = Message;
70  return true;
71}
72
73static bool ParseSymbolTableSection(unsigned char *Buffer, unsigned Size,
74                                    std::string *S) {
75  // Currently not supported (succeeds without doing anything)
76  return false;
77}
78
79static bool ReadArchiveBuffer(const std::string &ArchiveName,
80                              unsigned char *Buffer, unsigned Length,
81                              std::vector<Module*> &Objects,
82                              std::string *ErrorStr) {
83  if (Length < 8 || memcmp(Buffer, "!<arch>\n", 8))
84    return Error(ErrorStr, "signature incorrect for an archive file!");
85  Buffer += 8;  Length -= 8; // Skip the magic string.
86
87  std::vector<char> LongFilenames;
88
89  while (Length >= sizeof(ar_hdr)) {
90    ar_hdr *Hdr = (ar_hdr*)Buffer;
91    unsigned SizeFromHeader = atoi(Hdr->size);
92    if (SizeFromHeader + sizeof(ar_hdr) > Length)
93      return Error(ErrorStr, "invalid record length in archive file!");
94
95    unsigned char *MemberData = Buffer + sizeof(ar_hdr);
96    unsigned MemberSize = SizeFromHeader;
97    // Get name of archive member.
98    char *startp = Hdr->name;
99    char *endp = (char *) memchr (startp, '/', sizeof(ar_hdr));
100    if (memcmp (Hdr->name, "#1/", 3) == 0) {
101      // 4.4BSD/MacOSX long filenames are abbreviated as "#1/L", where L is an
102      // ASCII-coded decimal number representing the length of the name buffer,
103      // which is prepended to the archive member's contents.
104      unsigned NameLength = atoi (&Hdr->name[3]);
105      startp = (char *) MemberData;
106      endp = startp + NameLength;
107      MemberData += NameLength;
108      MemberSize -= NameLength;
109    } else if (startp == endp && isdigit (Hdr->name[1])) {
110      // SVR4 long filenames are abbreviated as "/I", where I is
111      // an ASCII-coded decimal index into the LongFilenames vector.
112      unsigned NameIndex = atoi (&Hdr->name[1]);
113      assert (LongFilenames.size () > NameIndex
114              && "SVR4-style long filename for archive member not found");
115      startp = &LongFilenames[NameIndex];
116      endp = strchr (startp, '/');
117    }
118    if (!endp) {
119      // 4.4BSD/MacOSX *short* filenames are not guaranteed to have a
120      // terminator. Start at the end of the field and backtrack over spaces.
121      endp = startp + sizeof(Hdr->name);
122      while (endp[-1] == ' ')
123        --endp;
124    }
125    std::string MemberName (startp, endp);
126    std::string FullMemberName = ArchiveName + "(" + MemberName + ")";
127
128    switch (getObjectType(Hdr, MemberData, MemberSize)) {
129    case SVR4LongFilename:
130      // If this is a long filename section, read all of the file names into the
131      // LongFilenames vector.
132      LongFilenames.assign (MemberData, MemberData + MemberSize);
133      break;
134    case UserObject: {
135      Module *M = ParseBytecodeBuffer(MemberData, MemberSize,
136                                      FullMemberName, ErrorStr);
137      if (!M) return true;
138      Objects.push_back(M);
139      break;
140    }
141    case ArchiveSymbolTable:
142      if (ParseSymbolTableSection(MemberData, MemberSize, ErrorStr))
143        return true;
144      break;
145    default:
146      std::cerr << "ReadArchiveBuffer: WARNING: Skipping unknown file: "
147                << FullMemberName << "\n";
148      break;   // Just ignore unknown files.
149    }
150
151    // Round SizeFromHeader up to an even number...
152    SizeFromHeader = (SizeFromHeader+1)/2*2;
153    Buffer += sizeof(ar_hdr)+SizeFromHeader;   // Move to the next entry
154    Length -= sizeof(ar_hdr)+SizeFromHeader;
155  }
156
157  return Length != 0;
158}
159
160
161// ReadArchiveFile - Read bytecode files from the specified .a file, returning
162// true on error, or false on success.  This does not support reading files from
163// standard input.
164//
165bool ReadArchiveFile(const std::string &Filename, std::vector<Module*> &Objects,
166                     std::string *ErrorStr) {
167  int Length = getFileSize(Filename);
168  if (Length == -1)
169    return Error(ErrorStr, "Error getting file length!");
170
171  int FD = open(Filename.c_str(), O_RDONLY);
172  if (FD == -1)
173    return Error(ErrorStr, "Error opening file!");
174
175    // mmap in the file all at once...
176  unsigned char *Buffer = (unsigned char*)mmap(0, Length, PROT_READ,
177                                               MAP_PRIVATE, FD, 0);
178  if (Buffer == (unsigned char*)MAP_FAILED)
179    return Error(ErrorStr, "Error mmapping file!");
180
181  // Parse the archive files we mmap'ped in
182  bool Result = ReadArchiveBuffer(Filename, Buffer, Length, Objects, ErrorStr);
183
184  // Unmmap the archive...
185  munmap((char*)Buffer, Length);
186
187  if (Result)    // Free any loaded objects
188    while (!Objects.empty()) {
189      delete Objects.back();
190      Objects.pop_back();
191    }
192
193  return Result;
194}
195
196} // End llvm namespace
197