gold-plugin.cpp revision ea97aa6129fc89292e215d01fa66504195f1a969
1//===-- gold-plugin.cpp - Plugin to gold for Link Time Optimization  ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This is a gold plugin for LLVM. It provides an LLVM implementation of the
11// interface described in http://gcc.gnu.org/wiki/whopr/driver .
12//
13//===----------------------------------------------------------------------===//
14
15#include "plugin-api.h"
16
17#include "llvm-c/lto.h"
18
19#include "llvm/Support/raw_ostream.h"
20#include "llvm/System/Path.h"
21
22#include <cerrno>
23#include <cstdlib>
24#include <cstring>
25#include <list>
26#include <vector>
27
28using namespace llvm;
29
30namespace {
31  ld_plugin_status discard_message(int level, const char *format, ...) {
32    // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
33    // callback in the transfer vector. This should never be called.
34    abort();
35  }
36
37  ld_plugin_add_symbols add_symbols = NULL;
38  ld_plugin_get_symbols get_symbols = NULL;
39  ld_plugin_add_input_file add_input_file = NULL;
40  ld_plugin_message message = discard_message;
41
42  int api_version = 0;
43  int gold_version = 0;
44
45  struct claimed_file {
46    lto_module_t M;
47    void *handle;
48    std::vector<ld_plugin_symbol> syms;
49  };
50
51  lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
52  std::list<claimed_file> Modules;
53  std::vector<sys::Path> Cleanup;
54}
55
56ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
57                                 int *claimed);
58ld_plugin_status all_symbols_read_hook(void);
59ld_plugin_status cleanup_hook(void);
60
61extern "C" ld_plugin_status onload(ld_plugin_tv *tv);
62ld_plugin_status onload(ld_plugin_tv *tv) {
63  // We're given a pointer to the first transfer vector. We read through them
64  // until we find one where tv_tag == LDPT_NULL. The REGISTER_* tagged values
65  // contain pointers to functions that we need to call to register our own
66  // hooks. The others are addresses of functions we can use to call into gold
67  // for services.
68
69  bool registeredClaimFile = false;
70  bool registeredAllSymbolsRead = false;
71  bool registeredCleanup = false;
72
73  for (; tv->tv_tag != LDPT_NULL; ++tv) {
74    switch (tv->tv_tag) {
75      case LDPT_API_VERSION:
76        api_version = tv->tv_u.tv_val;
77        break;
78      case LDPT_GOLD_VERSION:  // major * 100 + minor
79        gold_version = tv->tv_u.tv_val;
80        break;
81      case LDPT_LINKER_OUTPUT:
82        switch (tv->tv_u.tv_val) {
83          case LDPO_REL:  // .o
84          case LDPO_DYN:  // .so
85            output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC;
86            break;
87          case LDPO_EXEC:  // .exe
88            output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
89            break;
90          default:
91            (*message)(LDPL_ERROR, "Unknown output file type %d",
92                       tv->tv_u.tv_val);
93            return LDPS_ERR;
94        }
95        // TODO: add an option to disable PIC.
96        //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
97        break;
98      case LDPT_OPTION:
99        (*message)(LDPL_WARNING, "Ignoring flag %s", tv->tv_u.tv_string);
100        break;
101      case LDPT_REGISTER_CLAIM_FILE_HOOK: {
102        ld_plugin_register_claim_file callback;
103        callback = tv->tv_u.tv_register_claim_file;
104
105        if ((*callback)(claim_file_hook) != LDPS_OK)
106          return LDPS_ERR;
107
108        registeredClaimFile = true;
109      } break;
110      case LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK: {
111        ld_plugin_register_all_symbols_read callback;
112        callback = tv->tv_u.tv_register_all_symbols_read;
113
114        if ((*callback)(all_symbols_read_hook) != LDPS_OK)
115          return LDPS_ERR;
116
117        registeredAllSymbolsRead = true;
118      } break;
119      case LDPT_REGISTER_CLEANUP_HOOK: {
120        ld_plugin_register_cleanup callback;
121        callback = tv->tv_u.tv_register_cleanup;
122
123        if ((*callback)(cleanup_hook) != LDPS_OK)
124          return LDPS_ERR;
125
126        registeredCleanup = true;
127      } break;
128      case LDPT_ADD_SYMBOLS:
129        add_symbols = tv->tv_u.tv_add_symbols;
130        break;
131      case LDPT_GET_SYMBOLS:
132        get_symbols = tv->tv_u.tv_get_symbols;
133        break;
134      case LDPT_ADD_INPUT_FILE:
135        add_input_file = tv->tv_u.tv_add_input_file;
136        break;
137      case LDPT_MESSAGE:
138        message = tv->tv_u.tv_message;
139        break;
140      default:
141        break;
142    }
143  }
144
145  if (!registeredClaimFile || !registeredAllSymbolsRead || !registeredCleanup ||
146      !add_symbols || !get_symbols || !add_input_file) {
147    (*message)(LDPL_ERROR, "Not all hooks registered for LLVMgold.");
148    return LDPS_ERR;
149  }
150
151  return LDPS_OK;
152}
153
154/// claim_file_hook - called by gold to see whether this file is one that
155/// our plugin can handle. We'll try to open it and register all the symbols
156/// with add_symbol if possible.
157ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
158                                 int *claimed) {
159  void *buf = NULL;
160  if (file->offset) {
161    // Gold has found what might be IR part-way inside of a file, such as
162    // an .a archive.
163    if (lseek(file->fd, file->offset, SEEK_SET) == -1) {
164      (*message)(LDPL_ERROR,
165                 "Failed to seek to archive member of %s at offset %d: %s\n",
166                 file->name,
167                 file->offset, strerror(errno));
168      return LDPS_ERR;
169    }
170    buf = malloc(file->filesize);
171    if (!buf) {
172      (*message)(LDPL_ERROR,
173                 "Failed to allocate buffer for archive member of size: %d\n",
174                 file->filesize);
175      return LDPS_ERR;
176    }
177    if (read(file->fd, buf, file->filesize) != file->filesize) {
178      (*message)(LDPL_ERROR,
179                 "Failed to read archive member of %s at offset %d: %s\n",
180                 file->name,
181                 file->offset,
182                 strerror(errno));
183      return LDPS_ERR;
184    }
185    if (!lto_module_is_object_file_in_memory(buf, file->filesize))
186      return LDPS_OK;
187  } else if (!lto_module_is_object_file(file->name))
188    return LDPS_OK;
189
190  *claimed = 1;
191  Modules.resize(Modules.size() + 1);
192  claimed_file &cf = Modules.back();
193
194  cf.M = buf ? lto_module_create_from_memory(buf, file->filesize) :
195               lto_module_create(file->name);
196  free(buf);
197  if (!cf.M) {
198    (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
199               lto_get_error_message());
200    return LDPS_ERR;
201  }
202  cf.handle = file->handle;
203  unsigned sym_count = lto_module_get_num_symbols(cf.M);
204  cf.syms.reserve(sym_count);
205
206  for (unsigned i = 0; i != sym_count; ++i) {
207    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(cf.M, i);
208    if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
209      continue;
210
211    cf.syms.push_back(ld_plugin_symbol());
212    ld_plugin_symbol &sym = cf.syms.back();
213    sym.name = const_cast<char *>(lto_module_get_symbol_name(cf.M, i));
214    sym.version = NULL;
215
216    int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
217    switch (scope) {
218      case LTO_SYMBOL_SCOPE_HIDDEN:
219        sym.visibility = LDPV_HIDDEN;
220        break;
221      case LTO_SYMBOL_SCOPE_PROTECTED:
222        sym.visibility = LDPV_PROTECTED;
223        break;
224      case 0: // extern
225      case LTO_SYMBOL_SCOPE_DEFAULT:
226        sym.visibility = LDPV_DEFAULT;
227        break;
228      default:
229        (*message)(LDPL_ERROR, "Unknown scope attribute: %d", scope);
230        return LDPS_ERR;
231    }
232
233    int definition = attrs & LTO_SYMBOL_DEFINITION_MASK;
234    switch (definition) {
235      case LTO_SYMBOL_DEFINITION_REGULAR:
236        sym.def = LDPK_DEF;
237        break;
238      case LTO_SYMBOL_DEFINITION_UNDEFINED:
239        sym.def = LDPK_UNDEF;
240        break;
241      case LTO_SYMBOL_DEFINITION_TENTATIVE:
242        sym.def = LDPK_COMMON;
243        break;
244      case LTO_SYMBOL_DEFINITION_WEAK:
245        sym.def = LDPK_WEAKDEF;
246        break;
247      default:
248        (*message)(LDPL_ERROR, "Unknown definition attribute: %d", definition);
249        return LDPS_ERR;
250    }
251
252    // LLVM never emits COMDAT.
253    sym.size = 0;
254    sym.comdat_key = NULL;
255
256    sym.resolution = LDPR_UNKNOWN;
257  }
258
259  cf.syms.reserve(cf.syms.size());
260
261  if (!cf.syms.empty()) {
262    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
263      (*message)(LDPL_ERROR, "Unable to add symbols!");
264      return LDPS_ERR;
265    }
266  }
267
268  return LDPS_OK;
269}
270
271/// all_symbols_read_hook - gold informs us that all symbols have been read.
272/// At this point, we use get_symbols to see if any of our definitions have
273/// been overridden by a native object file. Then, perform optimization and
274/// codegen.
275ld_plugin_status all_symbols_read_hook(void) {
276  lto_code_gen_t cg = lto_codegen_create();
277
278  for (std::list<claimed_file>::iterator I = Modules.begin(),
279       E = Modules.end(); I != E; ++I)
280    lto_codegen_add_module(cg, I->M);
281
282  // If we don't preserve any symbols, libLTO will assume that all symbols are
283  // needed. Keep all symbols unless we're producing a final executable.
284  if (output_type == LTO_CODEGEN_PIC_MODEL_STATIC) {
285    bool anySymbolsPreserved = false;
286    for (std::list<claimed_file>::iterator I = Modules.begin(),
287         E = Modules.end(); I != E; ++I) {
288      (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
289      for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
290        if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
291          lto_codegen_add_must_preserve_symbol(cg, I->syms[i].name);
292          anySymbolsPreserved = true;
293        }
294      }
295    }
296
297    if (!anySymbolsPreserved) {
298      // This entire file is unnecessary!
299      lto_codegen_dispose(cg);
300      return LDPS_OK;
301    }
302  }
303
304  lto_codegen_set_pic_model(cg, output_type);
305  lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF);
306
307  size_t bufsize = 0;
308  const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
309                                                                     &bufsize));
310
311  std::string ErrMsg;
312
313  sys::Path uniqueObjPath("/tmp/llvmgold.o");
314  if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
315    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
316    return LDPS_ERR;
317  }
318  raw_fd_ostream *objFile = new raw_fd_ostream(uniqueObjPath.c_str(), true,
319                                               ErrMsg);
320  if (!ErrMsg.empty()) {
321    delete objFile;
322    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
323    return LDPS_ERR;
324  }
325
326  objFile->write(buffer, bufsize);
327  objFile->close();
328
329  lto_codegen_dispose(cg);
330
331  if ((*add_input_file)(const_cast<char*>(uniqueObjPath.c_str())) != LDPS_OK) {
332    (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
333    (*message)(LDPL_ERROR, "File left behind in: %s", uniqueObjPath.c_str());
334    return LDPS_ERR;
335  }
336
337  Cleanup.push_back(uniqueObjPath);
338
339  return LDPS_OK;
340}
341
342ld_plugin_status cleanup_hook(void) {
343  std::string ErrMsg;
344
345  for (int i = 0, e = Cleanup.size(); i != e; ++i)
346    if (Cleanup[i].eraseFromDisk(false, &ErrMsg))
347      (*message)(LDPL_ERROR, "Failed to delete '%s': %s", Cleanup[i].c_str(),
348                 ErrMsg.c_str());
349
350  return LDPS_OK;
351}
352