gold-plugin.cpp revision 6c8099243a0d8ff710e8f657628a8bea99b5dd07
1//===-- gold-plugin.cpp - Plugin to gold for Link Time Optimization  ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This is a gold plugin for LLVM. It provides an LLVM implementation of the
11// interface described in http://gcc.gnu.org/wiki/whopr/driver .
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Config/config.h"
16#include "plugin-api.h"
17
18#include "llvm-c/lto.h"
19
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/System/Errno.h"
22#include "llvm/System/Path.h"
23#include "llvm/System/Program.h"
24
25#include <cerrno>
26#include <cstdlib>
27#include <cstring>
28#include <fstream>
29#include <list>
30#include <vector>
31
32using namespace llvm;
33
34namespace {
35  ld_plugin_status discard_message(int level, const char *format, ...) {
36    // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
37    // callback in the transfer vector. This should never be called.
38    abort();
39  }
40
41  ld_plugin_add_symbols add_symbols = NULL;
42  ld_plugin_get_symbols get_symbols = NULL;
43  ld_plugin_add_input_file add_input_file = NULL;
44  ld_plugin_message message = discard_message;
45
46  int api_version = 0;
47  int gold_version = 0;
48
49  struct claimed_file {
50    lto_module_t M;
51    void *handle;
52    std::vector<ld_plugin_symbol> syms;
53  };
54
55  lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
56  std::string output_name = "";
57  std::list<claimed_file> Modules;
58  std::vector<sys::Path> Cleanup;
59}
60
61namespace options {
62  enum generate_bc { BC_NO, BC_ALSO, BC_ONLY };
63  static bool generate_api_file = false;
64  static generate_bc generate_bc_file = BC_NO;
65  static std::string bc_path;
66  static std::string as_path;
67  // Additional options to pass into the code generator.
68  // Note: This array will contain all plugin options which are not claimed
69  // as plugin exclusive to pass to the code generator.
70  // For example, "generate-api-file" and "as"options are for the plugin
71  // use only and will not be passed.
72  static std::vector<std::string> extra;
73
74  static void process_plugin_option(const char* opt_)
75  {
76    if (opt_ == NULL)
77      return;
78    llvm::StringRef opt = opt_;
79
80    if (opt == "generate-api-file") {
81      generate_api_file = true;
82    } else if (opt.startswith("as=")) {
83      if (!as_path.empty()) {
84        (*message)(LDPL_WARNING, "Path to as specified twice. "
85                   "Discarding %s", opt_);
86      } else {
87        as_path = opt.substr(strlen("as="));
88      }
89    } else if (opt == "emit-llvm") {
90      generate_bc_file = BC_ONLY;
91    } else if (opt == "also-emit-llvm") {
92      generate_bc_file = BC_ALSO;
93    } else if (opt.startswith("also-emit-llvm=")) {
94      llvm::StringRef path = opt.substr(strlen("also-emit-llvm="));
95      generate_bc_file = BC_ALSO;
96      if (!bc_path.empty()) {
97        (*message)(LDPL_WARNING, "Path to the output IL file specified twice. "
98                   "Discarding %s", opt_);
99      } else {
100        bc_path = path;
101      }
102    } else {
103      // Save this option to pass to the code generator.
104      extra.push_back(opt);
105    }
106  }
107}
108
109static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
110                                        int *claimed);
111static ld_plugin_status all_symbols_read_hook(void);
112static ld_plugin_status cleanup_hook(void);
113
114extern "C" ld_plugin_status onload(ld_plugin_tv *tv);
115ld_plugin_status onload(ld_plugin_tv *tv) {
116  // We're given a pointer to the first transfer vector. We read through them
117  // until we find one where tv_tag == LDPT_NULL. The REGISTER_* tagged values
118  // contain pointers to functions that we need to call to register our own
119  // hooks. The others are addresses of functions we can use to call into gold
120  // for services.
121
122  bool registeredClaimFile = false;
123  bool registeredAllSymbolsRead = false;
124  bool registeredCleanup = false;
125
126  for (; tv->tv_tag != LDPT_NULL; ++tv) {
127    switch (tv->tv_tag) {
128      case LDPT_API_VERSION:
129        api_version = tv->tv_u.tv_val;
130        break;
131      case LDPT_GOLD_VERSION:  // major * 100 + minor
132        gold_version = tv->tv_u.tv_val;
133        break;
134      case LDPT_OUTPUT_NAME:
135        output_name = tv->tv_u.tv_string;
136        break;
137      case LDPT_LINKER_OUTPUT:
138        switch (tv->tv_u.tv_val) {
139          case LDPO_REL:  // .o
140          case LDPO_DYN:  // .so
141            output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC;
142            break;
143          case LDPO_EXEC:  // .exe
144            output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
145            break;
146          default:
147            (*message)(LDPL_ERROR, "Unknown output file type %d",
148                       tv->tv_u.tv_val);
149            return LDPS_ERR;
150        }
151        // TODO: add an option to disable PIC.
152        //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
153        break;
154      case LDPT_OPTION:
155        options::process_plugin_option(tv->tv_u.tv_string);
156        break;
157      case LDPT_REGISTER_CLAIM_FILE_HOOK: {
158        ld_plugin_register_claim_file callback;
159        callback = tv->tv_u.tv_register_claim_file;
160
161        if ((*callback)(claim_file_hook) != LDPS_OK)
162          return LDPS_ERR;
163
164        registeredClaimFile = true;
165      } break;
166      case LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK: {
167        ld_plugin_register_all_symbols_read callback;
168        callback = tv->tv_u.tv_register_all_symbols_read;
169
170        if ((*callback)(all_symbols_read_hook) != LDPS_OK)
171          return LDPS_ERR;
172
173        registeredAllSymbolsRead = true;
174      } break;
175      case LDPT_REGISTER_CLEANUP_HOOK: {
176        ld_plugin_register_cleanup callback;
177        callback = tv->tv_u.tv_register_cleanup;
178
179        if ((*callback)(cleanup_hook) != LDPS_OK)
180          return LDPS_ERR;
181
182        registeredCleanup = true;
183      } break;
184      case LDPT_ADD_SYMBOLS:
185        add_symbols = tv->tv_u.tv_add_symbols;
186        break;
187      case LDPT_GET_SYMBOLS:
188        get_symbols = tv->tv_u.tv_get_symbols;
189        break;
190      case LDPT_ADD_INPUT_FILE:
191        add_input_file = tv->tv_u.tv_add_input_file;
192        break;
193      case LDPT_MESSAGE:
194        message = tv->tv_u.tv_message;
195        break;
196      default:
197        break;
198    }
199  }
200
201  if (!registeredClaimFile) {
202    (*message)(LDPL_ERROR, "register_claim_file not passed to LLVMgold.");
203    return LDPS_ERR;
204  }
205  if (!add_symbols) {
206    (*message)(LDPL_ERROR, "add_symbols not passed to LLVMgold.");
207    return LDPS_ERR;
208  }
209
210  return LDPS_OK;
211}
212
213/// claim_file_hook - called by gold to see whether this file is one that
214/// our plugin can handle. We'll try to open it and register all the symbols
215/// with add_symbol if possible.
216static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
217                                        int *claimed) {
218  void *buf = NULL;
219  if (file->offset) {
220    // Gold has found what might be IR part-way inside of a file, such as
221    // an .a archive.
222    if (lseek(file->fd, file->offset, SEEK_SET) == -1) {
223      (*message)(LDPL_ERROR,
224                 "Failed to seek to archive member of %s at offset %d: %s\n",
225                 file->name,
226                 file->offset, sys::StrError(errno).c_str());
227      return LDPS_ERR;
228    }
229    buf = malloc(file->filesize);
230    if (!buf) {
231      (*message)(LDPL_ERROR,
232                 "Failed to allocate buffer for archive member of size: %d\n",
233                 file->filesize);
234      return LDPS_ERR;
235    }
236    if (read(file->fd, buf, file->filesize) != file->filesize) {
237      (*message)(LDPL_ERROR,
238                 "Failed to read archive member of %s at offset %d: %s\n",
239                 file->name,
240                 file->offset,
241                 sys::StrError(errno).c_str());
242      free(buf);
243      return LDPS_ERR;
244    }
245    if (!lto_module_is_object_file_in_memory(buf, file->filesize)) {
246      free(buf);
247      return LDPS_OK;
248    }
249  } else if (!lto_module_is_object_file(file->name))
250    return LDPS_OK;
251
252  *claimed = 1;
253  Modules.resize(Modules.size() + 1);
254  claimed_file &cf = Modules.back();
255
256  cf.M = buf ? lto_module_create_from_memory(buf, file->filesize) :
257               lto_module_create(file->name);
258  free(buf);
259  if (!cf.M) {
260    (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
261               lto_get_error_message());
262    return LDPS_ERR;
263  }
264  cf.handle = file->handle;
265  unsigned sym_count = lto_module_get_num_symbols(cf.M);
266  cf.syms.reserve(sym_count);
267
268  for (unsigned i = 0; i != sym_count; ++i) {
269    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(cf.M, i);
270    if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
271      continue;
272
273    cf.syms.push_back(ld_plugin_symbol());
274    ld_plugin_symbol &sym = cf.syms.back();
275    sym.name = const_cast<char *>(lto_module_get_symbol_name(cf.M, i));
276    sym.version = NULL;
277
278    int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
279    switch (scope) {
280      case LTO_SYMBOL_SCOPE_HIDDEN:
281        sym.visibility = LDPV_HIDDEN;
282        break;
283      case LTO_SYMBOL_SCOPE_PROTECTED:
284        sym.visibility = LDPV_PROTECTED;
285        break;
286      case 0: // extern
287      case LTO_SYMBOL_SCOPE_DEFAULT:
288        sym.visibility = LDPV_DEFAULT;
289        break;
290      default:
291        (*message)(LDPL_ERROR, "Unknown scope attribute: %d", scope);
292        return LDPS_ERR;
293    }
294
295    int definition = attrs & LTO_SYMBOL_DEFINITION_MASK;
296    switch (definition) {
297      case LTO_SYMBOL_DEFINITION_REGULAR:
298        sym.def = LDPK_DEF;
299        break;
300      case LTO_SYMBOL_DEFINITION_UNDEFINED:
301        sym.def = LDPK_UNDEF;
302        break;
303      case LTO_SYMBOL_DEFINITION_TENTATIVE:
304        sym.def = LDPK_COMMON;
305        break;
306      case LTO_SYMBOL_DEFINITION_WEAK:
307        sym.def = LDPK_WEAKDEF;
308        break;
309      case LTO_SYMBOL_DEFINITION_WEAKUNDEF:
310        sym.def = LDPK_WEAKUNDEF;
311        break;
312      default:
313        (*message)(LDPL_ERROR, "Unknown definition attribute: %d", definition);
314        return LDPS_ERR;
315    }
316
317    // LLVM never emits COMDAT.
318    sym.size = 0;
319    sym.comdat_key = NULL;
320
321    sym.resolution = LDPR_UNKNOWN;
322  }
323
324  cf.syms.reserve(cf.syms.size());
325
326  if (!cf.syms.empty()) {
327    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
328      (*message)(LDPL_ERROR, "Unable to add symbols!");
329      return LDPS_ERR;
330    }
331  }
332
333  return LDPS_OK;
334}
335
336/// all_symbols_read_hook - gold informs us that all symbols have been read.
337/// At this point, we use get_symbols to see if any of our definitions have
338/// been overridden by a native object file. Then, perform optimization and
339/// codegen.
340static ld_plugin_status all_symbols_read_hook(void) {
341  lto_code_gen_t cg = lto_codegen_create();
342
343  for (std::list<claimed_file>::iterator I = Modules.begin(),
344       E = Modules.end(); I != E; ++I)
345    lto_codegen_add_module(cg, I->M);
346
347  std::ofstream api_file;
348  if (options::generate_api_file) {
349    api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
350    if (!api_file.is_open()) {
351      (*message)(LDPL_FATAL, "Unable to open apifile.txt for writing.");
352      abort();
353    }
354  }
355
356  // If we don't preserve any symbols, libLTO will assume that all symbols are
357  // needed. Keep all symbols unless we're producing a final executable.
358  bool anySymbolsPreserved = false;
359  for (std::list<claimed_file>::iterator I = Modules.begin(),
360         E = Modules.end(); I != E; ++I) {
361    (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
362    for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
363      if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
364        lto_codegen_add_must_preserve_symbol(cg, I->syms[i].name);
365        anySymbolsPreserved = true;
366
367        if (options::generate_api_file)
368          api_file << I->syms[i].name << "\n";
369      }
370    }
371
372    if (options::generate_api_file)
373      api_file.close();
374
375    if (!anySymbolsPreserved) {
376      // This entire file is unnecessary!
377      lto_codegen_dispose(cg);
378      return LDPS_OK;
379    }
380  }
381
382  lto_codegen_set_pic_model(cg, output_type);
383  lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF);
384  if (!options::as_path.empty()) {
385    sys::Path p = sys::Program::FindProgramByName(options::as_path);
386    lto_codegen_set_assembler_path(cg, p.c_str());
387  }
388  // Pass through extra options to the code generator.
389  if (!options::extra.empty()) {
390    for (std::vector<std::string>::iterator it = options::extra.begin();
391         it != options::extra.end(); ++it) {
392      lto_codegen_debug_options(cg, (*it).c_str());
393    }
394  }
395
396
397  if (options::generate_bc_file != options::BC_NO) {
398    std::string path;
399    if (options::generate_bc_file == options::BC_ONLY)
400      path = output_name;
401    else if (!options::bc_path.empty())
402      path = options::bc_path;
403    else
404      path = output_name + ".bc";
405    bool err = lto_codegen_write_merged_modules(cg, path.c_str());
406    if (err)
407      (*message)(LDPL_FATAL, "Failed to write the output file.");
408    if (options::generate_bc_file == options::BC_ONLY)
409      exit(0);
410  }
411  size_t bufsize = 0;
412  const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
413                                                                     &bufsize));
414
415  std::string ErrMsg;
416
417  sys::Path uniqueObjPath("/tmp/llvmgold.o");
418  if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
419    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
420    return LDPS_ERR;
421  }
422  raw_fd_ostream *objFile =
423    new raw_fd_ostream(uniqueObjPath.c_str(), ErrMsg,
424                       raw_fd_ostream::F_Binary);
425  if (!ErrMsg.empty()) {
426    delete objFile;
427    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
428    return LDPS_ERR;
429  }
430
431  objFile->write(buffer, bufsize);
432  objFile->close();
433
434  lto_codegen_dispose(cg);
435
436  if ((*add_input_file)(const_cast<char*>(uniqueObjPath.c_str())) != LDPS_OK) {
437    (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
438    (*message)(LDPL_ERROR, "File left behind in: %s", uniqueObjPath.c_str());
439    return LDPS_ERR;
440  }
441
442  Cleanup.push_back(uniqueObjPath);
443
444  return LDPS_OK;
445}
446
447static ld_plugin_status cleanup_hook(void) {
448  std::string ErrMsg;
449
450  for (int i = 0, e = Cleanup.size(); i != e; ++i)
451    if (Cleanup[i].eraseFromDisk(false, &ErrMsg))
452      (*message)(LDPL_ERROR, "Failed to delete '%s': %s", Cleanup[i].c_str(),
453                 ErrMsg.c_str());
454
455  return LDPS_OK;
456}
457