1//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Link Time Optimization library. This library is
11// intended to be used by linker to optimize code at link time.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/LTO/LTOModule.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/Bitcode/ReaderWriter.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/LLVMContext.h"
20#include "llvm/IR/Metadata.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/MCExpr.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCParser/MCAsmParser.h"
26#include "llvm/MC/MCSection.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/MC/MCSymbol.h"
29#include "llvm/MC/MCTargetAsmParser.h"
30#include "llvm/MC/SubtargetFeature.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/Host.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/Path.h"
36#include "llvm/Support/SourceMgr.h"
37#include "llvm/Support/TargetRegistry.h"
38#include "llvm/Support/TargetSelect.h"
39#include "llvm/Target/TargetLowering.h"
40#include "llvm/Target/TargetLoweringObjectFile.h"
41#include "llvm/Target/TargetRegisterInfo.h"
42#include "llvm/Transforms/Utils/GlobalStatus.h"
43#include <system_error>
44using namespace llvm;
45
46LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
47                     llvm::TargetMachine *TM)
48    : IRFile(std::move(Obj)), _target(TM) {}
49
50/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
51/// bitcode.
52bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
53  return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
54         sys::fs::file_magic::bitcode;
55}
56
57bool LTOModule::isBitcodeFile(const char *path) {
58  sys::fs::file_magic type;
59  if (sys::fs::identify_magic(path, type))
60    return false;
61  return type == sys::fs::file_magic::bitcode;
62}
63
64bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
65                                   StringRef triplePrefix) {
66  std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
67  return StringRef(Triple).startswith(triplePrefix);
68}
69
70LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
71                                     std::string &errMsg) {
72  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
73      MemoryBuffer::getFile(path);
74  if (std::error_code EC = BufferOrErr.getError()) {
75    errMsg = EC.message();
76    return nullptr;
77  }
78  return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
79}
80
81LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
82                                         TargetOptions options,
83                                         std::string &errMsg) {
84  return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
85}
86
87LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
88                                              size_t map_size, off_t offset,
89                                              TargetOptions options,
90                                              std::string &errMsg) {
91  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
92      MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
93  if (std::error_code EC = BufferOrErr.getError()) {
94    errMsg = EC.message();
95    return nullptr;
96  }
97  return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
98}
99
100LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
101                                       TargetOptions options,
102                                       std::string &errMsg, StringRef path) {
103  std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path));
104  if (!buffer)
105    return nullptr;
106  return makeLTOModule(std::move(buffer), options, errMsg);
107}
108
109LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
110                                    TargetOptions options,
111                                    std::string &errMsg) {
112  ErrorOr<Module *> MOrErr =
113      getLazyBitcodeModule(Buffer.get(), getGlobalContext());
114  if (std::error_code EC = MOrErr.getError()) {
115    errMsg = EC.message();
116    return nullptr;
117  }
118  std::unique_ptr<Module> M(MOrErr.get());
119
120  std::string TripleStr = M->getTargetTriple();
121  if (TripleStr.empty())
122    TripleStr = sys::getDefaultTargetTriple();
123  llvm::Triple Triple(TripleStr);
124
125  // find machine architecture for this module
126  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
127  if (!march)
128    return nullptr;
129
130  // construct LTOModule, hand over ownership of module and target
131  SubtargetFeatures Features;
132  Features.getDefaultSubtargetFeatures(Triple);
133  std::string FeatureStr = Features.getString();
134  // Set a default CPU for Darwin triples.
135  std::string CPU;
136  if (Triple.isOSDarwin()) {
137    if (Triple.getArch() == llvm::Triple::x86_64)
138      CPU = "core2";
139    else if (Triple.getArch() == llvm::Triple::x86)
140      CPU = "yonah";
141    else if (Triple.getArch() == llvm::Triple::arm64 ||
142             Triple.getArch() == llvm::Triple::aarch64)
143      CPU = "cyclone";
144  }
145
146  TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
147                                                     options);
148  M->materializeAllPermanently(true);
149  M->setDataLayout(target->getDataLayout());
150
151  std::unique_ptr<object::IRObjectFile> IRObj(
152      new object::IRObjectFile(std::move(Buffer), std::move(M)));
153
154  LTOModule *Ret = new LTOModule(std::move(IRObj), target);
155
156  if (Ret->parseSymbols(errMsg)) {
157    delete Ret;
158    return nullptr;
159  }
160
161  Ret->parseMetadata();
162
163  return Ret;
164}
165
166/// Create a MemoryBuffer from a memory range with an optional name.
167MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length,
168                                    StringRef name) {
169  const char *startPtr = (const char*)mem;
170  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
171}
172
173/// objcClassNameFromExpression - Get string that the data pointer points to.
174bool
175LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
176  if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
177    Constant *op = ce->getOperand(0);
178    if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
179      Constant *cn = gvn->getInitializer();
180      if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
181        if (ca->isCString()) {
182          name = ".objc_class_name_" + ca->getAsCString().str();
183          return true;
184        }
185      }
186    }
187  }
188  return false;
189}
190
191/// addObjCClass - Parse i386/ppc ObjC class data structure.
192void LTOModule::addObjCClass(const GlobalVariable *clgv) {
193  const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
194  if (!c) return;
195
196  // second slot in __OBJC,__class is pointer to superclass name
197  std::string superclassName;
198  if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
199    NameAndAttributes info;
200    StringMap<NameAndAttributes>::value_type &entry =
201      _undefines.GetOrCreateValue(superclassName);
202    if (!entry.getValue().name) {
203      const char *symbolName = entry.getKey().data();
204      info.name = symbolName;
205      info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
206      info.isFunction = false;
207      info.symbol = clgv;
208      entry.setValue(info);
209    }
210  }
211
212  // third slot in __OBJC,__class is pointer to class name
213  std::string className;
214  if (objcClassNameFromExpression(c->getOperand(2), className)) {
215    StringSet::value_type &entry = _defines.GetOrCreateValue(className);
216    entry.setValue(1);
217
218    NameAndAttributes info;
219    info.name = entry.getKey().data();
220    info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
221      LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
222    info.isFunction = false;
223    info.symbol = clgv;
224    _symbols.push_back(info);
225  }
226}
227
228/// addObjCCategory - Parse i386/ppc ObjC category data structure.
229void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
230  const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
231  if (!c) return;
232
233  // second slot in __OBJC,__category is pointer to target class name
234  std::string targetclassName;
235  if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
236    return;
237
238  NameAndAttributes info;
239  StringMap<NameAndAttributes>::value_type &entry =
240    _undefines.GetOrCreateValue(targetclassName);
241
242  if (entry.getValue().name)
243    return;
244
245  const char *symbolName = entry.getKey().data();
246  info.name = symbolName;
247  info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
248  info.isFunction = false;
249  info.symbol = clgv;
250  entry.setValue(info);
251}
252
253/// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
254void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
255  std::string targetclassName;
256  if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
257    return;
258
259  NameAndAttributes info;
260  StringMap<NameAndAttributes>::value_type &entry =
261    _undefines.GetOrCreateValue(targetclassName);
262  if (entry.getValue().name)
263    return;
264
265  const char *symbolName = entry.getKey().data();
266  info.name = symbolName;
267  info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
268  info.isFunction = false;
269  info.symbol = clgv;
270  entry.setValue(info);
271}
272
273void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
274  SmallString<64> Buffer;
275  {
276    raw_svector_ostream OS(Buffer);
277    Sym.printName(OS);
278  }
279
280  const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
281  addDefinedDataSymbol(Buffer.c_str(), V);
282}
283
284void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
285  // Add to list of defined symbols.
286  addDefinedSymbol(Name, v, false);
287
288  if (!v->hasSection() /* || !isTargetDarwin */)
289    return;
290
291  // Special case i386/ppc ObjC data structures in magic sections:
292  // The issue is that the old ObjC object format did some strange
293  // contortions to avoid real linker symbols.  For instance, the
294  // ObjC class data structure is allocated statically in the executable
295  // that defines that class.  That data structures contains a pointer to
296  // its superclass.  But instead of just initializing that part of the
297  // struct to the address of its superclass, and letting the static and
298  // dynamic linkers do the rest, the runtime works by having that field
299  // instead point to a C-string that is the name of the superclass.
300  // At runtime the objc initialization updates that pointer and sets
301  // it to point to the actual super class.  As far as the linker
302  // knows it is just a pointer to a string.  But then someone wanted the
303  // linker to issue errors at build time if the superclass was not found.
304  // So they figured out a way in mach-o object format to use an absolute
305  // symbols (.objc_class_name_Foo = 0) and a floating reference
306  // (.reference .objc_class_name_Bar) to cause the linker into erroring when
307  // a class was missing.
308  // The following synthesizes the implicit .objc_* symbols for the linker
309  // from the ObjC data structures generated by the front end.
310
311  // special case if this data blob is an ObjC class definition
312  std::string Section = v->getSection();
313  if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
314    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
315      addObjCClass(gv);
316    }
317  }
318
319  // special case if this data blob is an ObjC category definition
320  else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
321    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
322      addObjCCategory(gv);
323    }
324  }
325
326  // special case if this data blob is the list of referenced classes
327  else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
328    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
329      addObjCClassRef(gv);
330    }
331  }
332}
333
334void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
335  SmallString<64> Buffer;
336  {
337    raw_svector_ostream OS(Buffer);
338    Sym.printName(OS);
339  }
340
341  const Function *F =
342      cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
343  addDefinedFunctionSymbol(Buffer.c_str(), F);
344}
345
346void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
347  // add to list of defined symbols
348  addDefinedSymbol(Name, F, true);
349}
350
351static bool canBeHidden(const GlobalValue *GV) {
352  // FIXME: this is duplicated with another static function in AsmPrinter.cpp
353  GlobalValue::LinkageTypes L = GV->getLinkage();
354
355  if (L != GlobalValue::LinkOnceODRLinkage)
356    return false;
357
358  if (GV->hasUnnamedAddr())
359    return true;
360
361  // If it is a non constant variable, it needs to be uniqued across shared
362  // objects.
363  if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
364    if (!Var->isConstant())
365      return false;
366  }
367
368  GlobalStatus GS;
369  if (GlobalStatus::analyzeGlobal(GV, GS))
370    return false;
371
372  return !GS.IsCompared;
373}
374
375void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
376                                 bool isFunction) {
377  // set alignment part log2() can have rounding errors
378  uint32_t align = def->getAlignment();
379  uint32_t attr = align ? countTrailingZeros(align) : 0;
380
381  // set permissions part
382  if (isFunction) {
383    attr |= LTO_SYMBOL_PERMISSIONS_CODE;
384  } else {
385    const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
386    if (gv && gv->isConstant())
387      attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
388    else
389      attr |= LTO_SYMBOL_PERMISSIONS_DATA;
390  }
391
392  // set definition part
393  if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
394    attr |= LTO_SYMBOL_DEFINITION_WEAK;
395  else if (def->hasCommonLinkage())
396    attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
397  else
398    attr |= LTO_SYMBOL_DEFINITION_REGULAR;
399
400  // set scope part
401  if (def->hasLocalLinkage())
402    // Ignore visibility if linkage is local.
403    attr |= LTO_SYMBOL_SCOPE_INTERNAL;
404  else if (def->hasHiddenVisibility())
405    attr |= LTO_SYMBOL_SCOPE_HIDDEN;
406  else if (def->hasProtectedVisibility())
407    attr |= LTO_SYMBOL_SCOPE_PROTECTED;
408  else if (canBeHidden(def))
409    attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
410  else
411    attr |= LTO_SYMBOL_SCOPE_DEFAULT;
412
413  StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
414  entry.setValue(1);
415
416  // fill information structure
417  NameAndAttributes info;
418  StringRef NameRef = entry.getKey();
419  info.name = NameRef.data();
420  assert(info.name[NameRef.size()] == '\0');
421  info.attributes = attr;
422  info.isFunction = isFunction;
423  info.symbol = def;
424
425  // add to table of symbols
426  _symbols.push_back(info);
427}
428
429/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
430/// defined list.
431void LTOModule::addAsmGlobalSymbol(const char *name,
432                                   lto_symbol_attributes scope) {
433  StringSet::value_type &entry = _defines.GetOrCreateValue(name);
434
435  // only add new define if not already defined
436  if (entry.getValue())
437    return;
438
439  entry.setValue(1);
440
441  NameAndAttributes &info = _undefines[entry.getKey().data()];
442
443  if (info.symbol == nullptr) {
444    // FIXME: This is trying to take care of module ASM like this:
445    //
446    //   module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
447    //
448    // but is gross and its mother dresses it funny. Have the ASM parser give us
449    // more details for this type of situation so that we're not guessing so
450    // much.
451
452    // fill information structure
453    info.name = entry.getKey().data();
454    info.attributes =
455      LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
456    info.isFunction = false;
457    info.symbol = nullptr;
458
459    // add to table of symbols
460    _symbols.push_back(info);
461    return;
462  }
463
464  if (info.isFunction)
465    addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
466  else
467    addDefinedDataSymbol(info.name, info.symbol);
468
469  _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
470  _symbols.back().attributes |= scope;
471}
472
473/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
474/// undefined list.
475void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
476  StringMap<NameAndAttributes>::value_type &entry =
477    _undefines.GetOrCreateValue(name);
478
479  _asm_undefines.push_back(entry.getKey().data());
480
481  // we already have the symbol
482  if (entry.getValue().name)
483    return;
484
485  uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
486  attr |= LTO_SYMBOL_SCOPE_DEFAULT;
487  NameAndAttributes info;
488  info.name = entry.getKey().data();
489  info.attributes = attr;
490  info.isFunction = false;
491  info.symbol = nullptr;
492
493  entry.setValue(info);
494}
495
496/// Add a symbol which isn't defined just yet to a list to be resolved later.
497void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
498                                            bool isFunc) {
499  SmallString<64> name;
500  {
501    raw_svector_ostream OS(name);
502    Sym.printName(OS);
503  }
504
505  StringMap<NameAndAttributes>::value_type &entry =
506    _undefines.GetOrCreateValue(name);
507
508  // we already have the symbol
509  if (entry.getValue().name)
510    return;
511
512  NameAndAttributes info;
513
514  info.name = entry.getKey().data();
515
516  const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
517
518  if (decl->hasExternalWeakLinkage())
519    info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
520  else
521    info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
522
523  info.isFunction = isFunc;
524  info.symbol = decl;
525
526  entry.setValue(info);
527}
528
529/// parseSymbols - Parse the symbols from the module and model-level ASM and add
530/// them to either the defined or undefined lists.
531bool LTOModule::parseSymbols(std::string &errMsg) {
532  for (auto &Sym : IRFile->symbols()) {
533    const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
534    uint32_t Flags = Sym.getFlags();
535    if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
536      continue;
537
538    bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
539
540    if (!GV) {
541      SmallString<64> Buffer;
542      {
543        raw_svector_ostream OS(Buffer);
544        Sym.printName(OS);
545      }
546      const char *Name = Buffer.c_str();
547
548      if (IsUndefined)
549        addAsmGlobalSymbolUndef(Name);
550      else if (Flags & object::BasicSymbolRef::SF_Global)
551        addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
552      else
553        addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
554      continue;
555    }
556
557    auto *F = dyn_cast<Function>(GV);
558    if (IsUndefined) {
559      addPotentialUndefinedSymbol(Sym, F != nullptr);
560      continue;
561    }
562
563    if (F) {
564      addDefinedFunctionSymbol(Sym);
565      continue;
566    }
567
568    if (isa<GlobalVariable>(GV)) {
569      addDefinedDataSymbol(Sym);
570      continue;
571    }
572
573    assert(isa<GlobalAlias>(GV));
574    addDefinedDataSymbol(Sym);
575  }
576
577  // make symbols for all undefines
578  for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
579         e = _undefines.end(); u != e; ++u) {
580    // If this symbol also has a definition, then don't make an undefine because
581    // it is a tentative definition.
582    if (_defines.count(u->getKey())) continue;
583    NameAndAttributes info = u->getValue();
584    _symbols.push_back(info);
585  }
586
587  return false;
588}
589
590/// parseMetadata - Parse metadata from the module
591void LTOModule::parseMetadata() {
592  // Linker Options
593  if (Value *Val = getModule().getModuleFlag("Linker Options")) {
594    MDNode *LinkerOptions = cast<MDNode>(Val);
595    for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
596      MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
597      for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
598        MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
599        StringRef Op = _linkeropt_strings.
600            GetOrCreateValue(MDOption->getString()).getKey();
601        StringRef DepLibName = _target->getTargetLowering()->
602            getObjFileLowering().getDepLibFromLinkerOpt(Op);
603        if (!DepLibName.empty())
604          _deplibs.push_back(DepLibName.data());
605        else if (!Op.empty())
606          _linkeropts.push_back(Op.data());
607      }
608    }
609  }
610
611  // Add other interesting metadata here.
612}
613