LTOModule.cpp revision 3eb445feb22647e867a339f4c59b0a716b03a21a
1//===-LTOModule.cpp - LLVM Link Time Optimizer ----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Link Time Optimization library. This library is
11// intended to be used by linker to optimize code at link time.
12//
13//===----------------------------------------------------------------------===//
14
15#include "LTOModule.h"
16
17#include "llvm/Constants.h"
18#include "llvm/Module.h"
19#include "llvm/ModuleProvider.h"
20#include "llvm/ADT/OwningPtr.h"
21#include "llvm/Bitcode/ReaderWriter.h"
22#include "llvm/Support/SystemUtils.h"
23#include "llvm/Support/Mangler.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/System/Path.h"
27#include "llvm/System/Process.h"
28#include "llvm/Target/SubtargetFeature.h"
29#include "llvm/Target/TargetMachine.h"
30#include "llvm/Target/TargetMachineRegistry.h"
31#include "llvm/Target/TargetAsmInfo.h"
32
33#include <fstream>
34
35using namespace llvm;
36
37bool LTOModule::isBitcodeFile(const void* mem, size_t length)
38{
39    return ( llvm::sys::IdentifyFileType((char*)mem, length)
40                                            == llvm::sys::Bitcode_FileType );
41}
42
43bool LTOModule::isBitcodeFile(const char* path)
44{
45    return llvm::sys::Path(path).isBitcodeFile();
46}
47
48bool LTOModule::isBitcodeFileForTarget(const void* mem, size_t length,
49                                       const char* triplePrefix)
50{
51    MemoryBuffer* buffer = makeBuffer(mem, length);
52    if ( buffer == NULL )
53        return false;
54    return isTargetMatch(buffer, triplePrefix);
55}
56
57
58bool LTOModule::isBitcodeFileForTarget(const char* path,
59                                       const char* triplePrefix)
60{
61    MemoryBuffer *buffer = MemoryBuffer::getFile(path);
62    if (buffer == NULL)
63        return false;
64    return isTargetMatch(buffer, triplePrefix);
65}
66
67// takes ownership of buffer
68bool LTOModule::isTargetMatch(MemoryBuffer* buffer, const char* triplePrefix)
69{
70    OwningPtr<ModuleProvider> mp(getBitcodeModuleProvider(buffer));
71    // on success, mp owns buffer and both are deleted at end of this method
72    if ( !mp ) {
73        delete buffer;
74        return false;
75    }
76    std::string actualTarget = mp->getModule()->getTargetTriple();
77    return ( strncmp(actualTarget.c_str(), triplePrefix,
78                    strlen(triplePrefix)) == 0);
79}
80
81
82LTOModule::LTOModule(Module* m, TargetMachine* t)
83 : _module(m), _target(t), _symbolsParsed(false)
84{
85}
86
87LTOModule* LTOModule::makeLTOModule(const char* path, std::string& errMsg)
88{
89    OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg));
90    if ( !buffer )
91        return NULL;
92    return makeLTOModule(buffer.get(), errMsg);
93}
94
95/// makeBuffer - create a MemoryBuffer from a memory range.
96/// MemoryBuffer requires the byte past end of the buffer to be a zero.
97/// We might get lucky and already be that way, otherwise make a copy.
98/// Also if next byte is on a different page, don't assume it is readable.
99MemoryBuffer* LTOModule::makeBuffer(const void* mem, size_t length)
100{
101    const char* startPtr = (char*)mem;
102    const char* endPtr = startPtr+length;
103    if ( (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0)
104        || (*endPtr != 0) )
105        return MemoryBuffer::getMemBufferCopy(startPtr, endPtr);
106    else
107        return MemoryBuffer::getMemBuffer(startPtr, endPtr);
108}
109
110
111LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length,
112                                    std::string& errMsg)
113{
114    OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
115    if ( !buffer )
116        return NULL;
117    return makeLTOModule(buffer.get(), errMsg);
118}
119
120/// getFeatureString - Return a string listing the features associated with the
121/// target triple.
122///
123/// FIXME: This is an inelegant way of specifying the features of a
124/// subtarget. It would be better if we could encode this information into the
125/// IR. See <rdar://5972456>.
126std::string getFeatureString(const char *TargetTriple) {
127  SubtargetFeatures Features;
128
129  if (strncmp(TargetTriple, "powerpc-apple-", 14) == 0) {
130    Features.AddFeature("altivec", true);
131  } else if (strncmp(TargetTriple, "powerpc64-apple-", 16) == 0) {
132    Features.AddFeature("64bit", true);
133    Features.AddFeature("altivec", true);
134  }
135
136  return Features.getString();
137}
138
139LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer, std::string& errMsg)
140{
141    // parse bitcode buffer
142    OwningPtr<Module> m(ParseBitcodeFile(buffer, &errMsg));
143    if ( !m )
144        return NULL;
145    // find machine architecture for this module
146    const TargetMachineRegistry::entry* march =
147            TargetMachineRegistry::getClosestStaticTargetForModule(*m, errMsg);
148
149    if ( march == NULL )
150        return NULL;
151
152    // construct LTModule, hand over ownership of module and target
153    std::string FeatureStr = getFeatureString(m->getTargetTriple().c_str());
154    TargetMachine* target = march->CtorFn(*m, FeatureStr);
155    return new LTOModule(m.take(), target);
156}
157
158
159const char* LTOModule::getTargetTriple()
160{
161    return _module->getTargetTriple().c_str();
162}
163
164void LTOModule::addDefinedFunctionSymbol(Function* f, Mangler &mangler)
165{
166    // add to list of defined symbols
167    addDefinedSymbol(f, mangler, true);
168
169    // add external symbols referenced by this function.
170    for (Function::iterator b = f->begin(); b != f->end(); ++b) {
171        for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
172            for (unsigned count = 0, total = i->getNumOperands();
173                                        count != total; ++count) {
174                findExternalRefs(i->getOperand(count), mangler);
175            }
176        }
177    }
178}
179
180// get string that data pointer points to
181bool LTOModule::objcClassNameFromExpression(Constant* c, std::string& name)
182{
183    if (ConstantExpr* ce = dyn_cast<ConstantExpr>(c)) {
184        Constant* op = ce->getOperand(0);
185        if (GlobalVariable* gvn = dyn_cast<GlobalVariable>(op)) {
186            Constant* cn = gvn->getInitializer();
187            if (ConstantArray* ca = dyn_cast<ConstantArray>(cn)) {
188                if ( ca->isCString() ) {
189                    name = ".objc_class_name_" + ca->getAsString();
190                    return true;
191                }
192            }
193        }
194    }
195    return false;
196}
197
198// parse i386/ppc ObjC class data structure
199void LTOModule::addObjCClass(GlobalVariable* clgv)
200{
201    if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
202        // second slot in __OBJC,__class is pointer to superclass name
203        std::string superclassName;
204        if ( objcClassNameFromExpression(c->getOperand(1), superclassName) ) {
205            NameAndAttributes info;
206            if ( _undefines.find(superclassName.c_str()) == _undefines.end() ) {
207                const char* symbolName = ::strdup(superclassName.c_str());
208                info.name = ::strdup(symbolName);
209                info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
210                // string is owned by _undefines
211                _undefines[info.name] = info;
212            }
213        }
214        // third slot in __OBJC,__class is pointer to class name
215        std::string className;
216         if ( objcClassNameFromExpression(c->getOperand(2), className) ) {
217            const char* symbolName = ::strdup(className.c_str());
218            NameAndAttributes info;
219            info.name = symbolName;
220            info.attributes = (lto_symbol_attributes)
221                (LTO_SYMBOL_PERMISSIONS_DATA |
222                 LTO_SYMBOL_DEFINITION_REGULAR |
223                 LTO_SYMBOL_SCOPE_DEFAULT);
224            _symbols.push_back(info);
225            _defines[info.name] = 1;
226         }
227    }
228}
229
230
231// parse i386/ppc ObjC category data structure
232void LTOModule::addObjCCategory(GlobalVariable* clgv)
233{
234    if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
235        // second slot in __OBJC,__category is pointer to target class name
236        std::string targetclassName;
237        if ( objcClassNameFromExpression(c->getOperand(1), targetclassName) ) {
238            NameAndAttributes info;
239            if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ){
240                const char* symbolName = ::strdup(targetclassName.c_str());
241                info.name = ::strdup(symbolName);
242                info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
243                // string is owned by _undefines
244               _undefines[info.name] = info;
245            }
246        }
247    }
248}
249
250
251// parse i386/ppc ObjC class list data structure
252void LTOModule::addObjCClassRef(GlobalVariable* clgv)
253{
254    std::string targetclassName;
255    if ( objcClassNameFromExpression(clgv->getInitializer(), targetclassName) ){
256        NameAndAttributes info;
257        if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ) {
258            const char* symbolName = ::strdup(targetclassName.c_str());
259            info.name = ::strdup(symbolName);
260            info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
261            // string is owned by _undefines
262            _undefines[info.name] = info;
263        }
264    }
265}
266
267
268void LTOModule::addDefinedDataSymbol(GlobalValue* v, Mangler& mangler)
269{
270    // add to list of defined symbols
271    addDefinedSymbol(v, mangler, false);
272
273    // special case i386/ppc ObjC data structures in magic sections
274    if ( v->hasSection() ) {
275        // special case if this data blob is an ObjC class definition
276        if ( v->getSection().compare(0, 15, "__OBJC,__class,") == 0 ) {
277            if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
278                addObjCClass(gv);
279            }
280        }
281
282        // special case if this data blob is an ObjC category definition
283        else if ( v->getSection().compare(0, 18, "__OBJC,__category,") == 0 ) {
284            if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
285                addObjCCategory(gv);
286            }
287        }
288
289        // special case if this data blob is the list of referenced classes
290        else if ( v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0 ) {
291            if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
292                addObjCClassRef(gv);
293            }
294        }
295    }
296
297    // add external symbols referenced by this data.
298    for (unsigned count = 0, total = v->getNumOperands();
299                                                count != total; ++count) {
300        findExternalRefs(v->getOperand(count), mangler);
301    }
302}
303
304
305void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler,
306                                bool isFunction)
307{
308    // ignore all llvm.* symbols
309    if ( strncmp(def->getNameStart(), "llvm.", 5) == 0 )
310        return;
311
312    // string is owned by _defines
313    const char* symbolName = ::strdup(mangler.getValueName(def).c_str());
314
315    // set alignment part log2() can have rounding errors
316    uint32_t align = def->getAlignment();
317    uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
318
319    // set permissions part
320    if ( isFunction )
321        attr |= LTO_SYMBOL_PERMISSIONS_CODE;
322    else {
323        GlobalVariable* gv = dyn_cast<GlobalVariable>(def);
324        if ( (gv != NULL) && gv->isConstant() )
325            attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
326        else
327            attr |= LTO_SYMBOL_PERMISSIONS_DATA;
328    }
329
330    // set definition part
331    if ( def->hasWeakLinkage() || def->hasLinkOnceLinkage() ) {
332        attr |= LTO_SYMBOL_DEFINITION_WEAK;
333    }
334    else if ( def->hasCommonLinkage()) {
335        attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
336    }
337    else {
338        attr |= LTO_SYMBOL_DEFINITION_REGULAR;
339    }
340
341    // set scope part
342    if ( def->hasHiddenVisibility() )
343        attr |= LTO_SYMBOL_SCOPE_HIDDEN;
344    else if ( def->hasProtectedVisibility() )
345        attr |= LTO_SYMBOL_SCOPE_PROTECTED;
346    else if ( def->hasExternalLinkage() || def->hasWeakLinkage()
347              || def->hasLinkOnceLinkage() || def->hasCommonLinkage() )
348        attr |= LTO_SYMBOL_SCOPE_DEFAULT;
349    else
350        attr |= LTO_SYMBOL_SCOPE_INTERNAL;
351
352    // add to table of symbols
353    NameAndAttributes info;
354    info.name = symbolName;
355    info.attributes = (lto_symbol_attributes)attr;
356    _symbols.push_back(info);
357    _defines[info.name] = 1;
358}
359
360void LTOModule::addAsmGlobalSymbol(const char *name) {
361    // only add new define if not already defined
362    if ( _defines.count(name, &name[strlen(name)+1]) == 0 )
363        return;
364
365    // string is owned by _defines
366    const char *symbolName = ::strdup(name);
367    uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
368    attr |= LTO_SYMBOL_SCOPE_DEFAULT;
369    NameAndAttributes info;
370    info.name = symbolName;
371    info.attributes = (lto_symbol_attributes)attr;
372    _symbols.push_back(info);
373    _defines[info.name] = 1;
374}
375
376void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler)
377{
378    // ignore all llvm.* symbols
379    if ( strncmp(decl->getNameStart(), "llvm.", 5) == 0 )
380        return;
381
382    const char* name = mangler.getValueName(decl).c_str();
383
384    // we already have the symbol
385    if (_undefines.find(name) != _undefines.end())
386      return;
387
388    NameAndAttributes info;
389    // string is owned by _undefines
390    info.name = ::strdup(name);
391    if (decl->hasExternalWeakLinkage())
392      info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
393    else
394      info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
395    _undefines[name] = info;
396}
397
398
399
400// Find exeternal symbols referenced by VALUE. This is a recursive function.
401void LTOModule::findExternalRefs(Value* value, Mangler &mangler) {
402
403    if (GlobalValue* gv = dyn_cast<GlobalValue>(value)) {
404        if ( !gv->hasExternalLinkage() )
405            addPotentialUndefinedSymbol(gv, mangler);
406        // If this is a variable definition, do not recursively process
407        // initializer.  It might contain a reference to this variable
408        // and cause an infinite loop.  The initializer will be
409        // processed in addDefinedDataSymbol().
410        return;
411    }
412
413    // GlobalValue, even with InternalLinkage type, may have operands with
414    // ExternalLinkage type. Do not ignore these operands.
415    if (Constant* c = dyn_cast<Constant>(value)) {
416        // Handle ConstantExpr, ConstantStruct, ConstantArry etc..
417        for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
418            findExternalRefs(c->getOperand(i), mangler);
419    }
420}
421
422void LTOModule::lazyParseSymbols()
423{
424    if ( !_symbolsParsed ) {
425        _symbolsParsed = true;
426
427        // Use mangler to add GlobalPrefix to names to match linker names.
428        Mangler mangler(*_module, _target->getTargetAsmInfo()->getGlobalPrefix());
429        // add chars used in ObjC method names so method names aren't mangled
430        mangler.markCharAcceptable('[');
431        mangler.markCharAcceptable(']');
432        mangler.markCharAcceptable('(');
433        mangler.markCharAcceptable(')');
434        mangler.markCharAcceptable('-');
435        mangler.markCharAcceptable('+');
436        mangler.markCharAcceptable(' ');
437
438        // add functions
439        for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
440            if ( f->isDeclaration() )
441                addPotentialUndefinedSymbol(f, mangler);
442            else
443                addDefinedFunctionSymbol(f, mangler);
444        }
445
446        // add data
447        for (Module::global_iterator v = _module->global_begin(),
448                                    e = _module->global_end(); v !=  e; ++v) {
449            if ( v->isDeclaration() )
450                addPotentialUndefinedSymbol(v, mangler);
451            else
452                addDefinedDataSymbol(v, mangler);
453        }
454
455        // add asm globals
456        const std::string &inlineAsm = _module->getModuleInlineAsm();
457        const std::string glbl = ".globl";
458        std::string asmSymbolName;
459        std::string::size_type pos = inlineAsm.find(glbl, 0);
460        while (pos != std::string::npos) {
461          // eat .globl
462          pos = pos + 6;
463
464          // skip white space between .globl and symbol name
465          std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos);
466          if (pbegin == std::string::npos)
467            break;
468
469          // find end-of-line
470          std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin);
471          if (pend == std::string::npos)
472            break;
473
474          asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin);
475          addAsmGlobalSymbol(asmSymbolName.c_str());
476
477          // search next .globl
478          pos = inlineAsm.find(glbl, pend);
479        }
480
481        // make symbols for all undefines
482        for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
483                                                it != _undefines.end(); ++it) {
484            // if this symbol also has a definition, then don't make an undefine
485            // because it is a tentative definition
486            if ( _defines.count(it->getKeyData(), it->getKeyData()+
487                                                  it->getKeyLength()) == 0 ) {
488              NameAndAttributes info = it->getValue();
489              _symbols.push_back(info);
490            }
491        }
492    }
493}
494
495
496uint32_t LTOModule::getSymbolCount()
497{
498    lazyParseSymbols();
499    return _symbols.size();
500}
501
502
503lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index)
504{
505    lazyParseSymbols();
506    if ( index < _symbols.size() )
507        return _symbols[index].attributes;
508    else
509        return lto_symbol_attributes(0);
510}
511
512const char* LTOModule::getSymbolName(uint32_t index)
513{
514    lazyParseSymbols();
515    if ( index < _symbols.size() )
516        return _symbols[index].name;
517    else
518        return NULL;
519}
520
521