LTOModule.cpp revision 3eb445feb22647e867a339f4c59b0a716b03a21a
1//===-LTOModule.cpp - LLVM Link Time Optimizer ----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the Link Time Optimization library. This library is 11// intended to be used by linker to optimize code at link time. 12// 13//===----------------------------------------------------------------------===// 14 15#include "LTOModule.h" 16 17#include "llvm/Constants.h" 18#include "llvm/Module.h" 19#include "llvm/ModuleProvider.h" 20#include "llvm/ADT/OwningPtr.h" 21#include "llvm/Bitcode/ReaderWriter.h" 22#include "llvm/Support/SystemUtils.h" 23#include "llvm/Support/Mangler.h" 24#include "llvm/Support/MemoryBuffer.h" 25#include "llvm/Support/MathExtras.h" 26#include "llvm/System/Path.h" 27#include "llvm/System/Process.h" 28#include "llvm/Target/SubtargetFeature.h" 29#include "llvm/Target/TargetMachine.h" 30#include "llvm/Target/TargetMachineRegistry.h" 31#include "llvm/Target/TargetAsmInfo.h" 32 33#include <fstream> 34 35using namespace llvm; 36 37bool LTOModule::isBitcodeFile(const void* mem, size_t length) 38{ 39 return ( llvm::sys::IdentifyFileType((char*)mem, length) 40 == llvm::sys::Bitcode_FileType ); 41} 42 43bool LTOModule::isBitcodeFile(const char* path) 44{ 45 return llvm::sys::Path(path).isBitcodeFile(); 46} 47 48bool LTOModule::isBitcodeFileForTarget(const void* mem, size_t length, 49 const char* triplePrefix) 50{ 51 MemoryBuffer* buffer = makeBuffer(mem, length); 52 if ( buffer == NULL ) 53 return false; 54 return isTargetMatch(buffer, triplePrefix); 55} 56 57 58bool LTOModule::isBitcodeFileForTarget(const char* path, 59 const char* triplePrefix) 60{ 61 MemoryBuffer *buffer = MemoryBuffer::getFile(path); 62 if (buffer == NULL) 63 return false; 64 return isTargetMatch(buffer, triplePrefix); 65} 66 67// takes ownership of buffer 68bool LTOModule::isTargetMatch(MemoryBuffer* buffer, const char* triplePrefix) 69{ 70 OwningPtr<ModuleProvider> mp(getBitcodeModuleProvider(buffer)); 71 // on success, mp owns buffer and both are deleted at end of this method 72 if ( !mp ) { 73 delete buffer; 74 return false; 75 } 76 std::string actualTarget = mp->getModule()->getTargetTriple(); 77 return ( strncmp(actualTarget.c_str(), triplePrefix, 78 strlen(triplePrefix)) == 0); 79} 80 81 82LTOModule::LTOModule(Module* m, TargetMachine* t) 83 : _module(m), _target(t), _symbolsParsed(false) 84{ 85} 86 87LTOModule* LTOModule::makeLTOModule(const char* path, std::string& errMsg) 88{ 89 OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg)); 90 if ( !buffer ) 91 return NULL; 92 return makeLTOModule(buffer.get(), errMsg); 93} 94 95/// makeBuffer - create a MemoryBuffer from a memory range. 96/// MemoryBuffer requires the byte past end of the buffer to be a zero. 97/// We might get lucky and already be that way, otherwise make a copy. 98/// Also if next byte is on a different page, don't assume it is readable. 99MemoryBuffer* LTOModule::makeBuffer(const void* mem, size_t length) 100{ 101 const char* startPtr = (char*)mem; 102 const char* endPtr = startPtr+length; 103 if ( (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0) 104 || (*endPtr != 0) ) 105 return MemoryBuffer::getMemBufferCopy(startPtr, endPtr); 106 else 107 return MemoryBuffer::getMemBuffer(startPtr, endPtr); 108} 109 110 111LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length, 112 std::string& errMsg) 113{ 114 OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length)); 115 if ( !buffer ) 116 return NULL; 117 return makeLTOModule(buffer.get(), errMsg); 118} 119 120/// getFeatureString - Return a string listing the features associated with the 121/// target triple. 122/// 123/// FIXME: This is an inelegant way of specifying the features of a 124/// subtarget. It would be better if we could encode this information into the 125/// IR. See <rdar://5972456>. 126std::string getFeatureString(const char *TargetTriple) { 127 SubtargetFeatures Features; 128 129 if (strncmp(TargetTriple, "powerpc-apple-", 14) == 0) { 130 Features.AddFeature("altivec", true); 131 } else if (strncmp(TargetTriple, "powerpc64-apple-", 16) == 0) { 132 Features.AddFeature("64bit", true); 133 Features.AddFeature("altivec", true); 134 } 135 136 return Features.getString(); 137} 138 139LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer, std::string& errMsg) 140{ 141 // parse bitcode buffer 142 OwningPtr<Module> m(ParseBitcodeFile(buffer, &errMsg)); 143 if ( !m ) 144 return NULL; 145 // find machine architecture for this module 146 const TargetMachineRegistry::entry* march = 147 TargetMachineRegistry::getClosestStaticTargetForModule(*m, errMsg); 148 149 if ( march == NULL ) 150 return NULL; 151 152 // construct LTModule, hand over ownership of module and target 153 std::string FeatureStr = getFeatureString(m->getTargetTriple().c_str()); 154 TargetMachine* target = march->CtorFn(*m, FeatureStr); 155 return new LTOModule(m.take(), target); 156} 157 158 159const char* LTOModule::getTargetTriple() 160{ 161 return _module->getTargetTriple().c_str(); 162} 163 164void LTOModule::addDefinedFunctionSymbol(Function* f, Mangler &mangler) 165{ 166 // add to list of defined symbols 167 addDefinedSymbol(f, mangler, true); 168 169 // add external symbols referenced by this function. 170 for (Function::iterator b = f->begin(); b != f->end(); ++b) { 171 for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { 172 for (unsigned count = 0, total = i->getNumOperands(); 173 count != total; ++count) { 174 findExternalRefs(i->getOperand(count), mangler); 175 } 176 } 177 } 178} 179 180// get string that data pointer points to 181bool LTOModule::objcClassNameFromExpression(Constant* c, std::string& name) 182{ 183 if (ConstantExpr* ce = dyn_cast<ConstantExpr>(c)) { 184 Constant* op = ce->getOperand(0); 185 if (GlobalVariable* gvn = dyn_cast<GlobalVariable>(op)) { 186 Constant* cn = gvn->getInitializer(); 187 if (ConstantArray* ca = dyn_cast<ConstantArray>(cn)) { 188 if ( ca->isCString() ) { 189 name = ".objc_class_name_" + ca->getAsString(); 190 return true; 191 } 192 } 193 } 194 } 195 return false; 196} 197 198// parse i386/ppc ObjC class data structure 199void LTOModule::addObjCClass(GlobalVariable* clgv) 200{ 201 if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) { 202 // second slot in __OBJC,__class is pointer to superclass name 203 std::string superclassName; 204 if ( objcClassNameFromExpression(c->getOperand(1), superclassName) ) { 205 NameAndAttributes info; 206 if ( _undefines.find(superclassName.c_str()) == _undefines.end() ) { 207 const char* symbolName = ::strdup(superclassName.c_str()); 208 info.name = ::strdup(symbolName); 209 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 210 // string is owned by _undefines 211 _undefines[info.name] = info; 212 } 213 } 214 // third slot in __OBJC,__class is pointer to class name 215 std::string className; 216 if ( objcClassNameFromExpression(c->getOperand(2), className) ) { 217 const char* symbolName = ::strdup(className.c_str()); 218 NameAndAttributes info; 219 info.name = symbolName; 220 info.attributes = (lto_symbol_attributes) 221 (LTO_SYMBOL_PERMISSIONS_DATA | 222 LTO_SYMBOL_DEFINITION_REGULAR | 223 LTO_SYMBOL_SCOPE_DEFAULT); 224 _symbols.push_back(info); 225 _defines[info.name] = 1; 226 } 227 } 228} 229 230 231// parse i386/ppc ObjC category data structure 232void LTOModule::addObjCCategory(GlobalVariable* clgv) 233{ 234 if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) { 235 // second slot in __OBJC,__category is pointer to target class name 236 std::string targetclassName; 237 if ( objcClassNameFromExpression(c->getOperand(1), targetclassName) ) { 238 NameAndAttributes info; 239 if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ){ 240 const char* symbolName = ::strdup(targetclassName.c_str()); 241 info.name = ::strdup(symbolName); 242 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 243 // string is owned by _undefines 244 _undefines[info.name] = info; 245 } 246 } 247 } 248} 249 250 251// parse i386/ppc ObjC class list data structure 252void LTOModule::addObjCClassRef(GlobalVariable* clgv) 253{ 254 std::string targetclassName; 255 if ( objcClassNameFromExpression(clgv->getInitializer(), targetclassName) ){ 256 NameAndAttributes info; 257 if ( _undefines.find(targetclassName.c_str()) == _undefines.end() ) { 258 const char* symbolName = ::strdup(targetclassName.c_str()); 259 info.name = ::strdup(symbolName); 260 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 261 // string is owned by _undefines 262 _undefines[info.name] = info; 263 } 264 } 265} 266 267 268void LTOModule::addDefinedDataSymbol(GlobalValue* v, Mangler& mangler) 269{ 270 // add to list of defined symbols 271 addDefinedSymbol(v, mangler, false); 272 273 // special case i386/ppc ObjC data structures in magic sections 274 if ( v->hasSection() ) { 275 // special case if this data blob is an ObjC class definition 276 if ( v->getSection().compare(0, 15, "__OBJC,__class,") == 0 ) { 277 if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) { 278 addObjCClass(gv); 279 } 280 } 281 282 // special case if this data blob is an ObjC category definition 283 else if ( v->getSection().compare(0, 18, "__OBJC,__category,") == 0 ) { 284 if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) { 285 addObjCCategory(gv); 286 } 287 } 288 289 // special case if this data blob is the list of referenced classes 290 else if ( v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0 ) { 291 if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) { 292 addObjCClassRef(gv); 293 } 294 } 295 } 296 297 // add external symbols referenced by this data. 298 for (unsigned count = 0, total = v->getNumOperands(); 299 count != total; ++count) { 300 findExternalRefs(v->getOperand(count), mangler); 301 } 302} 303 304 305void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler, 306 bool isFunction) 307{ 308 // ignore all llvm.* symbols 309 if ( strncmp(def->getNameStart(), "llvm.", 5) == 0 ) 310 return; 311 312 // string is owned by _defines 313 const char* symbolName = ::strdup(mangler.getValueName(def).c_str()); 314 315 // set alignment part log2() can have rounding errors 316 uint32_t align = def->getAlignment(); 317 uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0; 318 319 // set permissions part 320 if ( isFunction ) 321 attr |= LTO_SYMBOL_PERMISSIONS_CODE; 322 else { 323 GlobalVariable* gv = dyn_cast<GlobalVariable>(def); 324 if ( (gv != NULL) && gv->isConstant() ) 325 attr |= LTO_SYMBOL_PERMISSIONS_RODATA; 326 else 327 attr |= LTO_SYMBOL_PERMISSIONS_DATA; 328 } 329 330 // set definition part 331 if ( def->hasWeakLinkage() || def->hasLinkOnceLinkage() ) { 332 attr |= LTO_SYMBOL_DEFINITION_WEAK; 333 } 334 else if ( def->hasCommonLinkage()) { 335 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE; 336 } 337 else { 338 attr |= LTO_SYMBOL_DEFINITION_REGULAR; 339 } 340 341 // set scope part 342 if ( def->hasHiddenVisibility() ) 343 attr |= LTO_SYMBOL_SCOPE_HIDDEN; 344 else if ( def->hasProtectedVisibility() ) 345 attr |= LTO_SYMBOL_SCOPE_PROTECTED; 346 else if ( def->hasExternalLinkage() || def->hasWeakLinkage() 347 || def->hasLinkOnceLinkage() || def->hasCommonLinkage() ) 348 attr |= LTO_SYMBOL_SCOPE_DEFAULT; 349 else 350 attr |= LTO_SYMBOL_SCOPE_INTERNAL; 351 352 // add to table of symbols 353 NameAndAttributes info; 354 info.name = symbolName; 355 info.attributes = (lto_symbol_attributes)attr; 356 _symbols.push_back(info); 357 _defines[info.name] = 1; 358} 359 360void LTOModule::addAsmGlobalSymbol(const char *name) { 361 // only add new define if not already defined 362 if ( _defines.count(name, &name[strlen(name)+1]) == 0 ) 363 return; 364 365 // string is owned by _defines 366 const char *symbolName = ::strdup(name); 367 uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR; 368 attr |= LTO_SYMBOL_SCOPE_DEFAULT; 369 NameAndAttributes info; 370 info.name = symbolName; 371 info.attributes = (lto_symbol_attributes)attr; 372 _symbols.push_back(info); 373 _defines[info.name] = 1; 374} 375 376void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler) 377{ 378 // ignore all llvm.* symbols 379 if ( strncmp(decl->getNameStart(), "llvm.", 5) == 0 ) 380 return; 381 382 const char* name = mangler.getValueName(decl).c_str(); 383 384 // we already have the symbol 385 if (_undefines.find(name) != _undefines.end()) 386 return; 387 388 NameAndAttributes info; 389 // string is owned by _undefines 390 info.name = ::strdup(name); 391 if (decl->hasExternalWeakLinkage()) 392 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; 393 else 394 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 395 _undefines[name] = info; 396} 397 398 399 400// Find exeternal symbols referenced by VALUE. This is a recursive function. 401void LTOModule::findExternalRefs(Value* value, Mangler &mangler) { 402 403 if (GlobalValue* gv = dyn_cast<GlobalValue>(value)) { 404 if ( !gv->hasExternalLinkage() ) 405 addPotentialUndefinedSymbol(gv, mangler); 406 // If this is a variable definition, do not recursively process 407 // initializer. It might contain a reference to this variable 408 // and cause an infinite loop. The initializer will be 409 // processed in addDefinedDataSymbol(). 410 return; 411 } 412 413 // GlobalValue, even with InternalLinkage type, may have operands with 414 // ExternalLinkage type. Do not ignore these operands. 415 if (Constant* c = dyn_cast<Constant>(value)) { 416 // Handle ConstantExpr, ConstantStruct, ConstantArry etc.. 417 for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i) 418 findExternalRefs(c->getOperand(i), mangler); 419 } 420} 421 422void LTOModule::lazyParseSymbols() 423{ 424 if ( !_symbolsParsed ) { 425 _symbolsParsed = true; 426 427 // Use mangler to add GlobalPrefix to names to match linker names. 428 Mangler mangler(*_module, _target->getTargetAsmInfo()->getGlobalPrefix()); 429 // add chars used in ObjC method names so method names aren't mangled 430 mangler.markCharAcceptable('['); 431 mangler.markCharAcceptable(']'); 432 mangler.markCharAcceptable('('); 433 mangler.markCharAcceptable(')'); 434 mangler.markCharAcceptable('-'); 435 mangler.markCharAcceptable('+'); 436 mangler.markCharAcceptable(' '); 437 438 // add functions 439 for (Module::iterator f = _module->begin(); f != _module->end(); ++f) { 440 if ( f->isDeclaration() ) 441 addPotentialUndefinedSymbol(f, mangler); 442 else 443 addDefinedFunctionSymbol(f, mangler); 444 } 445 446 // add data 447 for (Module::global_iterator v = _module->global_begin(), 448 e = _module->global_end(); v != e; ++v) { 449 if ( v->isDeclaration() ) 450 addPotentialUndefinedSymbol(v, mangler); 451 else 452 addDefinedDataSymbol(v, mangler); 453 } 454 455 // add asm globals 456 const std::string &inlineAsm = _module->getModuleInlineAsm(); 457 const std::string glbl = ".globl"; 458 std::string asmSymbolName; 459 std::string::size_type pos = inlineAsm.find(glbl, 0); 460 while (pos != std::string::npos) { 461 // eat .globl 462 pos = pos + 6; 463 464 // skip white space between .globl and symbol name 465 std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos); 466 if (pbegin == std::string::npos) 467 break; 468 469 // find end-of-line 470 std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin); 471 if (pend == std::string::npos) 472 break; 473 474 asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin); 475 addAsmGlobalSymbol(asmSymbolName.c_str()); 476 477 // search next .globl 478 pos = inlineAsm.find(glbl, pend); 479 } 480 481 // make symbols for all undefines 482 for (StringMap<NameAndAttributes>::iterator it=_undefines.begin(); 483 it != _undefines.end(); ++it) { 484 // if this symbol also has a definition, then don't make an undefine 485 // because it is a tentative definition 486 if ( _defines.count(it->getKeyData(), it->getKeyData()+ 487 it->getKeyLength()) == 0 ) { 488 NameAndAttributes info = it->getValue(); 489 _symbols.push_back(info); 490 } 491 } 492 } 493} 494 495 496uint32_t LTOModule::getSymbolCount() 497{ 498 lazyParseSymbols(); 499 return _symbols.size(); 500} 501 502 503lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) 504{ 505 lazyParseSymbols(); 506 if ( index < _symbols.size() ) 507 return _symbols[index].attributes; 508 else 509 return lto_symbol_attributes(0); 510} 511 512const char* LTOModule::getSymbolName(uint32_t index) 513{ 514 lazyParseSymbols(); 515 if ( index < _symbols.size() ) 516 return _symbols[index].name; 517 else 518 return NULL; 519} 520 521