ArchiveReader.cpp revision 94bc246a8b2fd0ef371c8f3846ac9a5a367ed9ab
1//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Builds up standard unix archive files (.a) containing LLVM bitcode. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ArchiveInternals.h" 15#include "llvm/ADT/SmallPtrSet.h" 16#include "llvm/Bitcode/ReaderWriter.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/Module.h" 19#include <cstdio> 20#include <cstdlib> 21#include <memory> 22using namespace llvm; 23 24/// Read a variable-bit-rate encoded unsigned integer 25static inline unsigned readInteger(const char*&At, const char*End) { 26 unsigned Shift = 0; 27 unsigned Result = 0; 28 29 do { 30 if (At == End) 31 return Result; 32 Result |= (unsigned)((*At++) & 0x7F) << Shift; 33 Shift += 7; 34 } while (At[-1] & 0x80); 35 return Result; 36} 37 38// Completely parse the Archive's symbol table and populate symTab member var. 39bool 40Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 41 const char* At = (const char*) data; 42 const char* End = At + size; 43 while (At < End) { 44 unsigned offset = readInteger(At, End); 45 if (At == End) { 46 if (error) 47 *error = "Ran out of data reading vbr_uint for symtab offset!"; 48 return false; 49 } 50 unsigned length = readInteger(At, End); 51 if (At == End) { 52 if (error) 53 *error = "Ran out of data reading vbr_uint for symtab length!"; 54 return false; 55 } 56 if (At + length > End) { 57 if (error) 58 *error = "Malformed symbol table: length not consistent with size"; 59 return false; 60 } 61 // we don't care if it can't be inserted (duplicate entry) 62 symTab.insert(std::make_pair(std::string(At, length), offset)); 63 At += length; 64 } 65 symTabSize = size; 66 return true; 67} 68 69// This member parses an ArchiveMemberHeader that is presumed to be pointed to 70// by At. The At pointer is updated to the byte just after the header, which 71// can be variable in size. 72ArchiveMember* 73Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 74{ 75 if (At + sizeof(ArchiveMemberHeader) >= End) { 76 if (error) 77 *error = "Unexpected end of file"; 78 return 0; 79 } 80 81 // Cast archive member header 82 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 83 At += sizeof(ArchiveMemberHeader); 84 85 int flags = 0; 86 int MemberSize = atoi(Hdr->size); 87 assert(MemberSize >= 0); 88 89 // Check the size of the member for sanity 90 if (At + MemberSize > End) { 91 if (error) 92 *error = "invalid member length in archive file"; 93 return 0; 94 } 95 96 // Check the member signature 97 if (!Hdr->checkSignature()) { 98 if (error) 99 *error = "invalid file member signature"; 100 return 0; 101 } 102 103 // Convert and check the member name 104 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 105 // table. The special name "//" and 14 blanks is for a string table, used 106 // for long file names. This library doesn't generate either of those but 107 // it will accept them. If the name starts with #1/ and the remainder is 108 // digits, then those digits specify the length of the name that is 109 // stored immediately following the header. The special name 110 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. 111 // Anything else is a regular, short filename that is terminated with 112 // a '/' and blanks. 113 114 std::string pathname; 115 switch (Hdr->name[0]) { 116 case '#': 117 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 118 if (isdigit(Hdr->name[3])) { 119 unsigned len = atoi(&Hdr->name[3]); 120 const char *nulp = (const char *)memchr(At, '\0', len); 121 pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len); 122 At += len; 123 MemberSize -= len; 124 flags |= ArchiveMember::HasLongFilenameFlag; 125 } else { 126 if (error) 127 *error = "invalid long filename"; 128 return 0; 129 } 130 } else if (Hdr->name[1] == '_' && 131 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 132 // The member is using a long file name (>15 chars) format. 133 // This format is standard for 4.4BSD and Mac OSX operating 134 // systems. LLVM uses it similarly. In this format, the 135 // remainder of the name field (after #1/) specifies the 136 // length of the file name which occupy the first bytes of 137 // the member's data. The pathname already has the #1/ stripped. 138 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 139 flags |= ArchiveMember::LLVMSymbolTableFlag; 140 } 141 break; 142 case '/': 143 if (Hdr->name[1]== '/') { 144 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 145 pathname.assign(ARFILE_STRTAB_NAME); 146 flags |= ArchiveMember::StringTableFlag; 147 } else { 148 if (error) 149 *error = "invalid string table name"; 150 return 0; 151 } 152 } else if (Hdr->name[1] == ' ') { 153 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 154 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 155 flags |= ArchiveMember::SVR4SymbolTableFlag; 156 } else { 157 if (error) 158 *error = "invalid SVR4 symbol table name"; 159 return 0; 160 } 161 } else if (isdigit(Hdr->name[1])) { 162 unsigned index = atoi(&Hdr->name[1]); 163 if (index < strtab.length()) { 164 const char* namep = strtab.c_str() + index; 165 const char* endp = strtab.c_str() + strtab.length(); 166 const char* p = namep; 167 const char* last_p = p; 168 while (p < endp) { 169 if (*p == '\n' && *last_p == '/') { 170 pathname.assign(namep, last_p - namep); 171 flags |= ArchiveMember::HasLongFilenameFlag; 172 break; 173 } 174 last_p = p; 175 p++; 176 } 177 if (p >= endp) { 178 if (error) 179 *error = "missing name termiantor in string table"; 180 return 0; 181 } 182 } else { 183 if (error) 184 *error = "name index beyond string table"; 185 return 0; 186 } 187 } 188 break; 189 case '_': 190 if (Hdr->name[1] == '_' && 191 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 192 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 193 flags |= ArchiveMember::BSD4SymbolTableFlag; 194 break; 195 } 196 /* FALL THROUGH */ 197 198 default: 199 char* slash = (char*) memchr(Hdr->name, '/', 16); 200 if (slash == 0) 201 slash = Hdr->name + 16; 202 pathname.assign(Hdr->name, slash - Hdr->name); 203 break; 204 } 205 206 // Determine if this is a bitcode file 207 switch (sys::IdentifyFileType(At, 4)) { 208 case sys::Bitcode_FileType: 209 flags |= ArchiveMember::BitcodeFlag; 210 break; 211 default: 212 flags &= ~ArchiveMember::BitcodeFlag; 213 break; 214 } 215 216 // Instantiate the ArchiveMember to be filled 217 ArchiveMember* member = new ArchiveMember(this); 218 219 // Fill in fields of the ArchiveMember 220 member->parent = this; 221 member->path.set(pathname); 222 member->info.fileSize = MemberSize; 223 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 224 unsigned int mode; 225 sscanf(Hdr->mode, "%o", &mode); 226 member->info.mode = mode; 227 member->info.user = atoi(Hdr->uid); 228 member->info.group = atoi(Hdr->gid); 229 member->flags = flags; 230 member->data = At; 231 232 return member; 233} 234 235bool 236Archive::checkSignature(std::string* error) { 237 // Check the magic string at file's header 238 if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 239 if (error) 240 *error = "invalid signature for an archive file"; 241 return false; 242 } 243 return true; 244} 245 246// This function loads the entire archive and fully populates its ilist with 247// the members of the archive file. This is typically used in preparation for 248// editing the contents of the archive. 249bool 250Archive::loadArchive(std::string* error) { 251 252 // Set up parsing 253 members.clear(); 254 symTab.clear(); 255 const char *At = base; 256 const char *End = mapfile->getBufferEnd(); 257 258 if (!checkSignature(error)) 259 return false; 260 261 At += 8; // Skip the magic string. 262 263 bool seenSymbolTable = false; 264 bool foundFirstFile = false; 265 while (At < End) { 266 // parse the member header 267 const char* Save = At; 268 ArchiveMember* mbr = parseMemberHeader(At, End, error); 269 if (!mbr) 270 return false; 271 272 // check if this is the foreign symbol table 273 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 274 // We just save this but don't do anything special 275 // with it. It doesn't count as the "first file". 276 if (foreignST) { 277 // What? Multiple foreign symbol tables? Just chuck it 278 // and retain the last one found. 279 delete foreignST; 280 } 281 foreignST = mbr; 282 At += mbr->getSize(); 283 if ((intptr_t(At) & 1) == 1) 284 At++; 285 } else if (mbr->isStringTable()) { 286 // Simply suck the entire string table into a string 287 // variable. This will be used to get the names of the 288 // members that use the "/ddd" format for their names 289 // (SVR4 style long names). 290 strtab.assign(At, mbr->getSize()); 291 At += mbr->getSize(); 292 if ((intptr_t(At) & 1) == 1) 293 At++; 294 delete mbr; 295 } else if (mbr->isLLVMSymbolTable()) { 296 // This is the LLVM symbol table for the archive. If we've seen it 297 // already, its an error. Otherwise, parse the symbol table and move on. 298 if (seenSymbolTable) { 299 if (error) 300 *error = "invalid archive: multiple symbol tables"; 301 return false; 302 } 303 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 304 return false; 305 seenSymbolTable = true; 306 At += mbr->getSize(); 307 if ((intptr_t(At) & 1) == 1) 308 At++; 309 delete mbr; // We don't need this member in the list of members. 310 } else { 311 // This is just a regular file. If its the first one, save its offset. 312 // Otherwise just push it on the list and move on to the next file. 313 if (!foundFirstFile) { 314 firstFileOffset = Save - base; 315 foundFirstFile = true; 316 } 317 members.push_back(mbr); 318 At += mbr->getSize(); 319 if ((intptr_t(At) & 1) == 1) 320 At++; 321 } 322 } 323 return true; 324} 325 326// Open and completely load the archive file. 327Archive* 328Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 329 std::string* ErrorMessage) { 330 std::auto_ptr<Archive> result ( new Archive(file, C)); 331 if (result->mapToMemory(ErrorMessage)) 332 return 0; 333 if (!result->loadArchive(ErrorMessage)) 334 return 0; 335 return result.release(); 336} 337 338// Get all the bitcode modules from the archive 339bool 340Archive::getAllModules(std::vector<Module*>& Modules, 341 std::string* ErrMessage) { 342 343 for (iterator I=begin(), E=end(); I != E; ++I) { 344 if (I->isBitcode()) { 345 std::string FullMemberName = archPath.str() + 346 "(" + I->getPath().str() + ")"; 347 MemoryBuffer *Buffer = 348 MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), 349 FullMemberName.c_str()); 350 351 Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage); 352 delete Buffer; 353 if (!M) 354 return true; 355 356 Modules.push_back(M); 357 } 358 } 359 return false; 360} 361 362// Load just the symbol table from the archive file 363bool 364Archive::loadSymbolTable(std::string* ErrorMsg) { 365 366 // Set up parsing 367 members.clear(); 368 symTab.clear(); 369 const char *At = base; 370 const char *End = mapfile->getBufferEnd(); 371 372 // Make sure we're dealing with an archive 373 if (!checkSignature(ErrorMsg)) 374 return false; 375 376 At += 8; // Skip signature 377 378 // Parse the first file member header 379 const char* FirstFile = At; 380 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 381 if (!mbr) 382 return false; 383 384 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 385 // Skip the foreign symbol table, we don't do anything with it 386 At += mbr->getSize(); 387 if ((intptr_t(At) & 1) == 1) 388 At++; 389 delete mbr; 390 391 // Read the next one 392 FirstFile = At; 393 mbr = parseMemberHeader(At, End, ErrorMsg); 394 if (!mbr) { 395 delete mbr; 396 return false; 397 } 398 } 399 400 if (mbr->isStringTable()) { 401 // Process the string table entry 402 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 403 At += mbr->getSize(); 404 if ((intptr_t(At) & 1) == 1) 405 At++; 406 delete mbr; 407 // Get the next one 408 FirstFile = At; 409 mbr = parseMemberHeader(At, End, ErrorMsg); 410 if (!mbr) { 411 delete mbr; 412 return false; 413 } 414 } 415 416 // See if its the symbol table 417 if (mbr->isLLVMSymbolTable()) { 418 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 419 delete mbr; 420 return false; 421 } 422 423 At += mbr->getSize(); 424 if ((intptr_t(At) & 1) == 1) 425 At++; 426 delete mbr; 427 // Can't be any more symtab headers so just advance 428 FirstFile = At; 429 } else { 430 // There's no symbol table in the file. We have to rebuild it from scratch 431 // because the intent of this method is to get the symbol table loaded so 432 // it can be searched efficiently. 433 // Add the member to the members list 434 members.push_back(mbr); 435 } 436 437 firstFileOffset = FirstFile - base; 438 return true; 439} 440 441// Open the archive and load just the symbol tables 442Archive* Archive::OpenAndLoadSymbols(const sys::Path& file, 443 LLVMContext& C, 444 std::string* ErrorMessage) { 445 std::auto_ptr<Archive> result ( new Archive(file, C) ); 446 if (result->mapToMemory(ErrorMessage)) 447 return 0; 448 if (!result->loadSymbolTable(ErrorMessage)) 449 return 0; 450 return result.release(); 451} 452 453// Look up one symbol in the symbol table and return the module that defines 454// that symbol. 455Module* 456Archive::findModuleDefiningSymbol(const std::string& symbol, 457 std::string* ErrMsg) { 458 SymTabType::iterator SI = symTab.find(symbol); 459 if (SI == symTab.end()) 460 return 0; 461 462 // The symbol table was previously constructed assuming that the members were 463 // written without the symbol table header. Because VBR encoding is used, the 464 // values could not be adjusted to account for the offset of the symbol table 465 // because that could affect the size of the symbol table due to VBR encoding. 466 // We now have to account for this by adjusting the offset by the size of the 467 // symbol table and its header. 468 unsigned fileOffset = 469 SI->second + // offset in symbol-table-less file 470 firstFileOffset; // add offset to first "real" file in archive 471 472 // See if the module is already loaded 473 ModuleMap::iterator MI = modules.find(fileOffset); 474 if (MI != modules.end()) 475 return MI->second.first; 476 477 // Module hasn't been loaded yet, we need to load it 478 const char* modptr = base + fileOffset; 479 ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), 480 ErrMsg); 481 if (!mbr) 482 return 0; 483 484 // Now, load the bitcode module to get the Module. 485 std::string FullMemberName = archPath.str() + "(" + 486 mbr->getPath().str() + ")"; 487 MemoryBuffer *Buffer = 488 MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()), 489 FullMemberName.c_str()); 490 491 Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg); 492 if (!m) 493 return 0; 494 495 modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr))); 496 497 return m; 498} 499 500// Look up multiple symbols in the symbol table and return a set of 501// Modules that define those symbols. 502bool 503Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 504 SmallVectorImpl<Module*>& result, 505 std::string* error) { 506 if (!mapfile || !base) { 507 if (error) 508 *error = "Empty archive invalid for finding modules defining symbols"; 509 return false; 510 } 511 512 if (symTab.empty()) { 513 // We don't have a symbol table, so we must build it now but lets also 514 // make sure that we populate the modules table as we do this to ensure 515 // that we don't load them twice when findModuleDefiningSymbol is called 516 // below. 517 518 // Get a pointer to the first file 519 const char* At = base + firstFileOffset; 520 const char* End = mapfile->getBufferEnd(); 521 522 while ( At < End) { 523 // Compute the offset to be put in the symbol table 524 unsigned offset = At - base - firstFileOffset; 525 526 // Parse the file's header 527 ArchiveMember* mbr = parseMemberHeader(At, End, error); 528 if (!mbr) 529 return false; 530 531 // If it contains symbols 532 if (mbr->isBitcode()) { 533 // Get the symbols 534 std::vector<std::string> symbols; 535 std::string FullMemberName = archPath.str() + "(" + 536 mbr->getPath().str() + ")"; 537 Module* M = 538 GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context, 539 symbols, error); 540 541 if (M) { 542 // Insert the module's symbols into the symbol table 543 for (std::vector<std::string>::iterator I = symbols.begin(), 544 E=symbols.end(); I != E; ++I ) { 545 symTab.insert(std::make_pair(*I, offset)); 546 } 547 // Insert the Module and the ArchiveMember into the table of 548 // modules. 549 modules.insert(std::make_pair(offset, std::make_pair(M, mbr))); 550 } else { 551 if (error) 552 *error = "Can't parse bitcode member: " + 553 mbr->getPath().str() + ": " + *error; 554 delete mbr; 555 return false; 556 } 557 } 558 559 // Go to the next file location 560 At += mbr->getSize(); 561 if ((intptr_t(At) & 1) == 1) 562 At++; 563 } 564 } 565 566 // At this point we have a valid symbol table (one way or another) so we 567 // just use it to quickly find the symbols requested. 568 569 SmallPtrSet<Module*, 16> Added; 570 for (std::set<std::string>::iterator I=symbols.begin(), 571 Next = I, 572 E=symbols.end(); I != E; I = Next) { 573 // Increment Next before we invalidate it. 574 ++Next; 575 576 // See if this symbol exists 577 Module* m = findModuleDefiningSymbol(*I,error); 578 if (!m) 579 continue; 580 bool NewMember = Added.insert(m); 581 if (!NewMember) 582 continue; 583 584 // The symbol exists, insert the Module into our result. 585 result.push_back(m); 586 587 // Remove the symbol now that its been resolved. 588 symbols.erase(I); 589 } 590 return true; 591} 592 593bool Archive::isBitcodeArchive() { 594 // Make sure the symTab has been loaded. In most cases this should have been 595 // done when the archive was constructed, but still, this is just in case. 596 if (symTab.empty()) 597 if (!loadSymbolTable(0)) 598 return false; 599 600 // Now that we know it's been loaded, return true 601 // if it has a size 602 if (symTab.size()) return true; 603 604 // We still can't be sure it isn't a bitcode archive 605 if (!loadArchive(0)) 606 return false; 607 608 std::vector<Module *> Modules; 609 std::string ErrorMessage; 610 611 // Scan the archive, trying to load a bitcode member. We only load one to 612 // see if this works. 613 for (iterator I = begin(), E = end(); I != E; ++I) { 614 if (!I->isBitcode()) 615 continue; 616 617 std::string FullMemberName = 618 archPath.str() + "(" + I->getPath().str() + ")"; 619 620 MemoryBuffer *Buffer = 621 MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), 622 FullMemberName.c_str()); 623 Module *M = ParseBitcodeFile(Buffer, Context); 624 delete Buffer; 625 if (!M) 626 return false; // Couldn't parse bitcode, not a bitcode archive. 627 delete M; 628 return true; 629 } 630 631 return false; 632} 633