ArchiveReader.cpp revision 7f6b4479044e7f6553f517737caa18e4e543697c
1//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Builds up standard unix archive files (.a) containing LLVM bitcode. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ArchiveInternals.h" 15#include "llvm/Bitcode/ReaderWriter.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Module.h" 18#include <memory> 19using namespace llvm; 20 21/// Read a variable-bit-rate encoded unsigned integer 22inline unsigned readInteger(const char*&At, const char*End){ 23 unsigned Shift = 0; 24 unsigned Result = 0; 25 26 do { 27 if (At == End) 28 return Result; 29 Result |= (unsigned)((*At++) & 0x7F) << Shift; 30 Shift += 7; 31 } while (At[-1] & 0x80); 32 return Result; 33} 34 35// Completely parse the Archive's symbol table and populate symTab member var. 36bool 37Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 38 const char* At = (const char*) data; 39 const char* End = At + size; 40 while (At < End) { 41 unsigned offset = readInteger(At, End); 42 if (At == End) { 43 if (error) 44 *error = "Ran out of data reading vbr_uint for symtab offset!"; 45 return false; 46 } 47 unsigned length = readInteger(At, End); 48 if (At == End) { 49 if (error) 50 *error = "Ran out of data reading vbr_uint for symtab length!"; 51 return false; 52 } 53 if (At + length > End) { 54 if (error) 55 *error = "Malformed symbol table: length not consistent with size"; 56 return false; 57 } 58 // we don't care if it can't be inserted (duplicate entry) 59 symTab.insert(std::make_pair(std::string(At, length), offset)); 60 At += length; 61 } 62 symTabSize = size; 63 return true; 64} 65 66// This member parses an ArchiveMemberHeader that is presumed to be pointed to 67// by At. The At pointer is updated to the byte just after the header, which 68// can be variable in size. 69ArchiveMember* 70Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 71{ 72 if (At + sizeof(ArchiveMemberHeader) >= End) { 73 if (error) 74 *error = "Unexpected end of file"; 75 return 0; 76 } 77 78 // Cast archive member header 79 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 80 At += sizeof(ArchiveMemberHeader); 81 82 // Extract the size and determine if the file is 83 // compressed or not (negative length). 84 int flags = 0; 85 int MemberSize = atoi(Hdr->size); 86 if (MemberSize < 0) { 87 flags |= ArchiveMember::CompressedFlag; 88 MemberSize = -MemberSize; 89 } 90 91 // Check the size of the member for sanity 92 if (At + MemberSize > End) { 93 if (error) 94 *error = "invalid member length in archive file"; 95 return 0; 96 } 97 98 // Check the member signature 99 if (!Hdr->checkSignature()) { 100 if (error) 101 *error = "invalid file member signature"; 102 return 0; 103 } 104 105 // Convert and check the member name 106 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 107 // table. The special name "//" and 14 blanks is for a string table, used 108 // for long file names. This library doesn't generate either of those but 109 // it will accept them. If the name starts with #1/ and the remainder is 110 // digits, then those digits specify the length of the name that is 111 // stored immediately following the header. The special name 112 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. 113 // Anything else is a regular, short filename that is terminated with 114 // a '/' and blanks. 115 116 std::string pathname; 117 switch (Hdr->name[0]) { 118 case '#': 119 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 120 if (isdigit(Hdr->name[3])) { 121 unsigned len = atoi(&Hdr->name[3]); 122 pathname.assign(At, len); 123 At += len; 124 MemberSize -= len; 125 flags |= ArchiveMember::HasLongFilenameFlag; 126 } else { 127 if (error) 128 *error = "invalid long filename"; 129 return 0; 130 } 131 } else if (Hdr->name[1] == '_' && 132 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 133 // The member is using a long file name (>15 chars) format. 134 // This format is standard for 4.4BSD and Mac OSX operating 135 // systems. LLVM uses it similarly. In this format, the 136 // remainder of the name field (after #1/) specifies the 137 // length of the file name which occupy the first bytes of 138 // the member's data. The pathname already has the #1/ stripped. 139 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 140 flags |= ArchiveMember::LLVMSymbolTableFlag; 141 } 142 break; 143 case '/': 144 if (Hdr->name[1]== '/') { 145 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 146 pathname.assign(ARFILE_STRTAB_NAME); 147 flags |= ArchiveMember::StringTableFlag; 148 } else { 149 if (error) 150 *error = "invalid string table name"; 151 return 0; 152 } 153 } else if (Hdr->name[1] == ' ') { 154 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 155 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 156 flags |= ArchiveMember::SVR4SymbolTableFlag; 157 } else { 158 if (error) 159 *error = "invalid SVR4 symbol table name"; 160 return 0; 161 } 162 } else if (isdigit(Hdr->name[1])) { 163 unsigned index = atoi(&Hdr->name[1]); 164 if (index < strtab.length()) { 165 const char* namep = strtab.c_str() + index; 166 const char* endp = strtab.c_str() + strtab.length(); 167 const char* p = namep; 168 const char* last_p = p; 169 while (p < endp) { 170 if (*p == '\n' && *last_p == '/') { 171 pathname.assign(namep, last_p - namep); 172 flags |= ArchiveMember::HasLongFilenameFlag; 173 break; 174 } 175 last_p = p; 176 p++; 177 } 178 if (p >= endp) { 179 if (error) 180 *error = "missing name termiantor in string table"; 181 return 0; 182 } 183 } else { 184 if (error) 185 *error = "name index beyond string table"; 186 return 0; 187 } 188 } 189 break; 190 case '_': 191 if (Hdr->name[1] == '_' && 192 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 193 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 194 flags |= ArchiveMember::BSD4SymbolTableFlag; 195 break; 196 } 197 /* FALL THROUGH */ 198 199 default: 200 char* slash = (char*) memchr(Hdr->name, '/', 16); 201 if (slash == 0) 202 slash = Hdr->name + 16; 203 pathname.assign(Hdr->name, slash - Hdr->name); 204 break; 205 } 206 207 // Determine if this is a bitcode file 208 switch (sys::IdentifyFileType(At, 4)) { 209 case sys::Bitcode_FileType: 210 flags |= ArchiveMember::BitcodeFlag; 211 break; 212 default: 213 flags &= ~ArchiveMember::BitcodeFlag; 214 break; 215 } 216 217 // Instantiate the ArchiveMember to be filled 218 ArchiveMember* member = new ArchiveMember(this); 219 220 // Fill in fields of the ArchiveMember 221 member->next = 0; 222 member->prev = 0; 223 member->parent = this; 224 member->path.set(pathname); 225 member->info.fileSize = MemberSize; 226 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 227 unsigned int mode; 228 sscanf(Hdr->mode, "%o", &mode); 229 member->info.mode = mode; 230 member->info.user = atoi(Hdr->uid); 231 member->info.group = atoi(Hdr->gid); 232 member->flags = flags; 233 member->data = At; 234 235 return member; 236} 237 238bool 239Archive::checkSignature(std::string* error) { 240 // Check the magic string at file's header 241 if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 242 if (error) 243 *error = "invalid signature for an archive file"; 244 return false; 245 } 246 return true; 247} 248 249// This function loads the entire archive and fully populates its ilist with 250// the members of the archive file. This is typically used in preparation for 251// editing the contents of the archive. 252bool 253Archive::loadArchive(std::string* error) { 254 255 // Set up parsing 256 members.clear(); 257 symTab.clear(); 258 const char *At = base; 259 const char *End = mapfile->getBufferEnd(); 260 261 if (!checkSignature(error)) 262 return false; 263 264 At += 8; // Skip the magic string. 265 266 bool seenSymbolTable = false; 267 bool foundFirstFile = false; 268 while (At < End) { 269 // parse the member header 270 const char* Save = At; 271 ArchiveMember* mbr = parseMemberHeader(At, End, error); 272 if (!mbr) 273 return false; 274 275 // check if this is the foreign symbol table 276 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 277 // We just save this but don't do anything special 278 // with it. It doesn't count as the "first file". 279 if (foreignST) { 280 // What? Multiple foreign symbol tables? Just chuck it 281 // and retain the last one found. 282 delete foreignST; 283 } 284 foreignST = mbr; 285 At += mbr->getSize(); 286 if ((intptr_t(At) & 1) == 1) 287 At++; 288 } else if (mbr->isStringTable()) { 289 // Simply suck the entire string table into a string 290 // variable. This will be used to get the names of the 291 // members that use the "/ddd" format for their names 292 // (SVR4 style long names). 293 strtab.assign(At, mbr->getSize()); 294 At += mbr->getSize(); 295 if ((intptr_t(At) & 1) == 1) 296 At++; 297 delete mbr; 298 } else if (mbr->isLLVMSymbolTable()) { 299 // This is the LLVM symbol table for the archive. If we've seen it 300 // already, its an error. Otherwise, parse the symbol table and move on. 301 if (seenSymbolTable) { 302 if (error) 303 *error = "invalid archive: multiple symbol tables"; 304 return false; 305 } 306 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 307 return false; 308 seenSymbolTable = true; 309 At += mbr->getSize(); 310 if ((intptr_t(At) & 1) == 1) 311 At++; 312 delete mbr; // We don't need this member in the list of members. 313 } else { 314 // This is just a regular file. If its the first one, save its offset. 315 // Otherwise just push it on the list and move on to the next file. 316 if (!foundFirstFile) { 317 firstFileOffset = Save - base; 318 foundFirstFile = true; 319 } 320 members.push_back(mbr); 321 At += mbr->getSize(); 322 if ((intptr_t(At) & 1) == 1) 323 At++; 324 } 325 } 326 return true; 327} 328 329// Open and completely load the archive file. 330Archive* 331Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) 332{ 333 std::auto_ptr<Archive> result ( new Archive(file)); 334 if (result->mapToMemory(ErrorMessage)) 335 return 0; 336 if (!result->loadArchive(ErrorMessage)) 337 return 0; 338 return result.release(); 339} 340 341// Get all the bitcode modules from the archive 342bool 343Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { 344 345 for (iterator I=begin(), E=end(); I != E; ++I) { 346 if (I->isBitcode()) { 347 std::string FullMemberName = archPath.toString() + 348 "(" + I->getPath().toString() + ")"; 349 MemoryBuffer *Buffer = 350 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 351 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); 352 353 Module *M = ParseBitcodeFile(Buffer, ErrMessage); 354 delete Buffer; 355 if (!M) 356 return true; 357 358 Modules.push_back(M); 359 } 360 } 361 return false; 362} 363 364// Load just the symbol table from the archive file 365bool 366Archive::loadSymbolTable(std::string* ErrorMsg) { 367 368 // Set up parsing 369 members.clear(); 370 symTab.clear(); 371 const char *At = base; 372 const char *End = mapfile->getBufferEnd(); 373 374 // Make sure we're dealing with an archive 375 if (!checkSignature(ErrorMsg)) 376 return false; 377 378 At += 8; // Skip signature 379 380 // Parse the first file member header 381 const char* FirstFile = At; 382 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 383 if (!mbr) 384 return false; 385 386 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 387 // Skip the foreign symbol table, we don't do anything with it 388 At += mbr->getSize(); 389 if ((intptr_t(At) & 1) == 1) 390 At++; 391 delete mbr; 392 393 // Read the next one 394 FirstFile = At; 395 mbr = parseMemberHeader(At, End, ErrorMsg); 396 if (!mbr) { 397 delete mbr; 398 return false; 399 } 400 } 401 402 if (mbr->isStringTable()) { 403 // Process the string table entry 404 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 405 At += mbr->getSize(); 406 if ((intptr_t(At) & 1) == 1) 407 At++; 408 delete mbr; 409 // Get the next one 410 FirstFile = At; 411 mbr = parseMemberHeader(At, End, ErrorMsg); 412 if (!mbr) { 413 delete mbr; 414 return false; 415 } 416 } 417 418 // See if its the symbol table 419 if (mbr->isLLVMSymbolTable()) { 420 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 421 delete mbr; 422 return false; 423 } 424 425 At += mbr->getSize(); 426 if ((intptr_t(At) & 1) == 1) 427 At++; 428 delete mbr; 429 // Can't be any more symtab headers so just advance 430 FirstFile = At; 431 } else { 432 // There's no symbol table in the file. We have to rebuild it from scratch 433 // because the intent of this method is to get the symbol table loaded so 434 // it can be searched efficiently. 435 // Add the member to the members list 436 members.push_back(mbr); 437 } 438 439 firstFileOffset = FirstFile - base; 440 return true; 441} 442 443// Open the archive and load just the symbol tables 444Archive* 445Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { 446 std::auto_ptr<Archive> result ( new Archive(file) ); 447 if (result->mapToMemory(ErrorMessage)) 448 return 0; 449 if (!result->loadSymbolTable(ErrorMessage)) 450 return 0; 451 return result.release(); 452} 453 454// Look up one symbol in the symbol table and return a ModuleProvider for the 455// module that defines that symbol. 456ModuleProvider* 457Archive::findModuleDefiningSymbol(const std::string& symbol, 458 std::string* ErrMsg) { 459 SymTabType::iterator SI = symTab.find(symbol); 460 if (SI == symTab.end()) 461 return 0; 462 463 // The symbol table was previously constructed assuming that the members were 464 // written without the symbol table header. Because VBR encoding is used, the 465 // values could not be adjusted to account for the offset of the symbol table 466 // because that could affect the size of the symbol table due to VBR encoding. 467 // We now have to account for this by adjusting the offset by the size of the 468 // symbol table and its header. 469 unsigned fileOffset = 470 SI->second + // offset in symbol-table-less file 471 firstFileOffset; // add offset to first "real" file in archive 472 473 // See if the module is already loaded 474 ModuleMap::iterator MI = modules.find(fileOffset); 475 if (MI != modules.end()) 476 return MI->second.first; 477 478 // Module hasn't been loaded yet, we need to load it 479 const char* modptr = base + fileOffset; 480 ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), 481 ErrMsg); 482 if (!mbr) 483 return 0; 484 485 // Now, load the bitcode module to get the ModuleProvider 486 std::string FullMemberName = archPath.toString() + "(" + 487 mbr->getPath().toString() + ")"; 488 MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), 489 FullMemberName.c_str()); 490 memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); 491 492 ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg); 493 if (!mp) 494 return 0; 495 496 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 497 498 return mp; 499} 500 501// Look up multiple symbols in the symbol table and return a set of 502// ModuleProviders that define those symbols. 503bool 504Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 505 std::set<ModuleProvider*>& result, 506 std::string* error) { 507 if (!mapfile || !base) { 508 if (error) 509 *error = "Empty archive invalid for finding modules defining symbols"; 510 return false; 511 } 512 513 if (symTab.empty()) { 514 // We don't have a symbol table, so we must build it now but lets also 515 // make sure that we populate the modules table as we do this to ensure 516 // that we don't load them twice when findModuleDefiningSymbol is called 517 // below. 518 519 // Get a pointer to the first file 520 const char* At = base + firstFileOffset; 521 const char* End = mapfile->getBufferEnd(); 522 523 while ( At < End) { 524 // Compute the offset to be put in the symbol table 525 unsigned offset = At - base - firstFileOffset; 526 527 // Parse the file's header 528 ArchiveMember* mbr = parseMemberHeader(At, End, error); 529 if (!mbr) 530 return false; 531 532 // If it contains symbols 533 if (mbr->isBitcode()) { 534 // Get the symbols 535 std::vector<std::string> symbols; 536 std::string FullMemberName = archPath.toString() + "(" + 537 mbr->getPath().toString() + ")"; 538 ModuleProvider* MP = 539 GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), 540 FullMemberName, symbols, error); 541 542 if (MP) { 543 // Insert the module's symbols into the symbol table 544 for (std::vector<std::string>::iterator I = symbols.begin(), 545 E=symbols.end(); I != E; ++I ) { 546 symTab.insert(std::make_pair(*I, offset)); 547 } 548 // Insert the ModuleProvider and the ArchiveMember into the table of 549 // modules. 550 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 551 } else { 552 if (error) 553 *error = "Can't parse bitcode member: " + 554 mbr->getPath().toString() + ": " + *error; 555 delete mbr; 556 return false; 557 } 558 } 559 560 // Go to the next file location 561 At += mbr->getSize(); 562 if ((intptr_t(At) & 1) == 1) 563 At++; 564 } 565 } 566 567 // At this point we have a valid symbol table (one way or another) so we 568 // just use it to quickly find the symbols requested. 569 570 for (std::set<std::string>::iterator I=symbols.begin(), 571 E=symbols.end(); I != E;) { 572 // See if this symbol exists 573 ModuleProvider* mp = findModuleDefiningSymbol(*I,error); 574 if (mp) { 575 // The symbol exists, insert the ModuleProvider into our result, 576 // duplicates wil be ignored 577 result.insert(mp); 578 579 // Remove the symbol now that its been resolved, being careful to 580 // post-increment the iterator. 581 symbols.erase(I++); 582 } else { 583 ++I; 584 } 585 } 586 return true; 587} 588 589bool Archive::isBitcodeArchive() { 590 // Make sure the symTab has been loaded. In most cases this should have been 591 // done when the archive was constructed, but still, this is just in case. 592 if (symTab.empty()) 593 if (!loadSymbolTable(0)) 594 return false; 595 596 // Now that we know it's been loaded, return true 597 // if it has a size 598 if (symTab.size()) return true; 599 600 // We still can't be sure it isn't a bitcode archive 601 if (!loadArchive(0)) 602 return false; 603 604 std::vector<Module *> Modules; 605 std::string ErrorMessage; 606 607 // Scan the archive, trying to load a bitcode member. We only load one to 608 // see if this works. 609 for (iterator I = begin(), E = end(); I != E; ++I) { 610 if (!I->isBitcode()) 611 continue; 612 613 std::string FullMemberName = 614 archPath.toString() + "(" + I->getPath().toString() + ")"; 615 616 MemoryBuffer *Buffer = 617 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 618 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); 619 Module *M = ParseBitcodeFile(Buffer); 620 delete Buffer; 621 if (!M) 622 return false; // Couldn't parse bitcode, not a bitcode archive. 623 delete M; 624 return true; 625 } 626 627 return false; 628} 629