ArchiveReader.cpp revision 303595942502f17c087fa28874c2b89117148c45
1//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Builds up standard unix archive files (.a) containing LLVM bitcode. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ArchiveInternals.h" 15#include "llvm/Bitcode/ReaderWriter.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Module.h" 18#include <memory> 19using namespace llvm; 20 21/// Read a variable-bit-rate encoded unsigned integer 22inline unsigned readInteger(const char*&At, const char*End){ 23 unsigned Shift = 0; 24 unsigned Result = 0; 25 26 do { 27 if (At == End) 28 return Result; 29 Result |= (unsigned)((*At++) & 0x7F) << Shift; 30 Shift += 7; 31 } while (At[-1] & 0x80); 32 return Result; 33} 34 35// Completely parse the Archive's symbol table and populate symTab member var. 36bool 37Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 38 const char* At = (const char*) data; 39 const char* End = At + size; 40 while (At < End) { 41 unsigned offset = readInteger(At, End); 42 if (At == End) { 43 if (error) 44 *error = "Ran out of data reading vbr_uint for symtab offset!"; 45 return false; 46 } 47 unsigned length = readInteger(At, End); 48 if (At == End) { 49 if (error) 50 *error = "Ran out of data reading vbr_uint for symtab length!"; 51 return false; 52 } 53 if (At + length > End) { 54 if (error) 55 *error = "Malformed symbol table: length not consistent with size"; 56 return false; 57 } 58 // we don't care if it can't be inserted (duplicate entry) 59 symTab.insert(std::make_pair(std::string(At, length), offset)); 60 At += length; 61 } 62 symTabSize = size; 63 return true; 64} 65 66// This member parses an ArchiveMemberHeader that is presumed to be pointed to 67// by At. The At pointer is updated to the byte just after the header, which 68// can be variable in size. 69ArchiveMember* 70Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 71{ 72 if (At + sizeof(ArchiveMemberHeader) >= End) { 73 if (error) 74 *error = "Unexpected end of file"; 75 return 0; 76 } 77 78 // Cast archive member header 79 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 80 At += sizeof(ArchiveMemberHeader); 81 82 // Extract the size and determine if the file is 83 // compressed or not (negative length). 84 int flags = 0; 85 int MemberSize = atoi(Hdr->size); 86 if (MemberSize < 0) { 87 flags |= ArchiveMember::CompressedFlag; 88 MemberSize = -MemberSize; 89 } 90 91 // Check the size of the member for sanity 92 if (At + MemberSize > End) { 93 if (error) 94 *error = "invalid member length in archive file"; 95 return 0; 96 } 97 98 // Check the member signature 99 if (!Hdr->checkSignature()) { 100 if (error) 101 *error = "invalid file member signature"; 102 return 0; 103 } 104 105 // Convert and check the member name 106 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 107 // table. The special name "//" and 14 blanks is for a string table, used 108 // for long file names. This library doesn't generate either of those but 109 // it will accept them. If the name starts with #1/ and the remainder is 110 // digits, then those digits specify the length of the name that is 111 // stored immediately following the header. The special name 112 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. 113 // Anything else is a regular, short filename that is terminated with 114 // a '/' and blanks. 115 116 std::string pathname; 117 switch (Hdr->name[0]) { 118 case '#': 119 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 120 if (isdigit(Hdr->name[3])) { 121 unsigned len = atoi(&Hdr->name[3]); 122 pathname.assign(At, len); 123 At += len; 124 MemberSize -= len; 125 flags |= ArchiveMember::HasLongFilenameFlag; 126 } else { 127 if (error) 128 *error = "invalid long filename"; 129 return 0; 130 } 131 } else if (Hdr->name[1] == '_' && 132 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 133 // The member is using a long file name (>15 chars) format. 134 // This format is standard for 4.4BSD and Mac OSX operating 135 // systems. LLVM uses it similarly. In this format, the 136 // remainder of the name field (after #1/) specifies the 137 // length of the file name which occupy the first bytes of 138 // the member's data. The pathname already has the #1/ stripped. 139 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 140 flags |= ArchiveMember::LLVMSymbolTableFlag; 141 } 142 break; 143 case '/': 144 if (Hdr->name[1]== '/') { 145 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 146 pathname.assign(ARFILE_STRTAB_NAME); 147 flags |= ArchiveMember::StringTableFlag; 148 } else { 149 if (error) 150 *error = "invalid string table name"; 151 return 0; 152 } 153 } else if (Hdr->name[1] == ' ') { 154 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 155 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 156 flags |= ArchiveMember::SVR4SymbolTableFlag; 157 } else { 158 if (error) 159 *error = "invalid SVR4 symbol table name"; 160 return 0; 161 } 162 } else if (isdigit(Hdr->name[1])) { 163 unsigned index = atoi(&Hdr->name[1]); 164 if (index < strtab.length()) { 165 const char* namep = strtab.c_str() + index; 166 const char* endp = strtab.c_str() + strtab.length(); 167 const char* p = namep; 168 const char* last_p = p; 169 while (p < endp) { 170 if (*p == '\n' && *last_p == '/') { 171 pathname.assign(namep, last_p - namep); 172 flags |= ArchiveMember::HasLongFilenameFlag; 173 break; 174 } 175 last_p = p; 176 p++; 177 } 178 if (p >= endp) { 179 if (error) 180 *error = "missing name termiantor in string table"; 181 return 0; 182 } 183 } else { 184 if (error) 185 *error = "name index beyond string table"; 186 return 0; 187 } 188 } 189 break; 190 case '_': 191 if (Hdr->name[1] == '_' && 192 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 193 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 194 flags |= ArchiveMember::BSD4SymbolTableFlag; 195 break; 196 } 197 /* FALL THROUGH */ 198 199 default: 200 char* slash = (char*) memchr(Hdr->name, '/', 16); 201 if (slash == 0) 202 slash = Hdr->name + 16; 203 pathname.assign(Hdr->name, slash - Hdr->name); 204 break; 205 } 206 207 // Determine if this is a bitcode file 208 switch (sys::IdentifyFileType(At, 4)) { 209 case sys::Bitcode_FileType: 210 flags |= ArchiveMember::BitcodeFlag; 211 break; 212 default: 213 flags &= ~ArchiveMember::BitcodeFlag; 214 break; 215 } 216 217 // Instantiate the ArchiveMember to be filled 218 ArchiveMember* member = new ArchiveMember(this); 219 220 // Fill in fields of the ArchiveMember 221 member->next = 0; 222 member->prev = 0; 223 member->parent = this; 224 member->path.set(pathname); 225 member->info.fileSize = MemberSize; 226 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 227 unsigned int mode; 228 sscanf(Hdr->mode, "%o", &mode); 229 member->info.mode = mode; 230 member->info.user = atoi(Hdr->uid); 231 member->info.group = atoi(Hdr->gid); 232 member->flags = flags; 233 member->data = At; 234 235 return member; 236} 237 238bool 239Archive::checkSignature(std::string* error) { 240 // Check the magic string at file's header 241 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 242 if (error) 243 *error = "invalid signature for an archive file"; 244 return false; 245 } 246 return true; 247} 248 249// This function loads the entire archive and fully populates its ilist with 250// the members of the archive file. This is typically used in preparation for 251// editing the contents of the archive. 252bool 253Archive::loadArchive(std::string* error) { 254 255 // Set up parsing 256 members.clear(); 257 symTab.clear(); 258 const char *At = base; 259 const char *End = base + mapfile->size(); 260 261 if (!checkSignature(error)) 262 return false; 263 264 At += 8; // Skip the magic string. 265 266 bool seenSymbolTable = false; 267 bool foundFirstFile = false; 268 while (At < End) { 269 // parse the member header 270 const char* Save = At; 271 ArchiveMember* mbr = parseMemberHeader(At, End, error); 272 if (!mbr) 273 return false; 274 275 // check if this is the foreign symbol table 276 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 277 // We just save this but don't do anything special 278 // with it. It doesn't count as the "first file". 279 if (foreignST) { 280 // What? Multiple foreign symbol tables? Just chuck it 281 // and retain the last one found. 282 delete foreignST; 283 } 284 foreignST = mbr; 285 At += mbr->getSize(); 286 if ((intptr_t(At) & 1) == 1) 287 At++; 288 } else if (mbr->isStringTable()) { 289 // Simply suck the entire string table into a string 290 // variable. This will be used to get the names of the 291 // members that use the "/ddd" format for their names 292 // (SVR4 style long names). 293 strtab.assign(At, mbr->getSize()); 294 At += mbr->getSize(); 295 if ((intptr_t(At) & 1) == 1) 296 At++; 297 delete mbr; 298 } else if (mbr->isLLVMSymbolTable()) { 299 // This is the LLVM symbol table for the archive. If we've seen it 300 // already, its an error. Otherwise, parse the symbol table and move on. 301 if (seenSymbolTable) { 302 if (error) 303 *error = "invalid archive: multiple symbol tables"; 304 return false; 305 } 306 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 307 return false; 308 seenSymbolTable = true; 309 At += mbr->getSize(); 310 if ((intptr_t(At) & 1) == 1) 311 At++; 312 delete mbr; // We don't need this member in the list of members. 313 } else { 314 // This is just a regular file. If its the first one, save its offset. 315 // Otherwise just push it on the list and move on to the next file. 316 if (!foundFirstFile) { 317 firstFileOffset = Save - base; 318 foundFirstFile = true; 319 } 320 members.push_back(mbr); 321 At += mbr->getSize(); 322 if ((intptr_t(At) & 1) == 1) 323 At++; 324 } 325 } 326 return true; 327} 328 329// Open and completely load the archive file. 330Archive* 331Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) 332{ 333 std::auto_ptr<Archive> result ( new Archive(file)); 334 if (result->mapToMemory(ErrorMessage)) 335 return 0; 336 if (!result->loadArchive(ErrorMessage)) 337 return 0; 338 return result.release(); 339} 340 341// Get all the bitcode modules from the archive 342bool 343Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { 344 345 for (iterator I=begin(), E=end(); I != E; ++I) { 346 if (I->isBitcode()) { 347 std::string FullMemberName = archPath.toString() + 348 "(" + I->getPath().toString() + ")"; 349 MemoryBuffer *Buffer = 350 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 351 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); 352 353 Module *M = ParseBitcodeFile(Buffer, ErrMessage); 354 delete Buffer; 355 if (!M) 356 return true; 357 358 Modules.push_back(M); 359 } 360 } 361 return false; 362} 363 364// Load just the symbol table from the archive file 365bool 366Archive::loadSymbolTable(std::string* ErrorMsg) { 367 368 // Set up parsing 369 members.clear(); 370 symTab.clear(); 371 const char *At = base; 372 const char *End = base + mapfile->size(); 373 374 // Make sure we're dealing with an archive 375 if (!checkSignature(ErrorMsg)) 376 return false; 377 378 At += 8; // Skip signature 379 380 // Parse the first file member header 381 const char* FirstFile = At; 382 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 383 if (!mbr) 384 return false; 385 386 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 387 // Skip the foreign symbol table, we don't do anything with it 388 At += mbr->getSize(); 389 if ((intptr_t(At) & 1) == 1) 390 At++; 391 delete mbr; 392 393 // Read the next one 394 FirstFile = At; 395 mbr = parseMemberHeader(At, End, ErrorMsg); 396 if (!mbr) { 397 delete mbr; 398 return false; 399 } 400 } 401 402 if (mbr->isStringTable()) { 403 // Process the string table entry 404 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 405 At += mbr->getSize(); 406 if ((intptr_t(At) & 1) == 1) 407 At++; 408 delete mbr; 409 // Get the next one 410 FirstFile = At; 411 mbr = parseMemberHeader(At, End, ErrorMsg); 412 if (!mbr) { 413 delete mbr; 414 return false; 415 } 416 } 417 418 // See if its the symbol table 419 if (mbr->isLLVMSymbolTable()) { 420 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 421 delete mbr; 422 return false; 423 } 424 425 At += mbr->getSize(); 426 if ((intptr_t(At) & 1) == 1) 427 At++; 428 delete mbr; 429 // Can't be any more symtab headers so just advance 430 FirstFile = At; 431 } else { 432 // There's no symbol table in the file. We have to rebuild it from scratch 433 // because the intent of this method is to get the symbol table loaded so 434 // it can be searched efficiently. 435 // Add the member to the members list 436 members.push_back(mbr); 437 } 438 439 firstFileOffset = FirstFile - base; 440 return true; 441} 442 443// Open the archive and load just the symbol tables 444Archive* 445Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { 446 std::auto_ptr<Archive> result ( new Archive(file) ); 447 if (result->mapToMemory(ErrorMessage)) 448 return 0; 449 if (!result->loadSymbolTable(ErrorMessage)) 450 return 0; 451 return result.release(); 452} 453 454// Look up one symbol in the symbol table and return a ModuleProvider for the 455// module that defines that symbol. 456ModuleProvider* 457Archive::findModuleDefiningSymbol(const std::string& symbol, 458 std::string* ErrMsg) { 459 SymTabType::iterator SI = symTab.find(symbol); 460 if (SI == symTab.end()) 461 return 0; 462 463 // The symbol table was previously constructed assuming that the members were 464 // written without the symbol table header. Because VBR encoding is used, the 465 // values could not be adjusted to account for the offset of the symbol table 466 // because that could affect the size of the symbol table due to VBR encoding. 467 // We now have to account for this by adjusting the offset by the size of the 468 // symbol table and its header. 469 unsigned fileOffset = 470 SI->second + // offset in symbol-table-less file 471 firstFileOffset; // add offset to first "real" file in archive 472 473 // See if the module is already loaded 474 ModuleMap::iterator MI = modules.find(fileOffset); 475 if (MI != modules.end()) 476 return MI->second.first; 477 478 // Module hasn't been loaded yet, we need to load it 479 const char* modptr = base + fileOffset; 480 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size(),ErrMsg); 481 if (!mbr) 482 return 0; 483 484 // Now, load the bitcode module to get the ModuleProvider 485 std::string FullMemberName = archPath.toString() + "(" + 486 mbr->getPath().toString() + ")"; 487 MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), 488 FullMemberName.c_str()); 489 memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); 490 491 ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg); 492 if (!mp) 493 return 0; 494 495 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 496 497 return mp; 498} 499 500// Look up multiple symbols in the symbol table and return a set of 501// ModuleProviders that define those symbols. 502bool 503Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 504 std::set<ModuleProvider*>& result, 505 std::string* error) { 506 if (!mapfile || !base) { 507 if (error) 508 *error = "Empty archive invalid for finding modules defining symbols"; 509 return false; 510 } 511 512 if (symTab.empty()) { 513 // We don't have a symbol table, so we must build it now but lets also 514 // make sure that we populate the modules table as we do this to ensure 515 // that we don't load them twice when findModuleDefiningSymbol is called 516 // below. 517 518 // Get a pointer to the first file 519 const char* At = ((const char*)base) + firstFileOffset; 520 const char* End = ((const char*)base) + mapfile->size(); 521 522 while ( At < End) { 523 // Compute the offset to be put in the symbol table 524 unsigned offset = At - base - firstFileOffset; 525 526 // Parse the file's header 527 ArchiveMember* mbr = parseMemberHeader(At, End, error); 528 if (!mbr) 529 return false; 530 531 // If it contains symbols 532 if (mbr->isBitcode()) { 533 // Get the symbols 534 std::vector<std::string> symbols; 535 std::string FullMemberName = archPath.toString() + "(" + 536 mbr->getPath().toString() + ")"; 537 ModuleProvider* MP = 538 GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), 539 FullMemberName, symbols, error); 540 541 if (MP) { 542 // Insert the module's symbols into the symbol table 543 for (std::vector<std::string>::iterator I = symbols.begin(), 544 E=symbols.end(); I != E; ++I ) { 545 symTab.insert(std::make_pair(*I, offset)); 546 } 547 // Insert the ModuleProvider and the ArchiveMember into the table of 548 // modules. 549 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 550 } else { 551 if (error) 552 *error = "Can't parse bitcode member: " + 553 mbr->getPath().toString() + ": " + *error; 554 delete mbr; 555 return false; 556 } 557 } 558 559 // Go to the next file location 560 At += mbr->getSize(); 561 if ((intptr_t(At) & 1) == 1) 562 At++; 563 } 564 } 565 566 // At this point we have a valid symbol table (one way or another) so we 567 // just use it to quickly find the symbols requested. 568 569 for (std::set<std::string>::iterator I=symbols.begin(), 570 E=symbols.end(); I != E;) { 571 // See if this symbol exists 572 ModuleProvider* mp = findModuleDefiningSymbol(*I,error); 573 if (mp) { 574 // The symbol exists, insert the ModuleProvider into our result, 575 // duplicates wil be ignored 576 result.insert(mp); 577 578 // Remove the symbol now that its been resolved, being careful to 579 // post-increment the iterator. 580 symbols.erase(I++); 581 } else { 582 ++I; 583 } 584 } 585 return true; 586} 587 588bool Archive::isBitcodeArchive() { 589 // Make sure the symTab has been loaded. In most cases this should have been 590 // done when the archive was constructed, but still, this is just in case. 591 if (symTab.empty()) 592 if (!loadSymbolTable(0)) 593 return false; 594 595 // Now that we know it's been loaded, return true 596 // if it has a size 597 if (symTab.size()) return true; 598 599 // We still can't be sure it isn't a bitcode archive 600 if (!loadArchive(0)) 601 return false; 602 603 std::vector<Module *> Modules; 604 std::string ErrorMessage; 605 606 // Scan the archive, trying to load a bitcode member. We only load one to 607 // see if this works. 608 for (iterator I = begin(), E = end(); I != E; ++I) { 609 if (!I->isBitcode()) 610 continue; 611 612 std::string FullMemberName = 613 archPath.toString() + "(" + I->getPath().toString() + ")"; 614 615 MemoryBuffer *Buffer = 616 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 617 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); 618 Module *M = ParseBitcodeFile(Buffer); 619 delete Buffer; 620 if (!M) 621 return false; // Couldn't parse bitcode, not a bitcode archive. 622 delete M; 623 return true; 624 } 625 626 return false; 627} 628