Archive.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ArchiveObjectFile class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Object/Archive.h" 15#include "llvm/ADT/APInt.h" 16#include "llvm/ADT/SmallString.h" 17#include "llvm/ADT/Twine.h" 18#include "llvm/Support/Endian.h" 19#include "llvm/Support/MemoryBuffer.h" 20 21using namespace llvm; 22using namespace object; 23 24static const char *const Magic = "!<arch>\n"; 25 26void Archive::anchor() { } 27 28StringRef ArchiveMemberHeader::getName() const { 29 char EndCond; 30 if (Name[0] == '/' || Name[0] == '#') 31 EndCond = ' '; 32 else 33 EndCond = '/'; 34 llvm::StringRef::size_type end = 35 llvm::StringRef(Name, sizeof(Name)).find(EndCond); 36 if (end == llvm::StringRef::npos) 37 end = sizeof(Name); 38 assert(end <= sizeof(Name) && end > 0); 39 // Don't include the EndCond if there is one. 40 return llvm::StringRef(Name, end); 41} 42 43uint32_t ArchiveMemberHeader::getSize() const { 44 uint32_t Ret; 45 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) 46 llvm_unreachable("Size is not a decimal number."); 47 return Ret; 48} 49 50sys::fs::perms ArchiveMemberHeader::getAccessMode() const { 51 unsigned Ret; 52 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret)) 53 llvm_unreachable("Access mode is not an octal number."); 54 return static_cast<sys::fs::perms>(Ret); 55} 56 57sys::TimeValue ArchiveMemberHeader::getLastModified() const { 58 unsigned Seconds; 59 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ") 60 .getAsInteger(10, Seconds)) 61 llvm_unreachable("Last modified time not a decimal number."); 62 63 sys::TimeValue Ret; 64 Ret.fromEpochTime(Seconds); 65 return Ret; 66} 67 68unsigned ArchiveMemberHeader::getUID() const { 69 unsigned Ret; 70 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret)) 71 llvm_unreachable("UID time not a decimal number."); 72 return Ret; 73} 74 75unsigned ArchiveMemberHeader::getGID() const { 76 unsigned Ret; 77 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret)) 78 llvm_unreachable("GID time not a decimal number."); 79 return Ret; 80} 81 82Archive::Child::Child(const Archive *Parent, const char *Start) 83 : Parent(Parent) { 84 if (!Start) 85 return; 86 87 const ArchiveMemberHeader *Header = 88 reinterpret_cast<const ArchiveMemberHeader *>(Start); 89 Data = StringRef(Start, sizeof(ArchiveMemberHeader) + Header->getSize()); 90 91 // Setup StartOfFile and PaddingBytes. 92 StartOfFile = sizeof(ArchiveMemberHeader); 93 // Don't include attached name. 94 StringRef Name = Header->getName(); 95 if (Name.startswith("#1/")) { 96 uint64_t NameSize; 97 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) 98 llvm_unreachable("Long name length is not an integer"); 99 StartOfFile += NameSize; 100 } 101} 102 103Archive::Child Archive::Child::getNext() const { 104 size_t SpaceToSkip = Data.size(); 105 // If it's odd, add 1 to make it even. 106 if (SpaceToSkip & 1) 107 ++SpaceToSkip; 108 109 const char *NextLoc = Data.data() + SpaceToSkip; 110 111 // Check to see if this is past the end of the archive. 112 if (NextLoc >= Parent->Data->getBufferEnd()) 113 return Child(Parent, nullptr); 114 115 return Child(Parent, NextLoc); 116} 117 118error_code Archive::Child::getName(StringRef &Result) const { 119 StringRef name = getRawName(); 120 // Check if it's a special name. 121 if (name[0] == '/') { 122 if (name.size() == 1) { // Linker member. 123 Result = name; 124 return object_error::success; 125 } 126 if (name.size() == 2 && name[1] == '/') { // String table. 127 Result = name; 128 return object_error::success; 129 } 130 // It's a long name. 131 // Get the offset. 132 std::size_t offset; 133 if (name.substr(1).rtrim(" ").getAsInteger(10, offset)) 134 llvm_unreachable("Long name offset is not an integer"); 135 const char *addr = Parent->StringTable->Data.begin() 136 + sizeof(ArchiveMemberHeader) 137 + offset; 138 // Verify it. 139 if (Parent->StringTable == Parent->child_end() 140 || addr < (Parent->StringTable->Data.begin() 141 + sizeof(ArchiveMemberHeader)) 142 || addr > (Parent->StringTable->Data.begin() 143 + sizeof(ArchiveMemberHeader) 144 + Parent->StringTable->getSize())) 145 return object_error::parse_failed; 146 147 // GNU long file names end with a /. 148 if (Parent->kind() == K_GNU) { 149 StringRef::size_type End = StringRef(addr).find('/'); 150 Result = StringRef(addr, End); 151 } else { 152 Result = addr; 153 } 154 return object_error::success; 155 } else if (name.startswith("#1/")) { 156 uint64_t name_size; 157 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) 158 llvm_unreachable("Long name length is not an ingeter"); 159 Result = Data.substr(sizeof(ArchiveMemberHeader), name_size) 160 .rtrim(StringRef("\0", 1)); 161 return object_error::success; 162 } 163 // It's a simple name. 164 if (name[name.size() - 1] == '/') 165 Result = name.substr(0, name.size() - 1); 166 else 167 Result = name; 168 return object_error::success; 169} 170 171error_code Archive::Child::getMemoryBuffer(std::unique_ptr<MemoryBuffer> &Result, 172 bool FullPath) const { 173 StringRef Name; 174 if (error_code ec = getName(Name)) 175 return ec; 176 SmallString<128> Path; 177 Result.reset(MemoryBuffer::getMemBuffer( 178 getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name + ")") 179 .toStringRef(Path) 180 : Name, 181 false)); 182 return error_code::success(); 183} 184 185error_code Archive::Child::getAsBinary(std::unique_ptr<Binary> &Result, 186 LLVMContext *Context) const { 187 std::unique_ptr<Binary> ret; 188 std::unique_ptr<MemoryBuffer> Buff; 189 if (error_code ec = getMemoryBuffer(Buff)) 190 return ec; 191 ErrorOr<Binary *> BinaryOrErr = createBinary(Buff.release(), Context); 192 if (error_code EC = BinaryOrErr.getError()) 193 return EC; 194 Result.reset(BinaryOrErr.get()); 195 return object_error::success; 196} 197 198ErrorOr<Archive*> Archive::create(MemoryBuffer *Source) { 199 error_code EC; 200 std::unique_ptr<Archive> Ret(new Archive(Source, EC)); 201 if (EC) 202 return EC; 203 return Ret.release(); 204} 205 206Archive::Archive(MemoryBuffer *source, error_code &ec) 207 : Binary(Binary::ID_Archive, source), SymbolTable(child_end()) { 208 // Check for sufficient magic. 209 assert(source); 210 if (source->getBufferSize() < 8 || 211 StringRef(source->getBufferStart(), 8) != Magic) { 212 ec = object_error::invalid_file_type; 213 return; 214 } 215 216 // Get the special members. 217 child_iterator i = child_begin(false); 218 child_iterator e = child_end(); 219 220 if (i == e) { 221 ec = object_error::success; 222 return; 223 } 224 225 StringRef Name = i->getRawName(); 226 227 // Below is the pattern that is used to figure out the archive format 228 // GNU archive format 229 // First member : / (may exist, if it exists, points to the symbol table ) 230 // Second member : // (may exist, if it exists, points to the string table) 231 // Note : The string table is used if the filename exceeds 15 characters 232 // BSD archive format 233 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 234 // There is no string table, if the filename exceeds 15 characters or has a 235 // embedded space, the filename has #1/<size>, The size represents the size 236 // of the filename that needs to be read after the archive header 237 // COFF archive format 238 // First member : / 239 // Second member : / (provides a directory of symbols) 240 // Third member : // (may exist, if it exists, contains the string table) 241 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 242 // even if the string table is empty. However, lib.exe does not in fact 243 // seem to create the third member if there's no member whose filename 244 // exceeds 15 characters. So the third member is optional. 245 246 if (Name == "__.SYMDEF") { 247 Format = K_BSD; 248 SymbolTable = i; 249 ++i; 250 FirstRegular = i; 251 ec = object_error::success; 252 return; 253 } 254 255 if (Name.startswith("#1/")) { 256 Format = K_BSD; 257 // We know this is BSD, so getName will work since there is no string table. 258 ec = i->getName(Name); 259 if (ec) 260 return; 261 if (Name == "__.SYMDEF SORTED") { 262 SymbolTable = i; 263 ++i; 264 } 265 FirstRegular = i; 266 return; 267 } 268 269 if (Name == "/") { 270 SymbolTable = i; 271 272 ++i; 273 if (i == e) { 274 ec = object_error::parse_failed; 275 return; 276 } 277 Name = i->getRawName(); 278 } 279 280 if (Name == "//") { 281 Format = K_GNU; 282 StringTable = i; 283 ++i; 284 FirstRegular = i; 285 ec = object_error::success; 286 return; 287 } 288 289 if (Name[0] != '/') { 290 Format = K_GNU; 291 FirstRegular = i; 292 ec = object_error::success; 293 return; 294 } 295 296 if (Name != "/") { 297 ec = object_error::parse_failed; 298 return; 299 } 300 301 Format = K_COFF; 302 SymbolTable = i; 303 304 ++i; 305 if (i == e) { 306 FirstRegular = i; 307 ec = object_error::success; 308 return; 309 } 310 311 Name = i->getRawName(); 312 313 if (Name == "//") { 314 StringTable = i; 315 ++i; 316 } 317 318 FirstRegular = i; 319 ec = object_error::success; 320} 321 322Archive::child_iterator Archive::child_begin(bool SkipInternal) const { 323 if (Data->getBufferSize() == 8) // empty archive. 324 return child_end(); 325 326 if (SkipInternal) 327 return FirstRegular; 328 329 const char *Loc = Data->getBufferStart() + strlen(Magic); 330 Child c(this, Loc); 331 return c; 332} 333 334Archive::child_iterator Archive::child_end() const { 335 return Child(this, nullptr); 336} 337 338error_code Archive::Symbol::getName(StringRef &Result) const { 339 Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex); 340 return object_error::success; 341} 342 343error_code Archive::Symbol::getMember(child_iterator &Result) const { 344 const char *Buf = Parent->SymbolTable->getBuffer().begin(); 345 const char *Offsets = Buf + 4; 346 uint32_t Offset = 0; 347 if (Parent->kind() == K_GNU) { 348 Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets) 349 + SymbolIndex); 350 } else if (Parent->kind() == K_BSD) { 351 llvm_unreachable("BSD format is not supported"); 352 } else { 353 uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf); 354 355 // Skip offsets. 356 Buf += sizeof(support::ulittle32_t) 357 + (MemberCount * sizeof(support::ulittle32_t)); 358 359 uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf); 360 361 if (SymbolIndex >= SymbolCount) 362 return object_error::parse_failed; 363 364 // Skip SymbolCount to get to the indices table. 365 const char *Indices = Buf + sizeof(support::ulittle32_t); 366 367 // Get the index of the offset in the file member offset table for this 368 // symbol. 369 uint16_t OffsetIndex = 370 *(reinterpret_cast<const support::ulittle16_t*>(Indices) 371 + SymbolIndex); 372 // Subtract 1 since OffsetIndex is 1 based. 373 --OffsetIndex; 374 375 if (OffsetIndex >= MemberCount) 376 return object_error::parse_failed; 377 378 Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets) 379 + OffsetIndex); 380 } 381 382 const char *Loc = Parent->getData().begin() + Offset; 383 Result = Child(Parent, Loc); 384 385 return object_error::success; 386} 387 388Archive::Symbol Archive::Symbol::getNext() const { 389 Symbol t(*this); 390 // Go to one past next null. 391 t.StringIndex = 392 Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; 393 ++t.SymbolIndex; 394 return t; 395} 396 397Archive::symbol_iterator Archive::symbol_begin() const { 398 if (!hasSymbolTable()) 399 return symbol_iterator(Symbol(this, 0, 0)); 400 401 const char *buf = SymbolTable->getBuffer().begin(); 402 if (kind() == K_GNU) { 403 uint32_t symbol_count = 0; 404 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); 405 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 406 } else if (kind() == K_BSD) { 407 llvm_unreachable("BSD archive format is not supported"); 408 } else { 409 uint32_t member_count = 0; 410 uint32_t symbol_count = 0; 411 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf); 412 buf += 4 + (member_count * 4); // Skip offsets. 413 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf); 414 buf += 4 + (symbol_count * 2); // Skip indices. 415 } 416 uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); 417 return symbol_iterator(Symbol(this, 0, string_start_offset)); 418} 419 420Archive::symbol_iterator Archive::symbol_end() const { 421 if (!hasSymbolTable()) 422 return symbol_iterator(Symbol(this, 0, 0)); 423 424 const char *buf = SymbolTable->getBuffer().begin(); 425 uint32_t symbol_count = 0; 426 if (kind() == K_GNU) { 427 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); 428 } else if (kind() == K_BSD) { 429 llvm_unreachable("BSD archive format is not supported"); 430 } else { 431 uint32_t member_count = 0; 432 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf); 433 buf += 4 + (member_count * 4); // Skip offsets. 434 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf); 435 } 436 return symbol_iterator( 437 Symbol(this, symbol_count, 0)); 438} 439 440Archive::child_iterator Archive::findSym(StringRef name) const { 441 Archive::symbol_iterator bs = symbol_begin(); 442 Archive::symbol_iterator es = symbol_end(); 443 Archive::child_iterator result; 444 445 StringRef symname; 446 for (; bs != es; ++bs) { 447 if (bs->getName(symname)) 448 return child_end(); 449 if (symname == name) { 450 if (bs->getMember(result)) 451 return child_end(); 452 return result; 453 } 454 } 455 return child_end(); 456} 457 458bool Archive::hasSymbolTable() const { 459 return SymbolTable != child_end(); 460} 461