GNUArchiveReader.cpp revision 22add6ff3426df1a85089fe6a6e1597ee3b6f300
1//===- GNUArchiveReader.cpp -----------------------------------------------===// 2// 3// The MCLinker Project 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9#include <mcld/LD/GNUArchiveReader.h> 10 11#include <mcld/Module.h> 12#include <mcld/InputTree.h> 13#include <mcld/MC/Attribute.h> 14#include <mcld/MC/MCLDInput.h> 15#include <mcld/LD/ResolveInfo.h> 16#include <mcld/LD/ELFObjectReader.h> 17#include <mcld/Support/FileSystem.h> 18#include <mcld/Support/FileHandle.h> 19#include <mcld/Support/MemoryArea.h> 20#include <mcld/Support/MemoryRegion.h> 21#include <mcld/Support/MsgHandling.h> 22#include <mcld/Support/Path.h> 23#include <mcld/ADT/SizeTraits.h> 24 25#include <llvm/ADT/StringRef.h> 26#include <llvm/Support/Host.h> 27 28#include <cstring> 29#include <cstdlib> 30 31using namespace mcld; 32 33GNUArchiveReader::GNUArchiveReader(Module& pModule, 34 ELFObjectReader& pELFObjectReader) 35 : m_Module(pModule), 36 m_ELFObjectReader(pELFObjectReader) 37{ 38} 39 40GNUArchiveReader::~GNUArchiveReader() 41{ 42} 43 44/// isMyFormat 45bool GNUArchiveReader::isMyFormat(Input& pInput) const 46{ 47 assert(pInput.hasMemArea()); 48 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 49 Archive::MAGIC_LEN); 50 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 51 52 bool result = false; 53 assert(NULL != str); 54 if (isArchive(str) || isThinArchive(str)) 55 result = true; 56 57 pInput.memArea()->release(region); 58 return result; 59} 60 61/// isArchive 62bool GNUArchiveReader::isArchive(const char* pStr) const 63{ 64 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN)); 65} 66 67/// isThinArchive 68bool GNUArchiveReader::isThinArchive(const char* pStr) const 69{ 70 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN)); 71} 72 73/// isThinArchive 74bool GNUArchiveReader::isThinArchive(Input& pInput) const 75{ 76 assert(pInput.hasMemArea()); 77 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 78 Archive::MAGIC_LEN); 79 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 80 81 bool result = false; 82 assert(NULL != str); 83 if (isThinArchive(str)) 84 result = true; 85 86 pInput.memArea()->release(region); 87 return result; 88} 89 90bool GNUArchiveReader::readArchive(Archive& pArchive) 91{ 92 if (pArchive.getARFile().attribute()->isWholeArchive()) 93 return includeAllMembers(pArchive); 94 95 // if this is the first time read this archive, setup symtab and strtab 96 if (pArchive.getSymbolTable().empty()) { 97 // read the symtab of the archive 98 readSymbolTable(pArchive); 99 100 // read the strtab of the archive 101 readStringTable(pArchive); 102 103 // add root archive to ArchiveMemberMap 104 pArchive.addArchiveMember(pArchive.getARFile().name(), 105 pArchive.inputs().root(), 106 &InputTree::Downward); 107 } 108 109 // include the needed members in the archive and build up the input tree 110 bool willSymResolved; 111 do { 112 willSymResolved = false; 113 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { 114 // bypass if we already decided to include this symbol or not 115 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) 116 continue; 117 118 // bypass if another symbol with the same object file offset is included 119 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { 120 pArchive.setSymbolStatus(idx, Archive::Symbol::Include); 121 continue; 122 } 123 124 // check if we should include this defined symbol 125 Archive::Symbol::Status status = 126 shouldIncludeSymbol(pArchive.getSymbolName(idx)); 127 if (Archive::Symbol::Unknown != status) 128 pArchive.setSymbolStatus(idx, status); 129 130 if (Archive::Symbol::Include == status) { 131 // include the object member from the given offset 132 includeMember(pArchive, pArchive.getObjFileOffset(idx)); 133 willSymResolved = true; 134 } // end of if 135 } // end of for 136 } while (willSymResolved); 137 138 return true; 139} 140 141/// readMemberHeader - read the header of a member in a archive file and then 142/// return the corresponding archive member (it may be an input object or 143/// another archive) 144/// @param pArchiveRoot - the archive root that holds the strtab (extended 145/// name table) 146/// @param pArchiveFile - the archive that contains the needed object 147/// @param pFileOffset - file offset of the member header in the archive 148/// @param pNestedOffset - used when we find a nested archive 149/// @param pMemberSize - the file size of this member 150Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, 151 Input& pArchiveFile, 152 uint32_t pFileOffset, 153 uint32_t& pNestedOffset, 154 size_t& pMemberSize) 155{ 156 assert(pArchiveFile.hasMemArea()); 157 158 MemoryRegion* header_region = 159 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset), 160 sizeof(Archive::MemberHeader)); 161 const Archive::MemberHeader* header = 162 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 163 164 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 165 166 pMemberSize = atoi(header->size); 167 168 // parse the member name and nested offset if any 169 std::string member_name; 170 llvm::StringRef name_field(header->name, sizeof(header->name)); 171 if ('/' != header->name[0]) { 172 // this is an object file in an archive 173 size_t pos = name_field.find_first_of('/'); 174 member_name.assign(name_field.substr(0, pos).str()); 175 } 176 else { 177 // this is an object/archive file in a thin archive 178 size_t begin = 1; 179 size_t end = name_field.find_first_of(" :"); 180 uint32_t name_offset = 0; 181 // parse the name offset 182 name_field.substr(begin, end - begin).getAsInteger(10, name_offset); 183 184 if (':' == name_field[end]) { 185 // there is a nested offset 186 begin = end + 1; 187 end = name_field.find_first_of(' ', begin); 188 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); 189 } 190 191 // get the member name from the extended name table 192 assert(pArchiveRoot.hasStrTable()); 193 begin = name_offset; 194 end = pArchiveRoot.getStrTable().find_first_of('\n', begin); 195 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1)); 196 } 197 198 Input* member = NULL; 199 bool isThinAR = isThinArchive(pArchiveFile); 200 if (!isThinAR) { 201 // this is an object file in an archive 202 member = pArchiveRoot.getMemberFile(pArchiveFile, 203 isThinAR, 204 member_name, 205 pArchiveFile.path(), 206 (pFileOffset + 207 sizeof(Archive::MemberHeader))); 208 } 209 else { 210 // this is a member in a thin archive 211 // try to find if this is a archive already in the map first 212 Archive::ArchiveMember* ar_member = 213 pArchiveRoot.getArchiveMember(member_name); 214 if (NULL != ar_member) { 215 return ar_member->file; 216 } 217 218 // get nested file path, the nested file's member name is the relative 219 // path to the archive containing it. 220 sys::fs::Path input_path(pArchiveFile.path().parent_path()); 221 if (!input_path.empty()) 222 input_path.append(member_name); 223 else 224 input_path.assign(member_name); 225 226 member = pArchiveRoot.getMemberFile(pArchiveFile, 227 isThinAR, 228 member_name, 229 input_path); 230 } 231 232 pArchiveFile.memArea()->release(header_region); 233 return member; 234} 235 236/// readSymbolTable - read the archive symbol map (armap) 237bool GNUArchiveReader::readSymbolTable(Archive& pArchive) 238{ 239 assert(pArchive.getARFile().hasMemArea()); 240 241 MemoryRegion* header_region = 242 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 243 Archive::MAGIC_LEN), 244 sizeof(Archive::MemberHeader)); 245 const Archive::MemberHeader* header = 246 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 247 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 248 249 int symtab_size = atoi(header->size); 250 pArchive.setSymTabSize(symtab_size); 251 252 if (!pArchive.getARFile().attribute()->isWholeArchive()) { 253 MemoryRegion* symtab_region = 254 pArchive.getARFile().memArea()->request( 255 (pArchive.getARFile().fileOffset() + 256 Archive::MAGIC_LEN + 257 sizeof(Archive::MemberHeader)), 258 symtab_size); 259 const uint32_t* data = 260 reinterpret_cast<const uint32_t*>(symtab_region->getBuffer()); 261 262 // read the number of symbols 263 uint32_t number = 0; 264 if (llvm::sys::isLittleEndianHost()) 265 number = bswap32(*data); 266 else 267 number = *data; 268 269 // set up the pointers for file offset and name offset 270 ++data; 271 const char* name = reinterpret_cast<const char*>(data + number); 272 273 // add the archive symbols 274 for (uint32_t i = 0; i < number; ++i) { 275 if (llvm::sys::isLittleEndianHost()) 276 pArchive.addSymbol(name, bswap32(*data)); 277 else 278 pArchive.addSymbol(name, *data); 279 name += strlen(name) + 1; 280 ++data; 281 } 282 pArchive.getARFile().memArea()->release(symtab_region); 283 } 284 pArchive.getARFile().memArea()->release(header_region); 285 return true; 286} 287 288/// readStringTable - read the strtab for long file name of the archive 289bool GNUArchiveReader::readStringTable(Archive& pArchive) 290{ 291 size_t offset = Archive::MAGIC_LEN + 292 sizeof(Archive::MemberHeader) + 293 pArchive.getSymTabSize(); 294 295 if (0x0 != (offset & 1)) 296 ++offset; 297 298 assert(pArchive.getARFile().hasMemArea()); 299 300 MemoryRegion* header_region = 301 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 302 offset), 303 sizeof(Archive::MemberHeader)); 304 const Archive::MemberHeader* header = 305 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 306 307 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag))); 308 309 if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) { 310 // read the extended name table 311 int strtab_size = atoi(header->size); 312 MemoryRegion* strtab_region = 313 pArchive.getARFile().memArea()->request( 314 (pArchive.getARFile().fileOffset() + 315 offset + sizeof(Archive::MemberHeader)), 316 strtab_size); 317 const char* strtab = 318 reinterpret_cast<const char*>(strtab_region->getBuffer()); 319 pArchive.getStrTable().assign(strtab, strtab_size); 320 pArchive.getARFile().memArea()->release(strtab_region); 321 } 322 pArchive.getARFile().memArea()->release(header_region); 323 return true; 324} 325 326/// shouldIncludeStatus - given a sym name from armap and check if including 327/// the corresponding archive member, and then return the decision 328enum Archive::Symbol::Status 329GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const 330{ 331 // TODO: handle symbol version issue and user defined symbols 332 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName); 333 if (NULL != info) { 334 if (!info->isUndef()) 335 return Archive::Symbol::Exclude; 336 if (info->isWeak()) 337 return Archive::Symbol::Unknown; 338 return Archive::Symbol::Include; 339 } 340 return Archive::Symbol::Unknown; 341} 342 343/// includeMember - include the object member in the given file offset, and 344/// return the size of the object 345/// @param pArchiveRoot - the archive root 346/// @param pFileOffset - file offset of the member header in the archive 347size_t GNUArchiveReader::includeMember(Archive& pArchive, uint32_t pFileOffset) 348{ 349 Input* cur_archive = &(pArchive.getARFile()); 350 Input* member = NULL; 351 uint32_t file_offset = pFileOffset; 352 size_t size = 0; 353 do { 354 uint32_t nested_offset = 0; 355 // use the file offset in current archive to find out the member we 356 // want to include 357 member = readMemberHeader(pArchive, 358 *cur_archive, 359 file_offset, 360 nested_offset, 361 size); 362 assert(member != NULL); 363 // bypass if we get an archive that is already in the map 364 if (Input::Archive == member->type()) { 365 cur_archive = member; 366 file_offset = nested_offset; 367 continue; 368 } 369 370 // insert a node into the subtree of current archive. 371 Archive::ArchiveMember* parent = 372 pArchive.getArchiveMember(cur_archive->name()); 373 374 assert(NULL != parent); 375 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); 376 377 // move the iterator to new created node, and also adjust the 378 // direction to Afterward for next insertion in this subtree 379 parent->move->move(parent->lastPos); 380 parent->move = &InputTree::Afterward; 381 382 if (m_ELFObjectReader.isMyFormat(*member)) { 383 member->setType(Input::Object); 384 pArchive.addObjectMember(pFileOffset, parent->lastPos); 385 m_ELFObjectReader.readHeader(*member); 386 m_ELFObjectReader.readSections(*member); 387 m_ELFObjectReader.readSymbols(*member); 388 m_Module.getObjectList().push_back(member); 389 } 390 else if (isMyFormat(*member)) { 391 member->setType(Input::Archive); 392 // when adding a new archive node, set the iterator to archive 393 // itself, and set the direction to Downward 394 pArchive.addArchiveMember(member->name(), 395 parent->lastPos, 396 &InputTree::Downward); 397 cur_archive = member; 398 file_offset = nested_offset; 399 } 400 } while (Input::Object != member->type()); 401 return size; 402} 403 404/// includeAllMembers - include all object members. This is called if 405/// --whole-archive is the attribute for this archive file. 406bool GNUArchiveReader::includeAllMembers(Archive& pArchive) 407{ 408 // read the symtab of the archive 409 readSymbolTable(pArchive); 410 411 // read the strtab of the archive 412 readStringTable(pArchive); 413 414 // add root archive to ArchiveMemberMap 415 pArchive.addArchiveMember(pArchive.getARFile().name(), 416 pArchive.inputs().root(), 417 &InputTree::Downward); 418 419 bool isThinAR = isThinArchive(pArchive.getARFile()); 420 uint32_t begin_offset = pArchive.getARFile().fileOffset() + 421 Archive::MAGIC_LEN + 422 sizeof(Archive::MemberHeader) + 423 pArchive.getSymTabSize(); 424 if (pArchive.hasStrTable()) { 425 if (0x0 != (begin_offset & 1)) 426 ++begin_offset; 427 begin_offset += sizeof(Archive::MemberHeader) + 428 pArchive.getStrTable().size(); 429 } 430 uint32_t end_offset = pArchive.getARFile().memArea()->handler()->size(); 431 for (uint32_t offset = begin_offset; 432 offset < end_offset; 433 offset += sizeof(Archive::MemberHeader)) { 434 435 size_t size = includeMember(pArchive, offset); 436 437 if (!isThinAR) { 438 offset += size; 439 } 440 441 if (0x0 != (offset & 1)) 442 ++offset; 443 } 444 return true; 445} 446 447