1//===- GNUArchiveReader.cpp -----------------------------------------------===// 2// 3// The MCLinker Project 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9#include <mcld/MC/MCLDInfo.h> 10#include <mcld/MC/MCLDInput.h> 11#include <mcld/MC/InputTree.h> 12#include <mcld/LD/GNUArchiveReader.h> 13#include <mcld/LD/ResolveInfo.h> 14#include <mcld/LD/ELFObjectReader.h> 15#include <mcld/Support/FileSystem.h> 16#include <mcld/Support/FileHandle.h> 17#include <mcld/Support/MemoryArea.h> 18#include <mcld/Support/MemoryRegion.h> 19#include <mcld/Support/MemoryAreaFactory.h> 20#include <mcld/Support/MsgHandling.h> 21#include <mcld/Support/Path.h> 22#include <mcld/ADT/SizeTraits.h> 23 24#include <llvm/ADT/StringRef.h> 25#include <llvm/Support/Host.h> 26 27#include <cstring> 28#include <cstdlib> 29 30using namespace mcld; 31 32GNUArchiveReader::GNUArchiveReader(MCLDInfo& pLDInfo, 33 MemoryAreaFactory& pMemAreaFactory, 34 ELFObjectReader& pELFObjectReader) 35 : m_LDInfo(pLDInfo), 36 m_MemAreaFactory(pMemAreaFactory), 37 m_ELFObjectReader(pELFObjectReader) 38{ 39} 40 41GNUArchiveReader::~GNUArchiveReader() 42{ 43} 44 45/// isMyFormat 46bool GNUArchiveReader::isMyFormat(Input& pInput) const 47{ 48 assert(pInput.hasMemArea()); 49 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 50 Archive::MAGIC_LEN); 51 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 52 53 bool result = false; 54 assert(NULL != str); 55 if (isArchive(str) || isThinArchive(str)) 56 result = true; 57 58 pInput.memArea()->release(region); 59 return result; 60} 61 62/// isArchive 63bool GNUArchiveReader::isArchive(const char* pStr) const 64{ 65 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN)); 66} 67 68/// isThinArchive 69bool GNUArchiveReader::isThinArchive(const char* pStr) const 70{ 71 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN)); 72} 73 74/// isThinArchive 75bool GNUArchiveReader::isThinArchive(Input& pInput) const 76{ 77 assert(pInput.hasMemArea()); 78 MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), 79 Archive::MAGIC_LEN); 80 const char* str = reinterpret_cast<const char*>(region->getBuffer()); 81 82 bool result = false; 83 assert(NULL != str); 84 if (isThinArchive(str)) 85 result = true; 86 87 pInput.memArea()->release(region); 88 return result; 89} 90 91bool GNUArchiveReader::readArchive(Archive& pArchive) 92{ 93 // read the symtab of the archive 94 readSymbolTable(pArchive); 95 96 // read the strtab of the archive 97 readStringTable(pArchive); 98 99 // add root archive to ArchiveMemberMap 100 pArchive.addArchiveMember(pArchive.getARFile().name(), 101 pArchive.inputs().root(), 102 &InputTree::Downward); 103 104 // include the needed members in the archive and build up the input tree 105 bool willSymResolved; 106 do { 107 willSymResolved = false; 108 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { 109 // bypass if we already decided to include this symbol or not 110 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) 111 continue; 112 113 // bypass if another symbol with the same object file offset is included 114 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { 115 pArchive.setSymbolStatus(idx, Archive::Symbol::Include); 116 continue; 117 } 118 119 // check if we should include this defined symbol 120 Archive::Symbol::Status status = 121 shouldIncludeSymbol(pArchive.getSymbolName(idx)); 122 if (Archive::Symbol::Unknown != status) 123 pArchive.setSymbolStatus(idx, status); 124 125 if (Archive::Symbol::Include == status) { 126 Input* cur_archive = &(pArchive.getARFile()); 127 Input* member = cur_archive; 128 uint32_t file_offset = pArchive.getObjFileOffset(idx); 129 while ((member != NULL) && (Input::Object != member->type())) { 130 uint32_t nested_offset = 0; 131 // use the file offset in current archive to find out the member we 132 // want to include 133 member = readMemberHeader(pArchive, 134 *cur_archive, 135 file_offset, 136 nested_offset); 137 assert(member != NULL); 138 // bypass if we get an archive that is already in the map 139 if (Input::Archive == member->type()) { 140 cur_archive = member; 141 file_offset = nested_offset; 142 continue; 143 } 144 145 // insert a node into the subtree of current archive. 146 Archive::ArchiveMember* parent = 147 pArchive.getArchiveMember(cur_archive->name()); 148 149 assert(NULL != parent); 150 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); 151 152 // move the iterator to new created node, and also adjust the 153 // direction to Afterward for next insertion in this subtree 154 parent->move->move(parent->lastPos); 155 parent->move = &InputTree::Afterward; 156 157 if (m_ELFObjectReader.isMyFormat(*member)) { 158 member->setType(Input::Object); 159 pArchive.addObjectMember(pArchive.getObjFileOffset(idx), 160 parent->lastPos); 161 m_ELFObjectReader.readObject(*member); 162 m_ELFObjectReader.readSections(*member); 163 m_ELFObjectReader.readSymbols(*member); 164 } 165 else if (isMyFormat(*member)) { 166 member->setType(Input::Archive); 167 // when adding a new archive node, set the iterator to archive 168 // itself, and set the direction to Downward 169 pArchive.addArchiveMember(member->name(), 170 parent->lastPos, 171 &InputTree::Downward); 172 cur_archive = member; 173 file_offset = nested_offset; 174 } 175 } // end of while 176 willSymResolved = true; 177 } // end of if 178 } // end of for 179 } while (willSymResolved); 180 181 return true; 182} 183 184/// readMemberHeader - read the header of a member in a archive file and then 185/// return the corresponding archive member (it may be an input object or 186/// another archive) 187/// @param pArchiveRoot - the archive root that holds the strtab (extended 188/// name table) 189/// @param pArchiveFile - the archive that contains the needed object 190/// @param pFileOffset - file offset of the member header in the archive 191/// @param pNestedOffset - used when we find a nested archive 192Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, 193 Input& pArchiveFile, 194 uint32_t pFileOffset, 195 uint32_t& pNestedOffset) 196{ 197 assert(pArchiveFile.hasMemArea()); 198 199 MemoryRegion* header_region = 200 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset), 201 sizeof(Archive::MemberHeader)); 202 const Archive::MemberHeader* header = 203 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 204 205 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); 206 207 // int size = atoi(header->size); 208 209 // parse the member name and nested offset if any 210 std::string member_name; 211 llvm::StringRef name_field(header->name, 16); 212 if ('/' != header->name[0]) { 213 // this is an object file in an archive 214 size_t pos = name_field.find_first_of('/'); 215 member_name.assign(name_field.substr(0, pos).str()); 216 } 217 else { 218 // this is an object/archive file in a thin archive 219 size_t begin = 1; 220 size_t end = name_field.find_first_of(" :"); 221 uint32_t name_offset = 0; 222 // parse the name offset 223 name_field.substr(begin, end - begin).getAsInteger(10, name_offset); 224 225 if (':' == name_field[end]) { 226 // there is a nested offset 227 begin = end + 1; 228 end = name_field.find_first_of(' ', begin); 229 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); 230 } 231 232 // get the member name from the extended name table 233 begin = name_offset; 234 end = pArchiveRoot.getStrTable().find_first_of('\n', begin); 235 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1)); 236 } 237 238 Input* member = NULL; 239 if (!isThinArchive(pArchiveFile)) { 240 // this is an object file in an archive 241 member = 242 m_LDInfo.inputFactory().produce(member_name, 243 pArchiveFile.path(), 244 Input::Unknown, 245 (pFileOffset + 246 sizeof(Archive::MemberHeader))); 247 assert(member != NULL); 248 member->setMemArea(pArchiveFile.memArea()); 249 LDContext *input_context = m_LDInfo.contextFactory().produce(); 250 member->setContext(input_context); 251 } 252 else { 253 // this is a member in a thin archive 254 // try to find if this is a archive already in the map first 255 Archive::ArchiveMember* ar_member = 256 pArchiveRoot.getArchiveMember(member_name); 257 if (NULL != ar_member) { 258 return ar_member->file; 259 } 260 261 // get nested file path, the nested file's member name is the relative 262 // path to the archive containing it. 263 sys::fs::Path input_path(pArchiveFile.path().parent_path()); 264 if (!input_path.empty()) 265 input_path.append(member_name); 266 else 267 input_path.assign(member_name); 268 member = 269 m_LDInfo.inputFactory().produce(member_name, input_path, Input::Unknown); 270 271 assert(member != NULL); 272 MemoryArea* input_memory = 273 m_MemAreaFactory.produce(member->path(), FileHandle::ReadOnly); 274 if (input_memory->handler()->isGood()) { 275 member->setMemArea(input_memory); 276 } 277 else { 278 error(diag::err_cannot_open_input) << member->name() << member->path(); 279 return NULL; 280 } 281 LDContext *input_context = m_LDInfo.contextFactory().produce(input_path); 282 member->setContext(input_context); 283 } 284 285 pArchiveFile.memArea()->release(header_region); 286 return member; 287} 288 289/// readSymbolTable - read the archive symbol map (armap) 290bool GNUArchiveReader::readSymbolTable(Archive& pArchive) 291{ 292 assert(pArchive.getARFile().hasMemArea()); 293 294 MemoryRegion* header_region = 295 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 296 Archive::MAGIC_LEN), 297 sizeof(Archive::MemberHeader)); 298 const Archive::MemberHeader* header = 299 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 300 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); 301 302 int symtab_size = atoi(header->size); 303 pArchive.setSymTabSize(symtab_size); 304 305 MemoryRegion* symtab_region = 306 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 307 Archive::MAGIC_LEN + 308 sizeof(Archive::MemberHeader)), 309 symtab_size); 310 const uint32_t* data = 311 reinterpret_cast<const uint32_t*>(symtab_region->getBuffer()); 312 313 // read the number of symbols 314 uint32_t number = 0; 315 if (llvm::sys::isLittleEndianHost()) 316 number = bswap32(*data); 317 else 318 number = *data; 319 320 // set up the pointers for file offset and name offset 321 ++data; 322 const char* name = reinterpret_cast<const char*>(data + number); 323 324 // add the archive symbols 325 for (uint32_t i = 0; i < number; ++i) { 326 if (llvm::sys::isLittleEndianHost()) 327 pArchive.addSymbol(name, bswap32(*data)); 328 else 329 pArchive.addSymbol(name, *data); 330 name += strlen(name) + 1; 331 ++data; 332 } 333 334 pArchive.getARFile().memArea()->release(header_region); 335 pArchive.getARFile().memArea()->release(symtab_region); 336 return true; 337} 338 339/// readStringTable - read the strtab for long file name of the archive 340bool GNUArchiveReader::readStringTable(Archive& pArchive) 341{ 342 size_t offset = Archive::MAGIC_LEN + 343 sizeof(Archive::MemberHeader) + 344 pArchive.getSymTabSize(); 345 346 if (0x0 != (offset & 1)) 347 ++offset; 348 349 assert(pArchive.getARFile().hasMemArea()); 350 351 MemoryRegion* header_region = 352 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 353 offset), 354 sizeof(Archive::MemberHeader)); 355 const Archive::MemberHeader* header = 356 reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); 357 358 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); 359 360 int strtab_size = atoi(header->size); 361 362 MemoryRegion* strtab_region = 363 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + 364 offset + 365 sizeof(Archive::MemberHeader)), 366 strtab_size); 367 const char* strtab = 368 reinterpret_cast<const char*>(strtab_region->getBuffer()); 369 370 pArchive.getStrTable().assign(strtab, strtab_size); 371 372 pArchive.getARFile().memArea()->release(header_region); 373 pArchive.getARFile().memArea()->release(strtab_region); 374 return true; 375} 376 377/// shouldIncludeStatus - given a sym name from armap and check if including 378/// the corresponding archive member, and then return the decision 379enum Archive::Symbol::Status 380GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const 381{ 382 // TODO: handle symbol version issue and user defined symbols 383 ResolveInfo* info = m_LDInfo.getNamePool().findInfo(pSymName); 384 if (NULL != info) { 385 if (!info->isUndef()) 386 return Archive::Symbol::Exclude; 387 if (info->isWeak()) 388 return Archive::Symbol::Unknown; 389 return Archive::Symbol::Include; 390 } 391 return Archive::Symbol::Unknown; 392} 393 394