MachObjectWriter.cpp revision 56279f42b6d955be8a5f399f405ce0b1784e9502
1//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/MC/MachObjectWriter.h" 11#include "llvm/ADT/StringMap.h" 12#include "llvm/ADT/Twine.h" 13#include "llvm/MC/MCAssembler.h" 14#include "llvm/MC/MCAsmLayout.h" 15#include "llvm/MC/MCExpr.h" 16#include "llvm/MC/MCObjectWriter.h" 17#include "llvm/MC/MCSectionMachO.h" 18#include "llvm/MC/MCSymbol.h" 19#include "llvm/MC/MCMachOSymbolFlags.h" 20#include "llvm/MC/MCValue.h" 21#include "llvm/Support/ErrorHandling.h" 22#include "llvm/Support/MachO.h" 23#include "llvm/Target/TargetAsmBackend.h" 24 25// FIXME: Gross. 26#include "../Target/X86/X86FixupKinds.h" 27 28#include <vector> 29using namespace llvm; 30 31static unsigned getFixupKindLog2Size(unsigned Kind) { 32 switch (Kind) { 33 default: llvm_unreachable("invalid fixup kind!"); 34 case X86::reloc_pcrel_1byte: 35 case FK_Data_1: return 0; 36 case FK_Data_2: return 1; 37 case X86::reloc_pcrel_4byte: 38 case X86::reloc_riprel_4byte: 39 case X86::reloc_riprel_4byte_movq_load: 40 case FK_Data_4: return 2; 41 case FK_Data_8: return 3; 42 } 43} 44 45static bool isFixupKindPCRel(unsigned Kind) { 46 switch (Kind) { 47 default: 48 return false; 49 case X86::reloc_pcrel_1byte: 50 case X86::reloc_pcrel_4byte: 51 case X86::reloc_riprel_4byte: 52 case X86::reloc_riprel_4byte_movq_load: 53 return true; 54 } 55} 56 57static bool isFixupKindRIPRel(unsigned Kind) { 58 return Kind == X86::reloc_riprel_4byte || 59 Kind == X86::reloc_riprel_4byte_movq_load; 60} 61 62static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { 63 // Undefined symbols are always extern. 64 if (SD->Symbol->isUndefined()) 65 return true; 66 67 // References to weak definitions require external relocation entries; the 68 // definition may not always be the one in the same object file. 69 if (SD->getFlags() & SF_WeakDefinition) 70 return true; 71 72 // Otherwise, we can use an internal relocation. 73 return false; 74} 75 76namespace { 77 78class MachObjectWriterImpl { 79 // See <mach-o/loader.h>. 80 enum { 81 Header_Magic32 = 0xFEEDFACE, 82 Header_Magic64 = 0xFEEDFACF 83 }; 84 85 enum { 86 Header32Size = 28, 87 Header64Size = 32, 88 SegmentLoadCommand32Size = 56, 89 SegmentLoadCommand64Size = 72, 90 Section32Size = 68, 91 Section64Size = 80, 92 SymtabLoadCommandSize = 24, 93 DysymtabLoadCommandSize = 80, 94 Nlist32Size = 12, 95 Nlist64Size = 16, 96 RelocationInfoSize = 8 97 }; 98 99 enum HeaderFileType { 100 HFT_Object = 0x1 101 }; 102 103 enum HeaderFlags { 104 HF_SubsectionsViaSymbols = 0x2000 105 }; 106 107 enum LoadCommandType { 108 LCT_Segment = 0x1, 109 LCT_Symtab = 0x2, 110 LCT_Dysymtab = 0xb, 111 LCT_Segment64 = 0x19 112 }; 113 114 // See <mach-o/nlist.h>. 115 enum SymbolTypeType { 116 STT_Undefined = 0x00, 117 STT_Absolute = 0x02, 118 STT_Section = 0x0e 119 }; 120 121 enum SymbolTypeFlags { 122 // If any of these bits are set, then the entry is a stab entry number (see 123 // <mach-o/stab.h>. Otherwise the other masks apply. 124 STF_StabsEntryMask = 0xe0, 125 126 STF_TypeMask = 0x0e, 127 STF_External = 0x01, 128 STF_PrivateExtern = 0x10 129 }; 130 131 /// IndirectSymbolFlags - Flags for encoding special values in the indirect 132 /// symbol entry. 133 enum IndirectSymbolFlags { 134 ISF_Local = 0x80000000, 135 ISF_Absolute = 0x40000000 136 }; 137 138 /// RelocationFlags - Special flags for addresses. 139 enum RelocationFlags { 140 RF_Scattered = 0x80000000 141 }; 142 143 enum RelocationInfoType { 144 RIT_Vanilla = 0, 145 RIT_Pair = 1, 146 RIT_Difference = 2, 147 RIT_PreboundLazyPointer = 3, 148 RIT_LocalDifference = 4 149 }; 150 151 /// X86_64 uses its own relocation types. 152 enum RelocationInfoTypeX86_64 { 153 RIT_X86_64_Unsigned = 0, 154 RIT_X86_64_Signed = 1, 155 RIT_X86_64_Branch = 2, 156 RIT_X86_64_GOTLoad = 3, 157 RIT_X86_64_GOT = 4, 158 RIT_X86_64_Subtractor = 5, 159 RIT_X86_64_Signed1 = 6, 160 RIT_X86_64_Signed2 = 7, 161 RIT_X86_64_Signed4 = 8 162 }; 163 164 /// MachSymbolData - Helper struct for containing some precomputed information 165 /// on symbols. 166 struct MachSymbolData { 167 MCSymbolData *SymbolData; 168 uint64_t StringIndex; 169 uint8_t SectionIndex; 170 171 // Support lexicographic sorting. 172 bool operator<(const MachSymbolData &RHS) const { 173 const std::string &Name = SymbolData->getSymbol().getName(); 174 return Name < RHS.SymbolData->getSymbol().getName(); 175 } 176 }; 177 178 /// @name Relocation Data 179 /// @{ 180 181 struct MachRelocationEntry { 182 uint32_t Word0; 183 uint32_t Word1; 184 }; 185 186 llvm::DenseMap<const MCSectionData*, 187 std::vector<MachRelocationEntry> > Relocations; 188 189 /// @} 190 /// @name Symbol Table Data 191 /// @{ 192 193 SmallString<256> StringTable; 194 std::vector<MachSymbolData> LocalSymbolData; 195 std::vector<MachSymbolData> ExternalSymbolData; 196 std::vector<MachSymbolData> UndefinedSymbolData; 197 198 /// @} 199 200 MachObjectWriter *Writer; 201 202 raw_ostream &OS; 203 204 unsigned Is64Bit : 1; 205 206public: 207 MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit) 208 : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) { 209 } 210 211 void Write8(uint8_t Value) { Writer->Write8(Value); } 212 void Write16(uint16_t Value) { Writer->Write16(Value); } 213 void Write32(uint32_t Value) { Writer->Write32(Value); } 214 void Write64(uint64_t Value) { Writer->Write64(Value); } 215 void WriteZeros(unsigned N) { Writer->WriteZeros(N); } 216 void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { 217 Writer->WriteBytes(Str, ZeroFillSize); 218 } 219 220 void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, 221 bool SubsectionsViaSymbols) { 222 uint32_t Flags = 0; 223 224 if (SubsectionsViaSymbols) 225 Flags |= HF_SubsectionsViaSymbols; 226 227 // struct mach_header (28 bytes) or 228 // struct mach_header_64 (32 bytes) 229 230 uint64_t Start = OS.tell(); 231 (void) Start; 232 233 Write32(Is64Bit ? Header_Magic64 : Header_Magic32); 234 235 // FIXME: Support cputype. 236 Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); 237 // FIXME: Support cpusubtype. 238 Write32(MachO::CPUSubType_I386_ALL); 239 Write32(HFT_Object); 240 Write32(NumLoadCommands); // Object files have a single load command, the 241 // segment. 242 Write32(LoadCommandsSize); 243 Write32(Flags); 244 if (Is64Bit) 245 Write32(0); // reserved 246 247 assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); 248 } 249 250 /// WriteSegmentLoadCommand - Write a segment load command. 251 /// 252 /// \arg NumSections - The number of sections in this segment. 253 /// \arg SectionDataSize - The total size of the sections. 254 void WriteSegmentLoadCommand(unsigned NumSections, 255 uint64_t VMSize, 256 uint64_t SectionDataStartOffset, 257 uint64_t SectionDataSize) { 258 // struct segment_command (56 bytes) or 259 // struct segment_command_64 (72 bytes) 260 261 uint64_t Start = OS.tell(); 262 (void) Start; 263 264 unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : 265 SegmentLoadCommand32Size; 266 Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); 267 Write32(SegmentLoadCommandSize + 268 NumSections * (Is64Bit ? Section64Size : Section32Size)); 269 270 WriteBytes("", 16); 271 if (Is64Bit) { 272 Write64(0); // vmaddr 273 Write64(VMSize); // vmsize 274 Write64(SectionDataStartOffset); // file offset 275 Write64(SectionDataSize); // file size 276 } else { 277 Write32(0); // vmaddr 278 Write32(VMSize); // vmsize 279 Write32(SectionDataStartOffset); // file offset 280 Write32(SectionDataSize); // file size 281 } 282 Write32(0x7); // maxprot 283 Write32(0x7); // initprot 284 Write32(NumSections); 285 Write32(0); // flags 286 287 assert(OS.tell() - Start == SegmentLoadCommandSize); 288 } 289 290 void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, 291 const MCSectionData &SD, uint64_t FileOffset, 292 uint64_t RelocationsStart, unsigned NumRelocations) { 293 uint64_t SectionSize = Layout.getSectionSize(&SD); 294 295 // The offset is unused for virtual sections. 296 if (Asm.getBackend().isVirtualSection(SD.getSection())) { 297 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 298 FileOffset = 0; 299 } 300 301 // struct section (68 bytes) or 302 // struct section_64 (80 bytes) 303 304 uint64_t Start = OS.tell(); 305 (void) Start; 306 307 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 308 WriteBytes(Section.getSectionName(), 16); 309 WriteBytes(Section.getSegmentName(), 16); 310 if (Is64Bit) { 311 Write64(Layout.getSectionAddress(&SD)); // address 312 Write64(SectionSize); // size 313 } else { 314 Write32(Layout.getSectionAddress(&SD)); // address 315 Write32(SectionSize); // size 316 } 317 Write32(FileOffset); 318 319 unsigned Flags = Section.getTypeAndAttributes(); 320 if (SD.hasInstructions()) 321 Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; 322 323 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 324 Write32(Log2_32(SD.getAlignment())); 325 Write32(NumRelocations ? RelocationsStart : 0); 326 Write32(NumRelocations); 327 Write32(Flags); 328 Write32(0); // reserved1 329 Write32(Section.getStubSize()); // reserved2 330 if (Is64Bit) 331 Write32(0); // reserved3 332 333 assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); 334 } 335 336 void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, 337 uint32_t StringTableOffset, 338 uint32_t StringTableSize) { 339 // struct symtab_command (24 bytes) 340 341 uint64_t Start = OS.tell(); 342 (void) Start; 343 344 Write32(LCT_Symtab); 345 Write32(SymtabLoadCommandSize); 346 Write32(SymbolOffset); 347 Write32(NumSymbols); 348 Write32(StringTableOffset); 349 Write32(StringTableSize); 350 351 assert(OS.tell() - Start == SymtabLoadCommandSize); 352 } 353 354 void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 355 uint32_t NumLocalSymbols, 356 uint32_t FirstExternalSymbol, 357 uint32_t NumExternalSymbols, 358 uint32_t FirstUndefinedSymbol, 359 uint32_t NumUndefinedSymbols, 360 uint32_t IndirectSymbolOffset, 361 uint32_t NumIndirectSymbols) { 362 // struct dysymtab_command (80 bytes) 363 364 uint64_t Start = OS.tell(); 365 (void) Start; 366 367 Write32(LCT_Dysymtab); 368 Write32(DysymtabLoadCommandSize); 369 Write32(FirstLocalSymbol); 370 Write32(NumLocalSymbols); 371 Write32(FirstExternalSymbol); 372 Write32(NumExternalSymbols); 373 Write32(FirstUndefinedSymbol); 374 Write32(NumUndefinedSymbols); 375 Write32(0); // tocoff 376 Write32(0); // ntoc 377 Write32(0); // modtaboff 378 Write32(0); // nmodtab 379 Write32(0); // extrefsymoff 380 Write32(0); // nextrefsyms 381 Write32(IndirectSymbolOffset); 382 Write32(NumIndirectSymbols); 383 Write32(0); // extreloff 384 Write32(0); // nextrel 385 Write32(0); // locreloff 386 Write32(0); // nlocrel 387 388 assert(OS.tell() - Start == DysymtabLoadCommandSize); 389 } 390 391 void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { 392 MCSymbolData &Data = *MSD.SymbolData; 393 const MCSymbol &Symbol = Data.getSymbol(); 394 uint8_t Type = 0; 395 uint16_t Flags = Data.getFlags(); 396 uint32_t Address = 0; 397 398 // Set the N_TYPE bits. See <mach-o/nlist.h>. 399 // 400 // FIXME: Are the prebound or indirect fields possible here? 401 if (Symbol.isUndefined()) 402 Type = STT_Undefined; 403 else if (Symbol.isAbsolute()) 404 Type = STT_Absolute; 405 else 406 Type = STT_Section; 407 408 // FIXME: Set STAB bits. 409 410 if (Data.isPrivateExtern()) 411 Type |= STF_PrivateExtern; 412 413 // Set external bit. 414 if (Data.isExternal() || Symbol.isUndefined()) 415 Type |= STF_External; 416 417 // Compute the symbol address. 418 if (Symbol.isDefined()) { 419 if (Symbol.isAbsolute()) { 420 Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); 421 } else { 422 Address = Layout.getSymbolAddress(&Data); 423 } 424 } else if (Data.isCommon()) { 425 // Common symbols are encoded with the size in the address 426 // field, and their alignment in the flags. 427 Address = Data.getCommonSize(); 428 429 // Common alignment is packed into the 'desc' bits. 430 if (unsigned Align = Data.getCommonAlignment()) { 431 unsigned Log2Size = Log2_32(Align); 432 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 433 if (Log2Size > 15) 434 report_fatal_error("invalid 'common' alignment '" + 435 Twine(Align) + "'"); 436 // FIXME: Keep this mask with the SymbolFlags enumeration. 437 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 438 } 439 } 440 441 // struct nlist (12 bytes) 442 443 Write32(MSD.StringIndex); 444 Write8(Type); 445 Write8(MSD.SectionIndex); 446 447 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 448 // value. 449 Write16(Flags); 450 if (Is64Bit) 451 Write64(Address); 452 else 453 Write32(Address); 454 } 455 456 // FIXME: We really need to improve the relocation validation. Basically, we 457 // want to implement a separate computation which evaluates the relocation 458 // entry as the linker would, and verifies that the resultant fixup value is 459 // exactly what the encoder wanted. This will catch several classes of 460 // problems: 461 // 462 // - Relocation entry bugs, the two algorithms are unlikely to have the same 463 // exact bug. 464 // 465 // - Relaxation issues, where we forget to relax something. 466 // 467 // - Input errors, where something cannot be correctly encoded. 'as' allows 468 // these through in many cases. 469 470 void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 471 const MCFragment *Fragment, 472 const MCAsmFixup &Fixup, MCValue Target, 473 uint64_t &FixedValue) { 474 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 475 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind); 476 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 477 478 // See <reloc.h>. 479 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 480 uint32_t FixupAddress = Layout.getFragmentAddress(Fragment) + Fixup.Offset; 481 int64_t Value = 0; 482 unsigned Index = 0; 483 unsigned IsExtern = 0; 484 unsigned Type = 0; 485 486 Value = Target.getConstant(); 487 488 if (IsPCRel) { 489 // Compensate for the relocation offset, Darwin x86_64 relocations only 490 // have the addend and appear to have attempted to define it to be the 491 // actual expression addend without the PCrel bias. However, instructions 492 // with data following the relocation are not accomodated for (see comment 493 // below regarding SIGNED{1,2,4}), so it isn't exactly that either. 494 Value += 1LL << Log2Size; 495 } 496 497 if (Target.isAbsolute()) { // constant 498 // SymbolNum of 0 indicates the absolute section. 499 Type = RIT_X86_64_Unsigned; 500 Index = 0; 501 502 // FIXME: I believe this is broken, I don't think the linker can 503 // understand it. I think it would require a local relocation, but I'm not 504 // sure if that would work either. The official way to get an absolute 505 // PCrel relocation is to use an absolute symbol (which we don't support 506 // yet). 507 if (IsPCRel) { 508 IsExtern = 1; 509 Type = RIT_X86_64_Branch; 510 } 511 } else if (Target.getSymB()) { // A - B + constant 512 const MCSymbol *A = &Target.getSymA()->getSymbol(); 513 MCSymbolData &A_SD = Asm.getSymbolData(*A); 514 const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD); 515 516 const MCSymbol *B = &Target.getSymB()->getSymbol(); 517 MCSymbolData &B_SD = Asm.getSymbolData(*B); 518 const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD); 519 520 // Neither symbol can be modified. 521 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || 522 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) 523 report_fatal_error("unsupported relocation of modified symbol"); 524 525 // We don't support PCrel relocations of differences. Darwin 'as' doesn't 526 // implement most of these correctly. 527 if (IsPCRel) 528 report_fatal_error("unsupported pc-relative relocation of difference"); 529 530 // We don't currently support any situation where one or both of the 531 // symbols would require a local relocation. This is almost certainly 532 // unused and may not be possible to encode correctly. 533 if (!A_Base || !B_Base) 534 report_fatal_error("unsupported local relocations in difference"); 535 536 // Darwin 'as' doesn't emit correct relocations for this (it ends up with 537 // a single SIGNED relocation); reject it for now. 538 if (A_Base == B_Base) 539 report_fatal_error("unsupported relocation with identical base"); 540 541 Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); 542 Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); 543 544 Index = A_Base->getIndex(); 545 IsExtern = 1; 546 Type = RIT_X86_64_Unsigned; 547 548 MachRelocationEntry MRE; 549 MRE.Word0 = FixupOffset; 550 MRE.Word1 = ((Index << 0) | 551 (IsPCRel << 24) | 552 (Log2Size << 25) | 553 (IsExtern << 27) | 554 (Type << 28)); 555 Relocations[Fragment->getParent()].push_back(MRE); 556 557 Index = B_Base->getIndex(); 558 IsExtern = 1; 559 Type = RIT_X86_64_Subtractor; 560 } else { 561 const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); 562 MCSymbolData &SD = Asm.getSymbolData(*Symbol); 563 const MCSymbolData *Base = Asm.getAtom(Layout, &SD); 564 565 // Relocations inside debug sections always use local relocations when 566 // possible. This seems to be done because the debugger doesn't fully 567 // understand x86_64 relocation entries, and expects to find values that 568 // have already been fixed up. 569 if (Symbol->isInSection()) { 570 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( 571 Fragment->getParent()->getSection()); 572 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) 573 Base = 0; 574 } 575 576 // x86_64 almost always uses external relocations, except when there is no 577 // symbol to use as a base address (a local symbol with no preceeding 578 // non-local symbol). 579 if (Base) { 580 Index = Base->getIndex(); 581 IsExtern = 1; 582 583 // Add the local offset, if needed. 584 if (Base != &SD) 585 Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); 586 } else if (Symbol->isInSection()) { 587 // The index is the section ordinal (1-based). 588 Index = SD.getFragment()->getParent()->getOrdinal() + 1; 589 IsExtern = 0; 590 Value += Layout.getSymbolAddress(&SD); 591 592 if (IsPCRel) 593 Value -= FixupAddress + (1 << Log2Size); 594 } else { 595 report_fatal_error("unsupported relocation of undefined symbol '" + 596 Symbol->getName() + "'"); 597 } 598 599 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); 600 if (IsPCRel) { 601 if (IsRIPRel) { 602 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 603 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can 604 // rewrite the movq to an leaq at link time if the symbol ends up in 605 // the same linkage unit. 606 if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load) 607 Type = RIT_X86_64_GOTLoad; 608 else 609 Type = RIT_X86_64_GOT; 610 } else if (Modifier != MCSymbolRefExpr::VK_None) { 611 report_fatal_error("unsupported symbol modifier in relocation"); 612 } else { 613 Type = RIT_X86_64_Signed; 614 615 // The Darwin x86_64 relocation format has a problem where it cannot 616 // encode an address (L<foo> + <constant>) which is outside the atom 617 // containing L<foo>. Generally, this shouldn't occur but it does 618 // happen when we have a RIPrel instruction with data following the 619 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel 620 // adjustment Darwin x86_64 uses, the offset is still negative and 621 // the linker has no way to recognize this. 622 // 623 // To work around this, Darwin uses several special relocation types 624 // to indicate the offsets. However, the specification or 625 // implementation of these seems to also be incomplete; they should 626 // adjust the addend as well based on the actual encoded instruction 627 // (the additional bias), but instead appear to just look at the 628 // final offset. 629 switch (-(Target.getConstant() + (1LL << Log2Size))) { 630 case 1: Type = RIT_X86_64_Signed1; break; 631 case 2: Type = RIT_X86_64_Signed2; break; 632 case 4: Type = RIT_X86_64_Signed4; break; 633 } 634 } 635 } else { 636 if (Modifier != MCSymbolRefExpr::VK_None) 637 report_fatal_error("unsupported symbol modifier in branch " 638 "relocation"); 639 640 Type = RIT_X86_64_Branch; 641 } 642 } else { 643 if (Modifier == MCSymbolRefExpr::VK_GOT) { 644 Type = RIT_X86_64_GOT; 645 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 646 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in 647 // which case all we do is set the PCrel bit in the relocation entry; 648 // this is used with exception handling, for example. The source is 649 // required to include any necessary offset directly. 650 Type = RIT_X86_64_GOT; 651 IsPCRel = 1; 652 } else if (Modifier != MCSymbolRefExpr::VK_None) 653 report_fatal_error("unsupported symbol modifier in relocation"); 654 else 655 Type = RIT_X86_64_Unsigned; 656 } 657 } 658 659 // x86_64 always writes custom values into the fixups. 660 FixedValue = Value; 661 662 // struct relocation_info (8 bytes) 663 MachRelocationEntry MRE; 664 MRE.Word0 = FixupOffset; 665 MRE.Word1 = ((Index << 0) | 666 (IsPCRel << 24) | 667 (Log2Size << 25) | 668 (IsExtern << 27) | 669 (Type << 28)); 670 Relocations[Fragment->getParent()].push_back(MRE); 671 } 672 673 void RecordScatteredRelocation(const MCAssembler &Asm, 674 const MCAsmLayout &Layout, 675 const MCFragment *Fragment, 676 const MCAsmFixup &Fixup, MCValue Target, 677 uint64_t &FixedValue) { 678 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 679 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 680 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 681 unsigned Type = RIT_Vanilla; 682 683 // See <reloc.h>. 684 const MCSymbol *A = &Target.getSymA()->getSymbol(); 685 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 686 687 if (!A_SD->getFragment()) 688 report_fatal_error("symbol '" + A->getName() + 689 "' can not be undefined in a subtraction expression"); 690 691 uint32_t Value = Layout.getSymbolAddress(A_SD); 692 uint32_t Value2 = 0; 693 694 if (const MCSymbolRefExpr *B = Target.getSymB()) { 695 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 696 697 if (!B_SD->getFragment()) 698 report_fatal_error("symbol '" + B->getSymbol().getName() + 699 "' can not be undefined in a subtraction expression"); 700 701 // Select the appropriate difference relocation type. 702 // 703 // Note that there is no longer any semantic difference between these two 704 // relocation types from the linkers point of view, this is done solely 705 // for pedantic compatibility with 'as'. 706 Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; 707 Value2 = Layout.getSymbolAddress(B_SD); 708 } 709 710 // Relocations are written out in reverse order, so the PAIR comes first. 711 if (Type == RIT_Difference || Type == RIT_LocalDifference) { 712 MachRelocationEntry MRE; 713 MRE.Word0 = ((0 << 0) | 714 (RIT_Pair << 24) | 715 (Log2Size << 28) | 716 (IsPCRel << 30) | 717 RF_Scattered); 718 MRE.Word1 = Value2; 719 Relocations[Fragment->getParent()].push_back(MRE); 720 } 721 722 MachRelocationEntry MRE; 723 MRE.Word0 = ((FixupOffset << 0) | 724 (Type << 24) | 725 (Log2Size << 28) | 726 (IsPCRel << 30) | 727 RF_Scattered); 728 MRE.Word1 = Value; 729 Relocations[Fragment->getParent()].push_back(MRE); 730 } 731 732 void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 733 const MCFragment *Fragment, const MCAsmFixup &Fixup, 734 MCValue Target, uint64_t &FixedValue) { 735 if (Is64Bit) { 736 RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 737 return; 738 } 739 740 unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); 741 unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); 742 743 // If this is a difference or a defined symbol plus an offset, then we need 744 // a scattered relocation entry. 745 // Differences always require scattered relocations. 746 if (Target.getSymB()) 747 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 748 Target, FixedValue); 749 750 // Get the symbol data, if any. 751 MCSymbolData *SD = 0; 752 if (Target.getSymA()) 753 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 754 755 // If this is an internal relocation with an offset, it also needs a 756 // scattered relocation entry. 757 uint32_t Offset = Target.getConstant(); 758 if (IsPCRel) 759 Offset += 1 << Log2Size; 760 if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) 761 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 762 Target, FixedValue); 763 764 // See <reloc.h>. 765 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; 766 uint32_t Value = 0; 767 unsigned Index = 0; 768 unsigned IsExtern = 0; 769 unsigned Type = 0; 770 771 if (Target.isAbsolute()) { // constant 772 // SymbolNum of 0 indicates the absolute section. 773 // 774 // FIXME: Currently, these are never generated (see code below). I cannot 775 // find a case where they are actually emitted. 776 Type = RIT_Vanilla; 777 Value = 0; 778 } else { 779 // Check whether we need an external or internal relocation. 780 if (doesSymbolRequireExternRelocation(SD)) { 781 IsExtern = 1; 782 Index = SD->getIndex(); 783 // For external relocations, make sure to offset the fixup value to 784 // compensate for the addend of the symbol address, if it was 785 // undefined. This occurs with weak definitions, for example. 786 if (!SD->Symbol->isUndefined()) 787 FixedValue -= Layout.getSymbolAddress(SD); 788 Value = 0; 789 } else { 790 // The index is the section ordinal (1-based). 791 Index = SD->getFragment()->getParent()->getOrdinal() + 1; 792 Value = Layout.getSymbolAddress(SD); 793 } 794 795 Type = RIT_Vanilla; 796 } 797 798 // struct relocation_info (8 bytes) 799 MachRelocationEntry MRE; 800 MRE.Word0 = FixupOffset; 801 MRE.Word1 = ((Index << 0) | 802 (IsPCRel << 24) | 803 (Log2Size << 25) | 804 (IsExtern << 27) | 805 (Type << 28)); 806 Relocations[Fragment->getParent()].push_back(MRE); 807 } 808 809 void BindIndirectSymbols(MCAssembler &Asm) { 810 // This is the point where 'as' creates actual symbols for indirect symbols 811 // (in the following two passes). It would be easier for us to do this 812 // sooner when we see the attribute, but that makes getting the order in the 813 // symbol table much more complicated than it is worth. 814 // 815 // FIXME: Revisit this when the dust settles. 816 817 // Bind non lazy symbol pointers first. 818 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 819 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 820 const MCSectionMachO &Section = 821 cast<MCSectionMachO>(it->SectionData->getSection()); 822 823 if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) 824 continue; 825 826 Asm.getOrCreateSymbolData(*it->Symbol); 827 } 828 829 // Then lazy symbol pointers and symbol stubs. 830 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 831 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 832 const MCSectionMachO &Section = 833 cast<MCSectionMachO>(it->SectionData->getSection()); 834 835 if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && 836 Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) 837 continue; 838 839 // Set the symbol type to undefined lazy, but only on construction. 840 // 841 // FIXME: Do not hardcode. 842 bool Created; 843 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 844 if (Created) 845 Entry.setFlags(Entry.getFlags() | 0x0001); 846 } 847 } 848 849 /// ComputeSymbolTable - Compute the symbol table data 850 /// 851 /// \param StringTable [out] - The string table data. 852 /// \param StringIndexMap [out] - Map from symbol names to offsets in the 853 /// string table. 854 void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, 855 std::vector<MachSymbolData> &LocalSymbolData, 856 std::vector<MachSymbolData> &ExternalSymbolData, 857 std::vector<MachSymbolData> &UndefinedSymbolData) { 858 // Build section lookup table. 859 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 860 unsigned Index = 1; 861 for (MCAssembler::iterator it = Asm.begin(), 862 ie = Asm.end(); it != ie; ++it, ++Index) 863 SectionIndexMap[&it->getSection()] = Index; 864 assert(Index <= 256 && "Too many sections!"); 865 866 // Index 0 is always the empty string. 867 StringMap<uint64_t> StringIndexMap; 868 StringTable += '\x00'; 869 870 // Build the symbol arrays and the string table, but only for non-local 871 // symbols. 872 // 873 // The particular order that we collect the symbols and create the string 874 // table, then sort the symbols is chosen to match 'as'. Even though it 875 // doesn't matter for correctness, this is important for letting us diff .o 876 // files. 877 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 878 ie = Asm.symbol_end(); it != ie; ++it) { 879 const MCSymbol &Symbol = it->getSymbol(); 880 881 // Ignore non-linker visible symbols. 882 if (!Asm.isSymbolLinkerVisible(it)) 883 continue; 884 885 if (!it->isExternal() && !Symbol.isUndefined()) 886 continue; 887 888 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 889 if (!Entry) { 890 Entry = StringTable.size(); 891 StringTable += Symbol.getName(); 892 StringTable += '\x00'; 893 } 894 895 MachSymbolData MSD; 896 MSD.SymbolData = it; 897 MSD.StringIndex = Entry; 898 899 if (Symbol.isUndefined()) { 900 MSD.SectionIndex = 0; 901 UndefinedSymbolData.push_back(MSD); 902 } else if (Symbol.isAbsolute()) { 903 MSD.SectionIndex = 0; 904 ExternalSymbolData.push_back(MSD); 905 } else { 906 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 907 assert(MSD.SectionIndex && "Invalid section index!"); 908 ExternalSymbolData.push_back(MSD); 909 } 910 } 911 912 // Now add the data for local symbols. 913 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 914 ie = Asm.symbol_end(); it != ie; ++it) { 915 const MCSymbol &Symbol = it->getSymbol(); 916 917 // Ignore non-linker visible symbols. 918 if (!Asm.isSymbolLinkerVisible(it)) 919 continue; 920 921 if (it->isExternal() || Symbol.isUndefined()) 922 continue; 923 924 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 925 if (!Entry) { 926 Entry = StringTable.size(); 927 StringTable += Symbol.getName(); 928 StringTable += '\x00'; 929 } 930 931 MachSymbolData MSD; 932 MSD.SymbolData = it; 933 MSD.StringIndex = Entry; 934 935 if (Symbol.isAbsolute()) { 936 MSD.SectionIndex = 0; 937 LocalSymbolData.push_back(MSD); 938 } else { 939 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 940 assert(MSD.SectionIndex && "Invalid section index!"); 941 LocalSymbolData.push_back(MSD); 942 } 943 } 944 945 // External and undefined symbols are required to be in lexicographic order. 946 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 947 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 948 949 // Set the symbol indices. 950 Index = 0; 951 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 952 LocalSymbolData[i].SymbolData->setIndex(Index++); 953 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 954 ExternalSymbolData[i].SymbolData->setIndex(Index++); 955 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 956 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 957 958 // The string table is padded to a multiple of 4. 959 while (StringTable.size() % 4) 960 StringTable += '\x00'; 961 } 962 963 void ExecutePostLayoutBinding(MCAssembler &Asm) { 964 // Create symbol data for any indirect symbols. 965 BindIndirectSymbols(Asm); 966 967 // Compute symbol table information and bind symbol indices. 968 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, 969 UndefinedSymbolData); 970 } 971 972 void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { 973 unsigned NumSections = Asm.size(); 974 975 // The section data starts after the header, the segment load command (and 976 // section headers) and the symbol table. 977 unsigned NumLoadCommands = 1; 978 uint64_t LoadCommandsSize = Is64Bit ? 979 SegmentLoadCommand64Size + NumSections * Section64Size : 980 SegmentLoadCommand32Size + NumSections * Section32Size; 981 982 // Add the symbol table load command sizes, if used. 983 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 984 UndefinedSymbolData.size(); 985 if (NumSymbols) { 986 NumLoadCommands += 2; 987 LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; 988 } 989 990 // Compute the total size of the section data, as well as its file size and 991 // vm size. 992 uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) 993 + LoadCommandsSize; 994 uint64_t SectionDataSize = 0; 995 uint64_t SectionDataFileSize = 0; 996 uint64_t VMSize = 0; 997 for (MCAssembler::const_iterator it = Asm.begin(), 998 ie = Asm.end(); it != ie; ++it) { 999 const MCSectionData &SD = *it; 1000 uint64_t Address = Layout.getSectionAddress(&SD); 1001 uint64_t Size = Layout.getSectionSize(&SD); 1002 uint64_t FileSize = Layout.getSectionFileSize(&SD); 1003 1004 VMSize = std::max(VMSize, Address + Size); 1005 1006 if (Asm.getBackend().isVirtualSection(SD.getSection())) 1007 continue; 1008 1009 SectionDataSize = std::max(SectionDataSize, Address + Size); 1010 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 1011 } 1012 1013 // The section data is padded to 4 bytes. 1014 // 1015 // FIXME: Is this machine dependent? 1016 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 1017 SectionDataFileSize += SectionDataPadding; 1018 1019 // Write the prolog, starting with the header and load command... 1020 WriteHeader(NumLoadCommands, LoadCommandsSize, 1021 Asm.getSubsectionsViaSymbols()); 1022 WriteSegmentLoadCommand(NumSections, VMSize, 1023 SectionDataStart, SectionDataSize); 1024 1025 // ... and then the section headers. 1026 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 1027 for (MCAssembler::const_iterator it = Asm.begin(), 1028 ie = Asm.end(); it != ie; ++it) { 1029 std::vector<MachRelocationEntry> &Relocs = Relocations[it]; 1030 unsigned NumRelocs = Relocs.size(); 1031 uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it); 1032 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 1033 RelocTableEnd += NumRelocs * RelocationInfoSize; 1034 } 1035 1036 // Write the symbol table load command, if used. 1037 if (NumSymbols) { 1038 unsigned FirstLocalSymbol = 0; 1039 unsigned NumLocalSymbols = LocalSymbolData.size(); 1040 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 1041 unsigned NumExternalSymbols = ExternalSymbolData.size(); 1042 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 1043 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 1044 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 1045 unsigned NumSymTabSymbols = 1046 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1047 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1048 uint64_t IndirectSymbolOffset = 0; 1049 1050 // If used, the indirect symbols are written after the section data. 1051 if (NumIndirectSymbols) 1052 IndirectSymbolOffset = RelocTableEnd; 1053 1054 // The symbol table is written after the indirect symbol data. 1055 uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; 1056 1057 // The string table is written after symbol table. 1058 uint64_t StringTableOffset = 1059 SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : 1060 Nlist32Size); 1061 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1062 StringTableOffset, StringTable.size()); 1063 1064 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1065 FirstExternalSymbol, NumExternalSymbols, 1066 FirstUndefinedSymbol, NumUndefinedSymbols, 1067 IndirectSymbolOffset, NumIndirectSymbols); 1068 } 1069 1070 // Write the actual section data. 1071 for (MCAssembler::const_iterator it = Asm.begin(), 1072 ie = Asm.end(); it != ie; ++it) 1073 Asm.WriteSectionData(it, Layout, Writer); 1074 1075 // Write the extra padding. 1076 WriteZeros(SectionDataPadding); 1077 1078 // Write the relocation entries. 1079 for (MCAssembler::const_iterator it = Asm.begin(), 1080 ie = Asm.end(); it != ie; ++it) { 1081 // Write the section relocation entries, in reverse order to match 'as' 1082 // (approximately, the exact algorithm is more complicated than this). 1083 std::vector<MachRelocationEntry> &Relocs = Relocations[it]; 1084 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 1085 Write32(Relocs[e - i - 1].Word0); 1086 Write32(Relocs[e - i - 1].Word1); 1087 } 1088 } 1089 1090 // Write the symbol table data, if used. 1091 if (NumSymbols) { 1092 // Write the indirect symbol entries. 1093 for (MCAssembler::const_indirect_symbol_iterator 1094 it = Asm.indirect_symbol_begin(), 1095 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 1096 // Indirect symbols in the non lazy symbol pointer section have some 1097 // special handling. 1098 const MCSectionMachO &Section = 1099 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 1100 if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { 1101 // If this symbol is defined and internal, mark it as such. 1102 if (it->Symbol->isDefined() && 1103 !Asm.getSymbolData(*it->Symbol).isExternal()) { 1104 uint32_t Flags = ISF_Local; 1105 if (it->Symbol->isAbsolute()) 1106 Flags |= ISF_Absolute; 1107 Write32(Flags); 1108 continue; 1109 } 1110 } 1111 1112 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 1113 } 1114 1115 // FIXME: Check that offsets match computed ones. 1116 1117 // Write the symbol table entries. 1118 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1119 WriteNlist(LocalSymbolData[i], Layout); 1120 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1121 WriteNlist(ExternalSymbolData[i], Layout); 1122 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1123 WriteNlist(UndefinedSymbolData[i], Layout); 1124 1125 // Write the string table. 1126 OS << StringTable.str(); 1127 } 1128 } 1129}; 1130 1131} 1132 1133MachObjectWriter::MachObjectWriter(raw_ostream &OS, 1134 bool Is64Bit, 1135 bool IsLittleEndian) 1136 : MCObjectWriter(OS, IsLittleEndian) 1137{ 1138 Impl = new MachObjectWriterImpl(this, Is64Bit); 1139} 1140 1141MachObjectWriter::~MachObjectWriter() { 1142 delete (MachObjectWriterImpl*) Impl; 1143} 1144 1145void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { 1146 ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); 1147} 1148 1149void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, 1150 const MCAsmLayout &Layout, 1151 const MCFragment *Fragment, 1152 const MCAsmFixup &Fixup, MCValue Target, 1153 uint64_t &FixedValue) { 1154 ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, 1155 Target, FixedValue); 1156} 1157 1158void MachObjectWriter::WriteObject(const MCAssembler &Asm, 1159 const MCAsmLayout &Layout) { 1160 ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); 1161} 1162