X86MachObjectWriter.cpp revision a87e40f16f1c3117412e01107807e490d6fb29bc
1//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MCTargetDesc/X86FixupKinds.h"
11#include "MCTargetDesc/X86MCTargetDesc.h"
12#include "llvm/MC/MCAssembler.h"
13#include "llvm/MC/MCAsmLayout.h"
14#include "llvm/MC/MCMachObjectWriter.h"
15#include "llvm/MC/MCSectionMachO.h"
16#include "llvm/MC/MCValue.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Object/MachOFormat.h"
20
21using namespace llvm;
22using namespace llvm::object;
23
24namespace {
25class X86MachObjectWriter : public MCMachObjectTargetWriter {
26  void RecordScatteredRelocation(MachObjectWriter *Writer,
27                                 const MCAssembler &Asm,
28                                 const MCAsmLayout &Layout,
29                                 const MCFragment *Fragment,
30                                 const MCFixup &Fixup,
31                                 MCValue Target,
32                                 unsigned Log2Size,
33                                 uint64_t &FixedValue);
34  void RecordTLVPRelocation(MachObjectWriter *Writer,
35                            const MCAssembler &Asm,
36                            const MCAsmLayout &Layout,
37                            const MCFragment *Fragment,
38                            const MCFixup &Fixup,
39                            MCValue Target,
40                            uint64_t &FixedValue);
41
42  void RecordX86Relocation(MachObjectWriter *Writer,
43                              const MCAssembler &Asm,
44                              const MCAsmLayout &Layout,
45                              const MCFragment *Fragment,
46                              const MCFixup &Fixup,
47                              MCValue Target,
48                              uint64_t &FixedValue);
49  void RecordX86_64Relocation(MachObjectWriter *Writer,
50                              const MCAssembler &Asm,
51                              const MCAsmLayout &Layout,
52                              const MCFragment *Fragment,
53                              const MCFixup &Fixup,
54                              MCValue Target,
55                              uint64_t &FixedValue);
56public:
57  X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
58                      uint32_t CPUSubtype)
59    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
60                               /*UseAggressiveSymbolFolding=*/Is64Bit) {}
61
62  void RecordRelocation(MachObjectWriter *Writer,
63                        const MCAssembler &Asm, const MCAsmLayout &Layout,
64                        const MCFragment *Fragment, const MCFixup &Fixup,
65                        MCValue Target, uint64_t &FixedValue) {
66    if (Writer->is64Bit())
67      RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
68                             FixedValue);
69    else
70      RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
71                          FixedValue);
72  }
73};
74}
75
76static bool isFixupKindRIPRel(unsigned Kind) {
77  return Kind == X86::reloc_riprel_4byte ||
78    Kind == X86::reloc_riprel_4byte_movq_load;
79}
80
81static unsigned getFixupKindLog2Size(unsigned Kind) {
82  switch (Kind) {
83  default:
84    llvm_unreachable("invalid fixup kind!");
85  case FK_PCRel_1:
86  case FK_Data_1: return 0;
87  case FK_PCRel_2:
88  case FK_Data_2: return 1;
89  case FK_PCRel_4:
90    // FIXME: Remove these!!!
91  case X86::reloc_riprel_4byte:
92  case X86::reloc_riprel_4byte_movq_load:
93  case X86::reloc_signed_4byte:
94  case FK_Data_4: return 2;
95  case FK_Data_8: return 3;
96  }
97}
98
99void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
100                                                 const MCAssembler &Asm,
101                                                 const MCAsmLayout &Layout,
102                                                 const MCFragment *Fragment,
103                                                 const MCFixup &Fixup,
104                                                 MCValue Target,
105                                                 uint64_t &FixedValue) {
106  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
107  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
108  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
109
110  // See <reloc.h>.
111  uint32_t FixupOffset =
112    Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
113  uint32_t FixupAddress =
114    Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
115  int64_t Value = 0;
116  unsigned Index = 0;
117  unsigned IsExtern = 0;
118  unsigned Type = 0;
119
120  Value = Target.getConstant();
121
122  if (IsPCRel) {
123    // Compensate for the relocation offset, Darwin x86_64 relocations only have
124    // the addend and appear to have attempted to define it to be the actual
125    // expression addend without the PCrel bias. However, instructions with data
126    // following the relocation are not accommodated for (see comment below
127    // regarding SIGNED{1,2,4}), so it isn't exactly that either.
128    Value += 1LL << Log2Size;
129  }
130
131  if (Target.isAbsolute()) { // constant
132    // SymbolNum of 0 indicates the absolute section.
133    Type = macho::RIT_X86_64_Unsigned;
134    Index = 0;
135
136    // FIXME: I believe this is broken, I don't think the linker can understand
137    // it. I think it would require a local relocation, but I'm not sure if that
138    // would work either. The official way to get an absolute PCrel relocation
139    // is to use an absolute symbol (which we don't support yet).
140    if (IsPCRel) {
141      IsExtern = 1;
142      Type = macho::RIT_X86_64_Branch;
143    }
144  } else if (Target.getSymB()) { // A - B + constant
145    const MCSymbol *A = &Target.getSymA()->getSymbol();
146    MCSymbolData &A_SD = Asm.getSymbolData(*A);
147    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
148
149    const MCSymbol *B = &Target.getSymB()->getSymbol();
150    MCSymbolData &B_SD = Asm.getSymbolData(*B);
151    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
152
153    // Neither symbol can be modified.
154    if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
155        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
156      report_fatal_error("unsupported relocation of modified symbol");
157
158    // We don't support PCrel relocations of differences. Darwin 'as' doesn't
159    // implement most of these correctly.
160    if (IsPCRel)
161      report_fatal_error("unsupported pc-relative relocation of difference");
162
163    // The support for the situation where one or both of the symbols would
164    // require a local relocation is handled just like if the symbols were
165    // external.  This is certainly used in the case of debug sections where the
166    // section has only temporary symbols and thus the symbols don't have base
167    // symbols.  This is encoded using the section ordinal and non-extern
168    // relocation entries.
169
170    // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
171    // single SIGNED relocation); reject it for now.  Except the case where both
172    // symbols don't have a base, equal but both NULL.
173    if (A_Base == B_Base && A_Base)
174      report_fatal_error("unsupported relocation with identical base");
175
176    Value += Writer->getSymbolAddress(&A_SD, Layout) -
177      (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
178    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
179      (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
180
181    if (A_Base) {
182      Index = A_Base->getIndex();
183      IsExtern = 1;
184    }
185    else {
186      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
187      IsExtern = 0;
188    }
189    Type = macho::RIT_X86_64_Unsigned;
190
191    macho::RelocationEntry MRE;
192    MRE.Word0 = FixupOffset;
193    MRE.Word1 = ((Index     <<  0) |
194                 (IsPCRel   << 24) |
195                 (Log2Size  << 25) |
196                 (IsExtern  << 27) |
197                 (Type      << 28));
198    Writer->addRelocation(Fragment->getParent(), MRE);
199
200    if (B_Base) {
201      Index = B_Base->getIndex();
202      IsExtern = 1;
203    }
204    else {
205      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
206      IsExtern = 0;
207    }
208    Type = macho::RIT_X86_64_Subtractor;
209  } else {
210    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
211    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
212    const MCSymbolData *Base = Asm.getAtom(&SD);
213
214    // Relocations inside debug sections always use local relocations when
215    // possible. This seems to be done because the debugger doesn't fully
216    // understand x86_64 relocation entries, and expects to find values that
217    // have already been fixed up.
218    if (Symbol->isInSection()) {
219      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
220        Fragment->getParent()->getSection());
221      if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
222        Base = 0;
223    }
224
225    // x86_64 almost always uses external relocations, except when there is no
226    // symbol to use as a base address (a local symbol with no preceding
227    // non-local symbol).
228    if (Base) {
229      Index = Base->getIndex();
230      IsExtern = 1;
231
232      // Add the local offset, if needed.
233      if (Base != &SD)
234        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
235    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
236      // The index is the section ordinal (1-based).
237      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
238      IsExtern = 0;
239      Value += Writer->getSymbolAddress(&SD, Layout);
240
241      if (IsPCRel)
242        Value -= FixupAddress + (1 << Log2Size);
243    } else if (Symbol->isVariable()) {
244      const MCExpr *Value = Symbol->getVariableValue();
245      int64_t Res;
246      bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
247                                             Writer->getSectionAddressMap());
248      if (isAbs) {
249        FixedValue = Res;
250        return;
251      } else {
252        report_fatal_error("unsupported relocation of variable '" +
253                           Symbol->getName() + "'");
254      }
255    } else {
256      report_fatal_error("unsupported relocation of undefined symbol '" +
257                         Symbol->getName() + "'");
258    }
259
260    MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
261    if (IsPCRel) {
262      if (IsRIPRel) {
263        if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
264          // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
265          // rewrite the movq to an leaq at link time if the symbol ends up in
266          // the same linkage unit.
267          if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
268            Type = macho::RIT_X86_64_GOTLoad;
269          else
270            Type = macho::RIT_X86_64_GOT;
271        }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
272          Type = macho::RIT_X86_64_TLV;
273        }  else if (Modifier != MCSymbolRefExpr::VK_None) {
274          report_fatal_error("unsupported symbol modifier in relocation");
275        } else {
276          Type = macho::RIT_X86_64_Signed;
277
278          // The Darwin x86_64 relocation format has a problem where it cannot
279          // encode an address (L<foo> + <constant>) which is outside the atom
280          // containing L<foo>. Generally, this shouldn't occur but it does
281          // happen when we have a RIPrel instruction with data following the
282          // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
283          // adjustment Darwin x86_64 uses, the offset is still negative and the
284          // linker has no way to recognize this.
285          //
286          // To work around this, Darwin uses several special relocation types
287          // to indicate the offsets. However, the specification or
288          // implementation of these seems to also be incomplete; they should
289          // adjust the addend as well based on the actual encoded instruction
290          // (the additional bias), but instead appear to just look at the final
291          // offset.
292          switch (-(Target.getConstant() + (1LL << Log2Size))) {
293          case 1: Type = macho::RIT_X86_64_Signed1; break;
294          case 2: Type = macho::RIT_X86_64_Signed2; break;
295          case 4: Type = macho::RIT_X86_64_Signed4; break;
296          }
297        }
298      } else {
299        if (Modifier != MCSymbolRefExpr::VK_None)
300          report_fatal_error("unsupported symbol modifier in branch "
301                             "relocation");
302
303        Type = macho::RIT_X86_64_Branch;
304      }
305    } else {
306      if (Modifier == MCSymbolRefExpr::VK_GOT) {
307        Type = macho::RIT_X86_64_GOT;
308      } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
309        // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
310        // case all we do is set the PCrel bit in the relocation entry; this is
311        // used with exception handling, for example. The source is required to
312        // include any necessary offset directly.
313        Type = macho::RIT_X86_64_GOT;
314        IsPCRel = 1;
315      } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
316        report_fatal_error("TLVP symbol modifier should have been rip-rel");
317      } else if (Modifier != MCSymbolRefExpr::VK_None)
318        report_fatal_error("unsupported symbol modifier in relocation");
319      else
320        Type = macho::RIT_X86_64_Unsigned;
321    }
322  }
323
324  // x86_64 always writes custom values into the fixups.
325  FixedValue = Value;
326
327  // struct relocation_info (8 bytes)
328  macho::RelocationEntry MRE;
329  MRE.Word0 = FixupOffset;
330  MRE.Word1 = ((Index     <<  0) |
331               (IsPCRel   << 24) |
332               (Log2Size  << 25) |
333               (IsExtern  << 27) |
334               (Type      << 28));
335  Writer->addRelocation(Fragment->getParent(), MRE);
336}
337
338void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
339                                                    const MCAssembler &Asm,
340                                                    const MCAsmLayout &Layout,
341                                                    const MCFragment *Fragment,
342                                                    const MCFixup &Fixup,
343                                                    MCValue Target,
344                                                    unsigned Log2Size,
345                                                    uint64_t &FixedValue) {
346  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
347  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
348  unsigned Type = macho::RIT_Vanilla;
349
350  // See <reloc.h>.
351  const MCSymbol *A = &Target.getSymA()->getSymbol();
352  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
353
354  if (!A_SD->getFragment())
355    report_fatal_error("symbol '" + A->getName() +
356                       "' can not be undefined in a subtraction expression");
357
358  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
359  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
360  FixedValue += SecAddr;
361  uint32_t Value2 = 0;
362
363  if (const MCSymbolRefExpr *B = Target.getSymB()) {
364    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
365
366    if (!B_SD->getFragment())
367      report_fatal_error("symbol '" + B->getSymbol().getName() +
368                         "' can not be undefined in a subtraction expression");
369
370    // Select the appropriate difference relocation type.
371    //
372    // Note that there is no longer any semantic difference between these two
373    // relocation types from the linkers point of view, this is done solely for
374    // pedantic compatibility with 'as'.
375    Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
376      (unsigned)macho::RIT_Generic_LocalDifference;
377    Value2 = Writer->getSymbolAddress(B_SD, Layout);
378    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
379  }
380
381  // Relocations are written out in reverse order, so the PAIR comes first.
382  if (Type == macho::RIT_Difference ||
383      Type == macho::RIT_Generic_LocalDifference) {
384    macho::RelocationEntry MRE;
385    MRE.Word0 = ((0         <<  0) |
386                 (macho::RIT_Pair  << 24) |
387                 (Log2Size  << 28) |
388                 (IsPCRel   << 30) |
389                 macho::RF_Scattered);
390    MRE.Word1 = Value2;
391    Writer->addRelocation(Fragment->getParent(), MRE);
392  }
393
394  macho::RelocationEntry MRE;
395  MRE.Word0 = ((FixupOffset <<  0) |
396               (Type        << 24) |
397               (Log2Size    << 28) |
398               (IsPCRel     << 30) |
399               macho::RF_Scattered);
400  MRE.Word1 = Value;
401  Writer->addRelocation(Fragment->getParent(), MRE);
402}
403
404void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
405                                               const MCAssembler &Asm,
406                                               const MCAsmLayout &Layout,
407                                               const MCFragment *Fragment,
408                                               const MCFixup &Fixup,
409                                               MCValue Target,
410                                               uint64_t &FixedValue) {
411  assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
412         !is64Bit() &&
413         "Should only be called with a 32-bit TLVP relocation!");
414
415  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
416  uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
417  unsigned IsPCRel = 0;
418
419  // Get the symbol data.
420  MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
421  unsigned Index = SD_A->getIndex();
422
423  // We're only going to have a second symbol in pic mode and it'll be a
424  // subtraction from the picbase. For 32-bit pic the addend is the difference
425  // between the picbase and the next address.  For 32-bit static the addend is
426  // zero.
427  if (Target.getSymB()) {
428    // If this is a subtraction then we're pcrel.
429    uint32_t FixupAddress =
430      Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
431    MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
432    IsPCRel = 1;
433    FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
434                  Target.getConstant());
435    FixedValue += 1ULL << Log2Size;
436  } else {
437    FixedValue = 0;
438  }
439
440  // struct relocation_info (8 bytes)
441  macho::RelocationEntry MRE;
442  MRE.Word0 = Value;
443  MRE.Word1 = ((Index                  <<  0) |
444               (IsPCRel                << 24) |
445               (Log2Size               << 25) |
446               (1                      << 27) | // Extern
447               (macho::RIT_Generic_TLV << 28)); // Type
448  Writer->addRelocation(Fragment->getParent(), MRE);
449}
450
451void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
452                                              const MCAssembler &Asm,
453                                              const MCAsmLayout &Layout,
454                                              const MCFragment *Fragment,
455                                              const MCFixup &Fixup,
456                                              MCValue Target,
457                                              uint64_t &FixedValue) {
458  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
459  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
460
461  // If this is a 32-bit TLVP reloc it's handled a bit differently.
462  if (Target.getSymA() &&
463      Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
464    RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
465                         FixedValue);
466    return;
467  }
468
469  // If this is a difference or a defined symbol plus an offset, then we need a
470  // scattered relocation entry. Differences always require scattered
471  // relocations.
472  if (Target.getSymB())
473    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
474                                     Target, Log2Size, FixedValue);
475
476  // Get the symbol data, if any.
477  MCSymbolData *SD = 0;
478  if (Target.getSymA())
479    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
480
481  // If this is an internal relocation with an offset, it also needs a scattered
482  // relocation entry.
483  uint32_t Offset = Target.getConstant();
484  if (IsPCRel)
485    Offset += 1 << Log2Size;
486  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
487    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
488                                     Target, Log2Size, FixedValue);
489
490  // See <reloc.h>.
491  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
492  unsigned Index = 0;
493  unsigned IsExtern = 0;
494  unsigned Type = 0;
495
496  if (Target.isAbsolute()) { // constant
497    // SymbolNum of 0 indicates the absolute section.
498    //
499    // FIXME: Currently, these are never generated (see code below). I cannot
500    // find a case where they are actually emitted.
501    Type = macho::RIT_Vanilla;
502  } else {
503    // Resolve constant variables.
504    if (SD->getSymbol().isVariable()) {
505      int64_t Res;
506      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
507            Res, Layout, Writer->getSectionAddressMap())) {
508        FixedValue = Res;
509        return;
510      }
511    }
512
513    // Check whether we need an external or internal relocation.
514    if (Writer->doesSymbolRequireExternRelocation(SD)) {
515      IsExtern = 1;
516      Index = SD->getIndex();
517      // For external relocations, make sure to offset the fixup value to
518      // compensate for the addend of the symbol address, if it was
519      // undefined. This occurs with weak definitions, for example.
520      if (!SD->Symbol->isUndefined())
521        FixedValue -= Layout.getSymbolOffset(SD);
522    } else {
523      // The index is the section ordinal (1-based).
524      const MCSectionData &SymSD = Asm.getSectionData(
525        SD->getSymbol().getSection());
526      Index = SymSD.getOrdinal() + 1;
527      FixedValue += Writer->getSectionAddress(&SymSD);
528    }
529    if (IsPCRel)
530      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
531
532    Type = macho::RIT_Vanilla;
533  }
534
535  // struct relocation_info (8 bytes)
536  macho::RelocationEntry MRE;
537  MRE.Word0 = FixupOffset;
538  MRE.Word1 = ((Index     <<  0) |
539               (IsPCRel   << 24) |
540               (Log2Size  << 25) |
541               (IsExtern  << 27) |
542               (Type      << 28));
543  Writer->addRelocation(Fragment->getParent(), MRE);
544}
545
546MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
547                                                bool Is64Bit,
548                                                uint32_t CPUType,
549                                                uint32_t CPUSubtype) {
550  return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
551                                                        CPUType,
552                                                        CPUSubtype),
553                                OS, /*IsLittleEndian=*/true);
554}
555