elf_builder.h revision b0a962c59699fb4d115fb159eeabbd2200c6f872
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_ELF_BUILDER_H_
18#define ART_COMPILER_ELF_BUILDER_H_
19
20#include <vector>
21
22#include "arch/instruction_set.h"
23#include "base/unix_file/fd_file.h"
24#include "buffered_output_stream.h"
25#include "elf_utils.h"
26#include "file_output_stream.h"
27
28namespace art {
29
30class CodeOutput {
31 public:
32  virtual bool Write(OutputStream* out) = 0;
33  virtual ~CodeOutput() {}
34};
35
36// Writes ELF file.
37// The main complication is that the sections often want to reference
38// each other.  We solve this by writing the ELF file in two stages:
39//  * Sections are asked about their size, and overall layout is calculated.
40//  * Sections do the actual writes which may use offsets of other sections.
41template <typename ElfTypes>
42class ElfBuilder FINAL {
43 public:
44  using Elf_Addr = typename ElfTypes::Addr;
45  using Elf_Off = typename ElfTypes::Off;
46  using Elf_Word = typename ElfTypes::Word;
47  using Elf_Sword = typename ElfTypes::Sword;
48  using Elf_Ehdr = typename ElfTypes::Ehdr;
49  using Elf_Shdr = typename ElfTypes::Shdr;
50  using Elf_Sym = typename ElfTypes::Sym;
51  using Elf_Phdr = typename ElfTypes::Phdr;
52  using Elf_Dyn = typename ElfTypes::Dyn;
53
54  // Base class of all sections.
55  class Section {
56   public:
57    Section(const std::string& name, Elf_Word type, Elf_Word flags,
58            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
59        : header_(), section_index_(0), name_(name), link_(link) {
60      header_.sh_type = type;
61      header_.sh_flags = flags;
62      header_.sh_info = info;
63      header_.sh_addralign = align;
64      header_.sh_entsize = entsize;
65    }
66    virtual ~Section() {}
67
68    // Returns the size of the content of this section.  It is used to
69    // calculate file offsets of all sections before doing any writes.
70    virtual Elf_Word GetSize() const = 0;
71
72    // Write the content of this section to the given file.
73    // This must write exactly the number of bytes returned by GetSize().
74    // Offsets of all sections are known when this method is called.
75    virtual bool Write(File* elf_file) = 0;
76
77    Elf_Word GetLink() const {
78      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
79    }
80
81    const Elf_Shdr* GetHeader() const {
82      return &header_;
83    }
84
85    Elf_Shdr* GetHeader() {
86      return &header_;
87    }
88
89    Elf_Word GetSectionIndex() const {
90      DCHECK_NE(section_index_, 0u);
91      return section_index_;
92    }
93
94    void SetSectionIndex(Elf_Word section_index) {
95      section_index_ = section_index;
96    }
97
98    const std::string& GetName() const {
99      return name_;
100    }
101
102   private:
103    Elf_Shdr header_;
104    Elf_Word section_index_;
105    const std::string name_;
106    const Section* const link_;
107
108    DISALLOW_COPY_AND_ASSIGN(Section);
109  };
110
111  // Writer of .dynamic section.
112  class DynamicSection FINAL : public Section {
113   public:
114    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
115      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
116      dynamics_.push_back({tag, value, section});
117    }
118
119    DynamicSection(const std::string& name, Section* link)
120        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
121                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
122
123    Elf_Word GetSize() const OVERRIDE {
124      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
125    }
126
127    bool Write(File* elf_file) OVERRIDE {
128      std::vector<Elf_Dyn> buffer;
129      buffer.reserve(dynamics_.size() + 1u);
130      for (const ElfDynamicState& it : dynamics_) {
131        if (it.section_ != nullptr) {
132          // We are adding an address relative to a section.
133          buffer.push_back(
134              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
135        } else {
136          buffer.push_back({it.tag_, {it.value_}});
137        }
138      }
139      buffer.push_back({DT_NULL, {0}});
140      return WriteArray(elf_file, buffer.data(), buffer.size());
141    }
142
143   private:
144    struct ElfDynamicState {
145      Elf_Sword tag_;
146      Elf_Word value_;
147      const Section* section_;
148    };
149    std::vector<ElfDynamicState> dynamics_;
150  };
151
152  // Section with content based on simple memory buffer.
153  // The buffer can be optionally patched before writing.
154  // The resulting address can be either absolute memory
155  // address or offset relative to the pointer location.
156  class RawSection FINAL : public Section {
157   public:
158    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
159               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
160               const Section* patch_base = nullptr, bool patch_relative = false,
161               bool patch_64bit = (sizeof(Elf_Addr) == sizeof(Elf64_Addr)))
162        : Section(name, type, flags, link, info, align, entsize),
163          patched(false), patch_base_(patch_base),
164          patch_relative_(patch_relative), patch_64bit_(patch_64bit) {
165    }
166
167    Elf_Word GetSize() const OVERRIDE {
168      return buffer_.size();
169    }
170
171    bool Write(File* elf_file) OVERRIDE {
172      if (!patch_locations_.empty()) {
173        DCHECK(patch_base_ != nullptr);
174        DCHECK(!patched);  // Do not patch twice.
175        if (patch_relative_) {
176          if (patch_64bit_) {
177            Patch<true, uint64_t>();
178          } else {
179            Patch<true, uint32_t>();
180          }
181        } else {
182          if (patch_64bit_) {
183            Patch<false, uint64_t>();
184          } else {
185            Patch<false, uint32_t>();
186          }
187        }
188        patched = true;
189      }
190      return WriteArray(elf_file, buffer_.data(), buffer_.size());
191    }
192
193    bool IsEmpty() const {
194      return buffer_.size() == 0;
195    }
196
197    std::vector<uint8_t>* GetBuffer() {
198      return &buffer_;
199    }
200
201    void SetBuffer(const std::vector<uint8_t>& buffer) {
202      buffer_ = buffer;
203    }
204
205    std::vector<uintptr_t>* GetPatchLocations() {
206      return &patch_locations_;
207    }
208
209   private:
210    template <bool RelativeAddress = false, typename PatchedAddress = Elf_Addr>
211    void Patch() {
212      Elf_Addr base_addr = patch_base_->GetHeader()->sh_addr;
213      Elf_Addr addr = this->GetHeader()->sh_addr;
214      for (uintptr_t patch_location : patch_locations_) {
215        typedef __attribute__((__aligned__(1))) PatchedAddress UnalignedAddress;
216        auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer_.data() + patch_location);
217        *to_patch = (base_addr + *to_patch) - (RelativeAddress ? (addr + patch_location) : 0);
218      }
219    }
220
221    std::vector<uint8_t> buffer_;
222    std::vector<uintptr_t> patch_locations_;
223    bool patched;
224    const Section* patch_base_;
225    bool patch_relative_;
226    bool patch_64bit_;
227  };
228
229  // Writer of .rodata section or .text section.
230  // The write is done lazily using the provided CodeOutput.
231  class OatSection FINAL : public Section {
232   public:
233    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
234               const Section* link, Elf_Word info, Elf_Word align,
235               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
236        : Section(name, type, flags, link, info, align, entsize),
237          size_(size), code_output_(code_output) {
238    }
239
240    Elf_Word GetSize() const OVERRIDE {
241      return size_;
242    }
243
244    bool Write(File* elf_file) OVERRIDE {
245      // The BufferedOutputStream class contains the buffer as field,
246      // therefore it is too big to allocate on the stack.
247      std::unique_ptr<BufferedOutputStream> output_stream(
248          new BufferedOutputStream(new FileOutputStream(elf_file)));
249      return code_output_->Write(output_stream.get());
250    }
251
252   private:
253    Elf_Word size_;
254    CodeOutput* code_output_;
255  };
256
257  // Writer of .bss section.
258  class NoBitsSection FINAL : public Section {
259   public:
260    NoBitsSection(const std::string& name, Elf_Word size)
261        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
262          size_(size) {
263    }
264
265    Elf_Word GetSize() const OVERRIDE {
266      return size_;
267    }
268
269    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
270      LOG(ERROR) << "This section should not be written to the ELF file";
271      return false;
272    }
273
274   private:
275    Elf_Word size_;
276  };
277
278  // Writer of .dynstr .strtab and .shstrtab sections.
279  class StrtabSection FINAL : public Section {
280   public:
281    StrtabSection(const std::string& name, Elf_Word flags)
282        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
283      buffer_.reserve(4 * KB);
284      // The first entry of strtab must be empty string.
285      buffer_ += '\0';
286    }
287
288    Elf_Word AddName(const std::string& name) {
289      Elf_Word offset = buffer_.size();
290      buffer_ += name;
291      buffer_ += '\0';
292      return offset;
293    }
294
295    Elf_Word GetSize() const OVERRIDE {
296      return buffer_.size();
297    }
298
299    bool Write(File* elf_file) OVERRIDE {
300      return WriteArray(elf_file, buffer_.data(), buffer_.size());
301    }
302
303   private:
304    std::string buffer_;
305  };
306
307  class HashSection;
308
309  // Writer of .dynsym and .symtab sections.
310  class SymtabSection FINAL : public Section {
311   public:
312    // Add a symbol with given name to this symtab. The symbol refers to
313    // 'relative_addr' within the given section and has the given attributes.
314    void AddSymbol(const std::string& name, const Section* section,
315                   Elf_Addr addr, bool is_relative, Elf_Word size,
316                   uint8_t binding, uint8_t type, uint8_t other = 0) {
317      CHECK(section != nullptr);
318      Elf_Word name_idx = strtab_->AddName(name);
319      symbols_.push_back({ name, section, addr, size, is_relative,
320                           MakeStInfo(binding, type), other, name_idx });
321    }
322
323    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
324                  StrtabSection* strtab)
325        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
326          strtab_(strtab) {
327    }
328
329    bool IsEmpty() const {
330      return symbols_.empty();
331    }
332
333    Elf_Word GetSize() const OVERRIDE {
334      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
335    }
336
337    bool Write(File* elf_file) OVERRIDE {
338      std::vector<Elf_Sym> buffer;
339      buffer.reserve(1u + symbols_.size());
340      buffer.push_back(Elf_Sym());  // NULL.
341      for (const ElfSymbolState& it : symbols_) {
342        Elf_Sym sym = Elf_Sym();
343        sym.st_name = it.name_idx_;
344        if (it.is_relative_) {
345          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
346        } else {
347          sym.st_value = it.addr_;
348        }
349        sym.st_size = it.size_;
350        sym.st_other = it.other_;
351        sym.st_shndx = it.section_->GetSectionIndex();
352        sym.st_info = it.info_;
353        buffer.push_back(sym);
354      }
355      return WriteArray(elf_file, buffer.data(), buffer.size());
356    }
357
358   private:
359    struct ElfSymbolState {
360      const std::string name_;
361      const Section* section_;
362      Elf_Addr addr_;
363      Elf_Word size_;
364      bool is_relative_;
365      uint8_t info_;
366      uint8_t other_;
367      Elf_Word name_idx_;  // index in the strtab.
368    };
369
370    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
371      return ((binding) << 4) + ((type) & 0xf);
372    }
373
374    // The symbols in the same order they will be in the symbol table.
375    std::vector<ElfSymbolState> symbols_;
376    StrtabSection* strtab_;
377
378    friend class HashSection;
379  };
380
381  // TODO: Consider removing.
382  // We use it only for the dynsym section which has only 5 symbols.
383  // We do not use it for symtab, and we probably do not have to
384  // since we use those symbols only to print backtraces.
385  class HashSection FINAL : public Section {
386   public:
387    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
388        : Section(name, SHT_HASH, flags, symtab,
389                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
390          symtab_(symtab) {
391    }
392
393    Elf_Word GetSize() const OVERRIDE {
394      Elf_Word nbuckets = GetNumBuckets();
395      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
396      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
397    }
398
399    bool Write(File* const elf_file) OVERRIDE {
400      // Here is how The ELF hash table works.
401      // There are 3 arrays to worry about.
402      // * The symbol table where the symbol information is.
403      // * The bucket array which is an array of indexes into the symtab and chain.
404      // * The chain array which is also an array of indexes into the symtab and chain.
405      //
406      // Lets say the state is something like this.
407      // +--------+       +--------+      +-----------+
408      // | symtab |       | bucket |      |   chain   |
409      // |  null  |       | 1      |      | STN_UNDEF |
410      // | <sym1> |       | 4      |      | 2         |
411      // | <sym2> |       |        |      | 5         |
412      // | <sym3> |       |        |      | STN_UNDEF |
413      // | <sym4> |       |        |      | 3         |
414      // | <sym5> |       |        |      | STN_UNDEF |
415      // +--------+       +--------+      +-----------+
416      //
417      // The lookup process (in python psudocode) is
418      //
419      // def GetSym(name):
420      //     # NB STN_UNDEF == 0
421      //     indx = bucket[elfhash(name) % num_buckets]
422      //     while indx != STN_UNDEF:
423      //         if GetSymbolName(symtab[indx]) == name:
424      //             return symtab[indx]
425      //         indx = chain[indx]
426      //     return SYMBOL_NOT_FOUND
427      //
428      // Between bucket and chain arrays every symtab index must be present exactly
429      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
430      const auto& symbols = symtab_->symbols_;
431      // Select number of buckets.
432      // This is essentially arbitrary.
433      Elf_Word nbuckets = GetNumBuckets();
434      // 1 is for the implicit NULL symbol.
435      Elf_Word chain_size = (symbols.size() + 1);
436      std::vector<Elf_Word> hash;
437      hash.push_back(nbuckets);
438      hash.push_back(chain_size);
439      uint32_t bucket_offset = hash.size();
440      uint32_t chain_offset = bucket_offset + nbuckets;
441      hash.resize(hash.size() + nbuckets + chain_size, 0);
442
443      Elf_Word* buckets = hash.data() + bucket_offset;
444      Elf_Word* chain   = hash.data() + chain_offset;
445
446      // Set up the actual hash table.
447      for (Elf_Word i = 0; i < symbols.size(); i++) {
448        // Add 1 since we need to have the null symbol that is not in the symbols
449        // list.
450        Elf_Word index = i + 1;
451        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
452        if (buckets[hash_val] == 0) {
453          buckets[hash_val] = index;
454        } else {
455          hash_val = buckets[hash_val];
456          CHECK_LT(hash_val, chain_size);
457          while (chain[hash_val] != 0) {
458            hash_val = chain[hash_val];
459            CHECK_LT(hash_val, chain_size);
460          }
461          chain[hash_val] = index;
462          // Check for loops. Works because if this is non-empty then there must be
463          // another cell which already contains the same symbol index as this one,
464          // which means some symbol has more then one name, which isn't allowed.
465          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
466        }
467      }
468      return WriteArray(elf_file, hash.data(), hash.size());
469    }
470
471   private:
472    Elf_Word GetNumBuckets() const {
473      const auto& symbols = symtab_->symbols_;
474      if (symbols.size() < 8) {
475        return 2;
476      } else if (symbols.size() < 32) {
477        return 4;
478      } else if (symbols.size() < 256) {
479        return 16;
480      } else {
481        // Have about 32 ids per bucket.
482        return RoundUp(symbols.size()/32, 2);
483      }
484    }
485
486    // from bionic
487    static inline unsigned elfhash(const char *_name) {
488      const unsigned char *name = (const unsigned char *) _name;
489      unsigned h = 0, g;
490
491      while (*name) {
492        h = (h << 4) + *name++;
493        g = h & 0xf0000000;
494        h ^= g;
495        h ^= g >> 24;
496      }
497      return h;
498    }
499
500    SymtabSection* symtab_;
501
502    DISALLOW_COPY_AND_ASSIGN(HashSection);
503  };
504
505  ElfBuilder(InstructionSet isa,
506             Elf_Word rodata_size, CodeOutput* rodata_writer,
507             Elf_Word text_size, CodeOutput* text_writer,
508             Elf_Word bss_size)
509    : isa_(isa),
510      dynstr_(".dynstr", SHF_ALLOC),
511      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
512      hash_(".hash", SHF_ALLOC, &dynsym_),
513      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
514              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
515      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
516            nullptr, 0, kPageSize, 0, text_size, text_writer),
517      bss_(".bss", bss_size),
518      dynamic_(".dynamic", &dynsym_),
519      strtab_(".strtab", 0),
520      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
521      shstrtab_(".shstrtab", 0) {
522  }
523  ~ElfBuilder() {}
524
525  OatSection* GetText() { return &text_; }
526  SymtabSection* GetSymtab() { return &symtab_; }
527
528  bool Write(File* elf_file) {
529    // Since the .text section of an oat file contains relative references to .rodata
530    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
531    // a non-traditional layout where the .bss section is mapped independently of the
532    // .dynamic section and needs its own program header with LOAD RW.
533    //
534    // The basic layout of the elf file. Order may be different in final output.
535    // +-------------------------+
536    // | Elf_Ehdr                |
537    // +-------------------------+
538    // | Elf_Phdr PHDR           |
539    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
540    // | Elf_Phdr LOAD R X       | .text
541    // | Elf_Phdr LOAD RW        | .bss (Optional)
542    // | Elf_Phdr LOAD RW        | .dynamic
543    // | Elf_Phdr DYNAMIC        | .dynamic
544    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
545    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
546    // +-------------------------+
547    // | .dynsym                 |
548    // | Elf_Sym  STN_UNDEF      |
549    // | Elf_Sym  oatdata        |
550    // | Elf_Sym  oatexec        |
551    // | Elf_Sym  oatlastword    |
552    // | Elf_Sym  oatbss         | (Optional)
553    // | Elf_Sym  oatbsslastword | (Optional)
554    // +-------------------------+
555    // | .dynstr                 |
556    // | names for .dynsym       |
557    // +-------------------------+
558    // | .hash                   |
559    // | hashtable for dynsym    |
560    // +-------------------------+
561    // | .rodata                 |
562    // | oatdata..oatexec-4      |
563    // +-------------------------+
564    // | .text                   |
565    // | oatexec..oatlastword    |
566    // +-------------------------+
567    // | .dynamic                |
568    // | Elf_Dyn DT_HASH         |
569    // | Elf_Dyn DT_STRTAB       |
570    // | Elf_Dyn DT_SYMTAB       |
571    // | Elf_Dyn DT_SYMENT       |
572    // | Elf_Dyn DT_STRSZ        |
573    // | Elf_Dyn DT_SONAME       |
574    // | Elf_Dyn DT_NULL         |
575    // +-------------------------+  (Optional)
576    // | .symtab                 |  (Optional)
577    // | program symbols         |  (Optional)
578    // +-------------------------+  (Optional)
579    // | .strtab                 |  (Optional)
580    // | names for .symtab       |  (Optional)
581    // +-------------------------+  (Optional)
582    // | .eh_frame               |  (Optional)
583    // +-------------------------+  (Optional)
584    // | .eh_frame_hdr           |  (Optional)
585    // +-------------------------+  (Optional)
586    // | .debug_info             |  (Optional)
587    // +-------------------------+  (Optional)
588    // | .debug_abbrev           |  (Optional)
589    // +-------------------------+  (Optional)
590    // | .debug_str              |  (Optional)
591    // +-------------------------+  (Optional)
592    // | .debug_line             |  (Optional)
593    // +-------------------------+
594    // | .shstrtab               |
595    // | names of sections       |
596    // +-------------------------+
597    // | Elf_Shdr null           |
598    // | Elf_Shdr .dynsym        |
599    // | Elf_Shdr .dynstr        |
600    // | Elf_Shdr .hash          |
601    // | Elf_Shdr .rodata        |
602    // | Elf_Shdr .text          |
603    // | Elf_Shdr .bss           |  (Optional)
604    // | Elf_Shdr .dynamic       |
605    // | Elf_Shdr .symtab        |  (Optional)
606    // | Elf_Shdr .strtab        |  (Optional)
607    // | Elf_Shdr .eh_frame      |  (Optional)
608    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
609    // | Elf_Shdr .debug_info    |  (Optional)
610    // | Elf_Shdr .debug_abbrev  |  (Optional)
611    // | Elf_Shdr .debug_str     |  (Optional)
612    // | Elf_Shdr .debug_line    |  (Optional)
613    // | Elf_Shdr .oat_patches   |  (Optional)
614    // | Elf_Shdr .shstrtab      |
615    // +-------------------------+
616    constexpr bool debug_logging_ = false;
617
618    // Create a list of all section which we want to write.
619    // This is the order in which they will be written.
620    std::vector<Section*> sections;
621    sections.push_back(&dynsym_);
622    sections.push_back(&dynstr_);
623    sections.push_back(&hash_);
624    sections.push_back(&rodata_);
625    sections.push_back(&text_);
626    if (bss_.GetSize() != 0u) {
627      sections.push_back(&bss_);
628    }
629    sections.push_back(&dynamic_);
630    if (!symtab_.IsEmpty()) {
631      sections.push_back(&symtab_);
632      sections.push_back(&strtab_);
633    }
634    for (Section* section : other_sections_) {
635      sections.push_back(section);
636    }
637    sections.push_back(&shstrtab_);
638    for (size_t i = 0; i < sections.size(); i++) {
639      // The first section index is 1.  Index 0 is reserved for NULL.
640      // Section index is used for relative symbols and for section links.
641      sections[i]->SetSectionIndex(i + 1);
642      // Add section name to .shstrtab.
643      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
644      sections[i]->GetHeader()->sh_name = name_offset;
645    }
646
647    // The running program does not have access to section headers
648    // and the loader is not supposed to use them either.
649    // The dynamic sections therefore replicates some of the layout
650    // information like the address and size of .rodata and .text.
651    // It also contains other metadata like the SONAME.
652    // The .dynamic section is found using the PT_DYNAMIC program header.
653    BuildDynsymSection();
654    BuildDynamicSection(elf_file->GetPath());
655
656    // We do not know the number of headers until the final stages of write.
657    // It is easiest to just reserve a fixed amount of space for them.
658    constexpr size_t kMaxProgramHeaders = 8;
659    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
660    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
661
662    // Layout of all sections - determine the final file offsets and addresses.
663    // This must be done after we have built all sections and know their size.
664    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
665    Elf_Addr load_address = file_offset;
666    std::vector<Elf_Shdr> section_headers;
667    section_headers.reserve(1u + sections.size());
668    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
669    for (auto* section : sections) {
670      Elf_Shdr* header = section->GetHeader();
671      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
672      header->sh_size = section->GetSize();
673      header->sh_link = section->GetLink();
674      // Allocate memory for the section in the file.
675      if (header->sh_type != SHT_NOBITS) {
676        header->sh_offset = RoundUp(file_offset, alignment);
677        file_offset = header->sh_offset + header->sh_size;
678      }
679      // Allocate memory for the section during program execution.
680      if ((header->sh_flags & SHF_ALLOC) != 0) {
681        header->sh_addr = RoundUp(load_address, alignment);
682        load_address = header->sh_addr + header->sh_size;
683      }
684      if (debug_logging_) {
685        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
686                  << " offset=0x" << header->sh_offset
687                  << " addr=0x" << header->sh_addr
688                  << " size=0x" << header->sh_size;
689      }
690      // Collect section headers into continuous array for convenience.
691      section_headers.push_back(*header);
692    }
693    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
694
695    // Create program headers now that we know the layout of the whole file.
696    // Each segment contains one or more sections which are mapped together.
697    // Not all sections are mapped during the execution of the program.
698    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
699    // interesting parts of memory and their addresses overlap with PT_LOAD.
700    std::vector<Elf_Phdr> program_headers;
701    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
702      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
703    // Create the main LOAD R segment which spans all sections up to .rodata.
704    const Elf_Shdr* rodata = rodata_.GetHeader();
705    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
706      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
707    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
708    if (bss_.GetHeader()->sh_size != 0u) {
709      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
710    }
711    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
712    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
713    const Section* eh_frame = FindSection(".eh_frame");
714    if (eh_frame != nullptr) {
715      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
716      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
717      if (eh_frame_hdr != nullptr) {
718        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
719        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
720        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
721                 eh_frame_hdr->GetHeader()->sh_offset);
722        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
723        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
724        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
725        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
726      }
727    }
728    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
729
730    // Create the main ELF header.
731    Elf_Ehdr elf_header = MakeElfHeader(isa_);
732    elf_header.e_phoff = kProgramHeadersOffset;
733    elf_header.e_shoff = section_headers_offset;
734    elf_header.e_phnum = program_headers.size();
735    elf_header.e_shnum = section_headers.size();
736    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
737
738    // Write all headers and section content to the file.
739    // Depending on the implementations of Section::Write, this
740    // might be just memory copies or some more elaborate operations.
741    if (!WriteArray(elf_file, &elf_header, 1)) {
742      LOG(INFO) << "Failed to write the ELF header";
743      return false;
744    }
745    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
746      LOG(INFO) << "Failed to write the program headers";
747      return false;
748    }
749    for (Section* section : sections) {
750      const Elf_Shdr* header = section->GetHeader();
751      if (header->sh_type != SHT_NOBITS) {
752        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
753          LOG(INFO) << "Failed to write section " << section->GetName();
754          return false;
755        }
756        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
757        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
758          << "The number of bytes written does not match GetSize()";
759      }
760    }
761    if (!SeekTo(elf_file, section_headers_offset) ||
762        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
763      LOG(INFO) << "Failed to write the section headers";
764      return false;
765    }
766    return true;
767  }
768
769  // Adds the given section to the builder.  It does not take ownership.
770  void RegisterSection(Section* section) {
771    other_sections_.push_back(section);
772  }
773
774  const Section* FindSection(const char* name) {
775    for (const auto* section : other_sections_) {
776      if (section->GetName() == name) {
777        return section;
778      }
779    }
780    return nullptr;
781  }
782
783 private:
784  static bool SeekTo(File* elf_file, Elf_Word offset) {
785    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
786      << "Seeking backwards";
787    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
788      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
789      return false;
790    }
791    return true;
792  }
793
794  template<typename T>
795  static bool WriteArray(File* elf_file, const T* data, size_t count) {
796    DCHECK(data != nullptr);
797    if (!elf_file->WriteFully(data, count * sizeof(T))) {
798      PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
799      return false;
800    }
801    return true;
802  }
803
804  // Helper - create segment header based on memory range.
805  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
806                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
807    Elf_Phdr phdr = Elf_Phdr();
808    phdr.p_type    = type;
809    phdr.p_flags   = flags;
810    phdr.p_offset  = offset;
811    phdr.p_vaddr   = offset;
812    phdr.p_paddr   = offset;
813    phdr.p_filesz  = size;
814    phdr.p_memsz   = size;
815    phdr.p_align   = align;
816    return phdr;
817  }
818
819  // Helper - create segment header based on section header.
820  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
821                                    const Section& section) {
822    const Elf_Shdr* shdr = section.GetHeader();
823    // Only run-time allocated sections should be in segment headers.
824    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
825    Elf_Phdr phdr = Elf_Phdr();
826    phdr.p_type   = type;
827    phdr.p_flags  = flags;
828    phdr.p_offset = shdr->sh_offset;
829    phdr.p_vaddr  = shdr->sh_addr;
830    phdr.p_paddr  = shdr->sh_addr;
831    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
832    phdr.p_memsz  = shdr->sh_size;
833    phdr.p_align  = shdr->sh_addralign;
834    return phdr;
835  }
836
837  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
838    Elf_Ehdr elf_header = Elf_Ehdr();
839    switch (isa) {
840      case kArm:
841        // Fall through.
842      case kThumb2: {
843        elf_header.e_machine = EM_ARM;
844        elf_header.e_flags = EF_ARM_EABI_VER5;
845        break;
846      }
847      case kArm64: {
848        elf_header.e_machine = EM_AARCH64;
849        elf_header.e_flags = 0;
850        break;
851      }
852      case kX86: {
853        elf_header.e_machine = EM_386;
854        elf_header.e_flags = 0;
855        break;
856      }
857      case kX86_64: {
858        elf_header.e_machine = EM_X86_64;
859        elf_header.e_flags = 0;
860        break;
861      }
862      case kMips: {
863        elf_header.e_machine = EM_MIPS;
864        elf_header.e_flags = (EF_MIPS_NOREORDER |
865                               EF_MIPS_PIC       |
866                               EF_MIPS_CPIC      |
867                               EF_MIPS_ABI_O32   |
868                               EF_MIPS_ARCH_32R2);
869        break;
870      }
871      case kMips64: {
872        elf_header.e_machine = EM_MIPS;
873        elf_header.e_flags = (EF_MIPS_NOREORDER |
874                               EF_MIPS_PIC       |
875                               EF_MIPS_CPIC      |
876                               EF_MIPS_ARCH_64R6);
877        break;
878      }
879      case kNone: {
880        LOG(FATAL) << "No instruction set";
881      }
882    }
883
884    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
885    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
886    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
887    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
888    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
889                                         ? ELFCLASS32 : ELFCLASS64;;
890    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
891    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
892    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
893    elf_header.e_ident[EI_ABIVERSION] = 0;
894    elf_header.e_type = ET_DYN;
895    elf_header.e_version = 1;
896    elf_header.e_entry = 0;
897    elf_header.e_ehsize = sizeof(Elf_Ehdr);
898    elf_header.e_phentsize = sizeof(Elf_Phdr);
899    elf_header.e_shentsize = sizeof(Elf_Shdr);
900    elf_header.e_phoff = sizeof(Elf_Ehdr);
901    return elf_header;
902  }
903
904  void BuildDynamicSection(const std::string& elf_file_path) {
905    std::string soname(elf_file_path);
906    size_t directory_separator_pos = soname.rfind('/');
907    if (directory_separator_pos != std::string::npos) {
908      soname = soname.substr(directory_separator_pos + 1);
909    }
910    // NB: We must add the name before adding DT_STRSZ.
911    Elf_Word soname_offset = dynstr_.AddName(soname);
912
913    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
914    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
915    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
916    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
917    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
918    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
919  }
920
921  void BuildDynsymSection() {
922    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
923                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
924    dynsym_.AddSymbol("oatexec", &text_, 0, true,
925                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
926    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
927                      true, 4, STB_GLOBAL, STT_OBJECT);
928    if (bss_.GetSize() != 0u) {
929      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
930                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
931      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
932                        true, 4, STB_GLOBAL, STT_OBJECT);
933    }
934  }
935
936  InstructionSet isa_;
937  StrtabSection dynstr_;
938  SymtabSection dynsym_;
939  HashSection hash_;
940  OatSection rodata_;
941  OatSection text_;
942  NoBitsSection bss_;
943  DynamicSection dynamic_;
944  StrtabSection strtab_;
945  SymtabSection symtab_;
946  std::vector<Section*> other_sections_;
947  StrtabSection shstrtab_;
948
949  DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
950};
951
952}  // namespace art
953
954#endif  // ART_COMPILER_ELF_BUILDER_H_
955