elf_builder.h revision f8980875ef8fb0ce86be4ed2c0af7070f5ae9cfd
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_ELF_BUILDER_H_
18#define ART_COMPILER_ELF_BUILDER_H_
19
20#include <vector>
21
22#include "arch/instruction_set.h"
23#include "base/bit_utils.h"
24#include "base/unix_file/fd_file.h"
25#include "buffered_output_stream.h"
26#include "elf_utils.h"
27#include "file_output_stream.h"
28
29namespace art {
30
31class CodeOutput {
32 public:
33  virtual bool Write(OutputStream* out) = 0;
34  virtual ~CodeOutput() {}
35};
36
37// Writes ELF file.
38// The main complication is that the sections often want to reference
39// each other.  We solve this by writing the ELF file in two stages:
40//  * Sections are asked about their size, and overall layout is calculated.
41//  * Sections do the actual writes which may use offsets of other sections.
42template <typename ElfTypes>
43class ElfBuilder FINAL {
44 public:
45  using Elf_Addr = typename ElfTypes::Addr;
46  using Elf_Off = typename ElfTypes::Off;
47  using Elf_Word = typename ElfTypes::Word;
48  using Elf_Sword = typename ElfTypes::Sword;
49  using Elf_Ehdr = typename ElfTypes::Ehdr;
50  using Elf_Shdr = typename ElfTypes::Shdr;
51  using Elf_Sym = typename ElfTypes::Sym;
52  using Elf_Phdr = typename ElfTypes::Phdr;
53  using Elf_Dyn = typename ElfTypes::Dyn;
54
55  // Base class of all sections.
56  class Section {
57   public:
58    Section(const std::string& name, Elf_Word type, Elf_Word flags,
59            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
60        : header_(new Elf_Shdr()), section_index_(0), name_(name), link_(link) {
61      header_->sh_type = type;
62      header_->sh_flags = flags;
63      header_->sh_info = info;
64      header_->sh_addralign = align;
65      header_->sh_entsize = entsize;
66    }
67    virtual ~Section() {}
68
69    // Returns the size of the content of this section.  It is used to
70    // calculate file offsets of all sections before doing any writes.
71    virtual Elf_Word GetSize() const = 0;
72
73    // Write the content of this section to the given file.
74    // This must write exactly the number of bytes returned by GetSize().
75    // Offsets of all sections are known when this method is called.
76    virtual bool Write(File* elf_file) = 0;
77
78    Elf_Word GetLink() const {
79      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
80    }
81
82    const Elf_Shdr* GetHeader() const {
83      return header_.get();
84    }
85
86    Elf_Shdr* GetHeader() {
87      return header_.get();
88    }
89
90    Elf_Word GetSectionIndex() const {
91      DCHECK_NE(section_index_, 0u);
92      return section_index_;
93    }
94
95    void SetSectionIndex(Elf_Word section_index) {
96      section_index_ = section_index;
97    }
98
99    const std::string& GetName() const {
100      return name_;
101    }
102
103   private:
104    // Elf_Shdr is somewhat large so allocate it on the heap.
105    // Otherwise we get in trouble with stack frame sizes.
106    std::unique_ptr<Elf_Shdr> header_;
107    Elf_Word section_index_;
108    const std::string name_;
109    const Section* const link_;
110
111    DISALLOW_COPY_AND_ASSIGN(Section);
112  };
113
114  // Writer of .dynamic section.
115  class DynamicSection FINAL : public Section {
116   public:
117    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
118      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
119      dynamics_.push_back({tag, value, section});
120    }
121
122    DynamicSection(const std::string& name, Section* link)
123        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
124                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
125
126    Elf_Word GetSize() const OVERRIDE {
127      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
128    }
129
130    bool Write(File* elf_file) OVERRIDE {
131      std::vector<Elf_Dyn> buffer;
132      buffer.reserve(dynamics_.size() + 1u);
133      for (const ElfDynamicState& it : dynamics_) {
134        if (it.section_ != nullptr) {
135          // We are adding an address relative to a section.
136          buffer.push_back(
137              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
138        } else {
139          buffer.push_back({it.tag_, {it.value_}});
140        }
141      }
142      buffer.push_back({DT_NULL, {0}});
143      return WriteArray(elf_file, buffer.data(), buffer.size());
144    }
145
146   private:
147    struct ElfDynamicState {
148      Elf_Sword tag_;
149      Elf_Word value_;
150      const Section* section_;
151    };
152    std::vector<ElfDynamicState> dynamics_;
153  };
154
155  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
156                           Elf_Addr buffer_address,
157                           Elf_Addr base_address,
158                           std::vector<uint8_t>* buffer);
159
160  // Section with content based on simple memory buffer.
161  // The buffer can be optionally patched before writing.
162  class RawSection FINAL : public Section {
163   public:
164    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
165               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
166               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
167        : Section(name, type, flags, link, info, align, entsize),
168          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
169    }
170
171    RawSection(const std::string& name, Elf_Word type)
172        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
173    }
174
175    Elf_Word GetSize() const OVERRIDE {
176      return buffer_.size();
177    }
178
179    bool Write(File* elf_file) OVERRIDE {
180      if (!patch_locations_.empty()) {
181        DCHECK(!patched_);  // Do not patch twice.
182        DCHECK(patch_ != nullptr);
183        DCHECK(patch_base_section_ != nullptr);
184        patch_(patch_locations_,
185               this->GetHeader()->sh_addr,
186               patch_base_section_->GetHeader()->sh_addr,
187               &buffer_);
188        patched_ = true;
189      }
190      return WriteArray(elf_file, buffer_.data(), buffer_.size());
191    }
192
193    bool IsEmpty() const {
194      return buffer_.size() == 0;
195    }
196
197    std::vector<uint8_t>* GetBuffer() {
198      return &buffer_;
199    }
200
201    void SetBuffer(const std::vector<uint8_t>& buffer) {
202      buffer_ = buffer;
203    }
204
205    std::vector<uintptr_t>* GetPatchLocations() {
206      return &patch_locations_;
207    }
208
209   private:
210    std::vector<uint8_t> buffer_;
211    std::vector<uintptr_t> patch_locations_;
212    bool patched_;
213    // User-provided function to do the actual patching.
214    PatchFn patch_;
215    // The section that we patch against (usually .text).
216    const Section* patch_base_section_;
217  };
218
219  // Writer of .rodata section or .text section.
220  // The write is done lazily using the provided CodeOutput.
221  class OatSection FINAL : public Section {
222   public:
223    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
224               const Section* link, Elf_Word info, Elf_Word align,
225               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
226        : Section(name, type, flags, link, info, align, entsize),
227          size_(size), code_output_(code_output) {
228    }
229
230    Elf_Word GetSize() const OVERRIDE {
231      return size_;
232    }
233
234    bool Write(File* elf_file) OVERRIDE {
235      // The BufferedOutputStream class contains the buffer as field,
236      // therefore it is too big to allocate on the stack.
237      std::unique_ptr<BufferedOutputStream> output_stream(
238          new BufferedOutputStream(new FileOutputStream(elf_file)));
239      return code_output_->Write(output_stream.get());
240    }
241
242   private:
243    Elf_Word size_;
244    CodeOutput* code_output_;
245  };
246
247  // Writer of .bss section.
248  class NoBitsSection FINAL : public Section {
249   public:
250    NoBitsSection(const std::string& name, Elf_Word size)
251        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
252          size_(size) {
253    }
254
255    Elf_Word GetSize() const OVERRIDE {
256      return size_;
257    }
258
259    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
260      LOG(ERROR) << "This section should not be written to the ELF file";
261      return false;
262    }
263
264   private:
265    Elf_Word size_;
266  };
267
268  // Writer of .dynstr .strtab and .shstrtab sections.
269  class StrtabSection FINAL : public Section {
270   public:
271    StrtabSection(const std::string& name, Elf_Word flags)
272        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
273      buffer_.reserve(4 * KB);
274      // The first entry of strtab must be empty string.
275      buffer_ += '\0';
276    }
277
278    Elf_Word AddName(const std::string& name) {
279      Elf_Word offset = buffer_.size();
280      buffer_ += name;
281      buffer_ += '\0';
282      return offset;
283    }
284
285    Elf_Word GetSize() const OVERRIDE {
286      return buffer_.size();
287    }
288
289    bool Write(File* elf_file) OVERRIDE {
290      return WriteArray(elf_file, buffer_.data(), buffer_.size());
291    }
292
293   private:
294    std::string buffer_;
295  };
296
297  class HashSection;
298
299  // Writer of .dynsym and .symtab sections.
300  class SymtabSection FINAL : public Section {
301   public:
302    // Add a symbol with given name to this symtab. The symbol refers to
303    // 'relative_addr' within the given section and has the given attributes.
304    void AddSymbol(const std::string& name, const Section* section,
305                   Elf_Addr addr, bool is_relative, Elf_Word size,
306                   uint8_t binding, uint8_t type, uint8_t other = 0) {
307      CHECK(section != nullptr);
308      Elf_Word name_idx = strtab_->AddName(name);
309      symbols_.push_back({ name, section, addr, size, is_relative,
310                           MakeStInfo(binding, type), other, name_idx });
311    }
312
313    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
314                  StrtabSection* strtab)
315        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
316          strtab_(strtab) {
317    }
318
319    bool IsEmpty() const {
320      return symbols_.empty();
321    }
322
323    Elf_Word GetSize() const OVERRIDE {
324      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
325    }
326
327    bool Write(File* elf_file) OVERRIDE {
328      std::vector<Elf_Sym> buffer;
329      buffer.reserve(1u + symbols_.size());
330      buffer.push_back(Elf_Sym());  // NULL.
331      for (const ElfSymbolState& it : symbols_) {
332        Elf_Sym sym = Elf_Sym();
333        sym.st_name = it.name_idx_;
334        if (it.is_relative_) {
335          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
336        } else {
337          sym.st_value = it.addr_;
338        }
339        sym.st_size = it.size_;
340        sym.st_other = it.other_;
341        sym.st_shndx = it.section_->GetSectionIndex();
342        sym.st_info = it.info_;
343        buffer.push_back(sym);
344      }
345      return WriteArray(elf_file, buffer.data(), buffer.size());
346    }
347
348   private:
349    struct ElfSymbolState {
350      const std::string name_;
351      const Section* section_;
352      Elf_Addr addr_;
353      Elf_Word size_;
354      bool is_relative_;
355      uint8_t info_;
356      uint8_t other_;
357      Elf_Word name_idx_;  // index in the strtab.
358    };
359
360    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
361      return ((binding) << 4) + ((type) & 0xf);
362    }
363
364    // The symbols in the same order they will be in the symbol table.
365    std::vector<ElfSymbolState> symbols_;
366    StrtabSection* strtab_;
367
368    friend class HashSection;
369  };
370
371  // TODO: Consider removing.
372  // We use it only for the dynsym section which has only 5 symbols.
373  // We do not use it for symtab, and we probably do not have to
374  // since we use those symbols only to print backtraces.
375  class HashSection FINAL : public Section {
376   public:
377    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
378        : Section(name, SHT_HASH, flags, symtab,
379                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
380          symtab_(symtab) {
381    }
382
383    Elf_Word GetSize() const OVERRIDE {
384      Elf_Word nbuckets = GetNumBuckets();
385      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
386      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
387    }
388
389    bool Write(File* const elf_file) OVERRIDE {
390      // Here is how The ELF hash table works.
391      // There are 3 arrays to worry about.
392      // * The symbol table where the symbol information is.
393      // * The bucket array which is an array of indexes into the symtab and chain.
394      // * The chain array which is also an array of indexes into the symtab and chain.
395      //
396      // Lets say the state is something like this.
397      // +--------+       +--------+      +-----------+
398      // | symtab |       | bucket |      |   chain   |
399      // |  null  |       | 1      |      | STN_UNDEF |
400      // | <sym1> |       | 4      |      | 2         |
401      // | <sym2> |       |        |      | 5         |
402      // | <sym3> |       |        |      | STN_UNDEF |
403      // | <sym4> |       |        |      | 3         |
404      // | <sym5> |       |        |      | STN_UNDEF |
405      // +--------+       +--------+      +-----------+
406      //
407      // The lookup process (in python psudocode) is
408      //
409      // def GetSym(name):
410      //     # NB STN_UNDEF == 0
411      //     indx = bucket[elfhash(name) % num_buckets]
412      //     while indx != STN_UNDEF:
413      //         if GetSymbolName(symtab[indx]) == name:
414      //             return symtab[indx]
415      //         indx = chain[indx]
416      //     return SYMBOL_NOT_FOUND
417      //
418      // Between bucket and chain arrays every symtab index must be present exactly
419      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
420      const auto& symbols = symtab_->symbols_;
421      // Select number of buckets.
422      // This is essentially arbitrary.
423      Elf_Word nbuckets = GetNumBuckets();
424      // 1 is for the implicit NULL symbol.
425      Elf_Word chain_size = (symbols.size() + 1);
426      std::vector<Elf_Word> hash;
427      hash.push_back(nbuckets);
428      hash.push_back(chain_size);
429      uint32_t bucket_offset = hash.size();
430      uint32_t chain_offset = bucket_offset + nbuckets;
431      hash.resize(hash.size() + nbuckets + chain_size, 0);
432
433      Elf_Word* buckets = hash.data() + bucket_offset;
434      Elf_Word* chain   = hash.data() + chain_offset;
435
436      // Set up the actual hash table.
437      for (Elf_Word i = 0; i < symbols.size(); i++) {
438        // Add 1 since we need to have the null symbol that is not in the symbols
439        // list.
440        Elf_Word index = i + 1;
441        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
442        if (buckets[hash_val] == 0) {
443          buckets[hash_val] = index;
444        } else {
445          hash_val = buckets[hash_val];
446          CHECK_LT(hash_val, chain_size);
447          while (chain[hash_val] != 0) {
448            hash_val = chain[hash_val];
449            CHECK_LT(hash_val, chain_size);
450          }
451          chain[hash_val] = index;
452          // Check for loops. Works because if this is non-empty then there must be
453          // another cell which already contains the same symbol index as this one,
454          // which means some symbol has more then one name, which isn't allowed.
455          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
456        }
457      }
458      return WriteArray(elf_file, hash.data(), hash.size());
459    }
460
461   private:
462    Elf_Word GetNumBuckets() const {
463      const auto& symbols = symtab_->symbols_;
464      if (symbols.size() < 8) {
465        return 2;
466      } else if (symbols.size() < 32) {
467        return 4;
468      } else if (symbols.size() < 256) {
469        return 16;
470      } else {
471        // Have about 32 ids per bucket.
472        return RoundUp(symbols.size()/32, 2);
473      }
474    }
475
476    // from bionic
477    static inline unsigned elfhash(const char *_name) {
478      const unsigned char *name = (const unsigned char *) _name;
479      unsigned h = 0, g;
480
481      while (*name) {
482        h = (h << 4) + *name++;
483        g = h & 0xf0000000;
484        h ^= g;
485        h ^= g >> 24;
486      }
487      return h;
488    }
489
490    SymtabSection* symtab_;
491
492    DISALLOW_COPY_AND_ASSIGN(HashSection);
493  };
494
495  ElfBuilder(InstructionSet isa,
496             Elf_Word rodata_size, CodeOutput* rodata_writer,
497             Elf_Word text_size, CodeOutput* text_writer,
498             Elf_Word bss_size)
499    : isa_(isa),
500      dynstr_(".dynstr", SHF_ALLOC),
501      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
502      hash_(".hash", SHF_ALLOC, &dynsym_),
503      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
504              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
505      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
506            nullptr, 0, kPageSize, 0, text_size, text_writer),
507      bss_(".bss", bss_size),
508      dynamic_(".dynamic", &dynsym_),
509      strtab_(".strtab", 0),
510      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
511      shstrtab_(".shstrtab", 0) {
512  }
513  ~ElfBuilder() {}
514
515  OatSection* GetText() { return &text_; }
516  SymtabSection* GetSymtab() { return &symtab_; }
517
518  bool Write(File* elf_file) {
519    // Since the .text section of an oat file contains relative references to .rodata
520    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
521    // a non-traditional layout where the .bss section is mapped independently of the
522    // .dynamic section and needs its own program header with LOAD RW.
523    //
524    // The basic layout of the elf file. Order may be different in final output.
525    // +-------------------------+
526    // | Elf_Ehdr                |
527    // +-------------------------+
528    // | Elf_Phdr PHDR           |
529    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
530    // | Elf_Phdr LOAD R X       | .text
531    // | Elf_Phdr LOAD RW        | .bss (Optional)
532    // | Elf_Phdr LOAD RW        | .dynamic
533    // | Elf_Phdr DYNAMIC        | .dynamic
534    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
535    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
536    // +-------------------------+
537    // | .dynsym                 |
538    // | Elf_Sym  STN_UNDEF      |
539    // | Elf_Sym  oatdata        |
540    // | Elf_Sym  oatexec        |
541    // | Elf_Sym  oatlastword    |
542    // | Elf_Sym  oatbss         | (Optional)
543    // | Elf_Sym  oatbsslastword | (Optional)
544    // +-------------------------+
545    // | .dynstr                 |
546    // | names for .dynsym       |
547    // +-------------------------+
548    // | .hash                   |
549    // | hashtable for dynsym    |
550    // +-------------------------+
551    // | .rodata                 |
552    // | oatdata..oatexec-4      |
553    // +-------------------------+
554    // | .text                   |
555    // | oatexec..oatlastword    |
556    // +-------------------------+
557    // | .dynamic                |
558    // | Elf_Dyn DT_HASH         |
559    // | Elf_Dyn DT_STRTAB       |
560    // | Elf_Dyn DT_SYMTAB       |
561    // | Elf_Dyn DT_SYMENT       |
562    // | Elf_Dyn DT_STRSZ        |
563    // | Elf_Dyn DT_SONAME       |
564    // | Elf_Dyn DT_NULL         |
565    // +-------------------------+  (Optional)
566    // | .symtab                 |  (Optional)
567    // | program symbols         |  (Optional)
568    // +-------------------------+  (Optional)
569    // | .strtab                 |  (Optional)
570    // | names for .symtab       |  (Optional)
571    // +-------------------------+  (Optional)
572    // | .eh_frame               |  (Optional)
573    // +-------------------------+  (Optional)
574    // | .eh_frame_hdr           |  (Optional)
575    // +-------------------------+  (Optional)
576    // | .debug_info             |  (Optional)
577    // +-------------------------+  (Optional)
578    // | .debug_abbrev           |  (Optional)
579    // +-------------------------+  (Optional)
580    // | .debug_str              |  (Optional)
581    // +-------------------------+  (Optional)
582    // | .debug_line             |  (Optional)
583    // +-------------------------+
584    // | .shstrtab               |
585    // | names of sections       |
586    // +-------------------------+
587    // | Elf_Shdr null           |
588    // | Elf_Shdr .dynsym        |
589    // | Elf_Shdr .dynstr        |
590    // | Elf_Shdr .hash          |
591    // | Elf_Shdr .rodata        |
592    // | Elf_Shdr .text          |
593    // | Elf_Shdr .bss           |  (Optional)
594    // | Elf_Shdr .dynamic       |
595    // | Elf_Shdr .symtab        |  (Optional)
596    // | Elf_Shdr .strtab        |  (Optional)
597    // | Elf_Shdr .eh_frame      |  (Optional)
598    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
599    // | Elf_Shdr .debug_info    |  (Optional)
600    // | Elf_Shdr .debug_abbrev  |  (Optional)
601    // | Elf_Shdr .debug_str     |  (Optional)
602    // | Elf_Shdr .debug_line    |  (Optional)
603    // | Elf_Shdr .oat_patches   |  (Optional)
604    // | Elf_Shdr .shstrtab      |
605    // +-------------------------+
606    constexpr bool debug_logging_ = false;
607
608    // Create a list of all section which we want to write.
609    // This is the order in which they will be written.
610    std::vector<Section*> sections;
611    sections.push_back(&dynsym_);
612    sections.push_back(&dynstr_);
613    sections.push_back(&hash_);
614    sections.push_back(&rodata_);
615    sections.push_back(&text_);
616    if (bss_.GetSize() != 0u) {
617      sections.push_back(&bss_);
618    }
619    sections.push_back(&dynamic_);
620    if (!symtab_.IsEmpty()) {
621      sections.push_back(&symtab_);
622      sections.push_back(&strtab_);
623    }
624    for (Section* section : other_sections_) {
625      sections.push_back(section);
626    }
627    sections.push_back(&shstrtab_);
628    for (size_t i = 0; i < sections.size(); i++) {
629      // The first section index is 1.  Index 0 is reserved for NULL.
630      // Section index is used for relative symbols and for section links.
631      sections[i]->SetSectionIndex(i + 1);
632      // Add section name to .shstrtab.
633      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
634      sections[i]->GetHeader()->sh_name = name_offset;
635    }
636
637    // The running program does not have access to section headers
638    // and the loader is not supposed to use them either.
639    // The dynamic sections therefore replicates some of the layout
640    // information like the address and size of .rodata and .text.
641    // It also contains other metadata like the SONAME.
642    // The .dynamic section is found using the PT_DYNAMIC program header.
643    BuildDynsymSection();
644    BuildDynamicSection(elf_file->GetPath());
645
646    // We do not know the number of headers until the final stages of write.
647    // It is easiest to just reserve a fixed amount of space for them.
648    constexpr size_t kMaxProgramHeaders = 8;
649    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
650    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
651
652    // Layout of all sections - determine the final file offsets and addresses.
653    // This must be done after we have built all sections and know their size.
654    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
655    Elf_Addr load_address = file_offset;
656    std::vector<Elf_Shdr> section_headers;
657    section_headers.reserve(1u + sections.size());
658    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
659    for (auto* section : sections) {
660      Elf_Shdr* header = section->GetHeader();
661      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
662      header->sh_size = section->GetSize();
663      header->sh_link = section->GetLink();
664      // Allocate memory for the section in the file.
665      if (header->sh_type != SHT_NOBITS) {
666        header->sh_offset = RoundUp(file_offset, alignment);
667        file_offset = header->sh_offset + header->sh_size;
668      }
669      // Allocate memory for the section during program execution.
670      if ((header->sh_flags & SHF_ALLOC) != 0) {
671        header->sh_addr = RoundUp(load_address, alignment);
672        load_address = header->sh_addr + header->sh_size;
673      }
674      if (debug_logging_) {
675        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
676                  << " offset=0x" << header->sh_offset
677                  << " addr=0x" << header->sh_addr
678                  << " size=0x" << header->sh_size;
679      }
680      // Collect section headers into continuous array for convenience.
681      section_headers.push_back(*header);
682    }
683    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
684
685    // Create program headers now that we know the layout of the whole file.
686    // Each segment contains one or more sections which are mapped together.
687    // Not all sections are mapped during the execution of the program.
688    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
689    // interesting parts of memory and their addresses overlap with PT_LOAD.
690    std::vector<Elf_Phdr> program_headers;
691    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
692      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
693    // Create the main LOAD R segment which spans all sections up to .rodata.
694    const Elf_Shdr* rodata = rodata_.GetHeader();
695    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
696      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
697    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
698    if (bss_.GetHeader()->sh_size != 0u) {
699      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
700    }
701    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
702    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
703    const Section* eh_frame = FindSection(".eh_frame");
704    if (eh_frame != nullptr) {
705      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
706      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
707      if (eh_frame_hdr != nullptr) {
708        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
709        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
710        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
711                 eh_frame_hdr->GetHeader()->sh_offset);
712        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
713        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
714        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
715        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
716      }
717    }
718    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
719
720    // Create the main ELF header.
721    Elf_Ehdr elf_header = MakeElfHeader(isa_);
722    elf_header.e_phoff = kProgramHeadersOffset;
723    elf_header.e_shoff = section_headers_offset;
724    elf_header.e_phnum = program_headers.size();
725    elf_header.e_shnum = section_headers.size();
726    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
727
728    // Write all headers and section content to the file.
729    // Depending on the implementations of Section::Write, this
730    // might be just memory copies or some more elaborate operations.
731    if (!WriteArray(elf_file, &elf_header, 1)) {
732      LOG(INFO) << "Failed to write the ELF header";
733      return false;
734    }
735    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
736      LOG(INFO) << "Failed to write the program headers";
737      return false;
738    }
739    for (Section* section : sections) {
740      const Elf_Shdr* header = section->GetHeader();
741      if (header->sh_type != SHT_NOBITS) {
742        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
743          LOG(INFO) << "Failed to write section " << section->GetName();
744          return false;
745        }
746        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
747        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
748          << "The number of bytes written does not match GetSize()";
749      }
750    }
751    if (!SeekTo(elf_file, section_headers_offset) ||
752        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
753      LOG(INFO) << "Failed to write the section headers";
754      return false;
755    }
756    return true;
757  }
758
759  // Adds the given section to the builder.  It does not take ownership.
760  void RegisterSection(Section* section) {
761    other_sections_.push_back(section);
762  }
763
764  const Section* FindSection(const char* name) {
765    for (const auto* section : other_sections_) {
766      if (section->GetName() == name) {
767        return section;
768      }
769    }
770    return nullptr;
771  }
772
773 private:
774  static bool SeekTo(File* elf_file, Elf_Word offset) {
775    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
776      << "Seeking backwards";
777    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
778      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
779      return false;
780    }
781    return true;
782  }
783
784  template<typename T>
785  static bool WriteArray(File* elf_file, const T* data, size_t count) {
786    if (count != 0) {
787      DCHECK(data != nullptr);
788      if (!elf_file->WriteFully(data, count * sizeof(T))) {
789        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
790        return false;
791      }
792    }
793    return true;
794  }
795
796  // Helper - create segment header based on memory range.
797  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
798                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
799    Elf_Phdr phdr = Elf_Phdr();
800    phdr.p_type    = type;
801    phdr.p_flags   = flags;
802    phdr.p_offset  = offset;
803    phdr.p_vaddr   = offset;
804    phdr.p_paddr   = offset;
805    phdr.p_filesz  = size;
806    phdr.p_memsz   = size;
807    phdr.p_align   = align;
808    return phdr;
809  }
810
811  // Helper - create segment header based on section header.
812  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
813                                    const Section& section) {
814    const Elf_Shdr* shdr = section.GetHeader();
815    // Only run-time allocated sections should be in segment headers.
816    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
817    Elf_Phdr phdr = Elf_Phdr();
818    phdr.p_type   = type;
819    phdr.p_flags  = flags;
820    phdr.p_offset = shdr->sh_offset;
821    phdr.p_vaddr  = shdr->sh_addr;
822    phdr.p_paddr  = shdr->sh_addr;
823    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
824    phdr.p_memsz  = shdr->sh_size;
825    phdr.p_align  = shdr->sh_addralign;
826    return phdr;
827  }
828
829  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
830    Elf_Ehdr elf_header = Elf_Ehdr();
831    switch (isa) {
832      case kArm:
833        // Fall through.
834      case kThumb2: {
835        elf_header.e_machine = EM_ARM;
836        elf_header.e_flags = EF_ARM_EABI_VER5;
837        break;
838      }
839      case kArm64: {
840        elf_header.e_machine = EM_AARCH64;
841        elf_header.e_flags = 0;
842        break;
843      }
844      case kX86: {
845        elf_header.e_machine = EM_386;
846        elf_header.e_flags = 0;
847        break;
848      }
849      case kX86_64: {
850        elf_header.e_machine = EM_X86_64;
851        elf_header.e_flags = 0;
852        break;
853      }
854      case kMips: {
855        elf_header.e_machine = EM_MIPS;
856        elf_header.e_flags = (EF_MIPS_NOREORDER |
857                               EF_MIPS_PIC       |
858                               EF_MIPS_CPIC      |
859                               EF_MIPS_ABI_O32   |
860                               EF_MIPS_ARCH_32R2);
861        break;
862      }
863      case kMips64: {
864        elf_header.e_machine = EM_MIPS;
865        elf_header.e_flags = (EF_MIPS_NOREORDER |
866                               EF_MIPS_PIC       |
867                               EF_MIPS_CPIC      |
868                               EF_MIPS_ARCH_64R6);
869        break;
870      }
871      case kNone: {
872        LOG(FATAL) << "No instruction set";
873      }
874    }
875
876    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
877    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
878    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
879    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
880    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
881                                         ? ELFCLASS32 : ELFCLASS64;;
882    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
883    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
884    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
885    elf_header.e_ident[EI_ABIVERSION] = 0;
886    elf_header.e_type = ET_DYN;
887    elf_header.e_version = 1;
888    elf_header.e_entry = 0;
889    elf_header.e_ehsize = sizeof(Elf_Ehdr);
890    elf_header.e_phentsize = sizeof(Elf_Phdr);
891    elf_header.e_shentsize = sizeof(Elf_Shdr);
892    elf_header.e_phoff = sizeof(Elf_Ehdr);
893    return elf_header;
894  }
895
896  void BuildDynamicSection(const std::string& elf_file_path) {
897    std::string soname(elf_file_path);
898    size_t directory_separator_pos = soname.rfind('/');
899    if (directory_separator_pos != std::string::npos) {
900      soname = soname.substr(directory_separator_pos + 1);
901    }
902    // NB: We must add the name before adding DT_STRSZ.
903    Elf_Word soname_offset = dynstr_.AddName(soname);
904
905    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
906    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
907    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
908    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
909    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
910    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
911  }
912
913  void BuildDynsymSection() {
914    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
915                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
916    dynsym_.AddSymbol("oatexec", &text_, 0, true,
917                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
918    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
919                      true, 4, STB_GLOBAL, STT_OBJECT);
920    if (bss_.GetSize() != 0u) {
921      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
922                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
923      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
924                        true, 4, STB_GLOBAL, STT_OBJECT);
925    }
926  }
927
928  InstructionSet isa_;
929  StrtabSection dynstr_;
930  SymtabSection dynsym_;
931  HashSection hash_;
932  OatSection rodata_;
933  OatSection text_;
934  NoBitsSection bss_;
935  DynamicSection dynamic_;
936  StrtabSection strtab_;
937  SymtabSection symtab_;
938  std::vector<Section*> other_sections_;
939  StrtabSection shstrtab_;
940
941  DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
942};
943
944}  // namespace art
945
946#endif  // ART_COMPILER_ELF_BUILDER_H_
947