elf_builder.h revision 491a7fed21eb153965cee73ac77268ec340aaca2
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_ELF_BUILDER_H_
18#define ART_COMPILER_ELF_BUILDER_H_
19
20#include <vector>
21
22#include "arch/instruction_set.h"
23#include "base/bit_utils.h"
24#include "base/unix_file/fd_file.h"
25#include "buffered_output_stream.h"
26#include "elf_utils.h"
27#include "file_output_stream.h"
28
29namespace art {
30
31class CodeOutput {
32 public:
33  virtual bool Write(OutputStream* out) = 0;
34  virtual ~CodeOutput() {}
35};
36
37// Writes ELF file.
38// The main complication is that the sections often want to reference
39// each other.  We solve this by writing the ELF file in two stages:
40//  * Sections are asked about their size, and overall layout is calculated.
41//  * Sections do the actual writes which may use offsets of other sections.
42template <typename ElfTypes>
43class ElfBuilder FINAL {
44 public:
45  using Elf_Addr = typename ElfTypes::Addr;
46  using Elf_Off = typename ElfTypes::Off;
47  using Elf_Word = typename ElfTypes::Word;
48  using Elf_Sword = typename ElfTypes::Sword;
49  using Elf_Ehdr = typename ElfTypes::Ehdr;
50  using Elf_Shdr = typename ElfTypes::Shdr;
51  using Elf_Sym = typename ElfTypes::Sym;
52  using Elf_Phdr = typename ElfTypes::Phdr;
53  using Elf_Dyn = typename ElfTypes::Dyn;
54
55  // Base class of all sections.
56  class Section {
57   public:
58    Section(const std::string& name, Elf_Word type, Elf_Word flags,
59            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
60        : header_(), section_index_(0), name_(name), link_(link) {
61      header_.sh_type = type;
62      header_.sh_flags = flags;
63      header_.sh_info = info;
64      header_.sh_addralign = align;
65      header_.sh_entsize = entsize;
66    }
67    virtual ~Section() {}
68
69    // Returns the size of the content of this section.  It is used to
70    // calculate file offsets of all sections before doing any writes.
71    virtual Elf_Word GetSize() const = 0;
72
73    // Write the content of this section to the given file.
74    // This must write exactly the number of bytes returned by GetSize().
75    // Offsets of all sections are known when this method is called.
76    virtual bool Write(File* elf_file) = 0;
77
78    Elf_Word GetLink() const {
79      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
80    }
81
82    const Elf_Shdr* GetHeader() const {
83      return &header_;
84    }
85
86    Elf_Shdr* GetHeader() {
87      return &header_;
88    }
89
90    Elf_Word GetSectionIndex() const {
91      DCHECK_NE(section_index_, 0u);
92      return section_index_;
93    }
94
95    void SetSectionIndex(Elf_Word section_index) {
96      section_index_ = section_index;
97    }
98
99    const std::string& GetName() const {
100      return name_;
101    }
102
103   private:
104    Elf_Shdr header_;
105    Elf_Word section_index_;
106    const std::string name_;
107    const Section* const link_;
108
109    DISALLOW_COPY_AND_ASSIGN(Section);
110  };
111
112  // Writer of .dynamic section.
113  class DynamicSection FINAL : public Section {
114   public:
115    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
116      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
117      dynamics_.push_back({tag, value, section});
118    }
119
120    DynamicSection(const std::string& name, Section* link)
121        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
122                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
123
124    Elf_Word GetSize() const OVERRIDE {
125      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
126    }
127
128    bool Write(File* elf_file) OVERRIDE {
129      std::vector<Elf_Dyn> buffer;
130      buffer.reserve(dynamics_.size() + 1u);
131      for (const ElfDynamicState& it : dynamics_) {
132        if (it.section_ != nullptr) {
133          // We are adding an address relative to a section.
134          buffer.push_back(
135              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
136        } else {
137          buffer.push_back({it.tag_, {it.value_}});
138        }
139      }
140      buffer.push_back({DT_NULL, {0}});
141      return WriteArray(elf_file, buffer.data(), buffer.size());
142    }
143
144   private:
145    struct ElfDynamicState {
146      Elf_Sword tag_;
147      Elf_Word value_;
148      const Section* section_;
149    };
150    std::vector<ElfDynamicState> dynamics_;
151  };
152
153  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
154                           Elf_Addr buffer_address,
155                           Elf_Addr base_address,
156                           std::vector<uint8_t>* buffer);
157
158  // Section with content based on simple memory buffer.
159  // The buffer can be optionally patched before writing.
160  class RawSection FINAL : public Section {
161   public:
162    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
163               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
164               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
165        : Section(name, type, flags, link, info, align, entsize),
166          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
167    }
168
169    RawSection(const std::string& name, Elf_Word type)
170        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
171    }
172
173    Elf_Word GetSize() const OVERRIDE {
174      return buffer_.size();
175    }
176
177    bool Write(File* elf_file) OVERRIDE {
178      if (!patch_locations_.empty()) {
179        DCHECK(!patched_);  // Do not patch twice.
180        DCHECK(patch_ != nullptr);
181        DCHECK(patch_base_section_ != nullptr);
182        patch_(patch_locations_,
183               this->GetHeader()->sh_addr,
184               patch_base_section_->GetHeader()->sh_addr,
185               &buffer_);
186        patched_ = true;
187      }
188      return WriteArray(elf_file, buffer_.data(), buffer_.size());
189    }
190
191    bool IsEmpty() const {
192      return buffer_.size() == 0;
193    }
194
195    std::vector<uint8_t>* GetBuffer() {
196      return &buffer_;
197    }
198
199    void SetBuffer(const std::vector<uint8_t>& buffer) {
200      buffer_ = buffer;
201    }
202
203    std::vector<uintptr_t>* GetPatchLocations() {
204      return &patch_locations_;
205    }
206
207   private:
208    std::vector<uint8_t> buffer_;
209    std::vector<uintptr_t> patch_locations_;
210    bool patched_;
211    // User-provided function to do the actual patching.
212    PatchFn patch_;
213    // The section that we patch against (usually .text).
214    const Section* patch_base_section_;
215  };
216
217  // Writer of .rodata section or .text section.
218  // The write is done lazily using the provided CodeOutput.
219  class OatSection FINAL : public Section {
220   public:
221    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
222               const Section* link, Elf_Word info, Elf_Word align,
223               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
224        : Section(name, type, flags, link, info, align, entsize),
225          size_(size), code_output_(code_output) {
226    }
227
228    Elf_Word GetSize() const OVERRIDE {
229      return size_;
230    }
231
232    bool Write(File* elf_file) OVERRIDE {
233      // The BufferedOutputStream class contains the buffer as field,
234      // therefore it is too big to allocate on the stack.
235      std::unique_ptr<BufferedOutputStream> output_stream(
236          new BufferedOutputStream(new FileOutputStream(elf_file)));
237      return code_output_->Write(output_stream.get());
238    }
239
240   private:
241    Elf_Word size_;
242    CodeOutput* code_output_;
243  };
244
245  // Writer of .bss section.
246  class NoBitsSection FINAL : public Section {
247   public:
248    NoBitsSection(const std::string& name, Elf_Word size)
249        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
250          size_(size) {
251    }
252
253    Elf_Word GetSize() const OVERRIDE {
254      return size_;
255    }
256
257    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
258      LOG(ERROR) << "This section should not be written to the ELF file";
259      return false;
260    }
261
262   private:
263    Elf_Word size_;
264  };
265
266  // Writer of .dynstr .strtab and .shstrtab sections.
267  class StrtabSection FINAL : public Section {
268   public:
269    StrtabSection(const std::string& name, Elf_Word flags)
270        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
271      buffer_.reserve(4 * KB);
272      // The first entry of strtab must be empty string.
273      buffer_ += '\0';
274    }
275
276    Elf_Word AddName(const std::string& name) {
277      Elf_Word offset = buffer_.size();
278      buffer_ += name;
279      buffer_ += '\0';
280      return offset;
281    }
282
283    Elf_Word GetSize() const OVERRIDE {
284      return buffer_.size();
285    }
286
287    bool Write(File* elf_file) OVERRIDE {
288      return WriteArray(elf_file, buffer_.data(), buffer_.size());
289    }
290
291   private:
292    std::string buffer_;
293  };
294
295  class HashSection;
296
297  // Writer of .dynsym and .symtab sections.
298  class SymtabSection FINAL : public Section {
299   public:
300    // Add a symbol with given name to this symtab. The symbol refers to
301    // 'relative_addr' within the given section and has the given attributes.
302    void AddSymbol(const std::string& name, const Section* section,
303                   Elf_Addr addr, bool is_relative, Elf_Word size,
304                   uint8_t binding, uint8_t type, uint8_t other = 0) {
305      CHECK(section != nullptr);
306      Elf_Word name_idx = strtab_->AddName(name);
307      symbols_.push_back({ name, section, addr, size, is_relative,
308                           MakeStInfo(binding, type), other, name_idx });
309    }
310
311    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
312                  StrtabSection* strtab)
313        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
314          strtab_(strtab) {
315    }
316
317    bool IsEmpty() const {
318      return symbols_.empty();
319    }
320
321    Elf_Word GetSize() const OVERRIDE {
322      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
323    }
324
325    bool Write(File* elf_file) OVERRIDE {
326      std::vector<Elf_Sym> buffer;
327      buffer.reserve(1u + symbols_.size());
328      buffer.push_back(Elf_Sym());  // NULL.
329      for (const ElfSymbolState& it : symbols_) {
330        Elf_Sym sym = Elf_Sym();
331        sym.st_name = it.name_idx_;
332        if (it.is_relative_) {
333          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
334        } else {
335          sym.st_value = it.addr_;
336        }
337        sym.st_size = it.size_;
338        sym.st_other = it.other_;
339        sym.st_shndx = it.section_->GetSectionIndex();
340        sym.st_info = it.info_;
341        buffer.push_back(sym);
342      }
343      return WriteArray(elf_file, buffer.data(), buffer.size());
344    }
345
346   private:
347    struct ElfSymbolState {
348      const std::string name_;
349      const Section* section_;
350      Elf_Addr addr_;
351      Elf_Word size_;
352      bool is_relative_;
353      uint8_t info_;
354      uint8_t other_;
355      Elf_Word name_idx_;  // index in the strtab.
356    };
357
358    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
359      return ((binding) << 4) + ((type) & 0xf);
360    }
361
362    // The symbols in the same order they will be in the symbol table.
363    std::vector<ElfSymbolState> symbols_;
364    StrtabSection* strtab_;
365
366    friend class HashSection;
367  };
368
369  // TODO: Consider removing.
370  // We use it only for the dynsym section which has only 5 symbols.
371  // We do not use it for symtab, and we probably do not have to
372  // since we use those symbols only to print backtraces.
373  class HashSection FINAL : public Section {
374   public:
375    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
376        : Section(name, SHT_HASH, flags, symtab,
377                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
378          symtab_(symtab) {
379    }
380
381    Elf_Word GetSize() const OVERRIDE {
382      Elf_Word nbuckets = GetNumBuckets();
383      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
384      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
385    }
386
387    bool Write(File* const elf_file) OVERRIDE {
388      // Here is how The ELF hash table works.
389      // There are 3 arrays to worry about.
390      // * The symbol table where the symbol information is.
391      // * The bucket array which is an array of indexes into the symtab and chain.
392      // * The chain array which is also an array of indexes into the symtab and chain.
393      //
394      // Lets say the state is something like this.
395      // +--------+       +--------+      +-----------+
396      // | symtab |       | bucket |      |   chain   |
397      // |  null  |       | 1      |      | STN_UNDEF |
398      // | <sym1> |       | 4      |      | 2         |
399      // | <sym2> |       |        |      | 5         |
400      // | <sym3> |       |        |      | STN_UNDEF |
401      // | <sym4> |       |        |      | 3         |
402      // | <sym5> |       |        |      | STN_UNDEF |
403      // +--------+       +--------+      +-----------+
404      //
405      // The lookup process (in python psudocode) is
406      //
407      // def GetSym(name):
408      //     # NB STN_UNDEF == 0
409      //     indx = bucket[elfhash(name) % num_buckets]
410      //     while indx != STN_UNDEF:
411      //         if GetSymbolName(symtab[indx]) == name:
412      //             return symtab[indx]
413      //         indx = chain[indx]
414      //     return SYMBOL_NOT_FOUND
415      //
416      // Between bucket and chain arrays every symtab index must be present exactly
417      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
418      const auto& symbols = symtab_->symbols_;
419      // Select number of buckets.
420      // This is essentially arbitrary.
421      Elf_Word nbuckets = GetNumBuckets();
422      // 1 is for the implicit NULL symbol.
423      Elf_Word chain_size = (symbols.size() + 1);
424      std::vector<Elf_Word> hash;
425      hash.push_back(nbuckets);
426      hash.push_back(chain_size);
427      uint32_t bucket_offset = hash.size();
428      uint32_t chain_offset = bucket_offset + nbuckets;
429      hash.resize(hash.size() + nbuckets + chain_size, 0);
430
431      Elf_Word* buckets = hash.data() + bucket_offset;
432      Elf_Word* chain   = hash.data() + chain_offset;
433
434      // Set up the actual hash table.
435      for (Elf_Word i = 0; i < symbols.size(); i++) {
436        // Add 1 since we need to have the null symbol that is not in the symbols
437        // list.
438        Elf_Word index = i + 1;
439        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
440        if (buckets[hash_val] == 0) {
441          buckets[hash_val] = index;
442        } else {
443          hash_val = buckets[hash_val];
444          CHECK_LT(hash_val, chain_size);
445          while (chain[hash_val] != 0) {
446            hash_val = chain[hash_val];
447            CHECK_LT(hash_val, chain_size);
448          }
449          chain[hash_val] = index;
450          // Check for loops. Works because if this is non-empty then there must be
451          // another cell which already contains the same symbol index as this one,
452          // which means some symbol has more then one name, which isn't allowed.
453          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
454        }
455      }
456      return WriteArray(elf_file, hash.data(), hash.size());
457    }
458
459   private:
460    Elf_Word GetNumBuckets() const {
461      const auto& symbols = symtab_->symbols_;
462      if (symbols.size() < 8) {
463        return 2;
464      } else if (symbols.size() < 32) {
465        return 4;
466      } else if (symbols.size() < 256) {
467        return 16;
468      } else {
469        // Have about 32 ids per bucket.
470        return RoundUp(symbols.size()/32, 2);
471      }
472    }
473
474    // from bionic
475    static inline unsigned elfhash(const char *_name) {
476      const unsigned char *name = (const unsigned char *) _name;
477      unsigned h = 0, g;
478
479      while (*name) {
480        h = (h << 4) + *name++;
481        g = h & 0xf0000000;
482        h ^= g;
483        h ^= g >> 24;
484      }
485      return h;
486    }
487
488    SymtabSection* symtab_;
489
490    DISALLOW_COPY_AND_ASSIGN(HashSection);
491  };
492
493  ElfBuilder(InstructionSet isa,
494             Elf_Word rodata_size, CodeOutput* rodata_writer,
495             Elf_Word text_size, CodeOutput* text_writer,
496             Elf_Word bss_size)
497    : isa_(isa),
498      dynstr_(".dynstr", SHF_ALLOC),
499      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
500      hash_(".hash", SHF_ALLOC, &dynsym_),
501      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
502              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
503      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
504            nullptr, 0, kPageSize, 0, text_size, text_writer),
505      bss_(".bss", bss_size),
506      dynamic_(".dynamic", &dynsym_),
507      strtab_(".strtab", 0),
508      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
509      shstrtab_(".shstrtab", 0) {
510  }
511  ~ElfBuilder() {}
512
513  OatSection* GetText() { return &text_; }
514  SymtabSection* GetSymtab() { return &symtab_; }
515
516  bool Write(File* elf_file) {
517    // Since the .text section of an oat file contains relative references to .rodata
518    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
519    // a non-traditional layout where the .bss section is mapped independently of the
520    // .dynamic section and needs its own program header with LOAD RW.
521    //
522    // The basic layout of the elf file. Order may be different in final output.
523    // +-------------------------+
524    // | Elf_Ehdr                |
525    // +-------------------------+
526    // | Elf_Phdr PHDR           |
527    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
528    // | Elf_Phdr LOAD R X       | .text
529    // | Elf_Phdr LOAD RW        | .bss (Optional)
530    // | Elf_Phdr LOAD RW        | .dynamic
531    // | Elf_Phdr DYNAMIC        | .dynamic
532    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
533    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
534    // +-------------------------+
535    // | .dynsym                 |
536    // | Elf_Sym  STN_UNDEF      |
537    // | Elf_Sym  oatdata        |
538    // | Elf_Sym  oatexec        |
539    // | Elf_Sym  oatlastword    |
540    // | Elf_Sym  oatbss         | (Optional)
541    // | Elf_Sym  oatbsslastword | (Optional)
542    // +-------------------------+
543    // | .dynstr                 |
544    // | names for .dynsym       |
545    // +-------------------------+
546    // | .hash                   |
547    // | hashtable for dynsym    |
548    // +-------------------------+
549    // | .rodata                 |
550    // | oatdata..oatexec-4      |
551    // +-------------------------+
552    // | .text                   |
553    // | oatexec..oatlastword    |
554    // +-------------------------+
555    // | .dynamic                |
556    // | Elf_Dyn DT_HASH         |
557    // | Elf_Dyn DT_STRTAB       |
558    // | Elf_Dyn DT_SYMTAB       |
559    // | Elf_Dyn DT_SYMENT       |
560    // | Elf_Dyn DT_STRSZ        |
561    // | Elf_Dyn DT_SONAME       |
562    // | Elf_Dyn DT_NULL         |
563    // +-------------------------+  (Optional)
564    // | .symtab                 |  (Optional)
565    // | program symbols         |  (Optional)
566    // +-------------------------+  (Optional)
567    // | .strtab                 |  (Optional)
568    // | names for .symtab       |  (Optional)
569    // +-------------------------+  (Optional)
570    // | .eh_frame               |  (Optional)
571    // +-------------------------+  (Optional)
572    // | .eh_frame_hdr           |  (Optional)
573    // +-------------------------+  (Optional)
574    // | .debug_info             |  (Optional)
575    // +-------------------------+  (Optional)
576    // | .debug_abbrev           |  (Optional)
577    // +-------------------------+  (Optional)
578    // | .debug_str              |  (Optional)
579    // +-------------------------+  (Optional)
580    // | .debug_line             |  (Optional)
581    // +-------------------------+
582    // | .shstrtab               |
583    // | names of sections       |
584    // +-------------------------+
585    // | Elf_Shdr null           |
586    // | Elf_Shdr .dynsym        |
587    // | Elf_Shdr .dynstr        |
588    // | Elf_Shdr .hash          |
589    // | Elf_Shdr .rodata        |
590    // | Elf_Shdr .text          |
591    // | Elf_Shdr .bss           |  (Optional)
592    // | Elf_Shdr .dynamic       |
593    // | Elf_Shdr .symtab        |  (Optional)
594    // | Elf_Shdr .strtab        |  (Optional)
595    // | Elf_Shdr .eh_frame      |  (Optional)
596    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
597    // | Elf_Shdr .debug_info    |  (Optional)
598    // | Elf_Shdr .debug_abbrev  |  (Optional)
599    // | Elf_Shdr .debug_str     |  (Optional)
600    // | Elf_Shdr .debug_line    |  (Optional)
601    // | Elf_Shdr .oat_patches   |  (Optional)
602    // | Elf_Shdr .shstrtab      |
603    // +-------------------------+
604    constexpr bool debug_logging_ = false;
605
606    // Create a list of all section which we want to write.
607    // This is the order in which they will be written.
608    std::vector<Section*> sections;
609    sections.push_back(&dynsym_);
610    sections.push_back(&dynstr_);
611    sections.push_back(&hash_);
612    sections.push_back(&rodata_);
613    sections.push_back(&text_);
614    if (bss_.GetSize() != 0u) {
615      sections.push_back(&bss_);
616    }
617    sections.push_back(&dynamic_);
618    if (!symtab_.IsEmpty()) {
619      sections.push_back(&symtab_);
620      sections.push_back(&strtab_);
621    }
622    for (Section* section : other_sections_) {
623      sections.push_back(section);
624    }
625    sections.push_back(&shstrtab_);
626    for (size_t i = 0; i < sections.size(); i++) {
627      // The first section index is 1.  Index 0 is reserved for NULL.
628      // Section index is used for relative symbols and for section links.
629      sections[i]->SetSectionIndex(i + 1);
630      // Add section name to .shstrtab.
631      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
632      sections[i]->GetHeader()->sh_name = name_offset;
633    }
634
635    // The running program does not have access to section headers
636    // and the loader is not supposed to use them either.
637    // The dynamic sections therefore replicates some of the layout
638    // information like the address and size of .rodata and .text.
639    // It also contains other metadata like the SONAME.
640    // The .dynamic section is found using the PT_DYNAMIC program header.
641    BuildDynsymSection();
642    BuildDynamicSection(elf_file->GetPath());
643
644    // We do not know the number of headers until the final stages of write.
645    // It is easiest to just reserve a fixed amount of space for them.
646    constexpr size_t kMaxProgramHeaders = 8;
647    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
648    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
649
650    // Layout of all sections - determine the final file offsets and addresses.
651    // This must be done after we have built all sections and know their size.
652    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
653    Elf_Addr load_address = file_offset;
654    std::vector<Elf_Shdr> section_headers;
655    section_headers.reserve(1u + sections.size());
656    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
657    for (auto* section : sections) {
658      Elf_Shdr* header = section->GetHeader();
659      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
660      header->sh_size = section->GetSize();
661      header->sh_link = section->GetLink();
662      // Allocate memory for the section in the file.
663      if (header->sh_type != SHT_NOBITS) {
664        header->sh_offset = RoundUp(file_offset, alignment);
665        file_offset = header->sh_offset + header->sh_size;
666      }
667      // Allocate memory for the section during program execution.
668      if ((header->sh_flags & SHF_ALLOC) != 0) {
669        header->sh_addr = RoundUp(load_address, alignment);
670        load_address = header->sh_addr + header->sh_size;
671      }
672      if (debug_logging_) {
673        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
674                  << " offset=0x" << header->sh_offset
675                  << " addr=0x" << header->sh_addr
676                  << " size=0x" << header->sh_size;
677      }
678      // Collect section headers into continuous array for convenience.
679      section_headers.push_back(*header);
680    }
681    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
682
683    // Create program headers now that we know the layout of the whole file.
684    // Each segment contains one or more sections which are mapped together.
685    // Not all sections are mapped during the execution of the program.
686    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
687    // interesting parts of memory and their addresses overlap with PT_LOAD.
688    std::vector<Elf_Phdr> program_headers;
689    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
690      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
691    // Create the main LOAD R segment which spans all sections up to .rodata.
692    const Elf_Shdr* rodata = rodata_.GetHeader();
693    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
694      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
695    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
696    if (bss_.GetHeader()->sh_size != 0u) {
697      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
698    }
699    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
700    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
701    const Section* eh_frame = FindSection(".eh_frame");
702    if (eh_frame != nullptr) {
703      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
704      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
705      if (eh_frame_hdr != nullptr) {
706        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
707        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
708        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
709                 eh_frame_hdr->GetHeader()->sh_offset);
710        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
711        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
712        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
713        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
714      }
715    }
716    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
717
718    // Create the main ELF header.
719    Elf_Ehdr elf_header = MakeElfHeader(isa_);
720    elf_header.e_phoff = kProgramHeadersOffset;
721    elf_header.e_shoff = section_headers_offset;
722    elf_header.e_phnum = program_headers.size();
723    elf_header.e_shnum = section_headers.size();
724    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
725
726    // Write all headers and section content to the file.
727    // Depending on the implementations of Section::Write, this
728    // might be just memory copies or some more elaborate operations.
729    if (!WriteArray(elf_file, &elf_header, 1)) {
730      LOG(INFO) << "Failed to write the ELF header";
731      return false;
732    }
733    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
734      LOG(INFO) << "Failed to write the program headers";
735      return false;
736    }
737    for (Section* section : sections) {
738      const Elf_Shdr* header = section->GetHeader();
739      if (header->sh_type != SHT_NOBITS) {
740        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
741          LOG(INFO) << "Failed to write section " << section->GetName();
742          return false;
743        }
744        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
745        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
746          << "The number of bytes written does not match GetSize()";
747      }
748    }
749    if (!SeekTo(elf_file, section_headers_offset) ||
750        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
751      LOG(INFO) << "Failed to write the section headers";
752      return false;
753    }
754    return true;
755  }
756
757  // Adds the given section to the builder.  It does not take ownership.
758  void RegisterSection(Section* section) {
759    other_sections_.push_back(section);
760  }
761
762  const Section* FindSection(const char* name) {
763    for (const auto* section : other_sections_) {
764      if (section->GetName() == name) {
765        return section;
766      }
767    }
768    return nullptr;
769  }
770
771 private:
772  static bool SeekTo(File* elf_file, Elf_Word offset) {
773    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
774      << "Seeking backwards";
775    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
776      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
777      return false;
778    }
779    return true;
780  }
781
782  template<typename T>
783  static bool WriteArray(File* elf_file, const T* data, size_t count) {
784    if (count != 0) {
785      DCHECK(data != nullptr);
786      if (!elf_file->WriteFully(data, count * sizeof(T))) {
787        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
788        return false;
789      }
790    }
791    return true;
792  }
793
794  // Helper - create segment header based on memory range.
795  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
796                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
797    Elf_Phdr phdr = Elf_Phdr();
798    phdr.p_type    = type;
799    phdr.p_flags   = flags;
800    phdr.p_offset  = offset;
801    phdr.p_vaddr   = offset;
802    phdr.p_paddr   = offset;
803    phdr.p_filesz  = size;
804    phdr.p_memsz   = size;
805    phdr.p_align   = align;
806    return phdr;
807  }
808
809  // Helper - create segment header based on section header.
810  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
811                                    const Section& section) {
812    const Elf_Shdr* shdr = section.GetHeader();
813    // Only run-time allocated sections should be in segment headers.
814    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
815    Elf_Phdr phdr = Elf_Phdr();
816    phdr.p_type   = type;
817    phdr.p_flags  = flags;
818    phdr.p_offset = shdr->sh_offset;
819    phdr.p_vaddr  = shdr->sh_addr;
820    phdr.p_paddr  = shdr->sh_addr;
821    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
822    phdr.p_memsz  = shdr->sh_size;
823    phdr.p_align  = shdr->sh_addralign;
824    return phdr;
825  }
826
827  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
828    Elf_Ehdr elf_header = Elf_Ehdr();
829    switch (isa) {
830      case kArm:
831        // Fall through.
832      case kThumb2: {
833        elf_header.e_machine = EM_ARM;
834        elf_header.e_flags = EF_ARM_EABI_VER5;
835        break;
836      }
837      case kArm64: {
838        elf_header.e_machine = EM_AARCH64;
839        elf_header.e_flags = 0;
840        break;
841      }
842      case kX86: {
843        elf_header.e_machine = EM_386;
844        elf_header.e_flags = 0;
845        break;
846      }
847      case kX86_64: {
848        elf_header.e_machine = EM_X86_64;
849        elf_header.e_flags = 0;
850        break;
851      }
852      case kMips: {
853        elf_header.e_machine = EM_MIPS;
854        elf_header.e_flags = (EF_MIPS_NOREORDER |
855                               EF_MIPS_PIC       |
856                               EF_MIPS_CPIC      |
857                               EF_MIPS_ABI_O32   |
858                               EF_MIPS_ARCH_32R2);
859        break;
860      }
861      case kMips64: {
862        elf_header.e_machine = EM_MIPS;
863        elf_header.e_flags = (EF_MIPS_NOREORDER |
864                               EF_MIPS_PIC       |
865                               EF_MIPS_CPIC      |
866                               EF_MIPS_ARCH_64R6);
867        break;
868      }
869      case kNone: {
870        LOG(FATAL) << "No instruction set";
871      }
872    }
873
874    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
875    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
876    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
877    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
878    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
879                                         ? ELFCLASS32 : ELFCLASS64;;
880    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
881    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
882    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
883    elf_header.e_ident[EI_ABIVERSION] = 0;
884    elf_header.e_type = ET_DYN;
885    elf_header.e_version = 1;
886    elf_header.e_entry = 0;
887    elf_header.e_ehsize = sizeof(Elf_Ehdr);
888    elf_header.e_phentsize = sizeof(Elf_Phdr);
889    elf_header.e_shentsize = sizeof(Elf_Shdr);
890    elf_header.e_phoff = sizeof(Elf_Ehdr);
891    return elf_header;
892  }
893
894  void BuildDynamicSection(const std::string& elf_file_path) {
895    std::string soname(elf_file_path);
896    size_t directory_separator_pos = soname.rfind('/');
897    if (directory_separator_pos != std::string::npos) {
898      soname = soname.substr(directory_separator_pos + 1);
899    }
900    // NB: We must add the name before adding DT_STRSZ.
901    Elf_Word soname_offset = dynstr_.AddName(soname);
902
903    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
904    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
905    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
906    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
907    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
908    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
909  }
910
911  void BuildDynsymSection() {
912    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
913                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
914    dynsym_.AddSymbol("oatexec", &text_, 0, true,
915                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
916    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
917                      true, 4, STB_GLOBAL, STT_OBJECT);
918    if (bss_.GetSize() != 0u) {
919      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
920                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
921      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
922                        true, 4, STB_GLOBAL, STT_OBJECT);
923    }
924  }
925
926  InstructionSet isa_;
927  StrtabSection dynstr_;
928  SymtabSection dynsym_;
929  HashSection hash_;
930  OatSection rodata_;
931  OatSection text_;
932  NoBitsSection bss_;
933  DynamicSection dynamic_;
934  StrtabSection strtab_;
935  SymtabSection symtab_;
936  std::vector<Section*> other_sections_;
937  StrtabSection shstrtab_;
938
939  DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
940};
941
942}  // namespace art
943
944#endif  // ART_COMPILER_ELF_BUILDER_H_
945