elf_builder.h revision 67a065368d66377781f9a937e2d9fdebeef3afef
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_ELF_BUILDER_H_
18#define ART_COMPILER_ELF_BUILDER_H_
19
20#include <vector>
21
22#include "arch/instruction_set.h"
23#include "base/bit_utils.h"
24#include "base/unix_file/fd_file.h"
25#include "buffered_output_stream.h"
26#include "elf_utils.h"
27#include "file_output_stream.h"
28
29namespace art {
30
31class CodeOutput {
32 public:
33  virtual bool Write(OutputStream* out) = 0;
34  virtual ~CodeOutput() {}
35};
36
37// Writes ELF file.
38// The main complication is that the sections often want to reference
39// each other.  We solve this by writing the ELF file in two stages:
40//  * Sections are asked about their size, and overall layout is calculated.
41//  * Sections do the actual writes which may use offsets of other sections.
42template <typename ElfTypes>
43class ElfBuilder FINAL {
44 public:
45  using Elf_Addr = typename ElfTypes::Addr;
46  using Elf_Off = typename ElfTypes::Off;
47  using Elf_Word = typename ElfTypes::Word;
48  using Elf_Sword = typename ElfTypes::Sword;
49  using Elf_Ehdr = typename ElfTypes::Ehdr;
50  using Elf_Shdr = typename ElfTypes::Shdr;
51  using Elf_Sym = typename ElfTypes::Sym;
52  using Elf_Phdr = typename ElfTypes::Phdr;
53  using Elf_Dyn = typename ElfTypes::Dyn;
54
55  // Base class of all sections.
56  class Section {
57   public:
58    Section(const std::string& name, Elf_Word type, Elf_Word flags,
59            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
60        : header_(), section_index_(0), name_(name), link_(link) {
61      header_.sh_type = type;
62      header_.sh_flags = flags;
63      header_.sh_info = info;
64      header_.sh_addralign = align;
65      header_.sh_entsize = entsize;
66    }
67    virtual ~Section() {}
68
69    // Returns the size of the content of this section.  It is used to
70    // calculate file offsets of all sections before doing any writes.
71    virtual Elf_Word GetSize() const = 0;
72
73    // Write the content of this section to the given file.
74    // This must write exactly the number of bytes returned by GetSize().
75    // Offsets of all sections are known when this method is called.
76    virtual bool Write(File* elf_file) = 0;
77
78    Elf_Word GetLink() const {
79      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
80    }
81
82    const Elf_Shdr* GetHeader() const {
83      return &header_;
84    }
85
86    Elf_Shdr* GetHeader() {
87      return &header_;
88    }
89
90    Elf_Word GetSectionIndex() const {
91      DCHECK_NE(section_index_, 0u);
92      return section_index_;
93    }
94
95    void SetSectionIndex(Elf_Word section_index) {
96      section_index_ = section_index;
97    }
98
99    const std::string& GetName() const {
100      return name_;
101    }
102
103   private:
104    Elf_Shdr header_;
105    Elf_Word section_index_;
106    const std::string name_;
107    const Section* const link_;
108
109    DISALLOW_COPY_AND_ASSIGN(Section);
110  };
111
112  // Writer of .dynamic section.
113  class DynamicSection FINAL : public Section {
114   public:
115    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
116      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
117      dynamics_.push_back({tag, value, section});
118    }
119
120    DynamicSection(const std::string& name, Section* link)
121        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
122                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
123
124    Elf_Word GetSize() const OVERRIDE {
125      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
126    }
127
128    bool Write(File* elf_file) OVERRIDE {
129      std::vector<Elf_Dyn> buffer;
130      buffer.reserve(dynamics_.size() + 1u);
131      for (const ElfDynamicState& it : dynamics_) {
132        if (it.section_ != nullptr) {
133          // We are adding an address relative to a section.
134          buffer.push_back(
135              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
136        } else {
137          buffer.push_back({it.tag_, {it.value_}});
138        }
139      }
140      buffer.push_back({DT_NULL, {0}});
141      return WriteArray(elf_file, buffer.data(), buffer.size());
142    }
143
144   private:
145    struct ElfDynamicState {
146      Elf_Sword tag_;
147      Elf_Word value_;
148      const Section* section_;
149    };
150    std::vector<ElfDynamicState> dynamics_;
151  };
152
153  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
154                           Elf_Addr buffer_address,
155                           Elf_Addr base_address,
156                           std::vector<uint8_t>* buffer);
157
158  // Section with content based on simple memory buffer.
159  // The buffer can be optionally patched before writing.
160  class RawSection FINAL : public Section {
161   public:
162    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
163               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
164               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
165        : Section(name, type, flags, link, info, align, entsize),
166          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
167    }
168
169    RawSection(const std::string& name, Elf_Word type)
170        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
171    }
172
173    Elf_Word GetSize() const OVERRIDE {
174      return buffer_.size();
175    }
176
177    bool Write(File* elf_file) OVERRIDE {
178      if (!patch_locations_.empty()) {
179        DCHECK(!patched_);  // Do not patch twice.
180        DCHECK(patch_ != nullptr);
181        DCHECK(patch_base_section_ != nullptr);
182        patch_(patch_locations_,
183               this->GetHeader()->sh_addr,
184               patch_base_section_->GetHeader()->sh_addr,
185               &buffer_);
186        patched_ = true;
187      }
188      return WriteArray(elf_file, buffer_.data(), buffer_.size());
189    }
190
191    bool IsEmpty() const {
192      return buffer_.size() == 0;
193    }
194
195    std::vector<uint8_t>* GetBuffer() {
196      return &buffer_;
197    }
198
199    void SetBuffer(const std::vector<uint8_t>& buffer) {
200      buffer_ = buffer;
201    }
202
203    std::vector<uintptr_t>* GetPatchLocations() {
204      return &patch_locations_;
205    }
206
207   private:
208    std::vector<uint8_t> buffer_;
209    std::vector<uintptr_t> patch_locations_;
210    bool patched_;
211    // User-provided function to do the actual patching.
212    PatchFn patch_;
213    // The section that we patch against (usually .text).
214    const Section* patch_base_section_;
215  };
216
217  // Writer of .rodata section or .text section.
218  // The write is done lazily using the provided CodeOutput.
219  class OatSection FINAL : public Section {
220   public:
221    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
222               const Section* link, Elf_Word info, Elf_Word align,
223               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
224        : Section(name, type, flags, link, info, align, entsize),
225          size_(size), code_output_(code_output) {
226    }
227
228    Elf_Word GetSize() const OVERRIDE {
229      return size_;
230    }
231
232    bool Write(File* elf_file) OVERRIDE {
233      // The BufferedOutputStream class contains the buffer as field,
234      // therefore it is too big to allocate on the stack.
235      std::unique_ptr<BufferedOutputStream> output_stream(
236          new BufferedOutputStream(new FileOutputStream(elf_file)));
237      return code_output_->Write(output_stream.get());
238    }
239
240   private:
241    Elf_Word size_;
242    CodeOutput* code_output_;
243  };
244
245  // Writer of .bss section.
246  class NoBitsSection FINAL : public Section {
247   public:
248    NoBitsSection(const std::string& name, Elf_Word size)
249        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
250          size_(size) {
251    }
252
253    Elf_Word GetSize() const OVERRIDE {
254      return size_;
255    }
256
257    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
258      LOG(ERROR) << "This section should not be written to the ELF file";
259      return false;
260    }
261
262   private:
263    Elf_Word size_;
264  };
265
266  // Writer of .dynstr .strtab and .shstrtab sections.
267  class StrtabSection FINAL : public Section {
268   public:
269    StrtabSection(const std::string& name, Elf_Word flags)
270        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) {
271      buffer_.reserve(4 * KB);
272      // The first entry of strtab must be empty string.
273      buffer_ += '\0';
274    }
275
276    Elf_Word AddName(const std::string& name) {
277      Elf_Word offset = buffer_.size();
278      buffer_ += name;
279      buffer_ += '\0';
280      return offset;
281    }
282
283    Elf_Word GetSize() const OVERRIDE {
284      return buffer_.size();
285    }
286
287    bool Write(File* elf_file) OVERRIDE {
288      return WriteArray(elf_file, buffer_.data(), buffer_.size());
289    }
290
291   private:
292    std::string buffer_;
293  };
294
295  class HashSection;
296
297  // Writer of .dynsym and .symtab sections.
298  class SymtabSection FINAL : public Section {
299   public:
300    // Add a symbol with given name to this symtab. The symbol refers to
301    // 'relative_addr' within the given section and has the given attributes.
302    void AddSymbol(const std::string& name, const Section* section,
303                   Elf_Addr addr, bool is_relative, Elf_Word size,
304                   uint8_t binding, uint8_t type, uint8_t other = 0) {
305      CHECK(section != nullptr);
306      Elf_Word name_idx = strtab_->AddName(name);
307      symbols_.push_back({ name, section, addr, size, is_relative,
308                           MakeStInfo(binding, type), other, name_idx });
309    }
310
311    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
312                  StrtabSection* strtab)
313        : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)),
314          strtab_(strtab) {
315    }
316
317    bool IsEmpty() const {
318      return symbols_.empty();
319    }
320
321    Elf_Word GetSize() const OVERRIDE {
322      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
323    }
324
325    bool Write(File* elf_file) OVERRIDE {
326      std::vector<Elf_Sym> buffer;
327      buffer.reserve(1u + symbols_.size());
328      buffer.push_back(Elf_Sym());  // NULL.
329      for (const ElfSymbolState& it : symbols_) {
330        Elf_Sym sym = Elf_Sym();
331        sym.st_name = it.name_idx_;
332        if (it.is_relative_) {
333          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
334        } else {
335          sym.st_value = it.addr_;
336        }
337        sym.st_size = it.size_;
338        sym.st_other = it.other_;
339        sym.st_shndx = it.section_->GetSectionIndex();
340        sym.st_info = it.info_;
341        buffer.push_back(sym);
342      }
343      return WriteArray(elf_file, buffer.data(), buffer.size());
344    }
345
346   private:
347    struct ElfSymbolState {
348      const std::string name_;
349      const Section* section_;
350      Elf_Addr addr_;
351      Elf_Word size_;
352      bool is_relative_;
353      uint8_t info_;
354      uint8_t other_;
355      Elf_Word name_idx_;  // index in the strtab.
356    };
357
358    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
359      return ((binding) << 4) + ((type) & 0xf);
360    }
361
362    // The symbols in the same order they will be in the symbol table.
363    std::vector<ElfSymbolState> symbols_;
364    StrtabSection* strtab_;
365
366    friend class HashSection;
367  };
368
369  // TODO: Consider removing.
370  // We use it only for the dynsym section which has only 5 symbols.
371  // We do not use it for symtab, and we probably do not have to
372  // since we use those symbols only to print backtraces.
373  class HashSection FINAL : public Section {
374   public:
375    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
376        : Section(name, SHT_HASH, flags, symtab,
377                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
378          symtab_(symtab) {
379    }
380
381    Elf_Word GetSize() const OVERRIDE {
382      Elf_Word nbuckets = GetNumBuckets();
383      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
384      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
385    }
386
387    bool Write(File* const elf_file) OVERRIDE {
388      // Here is how The ELF hash table works.
389      // There are 3 arrays to worry about.
390      // * The symbol table where the symbol information is.
391      // * The bucket array which is an array of indexes into the symtab and chain.
392      // * The chain array which is also an array of indexes into the symtab and chain.
393      //
394      // Lets say the state is something like this.
395      // +--------+       +--------+      +-----------+
396      // | symtab |       | bucket |      |   chain   |
397      // |  null  |       | 1      |      | STN_UNDEF |
398      // | <sym1> |       | 4      |      | 2         |
399      // | <sym2> |       |        |      | 5         |
400      // | <sym3> |       |        |      | STN_UNDEF |
401      // | <sym4> |       |        |      | 3         |
402      // | <sym5> |       |        |      | STN_UNDEF |
403      // +--------+       +--------+      +-----------+
404      //
405      // The lookup process (in python psudocode) is
406      //
407      // def GetSym(name):
408      //     # NB STN_UNDEF == 0
409      //     indx = bucket[elfhash(name) % num_buckets]
410      //     while indx != STN_UNDEF:
411      //         if GetSymbolName(symtab[indx]) == name:
412      //             return symtab[indx]
413      //         indx = chain[indx]
414      //     return SYMBOL_NOT_FOUND
415      //
416      // Between bucket and chain arrays every symtab index must be present exactly
417      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
418      const auto& symbols = symtab_->symbols_;
419      // Select number of buckets.
420      // This is essentially arbitrary.
421      Elf_Word nbuckets = GetNumBuckets();
422      // 1 is for the implicit NULL symbol.
423      Elf_Word chain_size = (symbols.size() + 1);
424      std::vector<Elf_Word> hash;
425      hash.push_back(nbuckets);
426      hash.push_back(chain_size);
427      uint32_t bucket_offset = hash.size();
428      uint32_t chain_offset = bucket_offset + nbuckets;
429      hash.resize(hash.size() + nbuckets + chain_size, 0);
430
431      Elf_Word* buckets = hash.data() + bucket_offset;
432      Elf_Word* chain   = hash.data() + chain_offset;
433
434      // Set up the actual hash table.
435      for (Elf_Word i = 0; i < symbols.size(); i++) {
436        // Add 1 since we need to have the null symbol that is not in the symbols
437        // list.
438        Elf_Word index = i + 1;
439        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
440        if (buckets[hash_val] == 0) {
441          buckets[hash_val] = index;
442        } else {
443          hash_val = buckets[hash_val];
444          CHECK_LT(hash_val, chain_size);
445          while (chain[hash_val] != 0) {
446            hash_val = chain[hash_val];
447            CHECK_LT(hash_val, chain_size);
448          }
449          chain[hash_val] = index;
450          // Check for loops. Works because if this is non-empty then there must be
451          // another cell which already contains the same symbol index as this one,
452          // which means some symbol has more then one name, which isn't allowed.
453          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
454        }
455      }
456      return WriteArray(elf_file, hash.data(), hash.size());
457    }
458
459   private:
460    Elf_Word GetNumBuckets() const {
461      const auto& symbols = symtab_->symbols_;
462      if (symbols.size() < 8) {
463        return 2;
464      } else if (symbols.size() < 32) {
465        return 4;
466      } else if (symbols.size() < 256) {
467        return 16;
468      } else {
469        // Have about 32 ids per bucket.
470        return RoundUp(symbols.size()/32, 2);
471      }
472    }
473
474    // from bionic
475    static inline unsigned elfhash(const char *_name) {
476      const unsigned char *name = (const unsigned char *) _name;
477      unsigned h = 0, g;
478
479      while (*name) {
480        h = (h << 4) + *name++;
481        g = h & 0xf0000000;
482        h ^= g;
483        h ^= g >> 24;
484      }
485      return h;
486    }
487
488    SymtabSection* symtab_;
489
490    DISALLOW_COPY_AND_ASSIGN(HashSection);
491  };
492
493  ElfBuilder(InstructionSet isa,
494             Elf_Word rodata_size, CodeOutput* rodata_writer,
495             Elf_Word text_size, CodeOutput* text_writer,
496             Elf_Word bss_size)
497    : isa_(isa),
498      dynstr_(".dynstr", SHF_ALLOC),
499      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
500      hash_(".hash", SHF_ALLOC, &dynsym_),
501      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
502              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
503      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
504            nullptr, 0, kPageSize, 0, text_size, text_writer),
505      bss_(".bss", bss_size),
506      dynamic_(".dynamic", &dynstr_),
507      strtab_(".strtab", 0),
508      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
509      shstrtab_(".shstrtab", 0) {
510  }
511  ~ElfBuilder() {}
512
513  OatSection* GetText() { return &text_; }
514  SymtabSection* GetSymtab() { return &symtab_; }
515
516  bool Write(File* elf_file) {
517    // Since the .text section of an oat file contains relative references to .rodata
518    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
519    // a non-traditional layout where the .bss section is mapped independently of the
520    // .dynamic section and needs its own program header with LOAD RW.
521    //
522    // The basic layout of the elf file. Order may be different in final output.
523    // +-------------------------+
524    // | Elf_Ehdr                |
525    // +-------------------------+
526    // | Elf_Phdr PHDR           |
527    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
528    // | Elf_Phdr LOAD R X       | .text
529    // | Elf_Phdr LOAD RW        | .bss (Optional)
530    // | Elf_Phdr LOAD RW        | .dynamic
531    // | Elf_Phdr DYNAMIC        | .dynamic
532    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
533    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
534    // +-------------------------+
535    // | .dynsym                 |
536    // | Elf_Sym  STN_UNDEF      |
537    // | Elf_Sym  oatdata        |
538    // | Elf_Sym  oatexec        |
539    // | Elf_Sym  oatlastword    |
540    // | Elf_Sym  oatbss         | (Optional)
541    // | Elf_Sym  oatbsslastword | (Optional)
542    // +-------------------------+
543    // | .dynstr                 |
544    // | names for .dynsym       |
545    // +-------------------------+
546    // | .hash                   |
547    // | hashtable for dynsym    |
548    // +-------------------------+
549    // | .rodata                 |
550    // | oatdata..oatexec-4      |
551    // +-------------------------+
552    // | .text                   |
553    // | oatexec..oatlastword    |
554    // +-------------------------+
555    // | .dynamic                |
556    // | Elf_Dyn DT_HASH         |
557    // | Elf_Dyn DT_STRTAB       |
558    // | Elf_Dyn DT_SYMTAB       |
559    // | Elf_Dyn DT_SYMENT       |
560    // | Elf_Dyn DT_STRSZ        |
561    // | Elf_Dyn DT_SONAME       |
562    // | Elf_Dyn DT_NULL         |
563    // +-------------------------+  (Optional)
564    // | .symtab                 |  (Optional)
565    // | program symbols         |  (Optional)
566    // +-------------------------+  (Optional)
567    // | .strtab                 |  (Optional)
568    // | names for .symtab       |  (Optional)
569    // +-------------------------+  (Optional)
570    // | .eh_frame               |  (Optional)
571    // +-------------------------+  (Optional)
572    // | .eh_frame_hdr           |  (Optional)
573    // +-------------------------+  (Optional)
574    // | .debug_info             |  (Optional)
575    // +-------------------------+  (Optional)
576    // | .debug_abbrev           |  (Optional)
577    // +-------------------------+  (Optional)
578    // | .debug_str              |  (Optional)
579    // +-------------------------+  (Optional)
580    // | .debug_line             |  (Optional)
581    // +-------------------------+
582    // | .shstrtab               |
583    // | names of sections       |
584    // +-------------------------+
585    // | Elf_Shdr null           |
586    // | Elf_Shdr .dynsym        |
587    // | Elf_Shdr .dynstr        |
588    // | Elf_Shdr .hash          |
589    // | Elf_Shdr .rodata        |
590    // | Elf_Shdr .text          |
591    // | Elf_Shdr .bss           |  (Optional)
592    // | Elf_Shdr .dynamic       |
593    // | Elf_Shdr .symtab        |  (Optional)
594    // | Elf_Shdr .strtab        |  (Optional)
595    // | Elf_Shdr .eh_frame      |  (Optional)
596    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
597    // | Elf_Shdr .debug_info    |  (Optional)
598    // | Elf_Shdr .debug_abbrev  |  (Optional)
599    // | Elf_Shdr .debug_str     |  (Optional)
600    // | Elf_Shdr .debug_line    |  (Optional)
601    // | Elf_Shdr .oat_patches   |  (Optional)
602    // | Elf_Shdr .shstrtab      |
603    // +-------------------------+
604    constexpr bool debug_logging_ = false;
605
606    // Create a list of all section which we want to write.
607    // This is the order in which they will be written.
608    std::vector<Section*> sections;
609    sections.push_back(&dynsym_);
610    sections.push_back(&dynstr_);
611    sections.push_back(&hash_);
612    sections.push_back(&rodata_);
613    sections.push_back(&text_);
614    if (bss_.GetSize() != 0u) {
615      sections.push_back(&bss_);
616    }
617    sections.push_back(&dynamic_);
618    if (!symtab_.IsEmpty()) {
619      sections.push_back(&symtab_);
620      sections.push_back(&strtab_);
621    }
622    for (Section* section : other_sections_) {
623      sections.push_back(section);
624    }
625    sections.push_back(&shstrtab_);
626    for (size_t i = 0; i < sections.size(); i++) {
627      // The first section index is 1.  Index 0 is reserved for NULL.
628      // Section index is used for relative symbols and for section links.
629      sections[i]->SetSectionIndex(i + 1);
630      // Add section name to .shstrtab.
631      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
632      sections[i]->GetHeader()->sh_name = name_offset;
633    }
634
635    // The running program does not have access to section headers
636    // and the loader is not supposed to use them either.
637    // The dynamic sections therefore replicates some of the layout
638    // information like the address and size of .rodata and .text.
639    // It also contains other metadata like the SONAME.
640    // The .dynamic section is found using the PT_DYNAMIC program header.
641    BuildDynsymSection();
642    BuildDynamicSection(elf_file->GetPath());
643
644    // We do not know the number of headers until the final stages of write.
645    // It is easiest to just reserve a fixed amount of space for them.
646    constexpr size_t kMaxProgramHeaders = 8;
647    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
648
649    // Layout of all sections - determine the final file offsets and addresses.
650    // This must be done after we have built all sections and know their size.
651    Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders;
652    Elf_Addr load_address = file_offset;
653    std::vector<Elf_Shdr> section_headers;
654    section_headers.reserve(1u + sections.size());
655    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
656    for (auto* section : sections) {
657      Elf_Shdr* header = section->GetHeader();
658      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
659      header->sh_size = section->GetSize();
660      header->sh_link = section->GetLink();
661      // Allocate memory for the section in the file.
662      if (header->sh_type != SHT_NOBITS) {
663        header->sh_offset = RoundUp(file_offset, alignment);
664        file_offset = header->sh_offset + header->sh_size;
665      }
666      // Allocate memory for the section during program execution.
667      if ((header->sh_flags & SHF_ALLOC) != 0) {
668        header->sh_addr = RoundUp(load_address, alignment);
669        load_address = header->sh_addr + header->sh_size;
670      }
671      if (debug_logging_) {
672        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
673                  << " offset=0x" << header->sh_offset
674                  << " addr=0x" << header->sh_addr
675                  << " size=0x" << header->sh_size;
676      }
677      // Collect section headers into continuous array for convenience.
678      section_headers.push_back(*header);
679    }
680    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off));
681
682    // Create program headers now that we know the layout of the whole file.
683    // Each segment contains one or more sections which are mapped together.
684    // Not all sections are mapped during the execution of the program.
685    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
686    // interesting parts of memory and their addresses overlap with PT_LOAD.
687    std::vector<Elf_Phdr> program_headers;
688    program_headers.push_back(Elf_Phdr());  // Placeholder for PT_PHDR.
689    // Create the main LOAD R segment which spans all sections up to .rodata.
690    const Elf_Shdr* rodata = rodata_.GetHeader();
691    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
692      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
693    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
694    if (bss_.GetHeader()->sh_size != 0u) {
695      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
696    }
697    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
698    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
699    const Section* eh_frame = FindSection(".eh_frame");
700    if (eh_frame != nullptr) {
701      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
702      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
703      if (eh_frame_hdr != nullptr) {
704        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
705        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
706        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
707                 eh_frame_hdr->GetHeader()->sh_offset);
708        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
709        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
710        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
711        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
712      }
713    }
714    DCHECK_EQ(program_headers[0].p_type, 0u);  // Check placeholder.
715    program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R,
716      kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off));
717    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
718
719    // Create the main ELF header.
720    Elf_Ehdr elf_header = MakeElfHeader(isa_);
721    elf_header.e_phoff = kProgramHeadersOffset;
722    elf_header.e_shoff = section_headers_offset;
723    elf_header.e_phnum = program_headers.size();
724    elf_header.e_shnum = section_headers.size();
725    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
726
727    // Write all headers and section content to the file.
728    // Depending on the implementations of Section::Write, this
729    // might be just memory copies or some more elaborate operations.
730    if (!WriteArray(elf_file, &elf_header, 1)) {
731      LOG(INFO) << "Failed to write the ELF header";
732      return false;
733    }
734    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
735      LOG(INFO) << "Failed to write the program headers";
736      return false;
737    }
738    for (Section* section : sections) {
739      const Elf_Shdr* header = section->GetHeader();
740      if (header->sh_type != SHT_NOBITS) {
741        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
742          LOG(INFO) << "Failed to write section " << section->GetName();
743          return false;
744        }
745        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
746        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
747          << "The number of bytes written does not match GetSize()";
748      }
749    }
750    if (!SeekTo(elf_file, section_headers_offset) ||
751        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
752      LOG(INFO) << "Failed to write the section headers";
753      return false;
754    }
755    return true;
756  }
757
758  // Adds the given section to the builder.  It does not take ownership.
759  void RegisterSection(Section* section) {
760    other_sections_.push_back(section);
761  }
762
763  const Section* FindSection(const char* name) {
764    for (const auto* section : other_sections_) {
765      if (section->GetName() == name) {
766        return section;
767      }
768    }
769    return nullptr;
770  }
771
772 private:
773  static bool SeekTo(File* elf_file, Elf_Word offset) {
774    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
775      << "Seeking backwards";
776    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
777      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
778      return false;
779    }
780    return true;
781  }
782
783  template<typename T>
784  static bool WriteArray(File* elf_file, const T* data, size_t count) {
785    if (count != 0) {
786      DCHECK(data != nullptr);
787      if (!elf_file->WriteFully(data, count * sizeof(T))) {
788        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
789        return false;
790      }
791    }
792    return true;
793  }
794
795  // Helper - create segment header based on memory range.
796  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
797                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
798    Elf_Phdr phdr = Elf_Phdr();
799    phdr.p_type    = type;
800    phdr.p_flags   = flags;
801    phdr.p_offset  = offset;
802    phdr.p_vaddr   = offset;
803    phdr.p_paddr   = offset;
804    phdr.p_filesz  = size;
805    phdr.p_memsz   = size;
806    phdr.p_align   = align;
807    return phdr;
808  }
809
810  // Helper - create segment header based on section header.
811  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
812                                    const Section& section) {
813    const Elf_Shdr* shdr = section.GetHeader();
814    // Only run-time allocated sections should be in segment headers.
815    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
816    Elf_Phdr phdr = Elf_Phdr();
817    phdr.p_type   = type;
818    phdr.p_flags  = flags;
819    phdr.p_offset = shdr->sh_offset;
820    phdr.p_vaddr  = shdr->sh_addr;
821    phdr.p_paddr  = shdr->sh_addr;
822    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
823    phdr.p_memsz  = shdr->sh_size;
824    phdr.p_align  = shdr->sh_addralign;
825    return phdr;
826  }
827
828  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
829    Elf_Ehdr elf_header = Elf_Ehdr();
830    switch (isa) {
831      case kArm:
832        // Fall through.
833      case kThumb2: {
834        elf_header.e_machine = EM_ARM;
835        elf_header.e_flags = EF_ARM_EABI_VER5;
836        break;
837      }
838      case kArm64: {
839        elf_header.e_machine = EM_AARCH64;
840        elf_header.e_flags = 0;
841        break;
842      }
843      case kX86: {
844        elf_header.e_machine = EM_386;
845        elf_header.e_flags = 0;
846        break;
847      }
848      case kX86_64: {
849        elf_header.e_machine = EM_X86_64;
850        elf_header.e_flags = 0;
851        break;
852      }
853      case kMips: {
854        elf_header.e_machine = EM_MIPS;
855        elf_header.e_flags = (EF_MIPS_NOREORDER |
856                               EF_MIPS_PIC       |
857                               EF_MIPS_CPIC      |
858                               EF_MIPS_ABI_O32   |
859                               EF_MIPS_ARCH_32R2);
860        break;
861      }
862      case kMips64: {
863        elf_header.e_machine = EM_MIPS;
864        elf_header.e_flags = (EF_MIPS_NOREORDER |
865                               EF_MIPS_PIC       |
866                               EF_MIPS_CPIC      |
867                               EF_MIPS_ARCH_64R6);
868        break;
869      }
870      case kNone: {
871        LOG(FATAL) << "No instruction set";
872      }
873    }
874
875    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
876    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
877    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
878    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
879    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
880                                         ? ELFCLASS32 : ELFCLASS64;;
881    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
882    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
883    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
884    elf_header.e_ident[EI_ABIVERSION] = 0;
885    elf_header.e_type = ET_DYN;
886    elf_header.e_version = 1;
887    elf_header.e_entry = 0;
888    elf_header.e_ehsize = sizeof(Elf_Ehdr);
889    elf_header.e_phentsize = sizeof(Elf_Phdr);
890    elf_header.e_shentsize = sizeof(Elf_Shdr);
891    elf_header.e_phoff = sizeof(Elf_Ehdr);
892    return elf_header;
893  }
894
895  void BuildDynamicSection(const std::string& elf_file_path) {
896    std::string soname(elf_file_path);
897    size_t directory_separator_pos = soname.rfind('/');
898    if (directory_separator_pos != std::string::npos) {
899      soname = soname.substr(directory_separator_pos + 1);
900    }
901    // NB: We must add the name before adding DT_STRSZ.
902    Elf_Word soname_offset = dynstr_.AddName(soname);
903
904    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
905    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
906    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
907    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
908    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
909    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
910  }
911
912  void BuildDynsymSection() {
913    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
914                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
915    dynsym_.AddSymbol("oatexec", &text_, 0, true,
916                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
917    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
918                      true, 4, STB_GLOBAL, STT_OBJECT);
919    if (bss_.GetSize() != 0u) {
920      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
921                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
922      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
923                        true, 4, STB_GLOBAL, STT_OBJECT);
924    }
925  }
926
927  InstructionSet isa_;
928  StrtabSection dynstr_;
929  SymtabSection dynsym_;
930  HashSection hash_;
931  OatSection rodata_;
932  OatSection text_;
933  NoBitsSection bss_;
934  DynamicSection dynamic_;
935  StrtabSection strtab_;
936  SymtabSection symtab_;
937  std::vector<Section*> other_sections_;
938  StrtabSection shstrtab_;
939
940  DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
941};
942
943}  // namespace art
944
945#endif  // ART_COMPILER_ELF_BUILDER_H_
946