elf_builder.h revision 90688ae398123b7a6c3752935fab0ebbb86d64cb
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_ELF_BUILDER_H_
18#define ART_COMPILER_ELF_BUILDER_H_
19
20#include <vector>
21
22#include "arch/instruction_set.h"
23#include "base/unix_file/fd_file.h"
24#include "buffered_output_stream.h"
25#include "elf_utils.h"
26#include "file_output_stream.h"
27
28namespace art {
29
30class CodeOutput {
31 public:
32  virtual bool Write(OutputStream* out) = 0;
33  virtual ~CodeOutput() {}
34};
35
36// Writes ELF file.
37// The main complication is that the sections often want to reference
38// each other.  We solve this by writing the ELF file in two stages:
39//  * Sections are asked about their size, and overall layout is calculated.
40//  * Sections do the actual writes which may use offsets of other sections.
41template <typename ElfTypes>
42class ElfBuilder FINAL {
43 public:
44  using Elf_Addr = typename ElfTypes::Addr;
45  using Elf_Off = typename ElfTypes::Off;
46  using Elf_Word = typename ElfTypes::Word;
47  using Elf_Sword = typename ElfTypes::Sword;
48  using Elf_Ehdr = typename ElfTypes::Ehdr;
49  using Elf_Shdr = typename ElfTypes::Shdr;
50  using Elf_Sym = typename ElfTypes::Sym;
51  using Elf_Phdr = typename ElfTypes::Phdr;
52  using Elf_Dyn = typename ElfTypes::Dyn;
53
54  // Base class of all sections.
55  class Section {
56   public:
57    Section(const std::string& name, Elf_Word type, Elf_Word flags,
58            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
59        : header_(new Elf_Shdr()), section_index_(0), name_(name), link_(link) {
60      header_->sh_type = type;
61      header_->sh_flags = flags;
62      header_->sh_info = info;
63      header_->sh_addralign = align;
64      header_->sh_entsize = entsize;
65    }
66    virtual ~Section() {}
67
68    // Returns the size of the content of this section.  It is used to
69    // calculate file offsets of all sections before doing any writes.
70    virtual Elf_Word GetSize() const = 0;
71
72    // Write the content of this section to the given file.
73    // This must write exactly the number of bytes returned by GetSize().
74    // Offsets of all sections are known when this method is called.
75    virtual bool Write(File* elf_file) = 0;
76
77    Elf_Word GetLink() const {
78      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
79    }
80
81    const Elf_Shdr* GetHeader() const {
82      return header_.get();
83    }
84
85    Elf_Shdr* GetHeader() {
86      return header_.get();
87    }
88
89    Elf_Word GetSectionIndex() const {
90      DCHECK_NE(section_index_, 0u);
91      return section_index_;
92    }
93
94    void SetSectionIndex(Elf_Word section_index) {
95      section_index_ = section_index;
96    }
97
98    const std::string& GetName() const {
99      return name_;
100    }
101
102   private:
103    // Elf_Shdr is somewhat large so allocate it on the heap.
104    // Otherwise we get in trouble with stack frame sizes.
105    std::unique_ptr<Elf_Shdr> header_;
106    Elf_Word section_index_;
107    const std::string name_;
108    const Section* const link_;
109
110    DISALLOW_COPY_AND_ASSIGN(Section);
111  };
112
113  // Writer of .dynamic section.
114  class DynamicSection FINAL : public Section {
115   public:
116    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
117      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
118      dynamics_.push_back({tag, value, section});
119    }
120
121    DynamicSection(const std::string& name, Section* link)
122        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
123                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
124
125    Elf_Word GetSize() const OVERRIDE {
126      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
127    }
128
129    bool Write(File* elf_file) OVERRIDE {
130      std::vector<Elf_Dyn> buffer;
131      buffer.reserve(dynamics_.size() + 1u);
132      for (const ElfDynamicState& it : dynamics_) {
133        if (it.section_ != nullptr) {
134          // We are adding an address relative to a section.
135          buffer.push_back(
136              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
137        } else {
138          buffer.push_back({it.tag_, {it.value_}});
139        }
140      }
141      buffer.push_back({DT_NULL, {0}});
142      return WriteArray(elf_file, buffer.data(), buffer.size());
143    }
144
145   private:
146    struct ElfDynamicState {
147      Elf_Sword tag_;
148      Elf_Word value_;
149      const Section* section_;
150    };
151    std::vector<ElfDynamicState> dynamics_;
152  };
153
154  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
155                           Elf_Addr buffer_address,
156                           Elf_Addr base_address,
157                           std::vector<uint8_t>* buffer);
158
159  // Section with content based on simple memory buffer.
160  // The buffer can be optionally patched before writing.
161  class RawSection FINAL : public Section {
162   public:
163    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
164               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
165               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
166        : Section(name, type, flags, link, info, align, entsize),
167          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
168    }
169
170    Elf_Word GetSize() const OVERRIDE {
171      return buffer_.size();
172    }
173
174    bool Write(File* elf_file) OVERRIDE {
175      if (!patch_locations_.empty()) {
176        DCHECK(!patched_);  // Do not patch twice.
177        DCHECK(patch_ != nullptr);
178        DCHECK(patch_base_section_ != nullptr);
179        patch_(patch_locations_,
180               this->GetHeader()->sh_addr,
181               patch_base_section_->GetHeader()->sh_addr,
182               &buffer_);
183        patched_ = true;
184      }
185      return WriteArray(elf_file, buffer_.data(), buffer_.size());
186    }
187
188    bool IsEmpty() const {
189      return buffer_.size() == 0;
190    }
191
192    std::vector<uint8_t>* GetBuffer() {
193      return &buffer_;
194    }
195
196    void SetBuffer(const std::vector<uint8_t>& buffer) {
197      buffer_ = buffer;
198    }
199
200    std::vector<uintptr_t>* GetPatchLocations() {
201      return &patch_locations_;
202    }
203
204   private:
205    std::vector<uint8_t> buffer_;
206    std::vector<uintptr_t> patch_locations_;
207    bool patched_;
208    // User-provided function to do the actual patching.
209    PatchFn patch_;
210    // The section that we patch against (usually .text).
211    const Section* patch_base_section_;
212  };
213
214  // Writer of .rodata section or .text section.
215  // The write is done lazily using the provided CodeOutput.
216  class OatSection FINAL : public Section {
217   public:
218    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
219               const Section* link, Elf_Word info, Elf_Word align,
220               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
221        : Section(name, type, flags, link, info, align, entsize),
222          size_(size), code_output_(code_output) {
223    }
224
225    Elf_Word GetSize() const OVERRIDE {
226      return size_;
227    }
228
229    bool Write(File* elf_file) OVERRIDE {
230      // The BufferedOutputStream class contains the buffer as field,
231      // therefore it is too big to allocate on the stack.
232      std::unique_ptr<BufferedOutputStream> output_stream(
233          new BufferedOutputStream(new FileOutputStream(elf_file)));
234      return code_output_->Write(output_stream.get());
235    }
236
237   private:
238    Elf_Word size_;
239    CodeOutput* code_output_;
240  };
241
242  // Writer of .bss section.
243  class NoBitsSection FINAL : public Section {
244   public:
245    NoBitsSection(const std::string& name, Elf_Word size)
246        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
247          size_(size) {
248    }
249
250    Elf_Word GetSize() const OVERRIDE {
251      return size_;
252    }
253
254    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
255      LOG(ERROR) << "This section should not be written to the ELF file";
256      return false;
257    }
258
259   private:
260    Elf_Word size_;
261  };
262
263  // Writer of .dynstr .strtab and .shstrtab sections.
264  class StrtabSection FINAL : public Section {
265   public:
266    StrtabSection(const std::string& name, Elf_Word flags)
267        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
268      buffer_.reserve(4 * KB);
269      // The first entry of strtab must be empty string.
270      buffer_ += '\0';
271    }
272
273    Elf_Word AddName(const std::string& name) {
274      Elf_Word offset = buffer_.size();
275      buffer_ += name;
276      buffer_ += '\0';
277      return offset;
278    }
279
280    Elf_Word GetSize() const OVERRIDE {
281      return buffer_.size();
282    }
283
284    bool Write(File* elf_file) OVERRIDE {
285      return WriteArray(elf_file, buffer_.data(), buffer_.size());
286    }
287
288   private:
289    std::string buffer_;
290  };
291
292  class HashSection;
293
294  // Writer of .dynsym and .symtab sections.
295  class SymtabSection FINAL : public Section {
296   public:
297    // Add a symbol with given name to this symtab. The symbol refers to
298    // 'relative_addr' within the given section and has the given attributes.
299    void AddSymbol(const std::string& name, const Section* section,
300                   Elf_Addr addr, bool is_relative, Elf_Word size,
301                   uint8_t binding, uint8_t type, uint8_t other = 0) {
302      CHECK(section != nullptr);
303      Elf_Word name_idx = strtab_->AddName(name);
304      symbols_.push_back({ name, section, addr, size, is_relative,
305                           MakeStInfo(binding, type), other, name_idx });
306    }
307
308    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
309                  StrtabSection* strtab)
310        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
311          strtab_(strtab) {
312    }
313
314    bool IsEmpty() const {
315      return symbols_.empty();
316    }
317
318    Elf_Word GetSize() const OVERRIDE {
319      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
320    }
321
322    bool Write(File* elf_file) OVERRIDE {
323      std::vector<Elf_Sym> buffer;
324      buffer.reserve(1u + symbols_.size());
325      buffer.push_back(Elf_Sym());  // NULL.
326      for (const ElfSymbolState& it : symbols_) {
327        Elf_Sym sym = Elf_Sym();
328        sym.st_name = it.name_idx_;
329        if (it.is_relative_) {
330          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
331        } else {
332          sym.st_value = it.addr_;
333        }
334        sym.st_size = it.size_;
335        sym.st_other = it.other_;
336        sym.st_shndx = it.section_->GetSectionIndex();
337        sym.st_info = it.info_;
338        buffer.push_back(sym);
339      }
340      return WriteArray(elf_file, buffer.data(), buffer.size());
341    }
342
343   private:
344    struct ElfSymbolState {
345      const std::string name_;
346      const Section* section_;
347      Elf_Addr addr_;
348      Elf_Word size_;
349      bool is_relative_;
350      uint8_t info_;
351      uint8_t other_;
352      Elf_Word name_idx_;  // index in the strtab.
353    };
354
355    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
356      return ((binding) << 4) + ((type) & 0xf);
357    }
358
359    // The symbols in the same order they will be in the symbol table.
360    std::vector<ElfSymbolState> symbols_;
361    StrtabSection* strtab_;
362
363    friend class HashSection;
364  };
365
366  // TODO: Consider removing.
367  // We use it only for the dynsym section which has only 5 symbols.
368  // We do not use it for symtab, and we probably do not have to
369  // since we use those symbols only to print backtraces.
370  class HashSection FINAL : public Section {
371   public:
372    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
373        : Section(name, SHT_HASH, flags, symtab,
374                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
375          symtab_(symtab) {
376    }
377
378    Elf_Word GetSize() const OVERRIDE {
379      Elf_Word nbuckets = GetNumBuckets();
380      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
381      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
382    }
383
384    bool Write(File* const elf_file) OVERRIDE {
385      // Here is how The ELF hash table works.
386      // There are 3 arrays to worry about.
387      // * The symbol table where the symbol information is.
388      // * The bucket array which is an array of indexes into the symtab and chain.
389      // * The chain array which is also an array of indexes into the symtab and chain.
390      //
391      // Lets say the state is something like this.
392      // +--------+       +--------+      +-----------+
393      // | symtab |       | bucket |      |   chain   |
394      // |  null  |       | 1      |      | STN_UNDEF |
395      // | <sym1> |       | 4      |      | 2         |
396      // | <sym2> |       |        |      | 5         |
397      // | <sym3> |       |        |      | STN_UNDEF |
398      // | <sym4> |       |        |      | 3         |
399      // | <sym5> |       |        |      | STN_UNDEF |
400      // +--------+       +--------+      +-----------+
401      //
402      // The lookup process (in python psudocode) is
403      //
404      // def GetSym(name):
405      //     # NB STN_UNDEF == 0
406      //     indx = bucket[elfhash(name) % num_buckets]
407      //     while indx != STN_UNDEF:
408      //         if GetSymbolName(symtab[indx]) == name:
409      //             return symtab[indx]
410      //         indx = chain[indx]
411      //     return SYMBOL_NOT_FOUND
412      //
413      // Between bucket and chain arrays every symtab index must be present exactly
414      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
415      const auto& symbols = symtab_->symbols_;
416      // Select number of buckets.
417      // This is essentially arbitrary.
418      Elf_Word nbuckets = GetNumBuckets();
419      // 1 is for the implicit NULL symbol.
420      Elf_Word chain_size = (symbols.size() + 1);
421      std::vector<Elf_Word> hash;
422      hash.push_back(nbuckets);
423      hash.push_back(chain_size);
424      uint32_t bucket_offset = hash.size();
425      uint32_t chain_offset = bucket_offset + nbuckets;
426      hash.resize(hash.size() + nbuckets + chain_size, 0);
427
428      Elf_Word* buckets = hash.data() + bucket_offset;
429      Elf_Word* chain   = hash.data() + chain_offset;
430
431      // Set up the actual hash table.
432      for (Elf_Word i = 0; i < symbols.size(); i++) {
433        // Add 1 since we need to have the null symbol that is not in the symbols
434        // list.
435        Elf_Word index = i + 1;
436        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
437        if (buckets[hash_val] == 0) {
438          buckets[hash_val] = index;
439        } else {
440          hash_val = buckets[hash_val];
441          CHECK_LT(hash_val, chain_size);
442          while (chain[hash_val] != 0) {
443            hash_val = chain[hash_val];
444            CHECK_LT(hash_val, chain_size);
445          }
446          chain[hash_val] = index;
447          // Check for loops. Works because if this is non-empty then there must be
448          // another cell which already contains the same symbol index as this one,
449          // which means some symbol has more then one name, which isn't allowed.
450          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
451        }
452      }
453      return WriteArray(elf_file, hash.data(), hash.size());
454    }
455
456   private:
457    Elf_Word GetNumBuckets() const {
458      const auto& symbols = symtab_->symbols_;
459      if (symbols.size() < 8) {
460        return 2;
461      } else if (symbols.size() < 32) {
462        return 4;
463      } else if (symbols.size() < 256) {
464        return 16;
465      } else {
466        // Have about 32 ids per bucket.
467        return RoundUp(symbols.size()/32, 2);
468      }
469    }
470
471    // from bionic
472    static inline unsigned elfhash(const char *_name) {
473      const unsigned char *name = (const unsigned char *) _name;
474      unsigned h = 0, g;
475
476      while (*name) {
477        h = (h << 4) + *name++;
478        g = h & 0xf0000000;
479        h ^= g;
480        h ^= g >> 24;
481      }
482      return h;
483    }
484
485    SymtabSection* symtab_;
486
487    DISALLOW_COPY_AND_ASSIGN(HashSection);
488  };
489
490  ElfBuilder(InstructionSet isa,
491             Elf_Word rodata_size, CodeOutput* rodata_writer,
492             Elf_Word text_size, CodeOutput* text_writer,
493             Elf_Word bss_size)
494    : isa_(isa),
495      dynstr_(".dynstr", SHF_ALLOC),
496      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
497      hash_(".hash", SHF_ALLOC, &dynsym_),
498      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
499              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
500      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
501            nullptr, 0, kPageSize, 0, text_size, text_writer),
502      bss_(".bss", bss_size),
503      dynamic_(".dynamic", &dynsym_),
504      strtab_(".strtab", 0),
505      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
506      shstrtab_(".shstrtab", 0) {
507  }
508  ~ElfBuilder() {}
509
510  OatSection* GetText() { return &text_; }
511  SymtabSection* GetSymtab() { return &symtab_; }
512
513  bool Write(File* elf_file) {
514    // Since the .text section of an oat file contains relative references to .rodata
515    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
516    // a non-traditional layout where the .bss section is mapped independently of the
517    // .dynamic section and needs its own program header with LOAD RW.
518    //
519    // The basic layout of the elf file. Order may be different in final output.
520    // +-------------------------+
521    // | Elf_Ehdr                |
522    // +-------------------------+
523    // | Elf_Phdr PHDR           |
524    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
525    // | Elf_Phdr LOAD R X       | .text
526    // | Elf_Phdr LOAD RW        | .bss (Optional)
527    // | Elf_Phdr LOAD RW        | .dynamic
528    // | Elf_Phdr DYNAMIC        | .dynamic
529    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
530    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
531    // +-------------------------+
532    // | .dynsym                 |
533    // | Elf_Sym  STN_UNDEF      |
534    // | Elf_Sym  oatdata        |
535    // | Elf_Sym  oatexec        |
536    // | Elf_Sym  oatlastword    |
537    // | Elf_Sym  oatbss         | (Optional)
538    // | Elf_Sym  oatbsslastword | (Optional)
539    // +-------------------------+
540    // | .dynstr                 |
541    // | names for .dynsym       |
542    // +-------------------------+
543    // | .hash                   |
544    // | hashtable for dynsym    |
545    // +-------------------------+
546    // | .rodata                 |
547    // | oatdata..oatexec-4      |
548    // +-------------------------+
549    // | .text                   |
550    // | oatexec..oatlastword    |
551    // +-------------------------+
552    // | .dynamic                |
553    // | Elf_Dyn DT_HASH         |
554    // | Elf_Dyn DT_STRTAB       |
555    // | Elf_Dyn DT_SYMTAB       |
556    // | Elf_Dyn DT_SYMENT       |
557    // | Elf_Dyn DT_STRSZ        |
558    // | Elf_Dyn DT_SONAME       |
559    // | Elf_Dyn DT_NULL         |
560    // +-------------------------+  (Optional)
561    // | .symtab                 |  (Optional)
562    // | program symbols         |  (Optional)
563    // +-------------------------+  (Optional)
564    // | .strtab                 |  (Optional)
565    // | names for .symtab       |  (Optional)
566    // +-------------------------+  (Optional)
567    // | .eh_frame               |  (Optional)
568    // +-------------------------+  (Optional)
569    // | .eh_frame_hdr           |  (Optional)
570    // +-------------------------+  (Optional)
571    // | .debug_info             |  (Optional)
572    // +-------------------------+  (Optional)
573    // | .debug_abbrev           |  (Optional)
574    // +-------------------------+  (Optional)
575    // | .debug_str              |  (Optional)
576    // +-------------------------+  (Optional)
577    // | .debug_line             |  (Optional)
578    // +-------------------------+
579    // | .shstrtab               |
580    // | names of sections       |
581    // +-------------------------+
582    // | Elf_Shdr null           |
583    // | Elf_Shdr .dynsym        |
584    // | Elf_Shdr .dynstr        |
585    // | Elf_Shdr .hash          |
586    // | Elf_Shdr .rodata        |
587    // | Elf_Shdr .text          |
588    // | Elf_Shdr .bss           |  (Optional)
589    // | Elf_Shdr .dynamic       |
590    // | Elf_Shdr .symtab        |  (Optional)
591    // | Elf_Shdr .strtab        |  (Optional)
592    // | Elf_Shdr .eh_frame      |  (Optional)
593    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
594    // | Elf_Shdr .debug_info    |  (Optional)
595    // | Elf_Shdr .debug_abbrev  |  (Optional)
596    // | Elf_Shdr .debug_str     |  (Optional)
597    // | Elf_Shdr .debug_line    |  (Optional)
598    // | Elf_Shdr .oat_patches   |  (Optional)
599    // | Elf_Shdr .shstrtab      |
600    // +-------------------------+
601    constexpr bool debug_logging_ = false;
602
603    // Create a list of all section which we want to write.
604    // This is the order in which they will be written.
605    std::vector<Section*> sections;
606    sections.push_back(&dynsym_);
607    sections.push_back(&dynstr_);
608    sections.push_back(&hash_);
609    sections.push_back(&rodata_);
610    sections.push_back(&text_);
611    if (bss_.GetSize() != 0u) {
612      sections.push_back(&bss_);
613    }
614    sections.push_back(&dynamic_);
615    if (!symtab_.IsEmpty()) {
616      sections.push_back(&symtab_);
617      sections.push_back(&strtab_);
618    }
619    for (Section* section : other_sections_) {
620      sections.push_back(section);
621    }
622    sections.push_back(&shstrtab_);
623    for (size_t i = 0; i < sections.size(); i++) {
624      // The first section index is 1.  Index 0 is reserved for NULL.
625      // Section index is used for relative symbols and for section links.
626      sections[i]->SetSectionIndex(i + 1);
627      // Add section name to .shstrtab.
628      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
629      sections[i]->GetHeader()->sh_name = name_offset;
630    }
631
632    // The running program does not have access to section headers
633    // and the loader is not supposed to use them either.
634    // The dynamic sections therefore replicates some of the layout
635    // information like the address and size of .rodata and .text.
636    // It also contains other metadata like the SONAME.
637    // The .dynamic section is found using the PT_DYNAMIC program header.
638    BuildDynsymSection();
639    BuildDynamicSection(elf_file->GetPath());
640
641    // We do not know the number of headers until the final stages of write.
642    // It is easiest to just reserve a fixed amount of space for them.
643    constexpr size_t kMaxProgramHeaders = 8;
644    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
645    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
646
647    // Layout of all sections - determine the final file offsets and addresses.
648    // This must be done after we have built all sections and know their size.
649    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
650    Elf_Addr load_address = file_offset;
651    std::vector<Elf_Shdr> section_headers;
652    section_headers.reserve(1u + sections.size());
653    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
654    for (auto* section : sections) {
655      Elf_Shdr* header = section->GetHeader();
656      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
657      header->sh_size = section->GetSize();
658      header->sh_link = section->GetLink();
659      // Allocate memory for the section in the file.
660      if (header->sh_type != SHT_NOBITS) {
661        header->sh_offset = RoundUp(file_offset, alignment);
662        file_offset = header->sh_offset + header->sh_size;
663      }
664      // Allocate memory for the section during program execution.
665      if ((header->sh_flags & SHF_ALLOC) != 0) {
666        header->sh_addr = RoundUp(load_address, alignment);
667        load_address = header->sh_addr + header->sh_size;
668      }
669      if (debug_logging_) {
670        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
671                  << " offset=0x" << header->sh_offset
672                  << " addr=0x" << header->sh_addr
673                  << " size=0x" << header->sh_size;
674      }
675      // Collect section headers into continuous array for convenience.
676      section_headers.push_back(*header);
677    }
678    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
679
680    // Create program headers now that we know the layout of the whole file.
681    // Each segment contains one or more sections which are mapped together.
682    // Not all sections are mapped during the execution of the program.
683    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
684    // interesting parts of memory and their addresses overlap with PT_LOAD.
685    std::vector<Elf_Phdr> program_headers;
686    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
687      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
688    // Create the main LOAD R segment which spans all sections up to .rodata.
689    const Elf_Shdr* rodata = rodata_.GetHeader();
690    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
691      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
692    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
693    if (bss_.GetHeader()->sh_size != 0u) {
694      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
695    }
696    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
697    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
698    const Section* eh_frame = FindSection(".eh_frame");
699    if (eh_frame != nullptr) {
700      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
701      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
702      if (eh_frame_hdr != nullptr) {
703        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
704        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
705        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
706                 eh_frame_hdr->GetHeader()->sh_offset);
707        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
708        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
709        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
710        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
711      }
712    }
713    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
714
715    // Create the main ELF header.
716    Elf_Ehdr elf_header = MakeElfHeader(isa_);
717    elf_header.e_phoff = kProgramHeadersOffset;
718    elf_header.e_shoff = section_headers_offset;
719    elf_header.e_phnum = program_headers.size();
720    elf_header.e_shnum = section_headers.size();
721    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
722
723    // Write all headers and section content to the file.
724    // Depending on the implementations of Section::Write, this
725    // might be just memory copies or some more elaborate operations.
726    if (!WriteArray(elf_file, &elf_header, 1)) {
727      LOG(INFO) << "Failed to write the ELF header";
728      return false;
729    }
730    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
731      LOG(INFO) << "Failed to write the program headers";
732      return false;
733    }
734    for (Section* section : sections) {
735      const Elf_Shdr* header = section->GetHeader();
736      if (header->sh_type != SHT_NOBITS) {
737        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
738          LOG(INFO) << "Failed to write section " << section->GetName();
739          return false;
740        }
741        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
742        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
743          << "The number of bytes written does not match GetSize()";
744      }
745    }
746    if (!SeekTo(elf_file, section_headers_offset) ||
747        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
748      LOG(INFO) << "Failed to write the section headers";
749      return false;
750    }
751    return true;
752  }
753
754  // Adds the given section to the builder.  It does not take ownership.
755  void RegisterSection(Section* section) {
756    other_sections_.push_back(section);
757  }
758
759  const Section* FindSection(const char* name) {
760    for (const auto* section : other_sections_) {
761      if (section->GetName() == name) {
762        return section;
763      }
764    }
765    return nullptr;
766  }
767
768 private:
769  static bool SeekTo(File* elf_file, Elf_Word offset) {
770    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
771      << "Seeking backwards";
772    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
773      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
774      return false;
775    }
776    return true;
777  }
778
779  template<typename T>
780  static bool WriteArray(File* elf_file, const T* data, size_t count) {
781    DCHECK(data != nullptr);
782    if (!elf_file->WriteFully(data, count * sizeof(T))) {
783      PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
784      return false;
785    }
786    return true;
787  }
788
789  // Helper - create segment header based on memory range.
790  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
791                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
792    Elf_Phdr phdr = Elf_Phdr();
793    phdr.p_type    = type;
794    phdr.p_flags   = flags;
795    phdr.p_offset  = offset;
796    phdr.p_vaddr   = offset;
797    phdr.p_paddr   = offset;
798    phdr.p_filesz  = size;
799    phdr.p_memsz   = size;
800    phdr.p_align   = align;
801    return phdr;
802  }
803
804  // Helper - create segment header based on section header.
805  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
806                                    const Section& section) {
807    const Elf_Shdr* shdr = section.GetHeader();
808    // Only run-time allocated sections should be in segment headers.
809    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
810    Elf_Phdr phdr = Elf_Phdr();
811    phdr.p_type   = type;
812    phdr.p_flags  = flags;
813    phdr.p_offset = shdr->sh_offset;
814    phdr.p_vaddr  = shdr->sh_addr;
815    phdr.p_paddr  = shdr->sh_addr;
816    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
817    phdr.p_memsz  = shdr->sh_size;
818    phdr.p_align  = shdr->sh_addralign;
819    return phdr;
820  }
821
822  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
823    Elf_Ehdr elf_header = Elf_Ehdr();
824    switch (isa) {
825      case kArm:
826        // Fall through.
827      case kThumb2: {
828        elf_header.e_machine = EM_ARM;
829        elf_header.e_flags = EF_ARM_EABI_VER5;
830        break;
831      }
832      case kArm64: {
833        elf_header.e_machine = EM_AARCH64;
834        elf_header.e_flags = 0;
835        break;
836      }
837      case kX86: {
838        elf_header.e_machine = EM_386;
839        elf_header.e_flags = 0;
840        break;
841      }
842      case kX86_64: {
843        elf_header.e_machine = EM_X86_64;
844        elf_header.e_flags = 0;
845        break;
846      }
847      case kMips: {
848        elf_header.e_machine = EM_MIPS;
849        elf_header.e_flags = (EF_MIPS_NOREORDER |
850                               EF_MIPS_PIC       |
851                               EF_MIPS_CPIC      |
852                               EF_MIPS_ABI_O32   |
853                               EF_MIPS_ARCH_32R2);
854        break;
855      }
856      case kMips64: {
857        elf_header.e_machine = EM_MIPS;
858        elf_header.e_flags = (EF_MIPS_NOREORDER |
859                               EF_MIPS_PIC       |
860                               EF_MIPS_CPIC      |
861                               EF_MIPS_ARCH_64R6);
862        break;
863      }
864      case kNone: {
865        LOG(FATAL) << "No instruction set";
866      }
867    }
868
869    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
870    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
871    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
872    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
873    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
874                                         ? ELFCLASS32 : ELFCLASS64;;
875    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
876    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
877    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
878    elf_header.e_ident[EI_ABIVERSION] = 0;
879    elf_header.e_type = ET_DYN;
880    elf_header.e_version = 1;
881    elf_header.e_entry = 0;
882    elf_header.e_ehsize = sizeof(Elf_Ehdr);
883    elf_header.e_phentsize = sizeof(Elf_Phdr);
884    elf_header.e_shentsize = sizeof(Elf_Shdr);
885    elf_header.e_phoff = sizeof(Elf_Ehdr);
886    return elf_header;
887  }
888
889  void BuildDynamicSection(const std::string& elf_file_path) {
890    std::string soname(elf_file_path);
891    size_t directory_separator_pos = soname.rfind('/');
892    if (directory_separator_pos != std::string::npos) {
893      soname = soname.substr(directory_separator_pos + 1);
894    }
895    // NB: We must add the name before adding DT_STRSZ.
896    Elf_Word soname_offset = dynstr_.AddName(soname);
897
898    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
899    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
900    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
901    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
902    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
903    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
904  }
905
906  void BuildDynsymSection() {
907    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
908                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
909    dynsym_.AddSymbol("oatexec", &text_, 0, true,
910                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
911    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
912                      true, 4, STB_GLOBAL, STT_OBJECT);
913    if (bss_.GetSize() != 0u) {
914      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
915                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
916      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
917                        true, 4, STB_GLOBAL, STT_OBJECT);
918    }
919  }
920
921  InstructionSet isa_;
922  StrtabSection dynstr_;
923  SymtabSection dynsym_;
924  HashSection hash_;
925  OatSection rodata_;
926  OatSection text_;
927  NoBitsSection bss_;
928  DynamicSection dynamic_;
929  StrtabSection strtab_;
930  SymtabSection symtab_;
931  std::vector<Section*> other_sections_;
932  StrtabSection shstrtab_;
933
934  DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
935};
936
937}  // namespace art
938
939#endif  // ART_COMPILER_ELF_BUILDER_H_
940