1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
6#define COURGETTE_ASSEMBLY_PROGRAM_H_
7
8#include <map>
9#include <set>
10#include <vector>
11
12#include "base/basictypes.h"
13#include "base/memory/scoped_ptr.h"
14
15#include "courgette/disassembler.h"
16#include "courgette/memory_allocator.h"
17
18namespace courgette {
19
20class EncodedProgram;
21class Instruction;
22
23typedef NoThrowBuffer<Instruction*> InstructionVector;
24
25// A Label is a symbolic reference to an address.  Unlike a conventional
26// assembly language, we always know the address.  The address will later be
27// stored in a table and the Label will be replaced with the index into the
28// table.
29//
30// TODO(sra): Make fields private and add setters and getters.
31class Label {
32 public:
33  static const int kNoIndex = -1;
34  Label() : rva_(0), index_(kNoIndex), count_(0) {}
35  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
36
37  RVA rva_;    // Address referred to by the label.
38  int index_;  // Index of address in address table, kNoIndex until assigned.
39  int count_;
40};
41
42typedef std::map<RVA, Label*> RVAToLabel;
43
44// An AssemblyProgram is the result of disassembling an executable file.
45//
46// * The disassembler creates labels in the AssemblyProgram and emits
47//   'Instructions'.
48// * The disassembler then calls DefaultAssignIndexes to assign
49//   addresses to positions in the address tables.
50// * [Optional step]
51// * At this point the AssemblyProgram can be converted into an
52//   EncodedProgram and serialized to an output stream.
53// * Later, the EncodedProgram can be deserialized and assembled into
54//   the original file.
55//
56// The optional step is to modify the AssemblyProgram.  One form of modification
57// is to assign indexes in such a way as to make the EncodedProgram for this
58// AssemblyProgram look more like the EncodedProgram for some other
59// AssemblyProgram.  The modification process should call UnassignIndexes, do
60// its own assignment, and then call AssignRemainingIndexes to ensure all
61// indexes are assigned.
62//
63class AssemblyProgram {
64 public:
65  explicit AssemblyProgram(ExecutableType kind);
66  ~AssemblyProgram();
67
68  ExecutableType kind() const { return kind_; }
69
70  void set_image_base(uint64 image_base) { image_base_ = image_base; }
71
72  // Instructions will be assembled in the order they are emitted.
73
74  // Generates an entire base relocation table.
75  CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
76
77  // Generates an ELF style relocation table for X86.
78  CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
79
80  // Generates an ELF style relocation table for ARM.
81  CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT;
82
83  // Following instruction will be assembled at address 'rva'.
84  CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
85
86  // Generates a single byte of data or machine instruction.
87  CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
88
89  // Generates multiple bytes of data or machine instructions.
90  CheckBool EmitBytesInstruction(const uint8* value, uint32 len)
91      WARN_UNUSED_RESULT;
92
93  // Generates 4-byte relative reference to address of 'label'.
94  CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
95
96  // Generates 4-byte relative reference to address of 'label' for
97  // ARM.
98  CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
99                         uint16 op_size) WARN_UNUSED_RESULT;
100
101  // Generates 4-byte absolute reference to address of 'label'.
102  CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
103
104  // Looks up a label or creates a new one.  Might return NULL.
105  Label* FindOrMakeAbs32Label(RVA rva);
106
107  // Looks up a label or creates a new one.  Might return NULL.
108  Label* FindOrMakeRel32Label(RVA rva);
109
110  void DefaultAssignIndexes();
111  void UnassignIndexes();
112  void AssignRemainingIndexes();
113
114  EncodedProgram* Encode() const;
115
116  // Accessor for instruction list.
117  const InstructionVector& instructions() const {
118    return instructions_;
119  }
120
121  // Returns the label if the instruction contains and absolute address,
122  // otherwise returns NULL.
123  Label* InstructionAbs32Label(const Instruction* instruction) const;
124
125  // Returns the label if the instruction contains and rel32 offset,
126  // otherwise returns NULL.
127  Label* InstructionRel32Label(const Instruction* instruction) const;
128
129  // Trim underused labels
130  CheckBool TrimLabels();
131
132  void PrintLabelCounts(RVAToLabel* labels);
133  void CountRel32ARM();
134
135 private:
136  ExecutableType kind_;
137
138  CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
139
140  static const int kLabelLowerLimit;
141
142  // Looks up a label or creates a new one.  Might return NULL.
143  Label* FindLabel(RVA rva, RVAToLabel* labels);
144
145  // Helper methods for the public versions.
146  static void UnassignIndexes(RVAToLabel* labels);
147  static void DefaultAssignIndexes(RVAToLabel* labels);
148  static void AssignRemainingIndexes(RVAToLabel* labels);
149
150  // Sharing instructions that emit a single byte saves a lot of space.
151  Instruction* GetByteInstruction(uint8 byte);
152  scoped_ptr<Instruction*[]> byte_instruction_cache_;
153
154  uint64 image_base_;  // Desired or mandated base address of image.
155
156  InstructionVector instructions_;  // All the instructions in program.
157
158  // These are lookup maps to find the label associated with a given address.
159  // We have separate label spaces for addresses referenced by rel32 labels and
160  // abs32 labels.  This is somewhat arbitrary.
161  RVAToLabel rel32_labels_;
162  RVAToLabel abs32_labels_;
163
164  DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
165};
166
167}  // namespace courgette
168#endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
169