assembly_program.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
6#define COURGETTE_ASSEMBLY_PROGRAM_H_
7
8#include <map>
9#include <set>
10#include <vector>
11
12#include "base/basictypes.h"
13#include "base/memory/scoped_ptr.h"
14
15#include "courgette/disassembler.h"
16#include "courgette/memory_allocator.h"
17
18namespace courgette {
19
20class EncodedProgram;
21class Instruction;
22
23typedef NoThrowBuffer<Instruction*> InstructionVector;
24
25// A Label is a symbolic reference to an address.  Unlike a conventional
26// assembly language, we always know the address.  The address will later be
27// stored in a table and the Label will be replaced with the index into the
28// table.
29//
30// TODO(sra): Make fields private and add setters and getters.
31class Label {
32 public:
33  static const int kNoIndex = -1;
34  Label() : rva_(0), index_(kNoIndex) {}
35  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex) {}
36
37  RVA rva_;    // Address referred to by the label.
38  int index_;  // Index of address in address table, kNoIndex until assigned.
39};
40
41typedef std::map<RVA, Label*> RVAToLabel;
42
43// An AssemblyProgram is the result of disassembling an executable file.
44//
45// * The disassembler creates labels in the AssemblyProgram and emits
46//   'Instructions'.
47// * The disassembler then calls DefaultAssignIndexes to assign
48//   addresses to positions in the address tables.
49// * [Optional step]
50// * At this point the AssemblyProgram can be converted into an
51//   EncodedProgram and serialized to an output stream.
52// * Later, the EncodedProgram can be deserialized and assembled into
53//   the original file.
54//
55// The optional step is to modify the AssemblyProgram.  One form of modification
56// is to assign indexes in such a way as to make the EncodedProgram for this
57// AssemblyProgram look more like the EncodedProgram for some other
58// AssemblyProgram.  The modification process should call UnassignIndexes, do
59// its own assignment, and then call AssignRemainingIndexes to ensure all
60// indexes are assigned.
61//
62class AssemblyProgram {
63 public:
64  AssemblyProgram();
65  ~AssemblyProgram();
66
67  void set_image_base(uint64 image_base) { image_base_ = image_base; }
68
69  // Instructions will be assembled in the order they are emitted.
70
71  // Generates an entire base relocation table.
72  CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
73
74  // Generates an ELF style relocation table.
75  CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
76
77  // Following instruction will be assembled at address 'rva'.
78  CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
79
80  // Generates a single byte of data or machine instruction.
81  CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
82
83  // Generates 4-byte relative reference to address of 'label'.
84  CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
85
86  // Generates 4-byte absolute reference to address of 'label'.
87  CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
88
89  // Looks up a label or creates a new one.  Might return NULL.
90  Label* FindOrMakeAbs32Label(RVA rva);
91
92  // Looks up a label or creates a new one.  Might return NULL.
93  Label* FindOrMakeRel32Label(RVA rva);
94
95  void DefaultAssignIndexes();
96  void UnassignIndexes();
97  void AssignRemainingIndexes();
98
99  EncodedProgram* Encode() const;
100
101  // Accessor for instruction list.
102  const InstructionVector& instructions() const {
103    return instructions_;
104  }
105
106  // Returns the label if the instruction contains and absolute address,
107  // otherwise returns NULL.
108  Label* InstructionAbs32Label(const Instruction* instruction) const;
109
110  // Returns the label if the instruction contains and rel32 offset,
111  // otherwise returns NULL.
112  Label* InstructionRel32Label(const Instruction* instruction) const;
113
114 private:
115  CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
116
117  // Looks up a label or creates a new one.  Might return NULL.
118  Label* FindLabel(RVA rva, RVAToLabel* labels);
119
120  // Helper methods for the public versions.
121  static void UnassignIndexes(RVAToLabel* labels);
122  static void DefaultAssignIndexes(RVAToLabel* labels);
123  static void AssignRemainingIndexes(RVAToLabel* labels);
124
125  // Sharing instructions that emit a single byte saves a lot of space.
126  Instruction* GetByteInstruction(uint8 byte);
127  scoped_array<Instruction*> byte_instruction_cache_;
128
129  uint64 image_base_;  // Desired or mandated base address of image.
130
131  InstructionVector instructions_;  // All the instructions in program.
132
133  // These are lookup maps to find the label associated with a given address.
134  // We have separate label spaces for addresses referenced by rel32 labels and
135  // abs32 labels.  This is somewhat arbitrary.
136  RVAToLabel rel32_labels_;
137  RVAToLabel abs32_labels_;
138
139  DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
140};
141
142}  // namespace courgette
143#endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
144