assembly_program.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_ 6#define COURGETTE_ASSEMBLY_PROGRAM_H_ 7 8#include <map> 9#include <set> 10#include <vector> 11 12#include "base/basictypes.h" 13#include "base/memory/scoped_ptr.h" 14 15#include "courgette/disassembler.h" 16#include "courgette/memory_allocator.h" 17 18namespace courgette { 19 20class EncodedProgram; 21class Instruction; 22 23typedef NoThrowBuffer<Instruction*> InstructionVector; 24 25// A Label is a symbolic reference to an address. Unlike a conventional 26// assembly language, we always know the address. The address will later be 27// stored in a table and the Label will be replaced with the index into the 28// table. 29// 30// TODO(sra): Make fields private and add setters and getters. 31class Label { 32 public: 33 static const int kNoIndex = -1; 34 Label() : rva_(0), index_(kNoIndex) {} 35 explicit Label(RVA rva) : rva_(rva), index_(kNoIndex) {} 36 37 RVA rva_; // Address referred to by the label. 38 int index_; // Index of address in address table, kNoIndex until assigned. 39}; 40 41typedef std::map<RVA, Label*> RVAToLabel; 42 43// An AssemblyProgram is the result of disassembling an executable file. 44// 45// * The disassembler creates labels in the AssemblyProgram and emits 46// 'Instructions'. 47// * The disassembler then calls DefaultAssignIndexes to assign 48// addresses to positions in the address tables. 49// * [Optional step] 50// * At this point the AssemblyProgram can be converted into an 51// EncodedProgram and serialized to an output stream. 52// * Later, the EncodedProgram can be deserialized and assembled into 53// the original file. 54// 55// The optional step is to modify the AssemblyProgram. One form of modification 56// is to assign indexes in such a way as to make the EncodedProgram for this 57// AssemblyProgram look more like the EncodedProgram for some other 58// AssemblyProgram. The modification process should call UnassignIndexes, do 59// its own assignment, and then call AssignRemainingIndexes to ensure all 60// indexes are assigned. 61// 62class AssemblyProgram { 63 public: 64 AssemblyProgram(); 65 ~AssemblyProgram(); 66 67 void set_image_base(uint64 image_base) { image_base_ = image_base; } 68 69 // Instructions will be assembled in the order they are emitted. 70 71 // Generates an entire base relocation table. 72 CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT; 73 74 // Generates an ELF style relocation table. 75 CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT; 76 77 // Following instruction will be assembled at address 'rva'. 78 CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT; 79 80 // Generates a single byte of data or machine instruction. 81 CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT; 82 83 // Generates 4-byte relative reference to address of 'label'. 84 CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT; 85 86 // Generates 4-byte absolute reference to address of 'label'. 87 CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT; 88 89 // Looks up a label or creates a new one. Might return NULL. 90 Label* FindOrMakeAbs32Label(RVA rva); 91 92 // Looks up a label or creates a new one. Might return NULL. 93 Label* FindOrMakeRel32Label(RVA rva); 94 95 void DefaultAssignIndexes(); 96 void UnassignIndexes(); 97 void AssignRemainingIndexes(); 98 99 EncodedProgram* Encode() const; 100 101 // Accessor for instruction list. 102 const InstructionVector& instructions() const { 103 return instructions_; 104 } 105 106 // Returns the label if the instruction contains and absolute address, 107 // otherwise returns NULL. 108 Label* InstructionAbs32Label(const Instruction* instruction) const; 109 110 // Returns the label if the instruction contains and rel32 offset, 111 // otherwise returns NULL. 112 Label* InstructionRel32Label(const Instruction* instruction) const; 113 114 private: 115 CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT; 116 117 // Looks up a label or creates a new one. Might return NULL. 118 Label* FindLabel(RVA rva, RVAToLabel* labels); 119 120 // Helper methods for the public versions. 121 static void UnassignIndexes(RVAToLabel* labels); 122 static void DefaultAssignIndexes(RVAToLabel* labels); 123 static void AssignRemainingIndexes(RVAToLabel* labels); 124 125 // Sharing instructions that emit a single byte saves a lot of space. 126 Instruction* GetByteInstruction(uint8 byte); 127 scoped_array<Instruction*> byte_instruction_cache_; 128 129 uint64 image_base_; // Desired or mandated base address of image. 130 131 InstructionVector instructions_; // All the instructions in program. 132 133 // These are lookup maps to find the label associated with a given address. 134 // We have separate label spaces for addresses referenced by rel32 labels and 135 // abs32 labels. This is somewhat arbitrary. 136 RVAToLabel rel32_labels_; 137 RVAToLabel abs32_labels_; 138 139 DISALLOW_COPY_AND_ASSIGN(AssemblyProgram); 140}; 141 142} // namespace courgette 143#endif // COURGETTE_ASSEMBLY_PROGRAM_H_ 144