15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COURGETTE_ASSEMBLY_PROGRAM_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <set>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/disassembler.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/memory_allocator.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace courgette {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class EncodedProgram;
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Instruction;
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef NoThrowBuffer<Instruction*> InstructionVector;
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A Label is a symbolic reference to an address.  Unlike a conventional
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// assembly language, we always know the address.  The address will later be
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// stored in a table and the Label will be replaced with the index into the
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// table.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(sra): Make fields private and add setters and getters.
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Label {
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const int kNoIndex = -1;
34a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  Label() : rva_(0), index_(kNoIndex), count_(0) {}
35a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVA rva_;    // Address referred to by the label.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int index_;  // Index of address in address table, kNoIndex until assigned.
39a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  int count_;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::map<RVA, Label*> RVAToLabel;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// An AssemblyProgram is the result of disassembling an executable file.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// * The disassembler creates labels in the AssemblyProgram and emits
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   'Instructions'.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// * The disassembler then calls DefaultAssignIndexes to assign
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   addresses to positions in the address tables.
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// * [Optional step]
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// * At this point the AssemblyProgram can be converted into an
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   EncodedProgram and serialized to an output stream.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// * Later, the EncodedProgram can be deserialized and assembled into
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   the original file.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The optional step is to modify the AssemblyProgram.  One form of modification
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is to assign indexes in such a way as to make the EncodedProgram for this
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// AssemblyProgram look more like the EncodedProgram for some other
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// AssemblyProgram.  The modification process should call UnassignIndexes, do
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// its own assignment, and then call AssignRemainingIndexes to ensure all
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// indexes are assigned.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AssemblyProgram {
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
65a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  explicit AssemblyProgram(ExecutableType kind);
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~AssemblyProgram();
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  ExecutableType kind() const { return kind_; }
69a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_image_base(uint64 image_base) { image_base_ = image_base; }
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Instructions will be assembled in the order they are emitted.
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates an entire base relocation table.
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
77ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  // Generates an ELF style relocation table for X86.
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
80ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  // Generates an ELF style relocation table for ARM.
81ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT;
82ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Following instruction will be assembled at address 'rva'.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates a single byte of data or machine instruction.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
892385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch  // Generates multiple bytes of data or machine instructions.
902385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch  CheckBool EmitBytesInstruction(const uint8* value, uint32 len)
912385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch      WARN_UNUSED_RESULT;
922385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates 4-byte relative reference to address of 'label'.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
96a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  // Generates 4-byte relative reference to address of 'label' for
97a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  // ARM.
98a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
99a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                         uint16 op_size) WARN_UNUSED_RESULT;
100a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates 4-byte absolute reference to address of 'label'.
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Looks up a label or creates a new one.  Might return NULL.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* FindOrMakeAbs32Label(RVA rva);
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Looks up a label or creates a new one.  Might return NULL.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* FindOrMakeRel32Label(RVA rva);
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void DefaultAssignIndexes();
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void UnassignIndexes();
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AssignRemainingIndexes();
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EncodedProgram* Encode() const;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Accessor for instruction list.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const InstructionVector& instructions() const {
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return instructions_;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the label if the instruction contains and absolute address,
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // otherwise returns NULL.
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* InstructionAbs32Label(const Instruction* instruction) const;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the label if the instruction contains and rel32 offset,
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // otherwise returns NULL.
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* InstructionRel32Label(const Instruction* instruction) const;
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1292385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch  // Trim underused labels
1302385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch  CheckBool TrimLabels();
1312385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch
132a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  void PrintLabelCounts(RVAToLabel* labels);
133a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  void CountRel32ARM();
134a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
136a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  ExecutableType kind_;
137a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
1392385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch
1402385ea399aae016c0806a4f9ef3c9cfe3d2a39dfBen Murdoch  static const int kLabelLowerLimit;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Looks up a label or creates a new one.  Might return NULL.
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* FindLabel(RVA rva, RVAToLabel* labels);
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper methods for the public versions.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void UnassignIndexes(RVAToLabel* labels);
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void DefaultAssignIndexes(RVAToLabel* labels);
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void AssignRemainingIndexes(RVAToLabel* labels);
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Sharing instructions that emit a single byte saves a lot of space.
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instruction* GetByteInstruction(uint8 byte);
152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  scoped_ptr<Instruction*[]> byte_instruction_cache_;
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64 image_base_;  // Desired or mandated base address of image.
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  InstructionVector instructions_;  // All the instructions in program.
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // These are lookup maps to find the label associated with a given address.
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We have separate label spaces for addresses referenced by rel32 labels and
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // abs32 labels.  This is somewhat arbitrary.
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVAToLabel rel32_labels_;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVAToLabel abs32_labels_;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace courgette
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
169