15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef COURGETTE_DISASSEMBLER_WIN32_X86_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COURGETTE_DISASSEMBLER_WIN32_X86_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/disassembler.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/memory_allocator.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/types_win_pe.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)#ifdef COURGETTE_HISTOGRAM_TARGETS
1468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)#include <map>
1568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)#endif
1668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace courgette {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AssemblyProgram;
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class DisassemblerWin32X86 : public Disassembler {
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit DisassemblerWin32X86(const void* start, size_t length);
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ExecutableType kind() { return EXE_WIN_32_X86; }
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns 'true' if the buffer appears to point to a Windows 32 bit
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // executable, 'false' otherwise.  If ParseHeader() succeeds, other member
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // functions may be called.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool ParseHeader();
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool Disassemble(AssemblyProgram* target);
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Exposed for test purposes
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool has_text_section() const { return has_text_section_; }
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 size_of_code() const { return size_of_code_; }
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool is_32bit() const { return !is_PE32_plus_; }
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns 'true' if the base relocation table can be parsed.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Output is a vector of the RVAs corresponding to locations within executable
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that are listed in the base relocation table.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ParseRelocs(std::vector<RVA> *addresses);
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns Section containing the relative virtual address, or NULL if none.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Section* RVAToSection(RVA rva) const;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const int kNoOffset = -1;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns kNoOffset if there is no file offset corresponding to 'rva'.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int RVAToFileOffset(RVA rva) const;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is returned if there is no file offset corresponding to 'rva'.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint8* RVAToPointer(RVA rva) const;
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static std::string SectionName(const Section* section);
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ParseAbs32Relocs();
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ParseRel32RelocsFromSections();
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ParseRel32RelocsFromSection(const Section* section);
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool ParseNonSectionFileRegion(uint32 start_file_offset,
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CheckBool ParseFileRegion(const Section* section,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      uint32 start_file_offset, uint32 end_file_offset,
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AssemblyProgram* program) WARN_UNUSED_RESULT;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if COURGETTE_HISTOGRAM_TARGETS
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Most addresses are represented as 32-bit RVAs.  The one address we can't
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // do this with is the image base address.  'image_base' is valid only for
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 image_base() const { return static_cast<uint32>(image_base_); }
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const ImageDataDirectory& base_relocation_table() const {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return base_relocation_table_;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns description of the RVA, e.g. ".text+0x1243".  For debugging only.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string DescribeRVA(RVA rva) const;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Finds the first section at file_offset or above.  Does not return sections
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that have no raw bytes in the file.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Section* FindNextSection(uint32 file_offset) const;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There are 2 'coordinate systems' for reasoning about executables.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   FileOffset - the the offset within a single .EXE or .DLL *file*.
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   RVA - relative virtual address (offset within *loaded image*)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // FileOffsetToRVA and RVAToFileOffset convert between these representations.
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVA FileOffsetToRVA(uint32 offset) const;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ReadDataDirectory(int index, ImageDataDirectory* dir);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool incomplete_disassembly_;  // 'true' if can leave out 'uninteresting' bits
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<RVA> abs32_locations_;
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<RVA> rel32_locations_;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fields that are always valid.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Information that is valid after successful ParseHeader.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool is_PE32_plus_;   // PE32_plus is for 64 bit executables.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint8 *optional_header_;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 size_of_optional_header_;
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 offset_of_data_directories_;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 machine_type_;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint16 number_of_sections_;
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Section *sections_;
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool has_text_section_;
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 size_of_code_;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 size_of_initialized_data_;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 size_of_uninitialized_data_;
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVA base_of_code_;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RVA base_of_data_;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64 image_base_;  // range limited to 32 bits for 32 bit executable
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 size_of_image_;
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int number_of_data_directories_;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory export_table_;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory import_table_;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory resource_table_;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory exception_table_;
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory base_relocation_table_;
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory bound_import_table_;
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory import_address_table_;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory delay_import_descriptor_;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ImageDataDirectory clr_runtime_header_;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if COURGETTE_HISTOGRAM_TARGETS
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<RVA, int> abs32_target_rvas_;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<RVA, int> rel32_target_rvas_;
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace courgette
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // COURGETTE_DISASSEMBLER_WIN32_X86_H_
161