1// All rights reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7//     * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9//     * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13//     * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// disassembler_x86.h: Basic x86 bytecode disassembler
30//
31// Provides a simple disassembler which wraps libdisasm. This allows simple
32// tests to be run against bytecode to test for various properties.
33//
34// Author: Cris Neckar
35
36#ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
37#define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
38
39#include <stddef.h>
40#include <sys/types.h>
41
42#include "google_breakpad/common/breakpad_types.h"
43
44namespace libdis {
45#include "third_party/libdisasm/libdis.h"
46}
47
48namespace google_breakpad {
49
50enum {
51  DISX86_NONE =                 0x0,
52  DISX86_BAD_BRANCH_TARGET =    0x1,
53  DISX86_BAD_ARGUMENT_PASSED =  0x2,
54  DISX86_BAD_WRITE =            0x4,
55  DISX86_BAD_BLOCK_WRITE =      0x8,
56  DISX86_BAD_READ =             0x10,
57  DISX86_BAD_BLOCK_READ =       0x20,
58  DISX86_BAD_COMPARISON =       0x40
59};
60
61class DisassemblerX86 {
62  public:
63    // TODO(cdn): Modify this class to take a MemoryRegion instead of just
64    // a raw buffer. This will make it easier to use this on arbitrary
65    // minidumps without first copying out the code segment.
66    DisassemblerX86(const uint8_t *bytecode, uint32_t, uint32_t);
67    ~DisassemblerX86();
68
69    // This walks to the next instruction in the memory region and
70    // sets flags based on the type of instruction and previous state
71    // including any registers marked as bad through setBadRead()
72    // or setBadWrite(). This method can be called in a loop to
73    // disassemble until the end of a region.
74    uint32_t NextInstruction();
75
76    // Indicates whether the current disassembled instruction was valid.
77    bool currentInstructionValid() { return instr_valid_; }
78
79    // Returns the current instruction as defined in libdis.h,
80    // or NULL if the current instruction is not valid.
81    const libdis::x86_insn_t* currentInstruction() {
82      return instr_valid_ ? &current_instr_ : NULL;
83    }
84
85    // Returns the type of the current instruction as defined in libdis.h.
86    libdis::x86_insn_group currentInstructionGroup() {
87      return current_instr_.group;
88    }
89
90    // Indicates whether a return instruction has been encountered.
91    bool endOfBlock() { return end_of_block_; }
92
93    // The flags set so far for the disassembly.
94    uint16_t flags() { return flags_; }
95
96    // This sets an indicator that the register used to determine
97    // src or dest for the current instruction is tainted. These can
98    // be used after examining the current instruction to indicate,
99    // for example that a bad read or write occurred and the pointer
100    // stored in the register is currently invalid.
101    bool setBadRead();
102    bool setBadWrite();
103
104  protected:
105    const uint8_t *bytecode_;
106    uint32_t size_;
107    uint32_t virtual_address_;
108    uint32_t current_byte_offset_;
109    uint32_t current_inst_offset_;
110
111    bool instr_valid_;
112    libdis::x86_insn_t current_instr_;
113
114    // TODO(cdn): Maybe also track an expression's index register.
115    // ex: mov eax, [ebx + ecx]; ebx is base, ecx is index.
116    bool register_valid_;
117    libdis::x86_reg_t bad_register_;
118
119    bool pushed_bad_value_;
120    bool end_of_block_;
121
122    uint16_t flags_;
123};
124
125}  // namespace google_breakpad
126
127#endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
128