1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/incident_reporting/module_integrity_verifier_win.h"
6
7#include "base/containers/hash_tables.h"
8#include "base/files/file_path.h"
9#include "base/files/memory_mapped_file.h"
10#include "base/metrics/sparse_histogram.h"
11#include "base/scoped_native_library.h"
12#include "base/win/pe_image.h"
13#include "build/build_config.h"
14
15namespace safe_browsing {
16
17struct ModuleVerificationState {
18  explicit ModuleVerificationState(HMODULE hModule);
19  ~ModuleVerificationState();
20
21  base::win::PEImageAsData disk_peimage;
22
23  // The module's preferred base address minus the base address it actually
24  // loaded at.
25  intptr_t image_base_delta;
26
27  // The location of the disk_peimage module's code section minus that of the
28  // mem_peimage module's code section.
29  intptr_t code_section_delta;
30
31  // The bytes corrected by relocs.
32  base::hash_set<uintptr_t> reloc_addr;
33
34  // Set true if the relocation table contains a reloc of type that we don't
35  // currently handle.
36  bool unknown_reloc_type;
37
38 private:
39  DISALLOW_COPY_AND_ASSIGN(ModuleVerificationState);
40};
41
42ModuleVerificationState::ModuleVerificationState(HMODULE hModule)
43    : disk_peimage(hModule),
44      image_base_delta(0),
45      code_section_delta(0),
46      reloc_addr(),
47      unknown_reloc_type(false) {
48}
49
50ModuleVerificationState::~ModuleVerificationState() {
51}
52
53namespace {
54
55struct Export {
56  Export(void* addr, const std::string& name);
57  ~Export();
58
59  bool operator<(const Export& other) const;
60
61  void* addr;
62  std::string name;
63};
64
65Export::Export(void* addr, const std::string& name) : addr(addr), name(name) {
66}
67
68Export::~Export() {
69}
70
71bool Export::operator<(const Export& other) const {
72  return addr < other.addr;
73}
74
75bool ByteAccountedForByReloc(uint8_t* byte_addr,
76                             const ModuleVerificationState& state) {
77  return ((state.reloc_addr.count(reinterpret_cast<uintptr_t>(byte_addr))) > 0);
78}
79
80// Checks each byte in the module's code section again the corresponding byte on
81// disk, returning the number of bytes differing between the two.  Also adds the
82// names of any modfied functions exported by name to |modified_exports|.
83// |exports| must be sorted.
84int ExamineBytesDiffInMemory(uint8_t* disk_code_start,
85                             uint8_t* mem_code_start,
86                             uint32_t code_size,
87                             const std::vector<Export>& exports,
88                             const ModuleVerificationState& state,
89                             std::set<std::string>* modified_exports) {
90  int bytes_different = 0;
91  std::vector<Export>::const_iterator export_it = exports.begin();
92
93  for (uint8_t* end = mem_code_start + code_size; mem_code_start != end;
94       ++mem_code_start) {
95    if ((*disk_code_start++ != *mem_code_start) &&
96        !ByteAccountedForByReloc(mem_code_start, state)) {
97      // We get the largest export address still smaller than |addr|.  It is
98      // possible that |addr| belongs to some nonexported function located
99      // between this export and the following one.
100      Export addr(reinterpret_cast<void*>(mem_code_start), std::string());
101      std::vector<Export>::const_iterator modified_export_it =
102          std::upper_bound(export_it, exports.end(), addr);
103
104      if (modified_export_it != exports.begin())
105        modified_exports->insert((modified_export_it - 1)->name);
106      ++bytes_different;
107
108      // No later byte can belong to an earlier export.
109      export_it = modified_export_it;
110    }
111  }
112  return bytes_different;
113}
114
115// Adds to |state->reloc_addr| the bytes of the pointer at |address| that are
116// corrected by adding |image_base_delta|.
117void AddBytesCorrectedByReloc(uintptr_t address,
118                              ModuleVerificationState* state) {
119#if defined(ARCH_CPU_LITTLE_ENDIAN)
120#  define OFFSET(i) i
121#else
122#  define OFFSET(i) (sizeof(uintptr_t) - i)
123#endif
124
125  uintptr_t orig_mem_value = *reinterpret_cast<uintptr_t*>(address);
126  uintptr_t fixed_mem_value = orig_mem_value + state->image_base_delta;
127  uintptr_t disk_value =
128      *reinterpret_cast<uintptr_t*>(address + state->code_section_delta);
129
130  uintptr_t diff_before = orig_mem_value ^ disk_value;
131  uintptr_t shared_after = ~(fixed_mem_value ^ disk_value);
132  int i = 0;
133  for (uintptr_t fixed = diff_before & shared_after; fixed; fixed >>= 8, ++i) {
134    if (fixed & 0xFF)
135      state->reloc_addr.insert(address + OFFSET(i));
136  }
137#undef OFFSET
138}
139
140bool AddrIsInCodeSection(void* address,
141                         uint8_t* code_addr,
142                         uint32_t code_size) {
143  return (code_addr <= address && address < code_addr + code_size);
144}
145
146bool EnumRelocsCallback(const base::win::PEImage& mem_peimage,
147                        WORD type,
148                        void* address,
149                        void* cookie) {
150  ModuleVerificationState* state =
151      reinterpret_cast<ModuleVerificationState*>(cookie);
152
153  uint8_t* mem_code_addr = NULL;
154  uint8_t* disk_code_addr = NULL;
155  uint32_t code_size = 0;
156  if (!GetCodeAddrsAndSize(mem_peimage,
157                           state->disk_peimage,
158                           &mem_code_addr,
159                           &disk_code_addr,
160                           &code_size))
161    return false;
162
163  // If not in the code section return true to continue to the next reloc.
164  if (!AddrIsInCodeSection(address, mem_code_addr, code_size))
165    return true;
166
167  switch (type) {
168    case IMAGE_REL_BASED_ABSOLUTE:  // 0
169      // Absolute type relocations are a noop, sometimes used to pad a section
170      // of relocations.
171      break;
172    case IMAGE_REL_BASED_HIGHLOW:  // 3
173      // The base relocation applies all 32 bits of the difference to the 32-bit
174      // field at offset.
175      AddBytesCorrectedByReloc(reinterpret_cast<uintptr_t>(address), state);
176      break;
177    case IMAGE_REL_BASED_DIR64:  // 10
178      // The base relocation applies the difference to the 64-bit field at
179      // offset.
180      // TODO(robertshield): Handle this type of reloc.
181      break;
182    default:
183      // TODO(robertshield): Find a reliable description of the behaviour of the
184      // remaining types of relocation and handle them.
185      UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.ModuleBaseRelocation", type);
186      state->unknown_reloc_type = true;
187      break;
188  }
189  return true;
190}
191
192bool EnumExportsCallback(const base::win::PEImage& mem_peimage,
193                         DWORD ordinal,
194                         DWORD hint,
195                         LPCSTR name,
196                         PVOID function_addr,
197                         LPCSTR forward,
198                         PVOID cookie) {
199  std::vector<Export>* exports = reinterpret_cast<std::vector<Export>*>(cookie);
200  if (name)
201    exports->push_back(Export(function_addr, std::string(name)));
202  return true;
203}
204
205}  // namespace
206
207bool GetCodeAddrsAndSize(const base::win::PEImage& mem_peimage,
208                         const base::win::PEImageAsData& disk_peimage,
209                         uint8_t** mem_code_addr,
210                         uint8_t** disk_code_addr,
211                         uint32_t* code_size) {
212  DWORD base_of_code = mem_peimage.GetNTHeaders()->OptionalHeader.BaseOfCode;
213
214  // Get the address and size of the code section in the loaded module image.
215  PIMAGE_SECTION_HEADER mem_code_header =
216      mem_peimage.GetImageSectionFromAddr(mem_peimage.RVAToAddr(base_of_code));
217  if (mem_code_header == NULL)
218    return false;
219  *mem_code_addr = reinterpret_cast<uint8_t*>(
220      mem_peimage.RVAToAddr(mem_code_header->VirtualAddress));
221  // If the section is padded with zeros when mapped then |VirtualSize| can be
222  // larger.  Alternatively, |SizeOfRawData| can be rounded up to align
223  // according to OptionalHeader.FileAlignment.
224  *code_size = std::min(mem_code_header->Misc.VirtualSize,
225                        mem_code_header->SizeOfRawData);
226
227  // Get the address of the code section in the module mapped as data from disk.
228  DWORD disk_code_offset = 0;
229  if (!mem_peimage.ImageAddrToOnDiskOffset(
230          reinterpret_cast<void*>(*mem_code_addr), &disk_code_offset))
231    return false;
232  *disk_code_addr =
233      reinterpret_cast<uint8_t*>(disk_peimage.module()) + disk_code_offset;
234  return true;
235}
236
237ModuleState VerifyModule(const wchar_t* module_name,
238                         std::set<std::string>* modified_exports) {
239  // Get module handle, load a copy from disk as data and create PEImages.
240  HMODULE module_handle = NULL;
241  if (!GetModuleHandleEx(0, module_name, &module_handle))
242    return MODULE_STATE_UNKNOWN;
243  base::ScopedNativeLibrary native_library(module_handle);
244
245  WCHAR module_path[MAX_PATH] = {};
246  DWORD length =
247      GetModuleFileName(module_handle, module_path, arraysize(module_path));
248  if (!length || length == arraysize(module_path))
249    return MODULE_STATE_UNKNOWN;
250
251  base::MemoryMappedFile mapped_module;
252  if (!mapped_module.Initialize(base::FilePath(module_path)))
253    return MODULE_STATE_UNKNOWN;
254  ModuleVerificationState state(
255      reinterpret_cast<HMODULE>(const_cast<uint8*>(mapped_module.data())));
256
257  base::win::PEImage mem_peimage(module_handle);
258  if (!mem_peimage.VerifyMagic() || !state.disk_peimage.VerifyMagic())
259    return MODULE_STATE_UNKNOWN;
260
261  // Get the list of exports.
262  std::vector<Export> exports;
263  mem_peimage.EnumExports(EnumExportsCallback, &exports);
264  std::sort(exports.begin(), exports.end());
265
266  // Get the addresses of the code sections then calculate |code_section_delta|
267  // and |image_base_delta|.
268  uint8_t* mem_code_addr = NULL;
269  uint8_t* disk_code_addr = NULL;
270  uint32_t code_size = 0;
271  if (!GetCodeAddrsAndSize(mem_peimage,
272                           state.disk_peimage,
273                           &mem_code_addr,
274                           &disk_code_addr,
275                           &code_size))
276    return MODULE_STATE_UNKNOWN;
277
278  state.code_section_delta = disk_code_addr - mem_code_addr;
279
280  uint8_t* preferred_image_base = reinterpret_cast<uint8_t*>(
281      state.disk_peimage.GetNTHeaders()->OptionalHeader.ImageBase);
282  state.image_base_delta =
283      preferred_image_base - reinterpret_cast<uint8_t*>(mem_peimage.module());
284
285  // Get the relocations.
286  mem_peimage.EnumRelocs(EnumRelocsCallback, &state);
287  if (state.unknown_reloc_type)
288    return MODULE_STATE_UNKNOWN;
289
290  // Count the modified bytes (after accounting for relocs) and get the set of
291  // modified functions.
292  int num_bytes_different = ExamineBytesDiffInMemory(disk_code_addr,
293                                                     mem_code_addr,
294                                                     code_size,
295                                                     exports,
296                                                     state,
297                                                     modified_exports);
298
299  return num_bytes_different ? MODULE_STATE_MODIFIED : MODULE_STATE_UNMODIFIED;
300}
301
302}  // namespace safe_browsing
303