disassembler_win32_x86.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/disassembler_win32_x86.h"
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include "base/basictypes.h"
12#include "base/logging.h"
13
14#include "courgette/assembly_program.h"
15#include "courgette/courgette.h"
16#include "courgette/encoded_program.h"
17
18// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
19// different target addresses are referenced.  Purely for debugging.
20#define COURGETTE_HISTOGRAM_TARGETS 0
21
22namespace courgette {
23
24DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
25  : Disassembler(start, length),
26    incomplete_disassembly_(false),
27    is_PE32_plus_(false),
28    optional_header_(NULL),
29    size_of_optional_header_(0),
30    offset_of_data_directories_(0),
31    machine_type_(0),
32    number_of_sections_(0),
33    sections_(NULL),
34    has_text_section_(false),
35    size_of_code_(0),
36    size_of_initialized_data_(0),
37    size_of_uninitialized_data_(0),
38    base_of_code_(0),
39    base_of_data_(0),
40    image_base_(0),
41    size_of_image_(0),
42    number_of_data_directories_(0) {
43}
44
45// ParseHeader attempts to match up the buffer with the Windows data
46// structures that exist within a Windows 'Portable Executable' format file.
47// Returns 'true' if the buffer matches, and 'false' if the data looks
48// suspicious.  Rather than try to 'map' the buffer to the numerous windows
49// structures, we extract the information we need into the courgette::PEInfo
50// structure.
51//
52bool DisassemblerWin32X86::ParseHeader() {
53  if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/)
54    return Bad("Too small");
55
56  // Have 'MZ' magic for a DOS header?
57  if (start()[0] != 'M' || start()[1] != 'Z')
58    return Bad("Not MZ");
59
60  // offset from DOS header to PE header is stored in DOS header.
61  uint32 offset = ReadU32(start(),
62                          kOffsetOfFileAddressOfNewExeHeader);
63
64  if (offset >= length())
65    return Bad("Bad offset to PE header");
66
67  const uint8* const pe_header = OffsetToPointer(offset);
68  const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
69  if (pe_header <= start() ||
70      pe_header >= end() - kMinPEHeaderSize)
71    return Bad("Bad offset to PE header");
72
73  if (offset % 8 != 0)
74    return Bad("Misaligned PE header");
75
76  // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
77  // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx
78  //
79  // The first field of the IMAGE_NT_HEADERS is the signature.
80  if (!(pe_header[0] == 'P' &&
81        pe_header[1] == 'E' &&
82        pe_header[2] == 0 &&
83        pe_header[3] == 0))
84    return Bad("no PE signature");
85
86  // The second field of the IMAGE_NT_HEADERS is the COFF header.
87  // The COFF header is also called an IMAGE_FILE_HEADER
88  //   http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx
89  const uint8* const coff_header = pe_header + 4;
90  machine_type_       = ReadU16(coff_header, 0);
91  number_of_sections_ = ReadU16(coff_header, 2);
92  size_of_optional_header_ = ReadU16(coff_header, 16);
93
94  // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64)
95  const uint8* const optional_header = coff_header + kSizeOfCoffHeader;
96  optional_header_ = optional_header;
97
98  if (optional_header + size_of_optional_header_ >= end())
99    return Bad("optional header past end of file");
100
101  // Check we can read the magic.
102  if (size_of_optional_header_ < 2)
103    return Bad("optional header no magic");
104
105  uint16 magic = ReadU16(optional_header, 0);
106
107  if (magic == kImageNtOptionalHdr32Magic) {
108    is_PE32_plus_ = false;
109    offset_of_data_directories_ =
110      kOffsetOfDataDirectoryFromImageOptionalHeader32;
111  } else if (magic == kImageNtOptionalHdr64Magic) {
112    is_PE32_plus_ = true;
113    offset_of_data_directories_ =
114      kOffsetOfDataDirectoryFromImageOptionalHeader64;
115  } else {
116    return Bad("unrecognized magic");
117  }
118
119  // Check that we can read the rest of the the fixed fields.  Data directories
120  // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER.
121  if (size_of_optional_header_ < offset_of_data_directories_)
122    return Bad("optional header too short");
123
124  // The optional header is either an IMAGE_OPTIONAL_HEADER32 or
125  // IMAGE_OPTIONAL_HEADER64
126  // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx
127  //
128  // Copy the fields we care about.
129  size_of_code_               = ReadU32(optional_header, 4);
130  size_of_initialized_data_   = ReadU32(optional_header, 8);
131  size_of_uninitialized_data_ = ReadU32(optional_header, 12);
132  base_of_code_               = ReadU32(optional_header, 20);
133  if (is_PE32_plus_) {
134    base_of_data_ = 0;
135    image_base_  = ReadU64(optional_header, 24);
136  } else {
137    base_of_data_ = ReadU32(optional_header, 24);
138    image_base_   = ReadU32(optional_header, 28);
139  }
140  size_of_image_ = ReadU32(optional_header, 56);
141  number_of_data_directories_ =
142    ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92));
143
144  if (size_of_code_ >= length() ||
145      size_of_initialized_data_ >= length() ||
146      size_of_code_ + size_of_initialized_data_ >= length()) {
147    // This validation fires on some perfectly fine executables.
148    //  return Bad("code or initialized data too big");
149  }
150
151  // TODO(sra): we can probably get rid of most of the data directories.
152  bool b = true;
153  // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary
154  // for correctness and it compiles smaller this way.
155  b &= ReadDataDirectory(0, &export_table_);
156  b &= ReadDataDirectory(1, &import_table_);
157  b &= ReadDataDirectory(2, &resource_table_);
158  b &= ReadDataDirectory(3, &exception_table_);
159  b &= ReadDataDirectory(5, &base_relocation_table_);
160  b &= ReadDataDirectory(11, &bound_import_table_);
161  b &= ReadDataDirectory(12, &import_address_table_);
162  b &= ReadDataDirectory(13, &delay_import_descriptor_);
163  b &= ReadDataDirectory(14, &clr_runtime_header_);
164  if (!b) {
165    return Bad("malformed data directory");
166  }
167
168  // Sections follow the optional header.
169  sections_ =
170      reinterpret_cast<const Section*>(optional_header +
171                                       size_of_optional_header_);
172  size_t detected_length = 0;
173
174  for (int i = 0;  i < number_of_sections_;  ++i) {
175    const Section* section = &sections_[i];
176
177    // TODO(sra): consider using the 'characteristics' field of the section
178    // header to see if the section contains instructions.
179    if (memcmp(section->name, ".text", 6) == 0)
180      has_text_section_ = true;
181
182    uint32 section_end =
183        section->file_offset_of_raw_data + section->size_of_raw_data;
184    if (section_end > detected_length)
185      detected_length = section_end;
186  }
187
188  // Pretend our in-memory copy is only as long as our detected length.
189  ReduceLength(detected_length);
190
191  if (!is_32bit()) {
192    return Bad("64 bit executables are not yet supported");
193  }
194
195  if (!has_text_section()) {
196    return Bad("Resource-only executables are not yet supported");
197  }
198
199  return Good();
200}
201
202bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
203  if (!ok())
204    return false;
205
206  target->set_image_base(image_base());
207
208  if (!ParseAbs32Relocs())
209    return false;
210
211  ParseRel32RelocsFromSections();
212
213  if (!ParseFile(target))
214    return false;
215
216  target->DefaultAssignIndexes();
217
218  return true;
219}
220
221////////////////////////////////////////////////////////////////////////////////
222
223bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
224  relocs->clear();
225
226  size_t relocs_size = base_relocation_table_.size_;
227  if (relocs_size == 0)
228    return true;
229
230  // The format of the base relocation table is a sequence of variable sized
231  // IMAGE_BASE_RELOCATION blocks.  Search for
232  //   "The format of the base relocation data is somewhat quirky"
233  // at http://msdn.microsoft.com/en-us/library/ms809762.aspx
234
235  const uint8* relocs_start = RVAToPointer(base_relocation_table_.address_);
236  const uint8* relocs_end = relocs_start + relocs_size;
237
238  // Make sure entire base relocation table is within the buffer.
239  if (relocs_start < start() ||
240      relocs_start >= end() ||
241      relocs_end <= start() ||
242      relocs_end > end()) {
243    return Bad(".relocs outside image");
244  }
245
246  const uint8* block = relocs_start;
247
248  // Walk the variable sized blocks.
249  while (block + 8 < relocs_end) {
250    RVA page_rva = ReadU32(block, 0);
251    uint32 size = ReadU32(block, 4);
252    if (size < 8 ||        // Size includes header ...
253        size % 4  !=  0)   // ... and is word aligned.
254      return Bad("unreasonable relocs block");
255
256    const uint8* end_entries = block + size;
257
258    if (end_entries <= block ||
259        end_entries <= start() ||
260        end_entries > end())
261      return Bad(".relocs block outside image");
262
263    // Walk through the two-byte entries.
264    for (const uint8* p = block + 8;  p < end_entries;  p += 2) {
265      uint16 entry = ReadU16(p, 0);
266      int type = entry >> 12;
267      int offset = entry & 0xFFF;
268
269      RVA rva = page_rva + offset;
270      if (type == 3) {         // IMAGE_REL_BASED_HIGHLOW
271        relocs->push_back(rva);
272      } else if (type == 0) {  // IMAGE_REL_BASED_ABSOLUTE
273        // Ignore, used as padding.
274      } else {
275        // Does not occur in Windows x86 executables.
276        return Bad("unknown type of reloc");
277      }
278    }
279
280    block += size;
281  }
282
283  std::sort(relocs->begin(), relocs->end());
284
285  return true;
286}
287
288const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
289  for (int i = 0; i < number_of_sections_; i++) {
290    const Section* section = &sections_[i];
291    uint32 offset = rva - section->virtual_address;
292    if (offset < section->virtual_size) {
293      return section;
294    }
295  }
296  return NULL;
297}
298
299int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
300  const Section* section = RVAToSection(rva);
301  if (section) {
302    uint32 offset = rva - section->virtual_address;
303    if (offset < section->size_of_raw_data) {
304      return section->file_offset_of_raw_data + offset;
305    } else {
306      return kNoOffset;  // In section but not in file (e.g. uninit data).
307    }
308  }
309
310  // Small RVA values point into the file header in the loaded image.
311  // RVA 0 is the module load address which Windows uses as the module handle.
312  // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
313  // DOS header.
314  if (rva == 0 || rva == 2)
315    return rva;
316
317  NOTREACHED();
318  return kNoOffset;
319}
320
321const uint8* DisassemblerWin32X86::RVAToPointer(RVA rva) const {
322  int file_offset = RVAToFileOffset(rva);
323  if (file_offset == kNoOffset)
324    return NULL;
325  else
326    return OffsetToPointer(file_offset);
327}
328
329std::string DisassemblerWin32X86::SectionName(const Section* section) {
330  if (section == NULL)
331    return "<none>";
332  char name[9];
333  memcpy(name, section->name, 8);
334  name[8] = '\0';  // Ensure termination.
335  return name;
336}
337
338CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
339  // Walk all the bytes in the file, whether or not in a section.
340  uint32 file_offset = 0;
341  while (file_offset < length()) {
342    const Section* section = FindNextSection(file_offset);
343    if (section == NULL) {
344      // No more sections.  There should not be extra stuff following last
345      // section.
346      //   ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
347      break;
348    }
349    if (file_offset < section->file_offset_of_raw_data) {
350      uint32 section_start_offset = section->file_offset_of_raw_data;
351      if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
352                                    program))
353        return false;
354
355      file_offset = section_start_offset;
356    }
357    uint32 end = file_offset + section->size_of_raw_data;
358    if (!ParseFileRegion(section, file_offset, end, program))
359      return false;
360    file_offset = end;
361  }
362
363#if COURGETTE_HISTOGRAM_TARGETS
364  HistogramTargets("abs32 relocs", abs32_target_rvas_);
365  HistogramTargets("rel32 relocs", rel32_target_rvas_);
366#endif
367
368  return true;
369}
370
371bool DisassemblerWin32X86::ParseAbs32Relocs() {
372  abs32_locations_.clear();
373  if (!ParseRelocs(&abs32_locations_))
374    return false;
375
376  std::sort(abs32_locations_.begin(), abs32_locations_.end());
377
378#if COURGETTE_HISTOGRAM_TARGETS
379  for (size_t i = 0;  i < abs32_locations_.size(); ++i) {
380    RVA rva = abs32_locations_[i];
381    // The 4 bytes at the relocation are a reference to some address.
382    uint32 target_address = Read32LittleEndian(RVAToPointer(rva));
383    ++abs32_target_rvas_[target_address - image_base()];
384  }
385#endif
386  return true;
387}
388
389void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
390  uint32 file_offset = 0;
391  while (file_offset < length()) {
392    const Section* section = FindNextSection(file_offset);
393    if (section == NULL)
394      break;
395    if (file_offset < section->file_offset_of_raw_data)
396      file_offset = section->file_offset_of_raw_data;
397    ParseRel32RelocsFromSection(section);
398    file_offset += section->size_of_raw_data;
399  }
400  std::sort(rel32_locations_.begin(), rel32_locations_.end());
401
402#if COURGETTE_HISTOGRAM_TARGETS
403  VLOG(1) << "abs32_locations_ " << abs32_locations_.size()
404          << "\nrel32_locations_ " << rel32_locations_.size()
405          << "\nabs32_target_rvas_ " << abs32_target_rvas_.size()
406          << "\nrel32_target_rvas_ " << rel32_target_rvas_.size();
407
408  int common = 0;
409  std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin();
410  std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
411  while (abs32_iter != abs32_target_rvas_.end() &&
412         rel32_iter != rel32_target_rvas_.end()) {
413    if (abs32_iter->first < rel32_iter->first)
414      ++abs32_iter;
415    else if (rel32_iter->first < abs32_iter->first)
416      ++rel32_iter;
417    else {
418      ++common;
419      ++abs32_iter;
420      ++rel32_iter;
421    }
422  }
423  VLOG(1) << "common " << common;
424#endif
425}
426
427void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
428  // TODO(sra): use characteristic.
429  bool isCode = strcmp(section->name, ".text") == 0;
430  if (!isCode)
431    return;
432
433  uint32 start_file_offset = section->file_offset_of_raw_data;
434  uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
435  RVA relocs_start_rva = base_relocation_table().address_;
436
437  const uint8* start_pointer = OffsetToPointer(start_file_offset);
438  const uint8* end_pointer = OffsetToPointer(end_file_offset);
439
440  RVA start_rva = FileOffsetToRVA(start_file_offset);
441  RVA end_rva = start_rva + section->virtual_size;
442
443  // Quick way to convert from Pointer to RVA within a single Section is to
444  // subtract 'pointer_to_rva'.
445  const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
446
447  std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
448
449  // Find the rel32 relocations.
450  const uint8* p = start_pointer;
451  while (p < end_pointer) {
452    RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
453    if (current_rva == relocs_start_rva) {
454      uint32 relocs_size = base_relocation_table().size_;
455      if (relocs_size) {
456        p += relocs_size;
457        continue;
458      }
459    }
460
461    //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
462    //  ++abs32_pos;
463
464    // Heuristic discovery of rel32 locations in instruction stream: are the
465    // next few bytes the start of an instruction containing a rel32
466    // addressing mode?
467    const uint8* rel32 = NULL;
468
469    if (p + 5 <= end_pointer) {
470      if (*p == 0xE8 || *p == 0xE9) {  // jmp rel32 and call rel32
471        rel32 = p + 1;
472      }
473    }
474    if (p + 6 <= end_pointer) {
475      if (*p == 0x0F  &&  (*(p+1) & 0xF0) == 0x80) {  // Jcc long form
476        if (p[1] != 0x8A && p[1] != 0x8B)  // JPE/JPO unlikely
477          rel32 = p + 2;
478      }
479    }
480    if (rel32) {
481      RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
482
483      // Is there an abs32 reloc overlapping the candidate?
484      while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3)
485        ++abs32_pos;
486      // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
487      // region that could overlap rel32_rva.
488      if (abs32_pos != abs32_locations_.end()) {
489        if (*abs32_pos < rel32_rva + 4) {
490          // Beginning of abs32 reloc is before end of rel32 reloc so they
491          // overlap.  Skip four bytes past the abs32 reloc.
492          p += (*abs32_pos + 4) - current_rva;
493          continue;
494        }
495      }
496
497      RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
498      // To be valid, rel32 target must be within image, and within this
499      // section.
500      if (IsValidRVA(target_rva) &&
501          start_rva <= target_rva && target_rva < end_rva) {
502        rel32_locations_.push_back(rel32_rva);
503#if COURGETTE_HISTOGRAM_TARGETS
504        ++rel32_target_rvas_[target_rva];
505#endif
506        p = rel32 + 4;
507        continue;
508      }
509    }
510    p += 1;
511  }
512}
513
514CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
515    uint32 start_file_offset,
516    uint32 end_file_offset,
517    AssemblyProgram* program) {
518  if (incomplete_disassembly_)
519    return true;
520
521  const uint8* start = OffsetToPointer(start_file_offset);
522  const uint8* end = OffsetToPointer(end_file_offset);
523
524  const uint8* p = start;
525
526  while (p < end) {
527    if (!program->EmitByteInstruction(*p))
528      return false;
529    ++p;
530  }
531
532  return true;
533}
534
535CheckBool DisassemblerWin32X86::ParseFileRegion(
536    const Section* section,
537    uint32 start_file_offset, uint32 end_file_offset,
538    AssemblyProgram* program) {
539  RVA relocs_start_rva = base_relocation_table().address_;
540
541  const uint8* start_pointer = OffsetToPointer(start_file_offset);
542  const uint8* end_pointer = OffsetToPointer(end_file_offset);
543
544  RVA start_rva = FileOffsetToRVA(start_file_offset);
545  RVA end_rva = start_rva + section->virtual_size;
546
547  // Quick way to convert from Pointer to RVA within a single Section is to
548  // subtract 'pointer_to_rva'.
549  const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
550
551  std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin();
552  std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
553
554  if (!program->EmitOriginInstruction(start_rva))
555    return false;
556
557  const uint8* p = start_pointer;
558
559  while (p < end_pointer) {
560    RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
561
562    // The base relocation table is usually in the .relocs section, but it could
563    // actually be anywhere.  Make sure we skip it because we will regenerate it
564    // during assembly.
565    if (current_rva == relocs_start_rva) {
566      if (!program->EmitPeRelocsInstruction())
567        return false;
568      uint32 relocs_size = base_relocation_table().size_;
569      if (relocs_size) {
570        p += relocs_size;
571        continue;
572      }
573    }
574
575    while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
576      ++abs32_pos;
577
578    if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
579      uint32 target_address = Read32LittleEndian(p);
580      RVA target_rva = target_address - image_base();
581      // TODO(sra): target could be Label+offset.  It is not clear how to guess
582      // which it might be.  We assume offset==0.
583      if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
584        return false;
585      p += 4;
586      continue;
587    }
588
589    while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva)
590      ++rel32_pos;
591
592    if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) {
593      RVA target_rva = current_rva + 4 + Read32LittleEndian(p);
594      if (!program->EmitRel32(program->FindOrMakeRel32Label(target_rva)))
595        return false;
596      p += 4;
597      continue;
598    }
599
600    if (incomplete_disassembly_) {
601      if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) &&
602          (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) &&
603          (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) {
604        // No more relocs in this section, don't bother encoding bytes.
605        break;
606      }
607    }
608
609    if (!program->EmitByteInstruction(*p))
610      return false;
611    p += 1;
612  }
613
614  return true;
615}
616
617#if COURGETTE_HISTOGRAM_TARGETS
618// Histogram is printed to std::cout.  It is purely for debugging the algorithm
619// and is only enabled manually in 'exploration' builds.  I don't want to add
620// command-line configuration for this feature because this code has to be
621// small, which means compiled-out.
622void DisassemblerWin32X86::HistogramTargets(const char* kind,
623                                            const std::map<RVA, int>& map) {
624  int total = 0;
625  std::map<int, std::vector<RVA> > h;
626  for (std::map<RVA, int>::const_iterator p = map.begin();
627       p != map.end();
628       ++p) {
629    h[p->second].push_back(p->first);
630    total += p->second;
631  }
632
633  std::cout << total << " " << kind << " to "
634            << map.size() << " unique targets" << std::endl;
635
636  std::cout << "indegree: #targets-with-indegree (example)" << std::endl;
637  const int kFirstN = 15;
638  bool someSkipped = false;
639  int index = 0;
640  for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin();
641       p != h.rend();
642       ++p) {
643    ++index;
644    if (index <= kFirstN || p->first <= 3) {
645      if (someSkipped) {
646        std::cout << "..." << std::endl;
647      }
648      size_t count = p->second.size();
649      std::cout << std::dec << p->first << ": " << count;
650      if (count <= 2) {
651        for (size_t i = 0;  i < count;  ++i)
652          std::cout << "  " << DescribeRVA(p->second[i]);
653      }
654      std::cout << std::endl;
655      someSkipped = false;
656    } else {
657      someSkipped = true;
658    }
659  }
660}
661#endif  // COURGETTE_HISTOGRAM_TARGETS
662
663
664// DescribeRVA is for debugging only.  I would put it under #ifdef DEBUG except
665// that during development I'm finding I need to call it when compiled in
666// Release mode.  Hence:
667// TODO(sra): make this compile only for debug mode.
668std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const {
669  const Section* section = RVAToSection(rva);
670  std::ostringstream s;
671  s << std::hex << rva;
672  if (section) {
673    s << " (";
674    s << SectionName(section) << "+"
675      << std::hex << (rva - section->virtual_address)
676      << ")";
677  }
678  return s.str();
679}
680
681const Section* DisassemblerWin32X86::FindNextSection(uint32 fileOffset) const {
682  const Section* best = 0;
683  for (int i = 0; i < number_of_sections_; i++) {
684    const Section* section = &sections_[i];
685    if (section->size_of_raw_data > 0) {  // i.e. has data in file.
686      if (fileOffset <= section->file_offset_of_raw_data) {
687        if (best == 0 ||
688            section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
689          best = section;
690        }
691      }
692    }
693  }
694  return best;
695}
696
697RVA DisassemblerWin32X86::FileOffsetToRVA(uint32 file_offset) const {
698  for (int i = 0; i < number_of_sections_; i++) {
699    const Section* section = &sections_[i];
700    uint32 offset = file_offset - section->file_offset_of_raw_data;
701    if (offset < section->size_of_raw_data) {
702      return section->virtual_address + offset;
703    }
704  }
705  return 0;
706}
707
708bool DisassemblerWin32X86::ReadDataDirectory(
709    int index,
710    ImageDataDirectory* directory) {
711
712  if (index < number_of_data_directories_) {
713    size_t offset = index * 8 + offset_of_data_directories_;
714    if (offset >= size_of_optional_header_)
715      return Bad("number of data directories inconsistent");
716    const uint8* data_directory = optional_header_ + offset;
717    if (data_directory < start() ||
718        data_directory + 8 >= end())
719      return Bad("data directory outside image");
720    RVA rva = ReadU32(data_directory, 0);
721    size_t size  = ReadU32(data_directory, 4);
722    if (size > size_of_image_)
723      return Bad("data directory size too big");
724
725    // TODO(sra): validate RVA.
726    directory->address_ = rva;
727    directory->size_ = static_cast<uint32>(size);
728    return true;
729  } else {
730    directory->address_ = 0;
731    directory->size_ = 0;
732    return true;
733  }
734}
735
736}  // namespace courgette
737