minidump.cc revision 0e94332f7c615d2b734e840bef233f3ee1188801
1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// minidump.cc: A minidump reader.
31//
32// See minidump.h for documentation.
33//
34// Author: Mark Mentovai
35
36
37#include <fcntl.h>
38#include <stdio.h>
39#include <time.h>
40#include <unistd.h>
41#ifdef _WIN32
42#include <io.h>
43typedef SSIZE_T ssize_t;
44#define open _open
45#define read _read
46#define lseek _lseek
47#else  // _WIN32
48#define O_BINARY 0
49#endif  // _WIN32
50
51#include <cassert>
52#include <limits>
53#include <map>
54#include <vector>
55
56#include "processor/range_map-inl.h"
57
58#include "google_breakpad/processor/minidump.h"
59#include "processor/basic_code_module.h"
60#include "processor/basic_code_modules.h"
61#include "processor/logging.h"
62#include "processor/scoped_ptr.h"
63
64
65namespace google_breakpad {
66
67
68using std::numeric_limits;
69using std::vector;
70
71
72//
73// Swapping routines
74//
75// Inlining these doesn't increase code size significantly, and it saves
76// a whole lot of unnecessary jumping back and forth.
77//
78
79
80// Swapping an 8-bit quantity is a no-op.  This function is only provided
81// to account for certain templatized operations that require swapping for
82// wider types but handle u_int8_t too
83// (MinidumpMemoryRegion::GetMemoryAtAddressInternal).
84static inline void Swap(u_int8_t* value) {
85}
86
87
88// Optimization: don't need to AND the furthest right shift, because we're
89// shifting an unsigned quantity.  The standard requires zero-filling in this
90// case.  If the quantities were signed, a bitmask whould be needed for this
91// right shift to avoid an arithmetic shift (which retains the sign bit).
92// The furthest left shift never needs to be ANDed bitmask.
93
94
95static inline void Swap(u_int16_t* value) {
96  *value = (*value >> 8) |
97           (*value << 8);
98}
99
100
101static inline void Swap(u_int32_t* value) {
102  *value =  (*value >> 24) |
103           ((*value >> 8)  & 0x0000ff00) |
104           ((*value << 8)  & 0x00ff0000) |
105            (*value << 24);
106}
107
108
109static inline void Swap(u_int64_t* value) {
110  *value =  (*value >> 56) |
111           ((*value >> 40) & 0x000000000000ff00LL) |
112           ((*value >> 24) & 0x0000000000ff0000LL) |
113           ((*value >> 8)  & 0x00000000ff000000LL) |
114           ((*value << 8)  & 0x000000ff00000000LL) |
115           ((*value << 24) & 0x0000ff0000000000LL) |
116           ((*value << 40) & 0x00ff000000000000LL) |
117            (*value << 56);
118}
119
120
121// Given a pointer to a 128-bit int in the minidump data, set the "low"
122// and "high" fields appropriately.
123static void Normalize128(u_int128_t* value, bool is_big_endian) {
124  // The struct format is [high, low], so if the format is big-endian,
125  // the most significant bytes will already be in the high field.
126  if (!is_big_endian) {
127    u_int64_t temp = value->low;
128    value->low = value->high;
129    value->high = temp;
130  }
131}
132
133// This just swaps each int64 half of the 128-bit value.
134// The value should also be normalized by calling Normalize128().
135static void Swap(u_int128_t* value) {
136  Swap(&value->low);
137  Swap(&value->high);
138}
139
140
141static inline void Swap(MDLocationDescriptor* location_descriptor) {
142  Swap(&location_descriptor->data_size);
143  Swap(&location_descriptor->rva);
144}
145
146
147static inline void Swap(MDMemoryDescriptor* memory_descriptor) {
148  Swap(&memory_descriptor->start_of_memory_range);
149  Swap(&memory_descriptor->memory);
150}
151
152
153static inline void Swap(MDGUID* guid) {
154  Swap(&guid->data1);
155  Swap(&guid->data2);
156  Swap(&guid->data3);
157  // Don't swap guid->data4[] because it contains 8-bit quantities.
158}
159
160
161//
162// Character conversion routines
163//
164
165
166// Standard wide-character conversion routines depend on the system's own
167// idea of what width a wide character should be: some use 16 bits, and
168// some use 32 bits.  For the purposes of a minidump, wide strings are
169// always represented with 16-bit UTF-16 chracters.  iconv isn't available
170// everywhere, and its interface varies where it is available.  iconv also
171// deals purely with char* pointers, so in addition to considering the swap
172// parameter, a converter that uses iconv would also need to take the host
173// CPU's endianness into consideration.  It doesn't seems worth the trouble
174// of making it a dependency when we don't care about anything but UTF-16.
175static string* UTF16ToUTF8(const vector<u_int16_t>& in,
176                           bool                     swap) {
177  scoped_ptr<string> out(new string());
178
179  // Set the string's initial capacity to the number of UTF-16 characters,
180  // because the UTF-8 representation will always be at least this long.
181  // If the UTF-8 representation is longer, the string will grow dynamically.
182  out->reserve(in.size());
183
184  for (vector<u_int16_t>::const_iterator iterator = in.begin();
185       iterator != in.end();
186       ++iterator) {
187    // Get a 16-bit value from the input
188    u_int16_t in_word = *iterator;
189    if (swap)
190      Swap(&in_word);
191
192    // Convert the input value (in_word) into a Unicode code point (unichar).
193    u_int32_t unichar;
194    if (in_word >= 0xdc00 && in_word <= 0xdcff) {
195      BPLOG(ERROR) << "UTF16ToUTF8 found low surrogate " <<
196                      HexString(in_word) << " without high";
197      return NULL;
198    } else if (in_word >= 0xd800 && in_word <= 0xdbff) {
199      // High surrogate.
200      unichar = (in_word - 0xd7c0) << 10;
201      if (++iterator == in.end()) {
202        BPLOG(ERROR) << "UTF16ToUTF8 found high surrogate " <<
203                        HexString(in_word) << " at end of string";
204        return NULL;
205      }
206      u_int32_t high_word = in_word;
207      in_word = *iterator;
208      if (in_word < 0xdc00 || in_word > 0xdcff) {
209        BPLOG(ERROR) << "UTF16ToUTF8 found high surrogate " <<
210                        HexString(high_word) << " without low " <<
211                        HexString(in_word);
212        return NULL;
213      }
214      unichar |= in_word & 0x03ff;
215    } else {
216      // The ordinary case, a single non-surrogate Unicode character encoded
217      // as a single 16-bit value.
218      unichar = in_word;
219    }
220
221    // Convert the Unicode code point (unichar) into its UTF-8 representation,
222    // appending it to the out string.
223    if (unichar < 0x80) {
224      (*out) += unichar;
225    } else if (unichar < 0x800) {
226      (*out) += 0xc0 | (unichar >> 6);
227      (*out) += 0x80 | (unichar & 0x3f);
228    } else if (unichar < 0x10000) {
229      (*out) += 0xe0 | (unichar >> 12);
230      (*out) += 0x80 | ((unichar >> 6) & 0x3f);
231      (*out) += 0x80 | (unichar & 0x3f);
232    } else if (unichar < 0x200000) {
233      (*out) += 0xf0 | (unichar >> 18);
234      (*out) += 0x80 | ((unichar >> 12) & 0x3f);
235      (*out) += 0x80 | ((unichar >> 6) & 0x3f);
236      (*out) += 0x80 | (unichar & 0x3f);
237    } else {
238      BPLOG(ERROR) << "UTF16ToUTF8 cannot represent high value " <<
239                      HexString(unichar) << " in UTF-8";
240      return NULL;
241    }
242  }
243
244  return out.release();
245}
246
247
248//
249// MinidumpObject
250//
251
252
253MinidumpObject::MinidumpObject(Minidump* minidump)
254    : minidump_(minidump),
255      valid_(false) {
256}
257
258
259//
260// MinidumpStream
261//
262
263
264MinidumpStream::MinidumpStream(Minidump* minidump)
265    : MinidumpObject(minidump) {
266}
267
268
269//
270// MinidumpContext
271//
272
273
274MinidumpContext::MinidumpContext(Minidump* minidump)
275    : MinidumpStream(minidump),
276      context_() {
277}
278
279
280MinidumpContext::~MinidumpContext() {
281  FreeContext();
282}
283
284
285bool MinidumpContext::Read(u_int32_t expected_size) {
286  valid_ = false;
287
288  FreeContext();
289
290  // First, figure out what type of CPU this context structure is for.
291  u_int32_t context_flags;
292  if (!minidump_->ReadBytes(&context_flags, sizeof(context_flags))) {
293    BPLOG(ERROR) << "MinidumpContext could not read context flags";
294    return false;
295  }
296  if (minidump_->swap())
297    Swap(&context_flags);
298
299  u_int32_t cpu_type = context_flags & MD_CONTEXT_CPU_MASK;
300
301  // Allocate the context structure for the correct CPU and fill it.  The
302  // casts are slightly unorthodox, but it seems better to do that than to
303  // maintain a separate pointer for each type of CPU context structure
304  // when only one of them will be used.
305  switch (cpu_type) {
306    case MD_CONTEXT_X86: {
307      if (expected_size != sizeof(MDRawContextX86)) {
308        BPLOG(ERROR) << "MinidumpContext x86 size mismatch, " <<
309                        expected_size << " != " << sizeof(MDRawContextX86);
310        return false;
311      }
312
313      scoped_ptr<MDRawContextX86> context_x86(new MDRawContextX86());
314
315      // Set the context_flags member, which has already been read, and
316      // read the rest of the structure beginning with the first member
317      // after context_flags.
318      context_x86->context_flags = context_flags;
319
320      size_t flags_size = sizeof(context_x86->context_flags);
321      u_int8_t* context_after_flags =
322          reinterpret_cast<u_int8_t*>(context_x86.get()) + flags_size;
323      if (!minidump_->ReadBytes(context_after_flags,
324                                sizeof(MDRawContextX86) - flags_size)) {
325        BPLOG(ERROR) << "MinidumpContext could not read x86 context";
326        return false;
327      }
328
329      // Do this after reading the entire MDRawContext structure because
330      // GetSystemInfo may seek minidump to a new position.
331      if (!CheckAgainstSystemInfo(cpu_type)) {
332        BPLOG(ERROR) << "MinidumpContext x86 does not match system info";
333        return false;
334      }
335
336      if (minidump_->swap()) {
337        // context_x86->context_flags was already swapped.
338        Swap(&context_x86->dr0);
339        Swap(&context_x86->dr1);
340        Swap(&context_x86->dr2);
341        Swap(&context_x86->dr3);
342        Swap(&context_x86->dr6);
343        Swap(&context_x86->dr7);
344        Swap(&context_x86->float_save.control_word);
345        Swap(&context_x86->float_save.status_word);
346        Swap(&context_x86->float_save.tag_word);
347        Swap(&context_x86->float_save.error_offset);
348        Swap(&context_x86->float_save.error_selector);
349        Swap(&context_x86->float_save.data_offset);
350        Swap(&context_x86->float_save.data_selector);
351        // context_x86->float_save.register_area[] contains 8-bit quantities
352        // and does not need to be swapped.
353        Swap(&context_x86->float_save.cr0_npx_state);
354        Swap(&context_x86->gs);
355        Swap(&context_x86->fs);
356        Swap(&context_x86->es);
357        Swap(&context_x86->ds);
358        Swap(&context_x86->edi);
359        Swap(&context_x86->esi);
360        Swap(&context_x86->ebx);
361        Swap(&context_x86->edx);
362        Swap(&context_x86->ecx);
363        Swap(&context_x86->eax);
364        Swap(&context_x86->ebp);
365        Swap(&context_x86->eip);
366        Swap(&context_x86->cs);
367        Swap(&context_x86->eflags);
368        Swap(&context_x86->esp);
369        Swap(&context_x86->ss);
370        // context_x86->extended_registers[] contains 8-bit quantities and
371        // does not need to be swapped.
372      }
373
374      context_.x86 = context_x86.release();
375
376      break;
377    }
378
379    case MD_CONTEXT_PPC: {
380      if (expected_size != sizeof(MDRawContextPPC)) {
381        BPLOG(ERROR) << "MinidumpContext ppc size mismatch, " <<
382                        expected_size << " != " << sizeof(MDRawContextPPC);
383        return false;
384      }
385
386      scoped_ptr<MDRawContextPPC> context_ppc(new MDRawContextPPC());
387
388      // Set the context_flags member, which has already been read, and
389      // read the rest of the structure beginning with the first member
390      // after context_flags.
391      context_ppc->context_flags = context_flags;
392
393      size_t flags_size = sizeof(context_ppc->context_flags);
394      u_int8_t* context_after_flags =
395          reinterpret_cast<u_int8_t*>(context_ppc.get()) + flags_size;
396      if (!minidump_->ReadBytes(context_after_flags,
397                                sizeof(MDRawContextPPC) - flags_size)) {
398        BPLOG(ERROR) << "MinidumpContext could not read ppc context";
399        return false;
400      }
401
402      // Do this after reading the entire MDRawContext structure because
403      // GetSystemInfo may seek minidump to a new position.
404      if (!CheckAgainstSystemInfo(cpu_type)) {
405        BPLOG(ERROR) << "MinidumpContext ppc does not match system info";
406        return false;
407      }
408
409      // Normalize the 128-bit types in the dump.
410      // Since this is PowerPC, by definition, the values are big-endian.
411      for (unsigned int vr_index = 0;
412           vr_index < MD_VECTORSAVEAREA_PPC_VR_COUNT;
413           ++vr_index) {
414        Normalize128(&context_ppc->vector_save.save_vr[vr_index], true);
415      }
416
417      if (minidump_->swap()) {
418        // context_ppc->context_flags was already swapped.
419        Swap(&context_ppc->srr0);
420        Swap(&context_ppc->srr1);
421        for (unsigned int gpr_index = 0;
422             gpr_index < MD_CONTEXT_PPC_GPR_COUNT;
423             ++gpr_index) {
424          Swap(&context_ppc->gpr[gpr_index]);
425        }
426        Swap(&context_ppc->cr);
427        Swap(&context_ppc->xer);
428        Swap(&context_ppc->lr);
429        Swap(&context_ppc->ctr);
430        Swap(&context_ppc->mq);
431        Swap(&context_ppc->vrsave);
432        for (unsigned int fpr_index = 0;
433             fpr_index < MD_FLOATINGSAVEAREA_PPC_FPR_COUNT;
434             ++fpr_index) {
435          Swap(&context_ppc->float_save.fpregs[fpr_index]);
436        }
437        // Don't swap context_ppc->float_save.fpscr_pad because it is only
438        // used for padding.
439        Swap(&context_ppc->float_save.fpscr);
440        for (unsigned int vr_index = 0;
441             vr_index < MD_VECTORSAVEAREA_PPC_VR_COUNT;
442             ++vr_index) {
443          Swap(&context_ppc->vector_save.save_vr[vr_index]);
444        }
445        Swap(&context_ppc->vector_save.save_vscr);
446        // Don't swap the padding fields in vector_save.
447        Swap(&context_ppc->vector_save.save_vrvalid);
448      }
449
450      context_.ppc = context_ppc.release();
451
452      break;
453    }
454
455    default: {
456      // Unknown context type
457      BPLOG(ERROR) << "MinidumpContext unknown context type " <<
458                      HexString(cpu_type);
459      return false;
460      break;
461    }
462  }
463
464  valid_ = true;
465  return true;
466}
467
468
469u_int32_t MinidumpContext::GetContextCPU() const {
470  if (!valid_) {
471    // Don't log a message, GetContextCPU can be legitimately called with
472    // valid_ false by FreeContext, which is called by Read.
473    return 0;
474  }
475
476  return context_.base->context_flags & MD_CONTEXT_CPU_MASK;
477}
478
479
480const MDRawContextX86* MinidumpContext::GetContextX86() const {
481  if (GetContextCPU() != MD_CONTEXT_X86) {
482    BPLOG(ERROR) << "MinidumpContext cannot get x86 context";
483    return NULL;
484  }
485
486  return context_.x86;
487}
488
489
490const MDRawContextPPC* MinidumpContext::GetContextPPC() const {
491  if (GetContextCPU() != MD_CONTEXT_PPC) {
492    BPLOG(ERROR) << "MinidumpContext cannot get ppc context";
493    return NULL;
494  }
495
496  return context_.ppc;
497}
498
499
500void MinidumpContext::FreeContext() {
501  switch (GetContextCPU()) {
502    case MD_CONTEXT_X86:
503      delete context_.x86;
504      break;
505
506    case MD_CONTEXT_PPC:
507      delete context_.ppc;
508      break;
509
510    default:
511      // There is no context record (valid_ is false) or there's a
512      // context record for an unknown CPU (shouldn't happen, only known
513      // records are stored by Read).
514      break;
515  }
516
517  context_.base = NULL;
518}
519
520
521bool MinidumpContext::CheckAgainstSystemInfo(u_int32_t context_cpu_type) {
522  // It's OK if the minidump doesn't contain an MD_SYSTEM_INFO_STREAM,
523  // as this function just implements a sanity check.
524  MinidumpSystemInfo* system_info = minidump_->GetSystemInfo();
525  if (!system_info) {
526    BPLOG(INFO) << "MinidumpContext could not be compared against "
527                   "MinidumpSystemInfo";
528    return true;
529  }
530
531  // If there is an MD_SYSTEM_INFO_STREAM, it should contain valid system info.
532  const MDRawSystemInfo* raw_system_info = system_info->system_info();
533  if (!raw_system_info) {
534    BPLOG(INFO) << "MinidumpContext could not be compared against "
535                   "MDRawSystemInfo";
536    return false;
537  }
538
539  MDCPUArchitecture system_info_cpu_type = static_cast<MDCPUArchitecture>(
540      raw_system_info->processor_architecture);
541
542  // Compare the CPU type of the context record to the CPU type in the
543  // minidump's system info stream.
544  bool return_value = false;
545  switch (context_cpu_type) {
546    case MD_CONTEXT_X86:
547      if (system_info_cpu_type == MD_CPU_ARCHITECTURE_X86 ||
548          system_info_cpu_type == MD_CPU_ARCHITECTURE_X86_WIN64) {
549        return_value = true;
550      }
551      break;
552
553    case MD_CONTEXT_PPC:
554      if (system_info_cpu_type == MD_CPU_ARCHITECTURE_PPC)
555        return_value = true;
556      break;
557  }
558
559  BPLOG_IF(ERROR, !return_value) << "MinidumpContext CPU " <<
560                                    HexString(context_cpu_type) <<
561                                    " wrong for MinidumpSysmtemInfo CPU " <<
562                                    HexString(system_info_cpu_type);
563
564  return return_value;
565}
566
567
568void MinidumpContext::Print() {
569  if (!valid_) {
570    BPLOG(ERROR) << "MinidumpContext cannot print invalid data";
571    return;
572  }
573
574  switch (GetContextCPU()) {
575    case MD_CONTEXT_X86: {
576      const MDRawContextX86* context_x86 = GetContextX86();
577      printf("MDRawContextX86\n");
578      printf("  context_flags                = 0x%x\n",
579             context_x86->context_flags);
580      printf("  dr0                          = 0x%x\n", context_x86->dr0);
581      printf("  dr1                          = 0x%x\n", context_x86->dr1);
582      printf("  dr2                          = 0x%x\n", context_x86->dr2);
583      printf("  dr3                          = 0x%x\n", context_x86->dr3);
584      printf("  dr6                          = 0x%x\n", context_x86->dr6);
585      printf("  dr7                          = 0x%x\n", context_x86->dr7);
586      printf("  float_save.control_word      = 0x%x\n",
587             context_x86->float_save.control_word);
588      printf("  float_save.status_word       = 0x%x\n",
589             context_x86->float_save.status_word);
590      printf("  float_save.tag_word          = 0x%x\n",
591             context_x86->float_save.tag_word);
592      printf("  float_save.error_offset      = 0x%x\n",
593             context_x86->float_save.error_offset);
594      printf("  float_save.error_selector    = 0x%x\n",
595             context_x86->float_save.error_selector);
596      printf("  float_save.data_offset       = 0x%x\n",
597             context_x86->float_save.data_offset);
598      printf("  float_save.data_selector     = 0x%x\n",
599             context_x86->float_save.data_selector);
600      printf("  float_save.register_area[%2d] = 0x",
601             MD_FLOATINGSAVEAREA_X86_REGISTERAREA_SIZE);
602      for (unsigned int register_index = 0;
603           register_index < MD_FLOATINGSAVEAREA_X86_REGISTERAREA_SIZE;
604           ++register_index) {
605        printf("%02x", context_x86->float_save.register_area[register_index]);
606      }
607      printf("\n");
608      printf("  float_save.cr0_npx_state     = 0x%x\n",
609             context_x86->float_save.cr0_npx_state);
610      printf("  gs                           = 0x%x\n", context_x86->gs);
611      printf("  fs                           = 0x%x\n", context_x86->fs);
612      printf("  es                           = 0x%x\n", context_x86->es);
613      printf("  ds                           = 0x%x\n", context_x86->ds);
614      printf("  edi                          = 0x%x\n", context_x86->edi);
615      printf("  esi                          = 0x%x\n", context_x86->esi);
616      printf("  ebx                          = 0x%x\n", context_x86->ebx);
617      printf("  edx                          = 0x%x\n", context_x86->edx);
618      printf("  ecx                          = 0x%x\n", context_x86->ecx);
619      printf("  eax                          = 0x%x\n", context_x86->eax);
620      printf("  ebp                          = 0x%x\n", context_x86->ebp);
621      printf("  eip                          = 0x%x\n", context_x86->eip);
622      printf("  cs                           = 0x%x\n", context_x86->cs);
623      printf("  eflags                       = 0x%x\n", context_x86->eflags);
624      printf("  esp                          = 0x%x\n", context_x86->esp);
625      printf("  ss                           = 0x%x\n", context_x86->ss);
626      printf("  extended_registers[%3d]      = 0x",
627             MD_CONTEXT_X86_EXTENDED_REGISTERS_SIZE);
628      for (unsigned int register_index = 0;
629           register_index < MD_CONTEXT_X86_EXTENDED_REGISTERS_SIZE;
630           ++register_index) {
631        printf("%02x", context_x86->extended_registers[register_index]);
632      }
633      printf("\n\n");
634
635      break;
636    }
637
638    case MD_CONTEXT_PPC: {
639      const MDRawContextPPC* context_ppc = GetContextPPC();
640      printf("MDRawContextPPC\n");
641      printf("  context_flags            = 0x%x\n",
642             context_ppc->context_flags);
643      printf("  srr0                     = 0x%x\n", context_ppc->srr0);
644      printf("  srr1                     = 0x%x\n", context_ppc->srr1);
645      for (unsigned int gpr_index = 0;
646           gpr_index < MD_CONTEXT_PPC_GPR_COUNT;
647           ++gpr_index) {
648        printf("  gpr[%2d]                  = 0x%x\n",
649               gpr_index, context_ppc->gpr[gpr_index]);
650      }
651      printf("  cr                       = 0x%x\n", context_ppc->cr);
652      printf("  xer                      = 0x%x\n", context_ppc->xer);
653      printf("  lr                       = 0x%x\n", context_ppc->lr);
654      printf("  ctr                      = 0x%x\n", context_ppc->ctr);
655      printf("  mq                       = 0x%x\n", context_ppc->mq);
656      printf("  vrsave                   = 0x%x\n", context_ppc->vrsave);
657      for (unsigned int fpr_index = 0;
658           fpr_index < MD_FLOATINGSAVEAREA_PPC_FPR_COUNT;
659           ++fpr_index) {
660        printf("  float_save.fpregs[%2d]    = 0x%llx\n",
661               fpr_index, context_ppc->float_save.fpregs[fpr_index]);
662      }
663      printf("  float_save.fpscr         = 0x%x\n",
664             context_ppc->float_save.fpscr);
665      // TODO(mmentovai): print the 128-bit quantities in
666      // context_ppc->vector_save.  This isn't done yet because printf
667      // doesn't support 128-bit quantities, and printing them using
668      // %llx as two 64-bit quantities requires knowledge of the CPU's
669      // byte ordering.
670      printf("  vector_save.save_vrvalid = 0x%x\n",
671             context_ppc->vector_save.save_vrvalid);
672      printf("\n");
673
674      break;
675    }
676
677    default: {
678      break;
679    }
680  }
681}
682
683
684//
685// MinidumpMemoryRegion
686//
687
688
689MinidumpMemoryRegion::MinidumpMemoryRegion(Minidump* minidump)
690    : MinidumpObject(minidump),
691      descriptor_(NULL),
692      memory_(NULL) {
693}
694
695
696MinidumpMemoryRegion::~MinidumpMemoryRegion() {
697  delete memory_;
698}
699
700
701void MinidumpMemoryRegion::SetDescriptor(MDMemoryDescriptor* descriptor) {
702  descriptor_ = descriptor;
703  valid_ = descriptor &&
704           descriptor_->memory.data_size <=
705               numeric_limits<uint64_t>::max() -
706               descriptor_->start_of_memory_range;
707}
708
709
710const u_int8_t* MinidumpMemoryRegion::GetMemory() {
711  if (!valid_) {
712    BPLOG(ERROR) << "Invalid MinidumpMemoryRegion for GetMemory";
713    return NULL;
714  }
715
716  if (!memory_) {
717    if (descriptor_->memory.data_size == 0) {
718      BPLOG(ERROR) << "MinidumpMemoryRegion is empty";
719      return NULL;
720    }
721
722    if (!minidump_->SeekSet(descriptor_->memory.rva)) {
723      BPLOG(ERROR) << "MinidumpMemoryRegion could not seek to memory region";
724      return NULL;
725    }
726
727    // TODO(mmentovai): verify rational size!
728    scoped_ptr< vector<u_int8_t> > memory(
729        new vector<u_int8_t>(descriptor_->memory.data_size));
730
731    if (!minidump_->ReadBytes(&(*memory)[0], descriptor_->memory.data_size)) {
732      BPLOG(ERROR) << "MinidumpMemoryRegion could not read memory region";
733      return NULL;
734    }
735
736    memory_ = memory.release();
737  }
738
739  return &(*memory_)[0];
740}
741
742
743u_int64_t MinidumpMemoryRegion::GetBase() {
744  if (!valid_) {
745    BPLOG(ERROR) << "Invalid MinidumpMemoryRegion for GetBase";
746    return static_cast<u_int64_t>(-1);
747  }
748
749  return descriptor_->start_of_memory_range;
750}
751
752
753u_int32_t MinidumpMemoryRegion::GetSize() {
754  if (!valid_) {
755    BPLOG(ERROR) << "Invalid MinidumpMemoryRegion for GetSize";
756    return 0;
757  }
758
759  return descriptor_->memory.data_size;
760}
761
762
763void MinidumpMemoryRegion::FreeMemory() {
764  delete memory_;
765  memory_ = NULL;
766}
767
768
769template<typename T>
770bool MinidumpMemoryRegion::GetMemoryAtAddressInternal(u_int64_t address,
771                                                      T*        value) {
772  BPLOG_IF(ERROR, !value) << "MinidumpMemoryRegion::GetMemoryAtAddressInternal "
773                             "requires |value|";
774  assert(value);
775  *value = 0;
776
777  if (!valid_) {
778    BPLOG(ERROR) << "Invalid MinidumpMemoryRegion for "
779                    "GetMemoryAtAddressInternal";
780    return false;
781  }
782
783  if (address < descriptor_->start_of_memory_range ||
784      sizeof(T) > numeric_limits<u_int64_t>::max() - address ||
785      address + sizeof(T) > descriptor_->start_of_memory_range +
786                            descriptor_->memory.data_size) {
787    BPLOG(ERROR) << "MinidumpMemoryRegion request out of range: " <<
788                    HexString(address) << "+" << sizeof(T) << "/" <<
789                    HexString(descriptor_->start_of_memory_range) << "+" <<
790                    HexString(descriptor_->memory.data_size);
791    return false;
792  }
793
794  const u_int8_t* memory = GetMemory();
795  if (!memory) {
796    // GetMemory already logged a perfectly good message.
797    return false;
798  }
799
800  // If the CPU requires memory accesses to be aligned, this can crash.
801  // x86 and ppc are able to cope, though.
802  *value = *reinterpret_cast<const T*>(
803      &memory[address - descriptor_->start_of_memory_range]);
804
805  if (minidump_->swap())
806    Swap(value);
807
808  return true;
809}
810
811
812bool MinidumpMemoryRegion::GetMemoryAtAddress(u_int64_t  address,
813                                              u_int8_t*  value) {
814  return GetMemoryAtAddressInternal(address, value);
815}
816
817
818bool MinidumpMemoryRegion::GetMemoryAtAddress(u_int64_t  address,
819                                              u_int16_t* value) {
820  return GetMemoryAtAddressInternal(address, value);
821}
822
823
824bool MinidumpMemoryRegion::GetMemoryAtAddress(u_int64_t  address,
825                                              u_int32_t* value) {
826  return GetMemoryAtAddressInternal(address, value);
827}
828
829
830bool MinidumpMemoryRegion::GetMemoryAtAddress(u_int64_t  address,
831                                              u_int64_t* value) {
832  return GetMemoryAtAddressInternal(address, value);
833}
834
835
836void MinidumpMemoryRegion::Print() {
837  if (!valid_) {
838    BPLOG(ERROR) << "MinidumpMemoryRegion cannot print invalid data";
839    return;
840  }
841
842  const u_int8_t* memory = GetMemory();
843  if (memory) {
844    printf("0x");
845    for (unsigned int byte_index = 0;
846         byte_index < descriptor_->memory.data_size;
847         byte_index++) {
848      printf("%02x", memory[byte_index]);
849    }
850    printf("\n");
851  } else {
852    printf("No memory\n");
853  }
854}
855
856
857//
858// MinidumpThread
859//
860
861
862MinidumpThread::MinidumpThread(Minidump* minidump)
863    : MinidumpObject(minidump),
864      thread_(),
865      memory_(NULL),
866      context_(NULL) {
867}
868
869
870MinidumpThread::~MinidumpThread() {
871  delete memory_;
872  delete context_;
873}
874
875
876bool MinidumpThread::Read() {
877  // Invalidate cached data.
878  delete memory_;
879  memory_ = NULL;
880  delete context_;
881  context_ = NULL;
882
883  valid_ = false;
884
885  if (!minidump_->ReadBytes(&thread_, sizeof(thread_))) {
886    BPLOG(ERROR) << "MinidumpThread cannot read thread";
887    return false;
888  }
889
890  if (minidump_->swap()) {
891    Swap(&thread_.thread_id);
892    Swap(&thread_.suspend_count);
893    Swap(&thread_.priority_class);
894    Swap(&thread_.priority);
895    Swap(&thread_.teb);
896    Swap(&thread_.stack);
897    Swap(&thread_.thread_context);
898  }
899
900  // Check for base + size overflow or undersize.
901  if (thread_.stack.memory.data_size == 0 ||
902      thread_.stack.memory.data_size > numeric_limits<u_int64_t>::max() -
903                                       thread_.stack.start_of_memory_range) {
904    BPLOG(ERROR) << "MinidumpThread has a memory region problem, " <<
905                    HexString(thread_.stack.start_of_memory_range) << "+" <<
906                    HexString(thread_.stack.memory.data_size);
907    return false;
908  }
909
910  memory_ = new MinidumpMemoryRegion(minidump_);
911  memory_->SetDescriptor(&thread_.stack);
912
913  valid_ = true;
914  return true;
915}
916
917
918MinidumpMemoryRegion* MinidumpThread::GetMemory() {
919  if (!valid_) {
920    BPLOG(ERROR) << "Invalid MinidumpThread for GetMemory";
921    return NULL;
922  }
923
924  return memory_;
925}
926
927
928MinidumpContext* MinidumpThread::GetContext() {
929  if (!valid_) {
930    BPLOG(ERROR) << "Invalid MinidumpThread for GetContext";
931    return NULL;
932  }
933
934  if (!context_) {
935    if (!minidump_->SeekSet(thread_.thread_context.rva)) {
936      BPLOG(ERROR) << "MinidumpThread cannot seek to context";
937      return NULL;
938    }
939
940    scoped_ptr<MinidumpContext> context(new MinidumpContext(minidump_));
941
942    if (!context->Read(thread_.thread_context.data_size)) {
943      BPLOG(ERROR) << "MinidumpThread cannot read context";
944      return NULL;
945    }
946
947    context_ = context.release();
948  }
949
950  return context_;
951}
952
953
954bool MinidumpThread::GetThreadID(u_int32_t *thread_id) const {
955  BPLOG_IF(ERROR, !thread_id) << "MinidumpThread::GetThreadID requires "
956                                 "|thread_id|";
957  assert(thread_id);
958  *thread_id = 0;
959
960  if (!valid_) {
961    BPLOG(ERROR) << "Invalid MinidumpThread for GetThreadID";
962    return false;
963  }
964
965  *thread_id = thread_.thread_id;
966  return true;
967}
968
969
970void MinidumpThread::Print() {
971  if (!valid_) {
972    BPLOG(ERROR) << "MinidumpThread cannot print invalid data";
973    return;
974  }
975
976  printf("MDRawThread\n");
977  printf("  thread_id                   = 0x%x\n",   thread_.thread_id);
978  printf("  suspend_count               = %d\n",     thread_.suspend_count);
979  printf("  priority_class              = 0x%x\n",   thread_.priority_class);
980  printf("  priority                    = 0x%x\n",   thread_.priority);
981  printf("  teb                         = 0x%llx\n", thread_.teb);
982  printf("  stack.start_of_memory_range = 0x%llx\n",
983         thread_.stack.start_of_memory_range);
984  printf("  stack.memory.data_size      = 0x%x\n",
985         thread_.stack.memory.data_size);
986  printf("  stack.memory.rva            = 0x%x\n",   thread_.stack.memory.rva);
987  printf("  thread_context.data_size    = 0x%x\n",
988         thread_.thread_context.data_size);
989  printf("  thread_context.rva          = 0x%x\n",
990         thread_.thread_context.rva);
991
992  MinidumpContext* context = GetContext();
993  if (context) {
994    printf("\n");
995    context->Print();
996  } else {
997    printf("  (no context)\n");
998    printf("\n");
999  }
1000
1001  MinidumpMemoryRegion* memory = GetMemory();
1002  if (memory) {
1003    printf("Stack\n");
1004    memory->Print();
1005  } else {
1006    printf("No stack\n");
1007  }
1008  printf("\n");
1009}
1010
1011
1012//
1013// MinidumpThreadList
1014//
1015
1016
1017MinidumpThreadList::MinidumpThreadList(Minidump* minidump)
1018    : MinidumpStream(minidump),
1019      id_to_thread_map_(),
1020      threads_(NULL),
1021      thread_count_(0) {
1022}
1023
1024
1025MinidumpThreadList::~MinidumpThreadList() {
1026  delete threads_;
1027}
1028
1029
1030bool MinidumpThreadList::Read(u_int32_t expected_size) {
1031  // Invalidate cached data.
1032  id_to_thread_map_.clear();
1033  delete threads_;
1034  threads_ = NULL;
1035  thread_count_ = 0;
1036
1037  valid_ = false;
1038
1039  u_int32_t thread_count;
1040  if (expected_size < sizeof(thread_count)) {
1041    BPLOG(ERROR) << "MinidumpThreadList count size mismatch, " <<
1042                    expected_size << " < " << sizeof(thread_count);
1043    return false;
1044  }
1045  if (!minidump_->ReadBytes(&thread_count, sizeof(thread_count))) {
1046    BPLOG(ERROR) << "MinidumpThreadList cannot read thread count";
1047    return false;
1048  }
1049
1050  if (minidump_->swap())
1051    Swap(&thread_count);
1052
1053  if (thread_count > numeric_limits<u_int32_t>::max() / sizeof(MDRawThread)) {
1054    BPLOG(ERROR) << "MinidumpThreadList thread count " << thread_count <<
1055                    " would cause multiplication overflow";
1056    return false;
1057  }
1058
1059  if (expected_size != sizeof(thread_count) +
1060                       thread_count * sizeof(MDRawThread)) {
1061    BPLOG(ERROR) << "MinidumpThreadList size mismatch, " << expected_size <<
1062                    " != " <<
1063                    sizeof(thread_count) + thread_count * sizeof(MDRawThread);
1064    return false;
1065  }
1066
1067  if (thread_count) {
1068    // TODO(mmentovai): verify rational size!
1069    scoped_ptr<MinidumpThreads> threads(
1070        new MinidumpThreads(thread_count, MinidumpThread(minidump_)));
1071
1072    for (unsigned int thread_index = 0;
1073         thread_index < thread_count;
1074         ++thread_index) {
1075      MinidumpThread* thread = &(*threads)[thread_index];
1076
1077      // Assume that the file offset is correct after the last read.
1078      if (!thread->Read()) {
1079        BPLOG(ERROR) << "MinidumpThreadList cannot read thread " <<
1080                        thread_index << "/" << thread_count;
1081        return false;
1082      }
1083
1084      u_int32_t thread_id;
1085      if (!thread->GetThreadID(&thread_id)) {
1086        BPLOG(ERROR) << "MinidumpThreadList cannot get thread ID for thread " <<
1087                        thread_index << "/" << thread_count;
1088        return false;
1089      }
1090
1091      if (GetThreadByID(thread_id)) {
1092        // Another thread with this ID is already in the list.  Data error.
1093        BPLOG(ERROR) << "MinidumpThreadList found multiple threads with ID " <<
1094                        HexString(thread_id) << " at thread " <<
1095                        thread_index << "/" << thread_count;
1096        return false;
1097      }
1098      id_to_thread_map_[thread_id] = thread;
1099    }
1100
1101    threads_ = threads.release();
1102  }
1103
1104  thread_count_ = thread_count;
1105
1106  valid_ = true;
1107  return true;
1108}
1109
1110
1111MinidumpThread* MinidumpThreadList::GetThreadAtIndex(unsigned int index)
1112    const {
1113  if (!valid_) {
1114    BPLOG(ERROR) << "Invalid MinidumpThreadList for GetThreadAtIndex";
1115    return NULL;
1116  }
1117
1118  if (index >= thread_count_) {
1119    BPLOG(ERROR) << "MinidumpThreadList index out of range: " <<
1120                    index << "/" << thread_count_;
1121    return NULL;
1122  }
1123
1124  return &(*threads_)[index];
1125}
1126
1127
1128MinidumpThread* MinidumpThreadList::GetThreadByID(u_int32_t thread_id) {
1129  // Don't check valid_.  Read calls this method before everything is
1130  // validated.  It is safe to not check valid_ here.
1131  return id_to_thread_map_[thread_id];
1132}
1133
1134
1135void MinidumpThreadList::Print() {
1136  if (!valid_) {
1137    BPLOG(ERROR) << "MinidumpThreadList cannot print invalid data";
1138    return;
1139  }
1140
1141  printf("MinidumpThreadList\n");
1142  printf("  thread_count = %d\n", thread_count_);
1143  printf("\n");
1144
1145  for (unsigned int thread_index = 0;
1146       thread_index < thread_count_;
1147       ++thread_index) {
1148    printf("thread[%d]\n", thread_index);
1149
1150    (*threads_)[thread_index].Print();
1151  }
1152}
1153
1154
1155//
1156// MinidumpModule
1157//
1158
1159
1160MinidumpModule::MinidumpModule(Minidump* minidump)
1161    : MinidumpObject(minidump),
1162      module_valid_(false),
1163      module_(),
1164      name_(NULL),
1165      cv_record_(NULL),
1166      cv_record_signature_(MD_CVINFOUNKNOWN_SIGNATURE),
1167      misc_record_(NULL) {
1168}
1169
1170
1171MinidumpModule::~MinidumpModule() {
1172  delete name_;
1173  delete cv_record_;
1174  delete misc_record_;
1175}
1176
1177
1178bool MinidumpModule::Read() {
1179  // Invalidate cached data.
1180  delete name_;
1181  name_ = NULL;
1182  delete cv_record_;
1183  cv_record_ = NULL;
1184  cv_record_signature_ = MD_CVINFOUNKNOWN_SIGNATURE;
1185  delete misc_record_;
1186  misc_record_ = NULL;
1187
1188  module_valid_ = false;
1189  valid_ = false;
1190
1191  if (!minidump_->ReadBytes(&module_, MD_MODULE_SIZE)) {
1192    BPLOG(ERROR) << "MinidumpModule cannot read module";
1193    return false;
1194  }
1195
1196  if (minidump_->swap()) {
1197    Swap(&module_.base_of_image);
1198    Swap(&module_.size_of_image);
1199    Swap(&module_.checksum);
1200    Swap(&module_.time_date_stamp);
1201    Swap(&module_.module_name_rva);
1202    Swap(&module_.version_info.signature);
1203    Swap(&module_.version_info.struct_version);
1204    Swap(&module_.version_info.file_version_hi);
1205    Swap(&module_.version_info.file_version_lo);
1206    Swap(&module_.version_info.product_version_hi);
1207    Swap(&module_.version_info.product_version_lo);
1208    Swap(&module_.version_info.file_flags_mask);
1209    Swap(&module_.version_info.file_flags);
1210    Swap(&module_.version_info.file_os);
1211    Swap(&module_.version_info.file_type);
1212    Swap(&module_.version_info.file_subtype);
1213    Swap(&module_.version_info.file_date_hi);
1214    Swap(&module_.version_info.file_date_lo);
1215    Swap(&module_.cv_record);
1216    Swap(&module_.misc_record);
1217    // Don't swap reserved fields because their contents are unknown (as
1218    // are their proper widths).
1219  }
1220
1221  // Check for base + size overflow or undersize.
1222  if (module_.size_of_image == 0 ||
1223      module_.size_of_image >
1224          numeric_limits<u_int64_t>::max() - module_.base_of_image) {
1225    BPLOG(ERROR) << "MinidumpModule has a module problem, " <<
1226                    HexString(module_.base_of_image) << "+" <<
1227                    HexString(module_.size_of_image);
1228    return false;
1229  }
1230
1231  module_valid_ = true;
1232  return true;
1233}
1234
1235
1236bool MinidumpModule::ReadAuxiliaryData() {
1237  if (!module_valid_) {
1238    BPLOG(ERROR) << "Invalid MinidumpModule for ReadAuxiliaryData";
1239    return false;
1240  }
1241
1242  // Each module must have a name.
1243  name_ = minidump_->ReadString(module_.module_name_rva);
1244  if (!name_) {
1245    BPLOG(ERROR) << "MinidumpModule could not read name";
1246    return false;
1247  }
1248
1249  // CodeView and miscellaneous debug records are only required if the
1250  // module indicates that they exist.
1251  if (module_.cv_record.data_size && !GetCVRecord(NULL)) {
1252    BPLOG(ERROR) << "MinidumpModule has no CodeView record, "
1253                    "but one was expected";
1254    return false;
1255  }
1256
1257  if (module_.misc_record.data_size && !GetMiscRecord(NULL)) {
1258    BPLOG(ERROR) << "MinidumpModule has no miscellaneous debug record, "
1259                    "but one was expected";
1260    return false;
1261  }
1262
1263  valid_ = true;
1264  return true;
1265}
1266
1267
1268string MinidumpModule::code_file() const {
1269  if (!valid_) {
1270    BPLOG(ERROR) << "Invalid MinidumpModule for code_file";
1271    return "";
1272  }
1273
1274  return *name_;
1275}
1276
1277
1278string MinidumpModule::code_identifier() const {
1279  if (!valid_) {
1280    BPLOG(ERROR) << "Invalid MinidumpModule for code_identifier";
1281    return "";
1282  }
1283
1284  MinidumpSystemInfo *minidump_system_info = minidump_->GetSystemInfo();
1285  if (!minidump_system_info) {
1286    BPLOG(ERROR) << "MinidumpModule code_identifier requires "
1287                    "MinidumpSystemInfo";
1288    return "";
1289  }
1290
1291  const MDRawSystemInfo *raw_system_info = minidump_system_info->system_info();
1292  if (!raw_system_info) {
1293    BPLOG(ERROR) << "MinidumpModule code_identifier requires MDRawSystemInfo";
1294    return "";
1295  }
1296
1297  string identifier;
1298
1299  switch (raw_system_info->platform_id) {
1300    case MD_OS_WIN32_NT:
1301    case MD_OS_WIN32_WINDOWS: {
1302      // Use the same format that the MS symbol server uses in filesystem
1303      // hierarchies.
1304      char identifier_string[17];
1305      snprintf(identifier_string, sizeof(identifier_string), "%08X%x",
1306               module_.time_date_stamp, module_.size_of_image);
1307      identifier = identifier_string;
1308      break;
1309    }
1310
1311    case MD_OS_MAC_OS_X:
1312    case MD_OS_LINUX: {
1313      // TODO(mmentovai): support uuid extension if present, otherwise fall
1314      // back to version (from LC_ID_DYLIB?), otherwise fall back to something
1315      // else.
1316      identifier = "id";
1317      break;
1318    }
1319
1320    default: {
1321      // Without knowing what OS generated the dump, we can't generate a good
1322      // identifier.  Return an empty string, signalling failure.
1323      BPLOG(ERROR) << "MinidumpModule code_identifier requires known platform, "
1324                      "found " << HexString(raw_system_info->platform_id);
1325      break;
1326    }
1327  }
1328
1329  return identifier;
1330}
1331
1332
1333string MinidumpModule::debug_file() const {
1334  if (!valid_) {
1335    BPLOG(ERROR) << "Invalid MinidumpModule for debug_file";
1336    return "";
1337  }
1338
1339  string file;
1340  // Prefer the CodeView record if present.
1341  if (cv_record_) {
1342    if (cv_record_signature_ == MD_CVINFOPDB70_SIGNATURE) {
1343      // It's actually an MDCVInfoPDB70 structure.
1344      const MDCVInfoPDB70* cv_record_70 =
1345          reinterpret_cast<const MDCVInfoPDB70*>(&(*cv_record_)[0]);
1346      assert(cv_record_70->cv_signature == MD_CVINFOPDB70_SIGNATURE);
1347
1348      // GetCVRecord guarantees pdb_file_name is null-terminated.
1349      file = reinterpret_cast<const char*>(cv_record_70->pdb_file_name);
1350    } else if (cv_record_signature_ == MD_CVINFOPDB20_SIGNATURE) {
1351      // It's actually an MDCVInfoPDB20 structure.
1352      const MDCVInfoPDB20* cv_record_20 =
1353          reinterpret_cast<const MDCVInfoPDB20*>(&(*cv_record_)[0]);
1354      assert(cv_record_20->cv_header.signature == MD_CVINFOPDB20_SIGNATURE);
1355
1356      // GetCVRecord guarantees pdb_file_name is null-terminated.
1357      file = reinterpret_cast<const char*>(cv_record_20->pdb_file_name);
1358    }
1359
1360    // If there's a CodeView record but it doesn't match a known signature,
1361    // try the miscellaneous record.
1362  }
1363
1364  if (file.empty()) {
1365    // No usable CodeView record.  Try the miscellaneous debug record.
1366    if (misc_record_) {
1367      const MDImageDebugMisc* misc_record =
1368          reinterpret_cast<const MDImageDebugMisc *>(&(*misc_record_)[0]);
1369      if (!misc_record->unicode) {
1370        // If it's not Unicode, just stuff it into the string.  It's unclear
1371        // if misc_record->data is 0-terminated, so use an explicit size.
1372        file = string(
1373            reinterpret_cast<const char*>(misc_record->data),
1374            module_.misc_record.data_size - sizeof(MDImageDebugMisc));
1375      } else {
1376        // There's a misc_record but it encodes the debug filename in UTF-16.
1377        // (Actually, because miscellaneous records are so old, it's probably
1378        // UCS-2.)  Convert it to UTF-8 for congruity with the other strings
1379        // that this method (and all other methods in the Minidump family)
1380        // return.
1381
1382        unsigned int bytes =
1383            module_.misc_record.data_size - sizeof(MDImageDebugMisc);
1384        if (bytes % 2 == 0) {
1385          unsigned int utf16_words = bytes / 2;
1386
1387          // UTF16ToUTF8 expects a vector<u_int16_t>, so create a temporary one
1388          // and copy the UTF-16 data into it.
1389          vector<u_int16_t> string_utf16(utf16_words);
1390          if (utf16_words)
1391            memcpy(&string_utf16[0], &misc_record->data, bytes);
1392
1393          // GetMiscRecord already byte-swapped the data[] field if it contains
1394          // UTF-16, so pass false as the swap argument.
1395          scoped_ptr<string> new_file(UTF16ToUTF8(string_utf16, false));
1396          file = *new_file;
1397        }
1398      }
1399    }
1400  }
1401
1402  BPLOG_IF(ERROR, file.empty()) << "MinidumpModule could not determine "
1403                                   "debug_file for " << *name_;
1404
1405  return file;
1406}
1407
1408
1409string MinidumpModule::debug_identifier() const {
1410  if (!valid_) {
1411    BPLOG(ERROR) << "Invalid MinidumpModule for debug_identifier";
1412    return "";
1413  }
1414
1415  string identifier;
1416
1417  // Use the CodeView record if present.
1418  if (cv_record_) {
1419    if (cv_record_signature_ == MD_CVINFOPDB70_SIGNATURE) {
1420      // It's actually an MDCVInfoPDB70 structure.
1421      const MDCVInfoPDB70* cv_record_70 =
1422          reinterpret_cast<const MDCVInfoPDB70*>(&(*cv_record_)[0]);
1423      assert(cv_record_70->cv_signature == MD_CVINFOPDB70_SIGNATURE);
1424
1425      // Use the same format that the MS symbol server uses in filesystem
1426      // hierarchies.
1427      char identifier_string[41];
1428      snprintf(identifier_string, sizeof(identifier_string),
1429               "%08X%04X%04X%02X%02X%02X%02X%02X%02X%02X%02X%x",
1430               cv_record_70->signature.data1,
1431               cv_record_70->signature.data2,
1432               cv_record_70->signature.data3,
1433               cv_record_70->signature.data4[0],
1434               cv_record_70->signature.data4[1],
1435               cv_record_70->signature.data4[2],
1436               cv_record_70->signature.data4[3],
1437               cv_record_70->signature.data4[4],
1438               cv_record_70->signature.data4[5],
1439               cv_record_70->signature.data4[6],
1440               cv_record_70->signature.data4[7],
1441               cv_record_70->age);
1442      identifier = identifier_string;
1443    } else if (cv_record_signature_ == MD_CVINFOPDB20_SIGNATURE) {
1444      // It's actually an MDCVInfoPDB20 structure.
1445      const MDCVInfoPDB20* cv_record_20 =
1446          reinterpret_cast<const MDCVInfoPDB20*>(&(*cv_record_)[0]);
1447      assert(cv_record_20->cv_header.signature == MD_CVINFOPDB20_SIGNATURE);
1448
1449      // Use the same format that the MS symbol server uses in filesystem
1450      // hierarchies.
1451      char identifier_string[17];
1452      snprintf(identifier_string, sizeof(identifier_string),
1453               "%08X%x", cv_record_20->signature, cv_record_20->age);
1454      identifier = identifier_string;
1455    }
1456  }
1457
1458  // TODO(mmentovai): if there's no usable CodeView record, there might be a
1459  // miscellaneous debug record.  It only carries a filename, though, and no
1460  // identifier.  I'm not sure what the right thing to do for the identifier
1461  // is in that case, but I don't expect to find many modules without a
1462  // CodeView record (or some other Breakpad extension structure in place of
1463  // a CodeView record).  Treat it as an error (empty identifier) for now.
1464
1465  // TODO(mmentovai): on the Mac, provide fallbacks as in code_identifier().
1466
1467  BPLOG_IF(ERROR, identifier.empty()) << "MinidumpModule could not determine "
1468                                         "debug_identifier for " << *name_;
1469
1470  return identifier;
1471}
1472
1473
1474string MinidumpModule::version() const {
1475  if (!valid_) {
1476    BPLOG(ERROR) << "Invalid MinidumpModule for version";
1477    return "";
1478  }
1479
1480  string version;
1481
1482  if (module_.version_info.signature == MD_VSFIXEDFILEINFO_SIGNATURE &&
1483      module_.version_info.struct_version & MD_VSFIXEDFILEINFO_VERSION) {
1484    char version_string[24];
1485    snprintf(version_string, sizeof(version_string), "%u.%u.%u.%u",
1486             module_.version_info.file_version_hi >> 16,
1487             module_.version_info.file_version_hi & 0xffff,
1488             module_.version_info.file_version_lo >> 16,
1489             module_.version_info.file_version_lo & 0xffff);
1490    version = version_string;
1491  }
1492
1493  // TODO(mmentovai): possibly support other struct types in place of
1494  // the one used with MD_VSFIXEDFILEINFO_SIGNATURE.  We can possibly use
1495  // a different structure that better represents versioning facilities on
1496  // Mac OS X and Linux, instead of forcing them to adhere to the dotted
1497  // quad of 16-bit ints that Windows uses.
1498
1499  BPLOG_IF(INFO, version.empty()) << "MinidumpModule could not determine "
1500                                     "version for " << *name_;
1501
1502  return version;
1503}
1504
1505
1506const CodeModule* MinidumpModule::Copy() const {
1507  return new BasicCodeModule(this);
1508}
1509
1510
1511const u_int8_t* MinidumpModule::GetCVRecord(u_int32_t* size) {
1512  if (!module_valid_) {
1513    BPLOG(ERROR) << "Invalid MinidumpModule for GetCVRecord";
1514    return NULL;
1515  }
1516
1517  if (!cv_record_) {
1518    // This just guards against 0-sized CodeView records; more specific checks
1519    // are used when the signature is checked against various structure types.
1520    if (module_.cv_record.data_size == 0) {
1521      return NULL;
1522    }
1523
1524    if (!minidump_->SeekSet(module_.cv_record.rva)) {
1525      BPLOG(ERROR) << "MinidumpModule could not seek to CodeView record";
1526      return NULL;
1527    }
1528
1529    // TODO(mmentovai): verify rational size!
1530
1531    // Allocating something that will be accessed as MDCVInfoPDB70 or
1532    // MDCVInfoPDB20 but is allocated as u_int8_t[] can cause alignment
1533    // problems.  x86 and ppc are able to cope, though.  This allocation
1534    // style is needed because the MDCVInfoPDB70 or MDCVInfoPDB20 are
1535    // variable-sized due to their pdb_file_name fields; these structures
1536    // are not sizeof(MDCVInfoPDB70) or sizeof(MDCVInfoPDB20) and treating
1537    // them as such would result in incomplete structures or overruns.
1538    scoped_ptr< vector<u_int8_t> > cv_record(
1539        new vector<u_int8_t>(module_.cv_record.data_size));
1540
1541    if (!minidump_->ReadBytes(&(*cv_record)[0], module_.cv_record.data_size)) {
1542      BPLOG(ERROR) << "MinidumpModule could not read CodeView record";
1543      return NULL;
1544    }
1545
1546    u_int32_t signature = MD_CVINFOUNKNOWN_SIGNATURE;
1547    if (module_.cv_record.data_size > sizeof(signature)) {
1548      MDCVInfoPDB70* cv_record_signature =
1549          reinterpret_cast<MDCVInfoPDB70*>(&(*cv_record)[0]);
1550      signature = cv_record_signature->cv_signature;
1551      if (minidump_->swap())
1552        Swap(&signature);
1553    }
1554
1555    if (signature == MD_CVINFOPDB70_SIGNATURE) {
1556      // Now that the structure type is known, recheck the size.
1557      if (sizeof(MDCVInfoPDB70) > module_.cv_record.data_size) {
1558        BPLOG(ERROR) << "MinidumpModule CodeView7 record size mismatch, " <<
1559                        sizeof(MDCVInfoPDB70) << " > " <<
1560                        module_.cv_record.data_size;
1561        return NULL;
1562      }
1563
1564      if (minidump_->swap()) {
1565        MDCVInfoPDB70* cv_record_70 =
1566            reinterpret_cast<MDCVInfoPDB70*>(&(*cv_record)[0]);
1567        Swap(&cv_record_70->cv_signature);
1568        Swap(&cv_record_70->signature);
1569        Swap(&cv_record_70->age);
1570        // Don't swap cv_record_70.pdb_file_name because it's an array of 8-bit
1571        // quantities.  (It's a path, is it UTF-8?)
1572      }
1573
1574      // The last field of either structure is null-terminated 8-bit character
1575      // data.  Ensure that it's null-terminated.
1576      if ((*cv_record)[module_.cv_record.data_size - 1] != '\0') {
1577        BPLOG(ERROR) << "MinidumpModule CodeView7 record string is not "
1578                        "0-terminated";
1579        return NULL;
1580      }
1581    } else if (signature == MD_CVINFOPDB20_SIGNATURE) {
1582      // Now that the structure type is known, recheck the size.
1583      if (sizeof(MDCVInfoPDB20) > module_.cv_record.data_size) {
1584        BPLOG(ERROR) << "MinidumpModule CodeView2 record size mismatch, " <<
1585                        sizeof(MDCVInfoPDB20) << " > " <<
1586                        module_.cv_record.data_size;
1587        return NULL;
1588      }
1589      if (minidump_->swap()) {
1590        MDCVInfoPDB20* cv_record_20 =
1591            reinterpret_cast<MDCVInfoPDB20*>(&(*cv_record)[0]);
1592        Swap(&cv_record_20->cv_header.signature);
1593        Swap(&cv_record_20->cv_header.offset);
1594        Swap(&cv_record_20->signature);
1595        Swap(&cv_record_20->age);
1596        // Don't swap cv_record_20.pdb_file_name because it's an array of 8-bit
1597        // quantities.  (It's a path, is it UTF-8?)
1598      }
1599
1600      // The last field of either structure is null-terminated 8-bit character
1601      // data.  Ensure that it's null-terminated.
1602      if ((*cv_record)[module_.cv_record.data_size - 1] != '\0') {
1603        BPLOG(ERROR) << "MindumpModule CodeView2 record string is not "
1604                        "0-terminated";
1605        return NULL;
1606      }
1607    }
1608
1609    // If the signature doesn't match something above, it's not something
1610    // that Breakpad can presently handle directly.  Because some modules in
1611    // the wild contain such CodeView records as MD_CVINFOCV50_SIGNATURE,
1612    // don't bail out here - allow the data to be returned to the user,
1613    // although byte-swapping can't be done.
1614
1615    // Store the vector type because that's how storage was allocated, but
1616    // return it casted to u_int8_t*.
1617    cv_record_ = cv_record.release();
1618    cv_record_signature_ = signature;
1619  }
1620
1621  if (size)
1622    *size = module_.cv_record.data_size;
1623
1624  return &(*cv_record_)[0];
1625}
1626
1627
1628const MDImageDebugMisc* MinidumpModule::GetMiscRecord(u_int32_t* size) {
1629  if (!module_valid_) {
1630    BPLOG(ERROR) << "Invalid MinidumpModule for GetMiscRecord";
1631    return NULL;
1632  }
1633
1634  if (!misc_record_) {
1635    if (module_.misc_record.data_size == 0) {
1636      return NULL;
1637    }
1638
1639    if (sizeof(MDImageDebugMisc) > module_.misc_record.data_size) {
1640      BPLOG(ERROR) << "MinidumpModule miscellaneous debugging record "
1641                      "size mismatch, " << sizeof(MDImageDebugMisc) << " > " <<
1642                      module_.misc_record.data_size;
1643      return NULL;
1644    }
1645
1646    if (!minidump_->SeekSet(module_.misc_record.rva)) {
1647      BPLOG(ERROR) << "MinidumpModule could not seek to miscellaneous "
1648                      "debugging record";
1649      return NULL;
1650    }
1651
1652    // TODO(mmentovai): verify rational size!
1653
1654    // Allocating something that will be accessed as MDImageDebugMisc but
1655    // is allocated as u_int8_t[] can cause alignment problems.  x86 and
1656    // ppc are able to cope, though.  This allocation style is needed
1657    // because the MDImageDebugMisc is variable-sized due to its data field;
1658    // this structure is not sizeof(MDImageDebugMisc) and treating it as such
1659    // would result in an incomplete structure or an overrun.
1660    scoped_ptr< vector<u_int8_t> > misc_record_mem(
1661        new vector<u_int8_t>(module_.misc_record.data_size));
1662    MDImageDebugMisc* misc_record =
1663        reinterpret_cast<MDImageDebugMisc*>(&(*misc_record_mem)[0]);
1664
1665    if (!minidump_->ReadBytes(misc_record, module_.misc_record.data_size)) {
1666      BPLOG(ERROR) << "MinidumpModule could not read miscellaneous debugging "
1667                      "record";
1668      return NULL;
1669    }
1670
1671    if (minidump_->swap()) {
1672      Swap(&misc_record->data_type);
1673      Swap(&misc_record->length);
1674      // Don't swap misc_record.unicode because it's an 8-bit quantity.
1675      // Don't swap the reserved fields for the same reason, and because
1676      // they don't contain any valid data.
1677      if (misc_record->unicode) {
1678        // There is a potential alignment problem, but shouldn't be a problem
1679        // in practice due to the layout of MDImageDebugMisc.
1680        u_int16_t* data16 = reinterpret_cast<u_int16_t*>(&(misc_record->data));
1681        unsigned int dataBytes = module_.misc_record.data_size -
1682                                 sizeof(MDImageDebugMisc);
1683        unsigned int dataLength = dataBytes / 2;
1684        for (unsigned int characterIndex = 0;
1685             characterIndex < dataLength;
1686             ++characterIndex) {
1687          Swap(&data16[characterIndex]);
1688        }
1689      }
1690    }
1691
1692    if (module_.misc_record.data_size != misc_record->length) {
1693      BPLOG(ERROR) << "MinidumpModule miscellaneous debugging record data "
1694                      "size mismatch, " << module_.misc_record.data_size <<
1695                      " != " << misc_record->length;
1696      return NULL;
1697    }
1698
1699    // Store the vector type because that's how storage was allocated, but
1700    // return it casted to MDImageDebugMisc*.
1701    misc_record_ = misc_record_mem.release();
1702  }
1703
1704  if (size)
1705    *size = module_.misc_record.data_size;
1706
1707  return reinterpret_cast<MDImageDebugMisc*>(&(*misc_record_)[0]);
1708}
1709
1710
1711void MinidumpModule::Print() {
1712  if (!valid_) {
1713    BPLOG(ERROR) << "MinidumpModule cannot print invalid data";
1714    return;
1715  }
1716
1717  printf("MDRawModule\n");
1718  printf("  base_of_image                   = 0x%llx\n",
1719         module_.base_of_image);
1720  printf("  size_of_image                   = 0x%x\n",
1721         module_.size_of_image);
1722  printf("  checksum                        = 0x%x\n",
1723         module_.checksum);
1724  printf("  time_date_stamp                 = 0x%x\n",
1725         module_.time_date_stamp);
1726  printf("  module_name_rva                 = 0x%x\n",
1727         module_.module_name_rva);
1728  printf("  version_info.signature          = 0x%x\n",
1729         module_.version_info.signature);
1730  printf("  version_info.struct_version     = 0x%x\n",
1731         module_.version_info.struct_version);
1732  printf("  version_info.file_version       = 0x%x:0x%x\n",
1733         module_.version_info.file_version_hi,
1734         module_.version_info.file_version_lo);
1735  printf("  version_info.product_version    = 0x%x:0x%x\n",
1736         module_.version_info.product_version_hi,
1737         module_.version_info.product_version_lo);
1738  printf("  version_info.file_flags_mask    = 0x%x\n",
1739         module_.version_info.file_flags_mask);
1740  printf("  version_info.file_flags         = 0x%x\n",
1741         module_.version_info.file_flags);
1742  printf("  version_info.file_os            = 0x%x\n",
1743         module_.version_info.file_os);
1744  printf("  version_info.file_type          = 0x%x\n",
1745         module_.version_info.file_type);
1746  printf("  version_info.file_subtype       = 0x%x\n",
1747         module_.version_info.file_subtype);
1748  printf("  version_info.file_date          = 0x%x:0x%x\n",
1749         module_.version_info.file_date_hi,
1750         module_.version_info.file_date_lo);
1751  printf("  cv_record.data_size             = %d\n",
1752         module_.cv_record.data_size);
1753  printf("  cv_record.rva                   = 0x%x\n",
1754         module_.cv_record.rva);
1755  printf("  misc_record.data_size           = %d\n",
1756         module_.misc_record.data_size);
1757  printf("  misc_record.rva                 = 0x%x\n",
1758         module_.misc_record.rva);
1759
1760  printf("  (code_file)                     = \"%s\"\n", code_file().c_str());
1761  printf("  (code_identifier)               = \"%s\"\n",
1762         code_identifier().c_str());
1763
1764  u_int32_t cv_record_size;
1765  const u_int8_t *cv_record = GetCVRecord(&cv_record_size);
1766  if (cv_record) {
1767    if (cv_record_signature_ == MD_CVINFOPDB70_SIGNATURE) {
1768      const MDCVInfoPDB70* cv_record_70 =
1769          reinterpret_cast<const MDCVInfoPDB70*>(cv_record);
1770      assert(cv_record_70->cv_signature == MD_CVINFOPDB70_SIGNATURE);
1771
1772      printf("  (cv_record).cv_signature        = 0x%x\n",
1773             cv_record_70->cv_signature);
1774      printf("  (cv_record).signature           = %08x-%04x-%04x-%02x%02x-",
1775             cv_record_70->signature.data1,
1776             cv_record_70->signature.data2,
1777             cv_record_70->signature.data3,
1778             cv_record_70->signature.data4[0],
1779             cv_record_70->signature.data4[1]);
1780      for (unsigned int guidIndex = 2;
1781           guidIndex < 8;
1782           ++guidIndex) {
1783        printf("%02x", cv_record_70->signature.data4[guidIndex]);
1784      }
1785      printf("\n");
1786      printf("  (cv_record).age                 = %d\n",
1787             cv_record_70->age);
1788      printf("  (cv_record).pdb_file_name       = \"%s\"\n",
1789             cv_record_70->pdb_file_name);
1790    } else if (cv_record_signature_ == MD_CVINFOPDB20_SIGNATURE) {
1791      const MDCVInfoPDB20* cv_record_20 =
1792          reinterpret_cast<const MDCVInfoPDB20*>(cv_record);
1793      assert(cv_record_20->cv_header.signature == MD_CVINFOPDB20_SIGNATURE);
1794
1795      printf("  (cv_record).cv_header.signature = 0x%x\n",
1796             cv_record_20->cv_header.signature);
1797      printf("  (cv_record).cv_header.offset    = 0x%x\n",
1798             cv_record_20->cv_header.offset);
1799      printf("  (cv_record).signature           = 0x%x\n",
1800             cv_record_20->signature);
1801      printf("  (cv_record).age                 = %d\n",
1802             cv_record_20->age);
1803      printf("  (cv_record).pdb_file_name       = \"%s\"\n",
1804             cv_record_20->pdb_file_name);
1805    } else {
1806      printf("  (cv_record)                     = ");
1807      for (unsigned int cv_byte_index = 0;
1808           cv_byte_index < cv_record_size;
1809           ++cv_byte_index) {
1810        printf("%02x", cv_record[cv_byte_index]);
1811      }
1812      printf("\n");
1813    }
1814  } else {
1815    printf("  (cv_record)                     = (null)\n");
1816  }
1817
1818  const MDImageDebugMisc* misc_record = GetMiscRecord(NULL);
1819  if (misc_record) {
1820    printf("  (misc_record).data_type         = 0x%x\n",
1821           misc_record->data_type);
1822    printf("  (misc_record).length            = 0x%x\n",
1823           misc_record->length);
1824    printf("  (misc_record).unicode           = %d\n",
1825           misc_record->unicode);
1826    // Don't bother printing the UTF-16, we don't really even expect to ever
1827    // see this misc_record anyway.
1828    if (misc_record->unicode)
1829      printf("  (misc_record).data              = \"%s\"\n",
1830             misc_record->data);
1831    else
1832      printf("  (misc_record).data              = (UTF-16)\n");
1833  } else {
1834    printf("  (misc_record)                   = (null)\n");
1835  }
1836
1837  printf("  (debug_file)                    = \"%s\"\n", debug_file().c_str());
1838  printf("  (debug_identifier)              = \"%s\"\n",
1839         debug_identifier().c_str());
1840  printf("  (version)                       = \"%s\"\n", version().c_str());
1841  printf("\n");
1842}
1843
1844
1845//
1846// MinidumpModuleList
1847//
1848
1849
1850MinidumpModuleList::MinidumpModuleList(Minidump* minidump)
1851    : MinidumpStream(minidump),
1852      range_map_(new RangeMap<u_int64_t, unsigned int>()),
1853      modules_(NULL),
1854      module_count_(0) {
1855}
1856
1857
1858MinidumpModuleList::~MinidumpModuleList() {
1859  delete range_map_;
1860  delete modules_;
1861}
1862
1863
1864bool MinidumpModuleList::Read(u_int32_t expected_size) {
1865  // Invalidate cached data.
1866  range_map_->Clear();
1867  delete modules_;
1868  modules_ = NULL;
1869  module_count_ = 0;
1870
1871  valid_ = false;
1872
1873  u_int32_t module_count;
1874  if (expected_size < sizeof(module_count)) {
1875    BPLOG(ERROR) << "MinidumpModuleList count size mismatch, " <<
1876                    expected_size << " < " << sizeof(module_count);
1877    return false;
1878  }
1879  if (!minidump_->ReadBytes(&module_count, sizeof(module_count))) {
1880    BPLOG(ERROR) << "MinidumpModuleList could not read module count";
1881    return false;
1882  }
1883
1884  if (minidump_->swap())
1885    Swap(&module_count);
1886
1887  if (module_count > numeric_limits<u_int32_t>::max() / MD_MODULE_SIZE) {
1888    BPLOG(ERROR) << "MinidumpModuleList module count " << module_count <<
1889                    " would cause multiplication overflow";
1890    return false;
1891  }
1892
1893  if (expected_size != sizeof(module_count) +
1894                       module_count * MD_MODULE_SIZE) {
1895    BPLOG(ERROR) << "MinidumpModuleList size mismatch, " << expected_size <<
1896                    " != " <<
1897                    sizeof(module_count) + module_count * MD_MODULE_SIZE;
1898    return false;
1899  }
1900
1901  if (module_count) {
1902    // TODO(mmentovai): verify rational size!
1903    scoped_ptr<MinidumpModules> modules(
1904        new MinidumpModules(module_count, MinidumpModule(minidump_)));
1905
1906    for (unsigned int module_index = 0;
1907         module_index < module_count;
1908         ++module_index) {
1909      MinidumpModule* module = &(*modules)[module_index];
1910
1911      // Assume that the file offset is correct after the last read.
1912      if (!module->Read()) {
1913        BPLOG(ERROR) << "MinidumpModuleList could not read module " <<
1914                        module_index << "/" << module_count;
1915        return false;
1916      }
1917    }
1918
1919    // Loop through the module list once more to read additional data and
1920    // build the range map.  This is done in a second pass because
1921    // MinidumpModule::ReadAuxiliaryData seeks around, and if it were
1922    // included in the loop above, additional seeks would be needed where
1923    // none are now to read contiguous data.
1924    for (unsigned int module_index = 0;
1925         module_index < module_count;
1926         ++module_index) {
1927      MinidumpModule* module = &(*modules)[module_index];
1928
1929      if (!module->ReadAuxiliaryData()) {
1930        BPLOG(ERROR) << "MinidumpModuleList could not read module auxiliary "
1931                        "data for module " <<
1932                        module_index << "/" << module_count;
1933        return false;
1934      }
1935
1936      // It is safe to use module->code_file() after successfully calling
1937      // module->ReadAuxiliaryData.
1938
1939      u_int64_t base_address = module->base_address();
1940      u_int64_t module_size = module->size();
1941      if (base_address == static_cast<u_int64_t>(-1)) {
1942        BPLOG(ERROR) << "MinidumpModuleList found bad base address "
1943                        "for module " << module_index << "/" << module_count <<
1944                        ", " << module->code_file();
1945        return false;
1946      }
1947
1948      if (!range_map_->StoreRange(base_address, module_size, module_index)) {
1949        BPLOG(ERROR) << "MinidumpModuleList could not store module " <<
1950                        module_index << "/" << module_count << ", " <<
1951                        module->code_file() << ", " <<
1952                        HexString(base_address) << "+" <<
1953                        HexString(module_size);
1954        return false;
1955      }
1956    }
1957
1958    modules_ = modules.release();
1959  }
1960
1961  module_count_ = module_count;
1962
1963  valid_ = true;
1964  return true;
1965}
1966
1967
1968const MinidumpModule* MinidumpModuleList::GetModuleForAddress(
1969    u_int64_t address) const {
1970  if (!valid_) {
1971    BPLOG(ERROR) << "Invalid MinidumpModuleList for GetModuleForAddress";
1972    return NULL;
1973  }
1974
1975  unsigned int module_index;
1976  if (!range_map_->RetrieveRange(address, &module_index, NULL, NULL)) {
1977    BPLOG(INFO) << "MinidumpModuleList has no module at " <<
1978                   HexString(address);
1979    return NULL;
1980  }
1981
1982  return GetModuleAtIndex(module_index);
1983}
1984
1985
1986const MinidumpModule* MinidumpModuleList::GetMainModule() const {
1987  if (!valid_) {
1988    BPLOG(ERROR) << "Invalid MinidumpModuleList for GetMainModule";
1989    return NULL;
1990  }
1991
1992  // The main code module is the first one present in a minidump file's
1993  // MDRawModuleList.
1994  return GetModuleAtSequence(0);
1995}
1996
1997
1998const MinidumpModule* MinidumpModuleList::GetModuleAtSequence(
1999    unsigned int sequence) const {
2000  if (!valid_) {
2001    BPLOG(ERROR) << "Invalid MinidumpModuleList for GetModuleAtSequence";
2002    return NULL;
2003  }
2004
2005  if (sequence >= module_count_) {
2006    BPLOG(ERROR) << "MinidumpModuleList sequence out of range: " <<
2007                    sequence << "/" << module_count_;
2008    return NULL;
2009  }
2010
2011  unsigned int module_index;
2012  if (!range_map_->RetrieveRangeAtIndex(sequence, &module_index, NULL, NULL)) {
2013    BPLOG(ERROR) << "MinidumpModuleList has no module at sequence " << sequence;
2014    return NULL;
2015  }
2016
2017  return GetModuleAtIndex(module_index);
2018}
2019
2020
2021const MinidumpModule* MinidumpModuleList::GetModuleAtIndex(
2022    unsigned int index) const {
2023  if (!valid_) {
2024    BPLOG(ERROR) << "Invalid MinidumpModuleList for GetModuleAtIndex";
2025    return NULL;
2026  }
2027
2028  if (index >= module_count_) {
2029    BPLOG(ERROR) << "MinidumpModuleList index out of range: " <<
2030                    index << "/" << module_count_;
2031    return NULL;
2032  }
2033
2034  return &(*modules_)[index];
2035}
2036
2037
2038const CodeModules* MinidumpModuleList::Copy() const {
2039  return new BasicCodeModules(this);
2040}
2041
2042
2043void MinidumpModuleList::Print() {
2044  if (!valid_) {
2045    BPLOG(ERROR) << "MinidumpModuleList cannot print invalid data";
2046    return;
2047  }
2048
2049  printf("MinidumpModuleList\n");
2050  printf("  module_count = %d\n", module_count_);
2051  printf("\n");
2052
2053  for (unsigned int module_index = 0;
2054       module_index < module_count_;
2055       ++module_index) {
2056    printf("module[%d]\n", module_index);
2057
2058    (*modules_)[module_index].Print();
2059  }
2060}
2061
2062
2063//
2064// MinidumpMemoryList
2065//
2066
2067
2068MinidumpMemoryList::MinidumpMemoryList(Minidump* minidump)
2069    : MinidumpStream(minidump),
2070      range_map_(new RangeMap<u_int64_t, unsigned int>()),
2071      descriptors_(NULL),
2072      regions_(NULL),
2073      region_count_(0) {
2074}
2075
2076
2077MinidumpMemoryList::~MinidumpMemoryList() {
2078  delete range_map_;
2079  delete descriptors_;
2080  delete regions_;
2081}
2082
2083
2084bool MinidumpMemoryList::Read(u_int32_t expected_size) {
2085  // Invalidate cached data.
2086  delete descriptors_;
2087  descriptors_ = NULL;
2088  delete regions_;
2089  regions_ = NULL;
2090  range_map_->Clear();
2091  region_count_ = 0;
2092
2093  valid_ = false;
2094
2095  u_int32_t region_count;
2096  if (expected_size < sizeof(region_count)) {
2097    BPLOG(ERROR) << "MinidumpMemoryList count size mismatch, " <<
2098                    expected_size << " < " << sizeof(region_count);
2099    return false;
2100  }
2101  if (!minidump_->ReadBytes(&region_count, sizeof(region_count))) {
2102    BPLOG(ERROR) << "MinidumpMemoryList could not read memory region count";
2103    return false;
2104  }
2105
2106  if (minidump_->swap())
2107    Swap(&region_count);
2108
2109  if (region_count >
2110          numeric_limits<u_int32_t>::max() / sizeof(MDMemoryDescriptor)) {
2111    BPLOG(ERROR) << "MinidumpMemoryList region count " << region_count <<
2112                    " would cause multiplication overflow";
2113    return false;
2114  }
2115
2116  if (expected_size != sizeof(region_count) +
2117                       region_count * sizeof(MDMemoryDescriptor)) {
2118    BPLOG(ERROR) << "MinidumpMemoryList size mismatch, " << expected_size <<
2119                    " != " << region_count * sizeof(MDMemoryDescriptor);
2120    return false;
2121  }
2122
2123  if (region_count) {
2124    // TODO(mmentovai): verify rational size!
2125    scoped_ptr<MemoryDescriptors> descriptors(
2126        new MemoryDescriptors(region_count));
2127
2128    // Read the entire array in one fell swoop, instead of reading one entry
2129    // at a time in the loop.
2130    if (!minidump_->ReadBytes(&(*descriptors)[0],
2131                              sizeof(MDMemoryDescriptor) * region_count)) {
2132      BPLOG(ERROR) << "MinidumpMemoryList could not read memory region list";
2133      return false;
2134    }
2135
2136    scoped_ptr<MemoryRegions> regions(
2137        new MemoryRegions(region_count, MinidumpMemoryRegion(minidump_)));
2138
2139    for (unsigned int region_index = 0;
2140         region_index < region_count;
2141         ++region_index) {
2142      MDMemoryDescriptor* descriptor = &(*descriptors)[region_index];
2143
2144      if (minidump_->swap())
2145        Swap(descriptor);
2146
2147      u_int64_t base_address = descriptor->start_of_memory_range;
2148      u_int32_t region_size = descriptor->memory.data_size;
2149
2150      // Check for base + size overflow or undersize.
2151      if (region_size == 0 ||
2152          region_size > numeric_limits<u_int64_t>::max() - base_address) {
2153        BPLOG(ERROR) << "MinidumpMemoryList has a memory region problem, " <<
2154                        " region " << region_index << "/" << region_count <<
2155                        ", " << HexString(base_address) << "+" <<
2156                        HexString(region_size);
2157        return false;
2158      }
2159
2160      if (!range_map_->StoreRange(base_address, region_size, region_index)) {
2161        BPLOG(ERROR) << "MinidumpMemoryList could not store memory region " <<
2162                        region_index << "/" << region_count << ", " <<
2163                        HexString(base_address) << "+" <<
2164                        HexString(region_size);
2165        return false;
2166      }
2167
2168      (*regions)[region_index].SetDescriptor(descriptor);
2169    }
2170
2171    descriptors_ = descriptors.release();
2172    regions_ = regions.release();
2173  }
2174
2175  region_count_ = region_count;
2176
2177  valid_ = true;
2178  return true;
2179}
2180
2181
2182MinidumpMemoryRegion* MinidumpMemoryList::GetMemoryRegionAtIndex(
2183      unsigned int index) {
2184  if (!valid_) {
2185    BPLOG(ERROR) << "Invalid MinidumpMemoryList for GetMemoryRegionAtIndex";
2186    return NULL;
2187  }
2188
2189  if (index >= region_count_) {
2190    BPLOG(ERROR) << "MinidumpMemoryList index out of range: " <<
2191                    index << "/" << region_count_;
2192    return NULL;
2193  }
2194
2195  return &(*regions_)[index];
2196}
2197
2198
2199MinidumpMemoryRegion* MinidumpMemoryList::GetMemoryRegionForAddress(
2200    u_int64_t address) {
2201  if (!valid_) {
2202    BPLOG(ERROR) << "Invalid MinidumpMemoryList for GetMemoryRegionForAddress";
2203    return NULL;
2204  }
2205
2206  unsigned int region_index;
2207  if (!range_map_->RetrieveRange(address, &region_index, NULL, NULL)) {
2208    BPLOG(INFO) << "MinidumpMemoryList has no memory region at " <<
2209                   HexString(address);
2210    return NULL;
2211  }
2212
2213  return GetMemoryRegionAtIndex(region_index);
2214}
2215
2216
2217void MinidumpMemoryList::Print() {
2218  if (!valid_) {
2219    BPLOG(ERROR) << "MinidumpMemoryList cannot print invalid data";
2220    return;
2221  }
2222
2223  printf("MinidumpMemoryList\n");
2224  printf("  region_count = %d\n", region_count_);
2225  printf("\n");
2226
2227  for (unsigned int region_index = 0;
2228       region_index < region_count_;
2229       ++region_index) {
2230    MDMemoryDescriptor* descriptor = &(*descriptors_)[region_index];
2231    printf("region[%d]\n", region_index);
2232    printf("MDMemoryDescriptor\n");
2233    printf("  start_of_memory_range = 0x%llx\n",
2234           descriptor->start_of_memory_range);
2235    printf("  memory.data_size      = 0x%x\n", descriptor->memory.data_size);
2236    printf("  memory.rva            = 0x%x\n", descriptor->memory.rva);
2237    MinidumpMemoryRegion* region = GetMemoryRegionAtIndex(region_index);
2238    if (region) {
2239      printf("Memory\n");
2240      region->Print();
2241    } else {
2242      printf("No memory\n");
2243    }
2244    printf("\n");
2245  }
2246}
2247
2248
2249//
2250// MinidumpException
2251//
2252
2253
2254MinidumpException::MinidumpException(Minidump* minidump)
2255    : MinidumpStream(minidump),
2256      exception_(),
2257      context_(NULL) {
2258}
2259
2260
2261MinidumpException::~MinidumpException() {
2262  delete context_;
2263}
2264
2265
2266bool MinidumpException::Read(u_int32_t expected_size) {
2267  // Invalidate cached data.
2268  delete context_;
2269  context_ = NULL;
2270
2271  valid_ = false;
2272
2273  if (expected_size != sizeof(exception_)) {
2274    BPLOG(ERROR) << "MinidumpException size mismatch, " << expected_size <<
2275                    " != " << sizeof(exception_);
2276    return false;
2277  }
2278
2279  if (!minidump_->ReadBytes(&exception_, sizeof(exception_))) {
2280    BPLOG(ERROR) << "MinidumpException cannot read exception";
2281    return false;
2282  }
2283
2284  if (minidump_->swap()) {
2285    Swap(&exception_.thread_id);
2286    // exception_.__align is for alignment only and does not need to be
2287    // swapped.
2288    Swap(&exception_.exception_record.exception_code);
2289    Swap(&exception_.exception_record.exception_flags);
2290    Swap(&exception_.exception_record.exception_record);
2291    Swap(&exception_.exception_record.exception_address);
2292    Swap(&exception_.exception_record.number_parameters);
2293    // exception_.exception_record.__align is for alignment only and does not
2294    // need to be swapped.
2295    for (unsigned int parameter_index = 0;
2296         parameter_index < MD_EXCEPTION_MAXIMUM_PARAMETERS;
2297         ++parameter_index) {
2298      Swap(&exception_.exception_record.exception_information[parameter_index]);
2299    }
2300    Swap(&exception_.thread_context);
2301  }
2302
2303  valid_ = true;
2304  return true;
2305}
2306
2307
2308bool MinidumpException::GetThreadID(u_int32_t *thread_id) const {
2309  BPLOG_IF(ERROR, !thread_id) << "MinidumpException::GetThreadID requires "
2310                                 "|thread_id|";
2311  assert(thread_id);
2312  *thread_id = 0;
2313
2314  if (!valid_) {
2315    BPLOG(ERROR) << "Invalid MinidumpException for GetThreadID";
2316    return false;
2317  }
2318
2319  *thread_id = exception_.thread_id;
2320  return true;
2321}
2322
2323
2324MinidumpContext* MinidumpException::GetContext() {
2325  if (!valid_) {
2326    BPLOG(ERROR) << "Invalid MinidumpException for GetContext";
2327    return NULL;
2328  }
2329
2330  if (!context_) {
2331    if (!minidump_->SeekSet(exception_.thread_context.rva)) {
2332      BPLOG(ERROR) << "MinidumpException cannot seek to context";
2333      return NULL;
2334    }
2335
2336    scoped_ptr<MinidumpContext> context(new MinidumpContext(minidump_));
2337
2338    if (!context->Read(exception_.thread_context.data_size)) {
2339      BPLOG(ERROR) << "MinidumpException cannot read context";
2340      return NULL;
2341    }
2342
2343    context_ = context.release();
2344  }
2345
2346  return context_;
2347}
2348
2349
2350void MinidumpException::Print() {
2351  if (!valid_) {
2352    BPLOG(ERROR) << "MinidumpException cannot print invalid data";
2353    return;
2354  }
2355
2356  printf("MDException\n");
2357  printf("  thread_id                                  = 0x%x\n",
2358         exception_.thread_id);
2359  printf("  exception_record.exception_code            = 0x%x\n",
2360         exception_.exception_record.exception_code);
2361  printf("  exception_record.exception_flags           = 0x%x\n",
2362         exception_.exception_record.exception_flags);
2363  printf("  exception_record.exception_record          = 0x%llx\n",
2364         exception_.exception_record.exception_record);
2365  printf("  exception_record.exception_address         = 0x%llx\n",
2366         exception_.exception_record.exception_address);
2367  printf("  exception_record.number_parameters         = %d\n",
2368         exception_.exception_record.number_parameters);
2369  for (unsigned int parameterIndex = 0;
2370       parameterIndex < exception_.exception_record.number_parameters;
2371       ++parameterIndex) {
2372    printf("  exception_record.exception_information[%2d] = 0x%llx\n",
2373           parameterIndex,
2374           exception_.exception_record.exception_information[parameterIndex]);
2375  }
2376  printf("  thread_context.data_size                   = %d\n",
2377         exception_.thread_context.data_size);
2378  printf("  thread_context.rva                         = 0x%x\n",
2379         exception_.thread_context.rva);
2380  MinidumpContext* context = GetContext();
2381  if (context) {
2382    printf("\n");
2383    context->Print();
2384  } else {
2385    printf("  (no context)\n");
2386    printf("\n");
2387  }
2388}
2389
2390
2391//
2392// MinidumpSystemInfo
2393//
2394
2395
2396MinidumpSystemInfo::MinidumpSystemInfo(Minidump* minidump)
2397    : MinidumpStream(minidump),
2398      system_info_(),
2399      csd_version_(NULL),
2400      cpu_vendor_(NULL) {
2401}
2402
2403
2404MinidumpSystemInfo::~MinidumpSystemInfo() {
2405  delete csd_version_;
2406  delete cpu_vendor_;
2407}
2408
2409
2410bool MinidumpSystemInfo::Read(u_int32_t expected_size) {
2411  // Invalidate cached data.
2412  delete csd_version_;
2413  csd_version_ = NULL;
2414  delete cpu_vendor_;
2415  cpu_vendor_ = NULL;
2416
2417  valid_ = false;
2418
2419  if (expected_size != sizeof(system_info_)) {
2420    BPLOG(ERROR) << "MinidumpSystemInfo size mismatch, " << expected_size <<
2421                    " != " << sizeof(system_info_);
2422    return false;
2423  }
2424
2425  if (!minidump_->ReadBytes(&system_info_, sizeof(system_info_))) {
2426    BPLOG(ERROR) << "MinidumpSystemInfo cannot read system info";
2427    return false;
2428  }
2429
2430  if (minidump_->swap()) {
2431    Swap(&system_info_.processor_architecture);
2432    Swap(&system_info_.processor_level);
2433    Swap(&system_info_.processor_revision);
2434    // number_of_processors and product_type are 8-bit quantities and need no
2435    // swapping.
2436    Swap(&system_info_.major_version);
2437    Swap(&system_info_.minor_version);
2438    Swap(&system_info_.build_number);
2439    Swap(&system_info_.platform_id);
2440    Swap(&system_info_.csd_version_rva);
2441    Swap(&system_info_.suite_mask);
2442    // Don't swap the reserved2 field because its contents are unknown.
2443
2444    if (system_info_.processor_architecture == MD_CPU_ARCHITECTURE_X86 ||
2445        system_info_.processor_architecture == MD_CPU_ARCHITECTURE_X86_WIN64) {
2446      for (unsigned int i = 0; i < 3; ++i)
2447        Swap(&system_info_.cpu.x86_cpu_info.vendor_id[i]);
2448      Swap(&system_info_.cpu.x86_cpu_info.version_information);
2449      Swap(&system_info_.cpu.x86_cpu_info.feature_information);
2450      Swap(&system_info_.cpu.x86_cpu_info.amd_extended_cpu_features);
2451    } else {
2452      for (unsigned int i = 0; i < 2; ++i)
2453        Swap(&system_info_.cpu.other_cpu_info.processor_features[i]);
2454    }
2455  }
2456
2457  valid_ = true;
2458  return true;
2459}
2460
2461
2462string MinidumpSystemInfo::GetOS() {
2463  if (!valid_) {
2464    BPLOG(ERROR) << "Invalid MinidumpSystemInfo for GetOS";
2465    return NULL;
2466  }
2467
2468  string os;
2469
2470  switch (system_info_.platform_id) {
2471    case MD_OS_WIN32_NT:
2472    case MD_OS_WIN32_WINDOWS:
2473      os = "windows";
2474      break;
2475
2476    case MD_OS_MAC_OS_X:
2477      os = "mac";
2478      break;
2479
2480    case MD_OS_LINUX:
2481      os = "linux";
2482      break;
2483
2484    default:
2485      BPLOG(ERROR) << "MinidumpSystemInfo unknown OS for platform " <<
2486                      HexString(system_info_.platform_id);
2487      break;
2488  }
2489
2490  return os;
2491}
2492
2493
2494string MinidumpSystemInfo::GetCPU() {
2495  if (!valid_) {
2496    BPLOG(ERROR) << "Invalid MinidumpSystemInfo for GetCPU";
2497    return "";
2498  }
2499
2500  string cpu;
2501
2502  switch (system_info_.processor_architecture) {
2503    case MD_CPU_ARCHITECTURE_X86:
2504    case MD_CPU_ARCHITECTURE_X86_WIN64:
2505      cpu = "x86";
2506      break;
2507
2508    case MD_CPU_ARCHITECTURE_PPC:
2509      cpu = "ppc";
2510      break;
2511
2512    default:
2513      BPLOG(ERROR) << "MinidumpSystemInfo unknown CPU for architecture " <<
2514                      HexString(system_info_.processor_architecture);
2515      break;
2516  }
2517
2518  return cpu;
2519}
2520
2521
2522const string* MinidumpSystemInfo::GetCSDVersion() {
2523  if (!valid_) {
2524    BPLOG(ERROR) << "Invalid MinidumpSystemInfo for GetCSDVersion";
2525    return NULL;
2526  }
2527
2528  if (!csd_version_)
2529    csd_version_ = minidump_->ReadString(system_info_.csd_version_rva);
2530
2531  BPLOG_IF(ERROR, !csd_version_) << "MinidumpSystemInfo could not read "
2532                                    "CSD version";
2533
2534  return csd_version_;
2535}
2536
2537
2538const string* MinidumpSystemInfo::GetCPUVendor() {
2539  if (!valid_) {
2540    BPLOG(ERROR) << "Invalid MinidumpSystemInfo for GetCPUVendor";
2541    return NULL;
2542  }
2543
2544  // CPU vendor information can only be determined from x86 minidumps.
2545  if (!cpu_vendor_ &&
2546      (system_info_.processor_architecture == MD_CPU_ARCHITECTURE_X86 ||
2547       system_info_.processor_architecture == MD_CPU_ARCHITECTURE_X86_WIN64)) {
2548    char cpu_vendor_string[13];
2549    snprintf(cpu_vendor_string, sizeof(cpu_vendor_string),
2550             "%c%c%c%c%c%c%c%c%c%c%c%c",
2551              system_info_.cpu.x86_cpu_info.vendor_id[0] & 0xff,
2552             (system_info_.cpu.x86_cpu_info.vendor_id[0] >> 8) & 0xff,
2553             (system_info_.cpu.x86_cpu_info.vendor_id[0] >> 16) & 0xff,
2554             (system_info_.cpu.x86_cpu_info.vendor_id[0] >> 24) & 0xff,
2555              system_info_.cpu.x86_cpu_info.vendor_id[1] & 0xff,
2556             (system_info_.cpu.x86_cpu_info.vendor_id[1] >> 8) & 0xff,
2557             (system_info_.cpu.x86_cpu_info.vendor_id[1] >> 16) & 0xff,
2558             (system_info_.cpu.x86_cpu_info.vendor_id[1] >> 24) & 0xff,
2559              system_info_.cpu.x86_cpu_info.vendor_id[2] & 0xff,
2560             (system_info_.cpu.x86_cpu_info.vendor_id[2] >> 8) & 0xff,
2561             (system_info_.cpu.x86_cpu_info.vendor_id[2] >> 16) & 0xff,
2562             (system_info_.cpu.x86_cpu_info.vendor_id[2] >> 24) & 0xff);
2563    cpu_vendor_ = new string(cpu_vendor_string);
2564  }
2565
2566  return cpu_vendor_;
2567}
2568
2569
2570void MinidumpSystemInfo::Print() {
2571  if (!valid_) {
2572    BPLOG(ERROR) << "MinidumpSystemInfo cannot print invalid data";
2573    return;
2574  }
2575
2576  printf("MDRawSystemInfo\n");
2577  printf("  processor_architecture                     = %d\n",
2578         system_info_.processor_architecture);
2579  printf("  processor_level                            = %d\n",
2580         system_info_.processor_level);
2581  printf("  processor_revision                         = 0x%x\n",
2582         system_info_.processor_revision);
2583  printf("  number_of_processors                       = %d\n",
2584         system_info_.number_of_processors);
2585  printf("  product_type                               = %d\n",
2586         system_info_.product_type);
2587  printf("  major_version                              = %d\n",
2588         system_info_.major_version);
2589  printf("  minor_version                              = %d\n",
2590         system_info_.minor_version);
2591  printf("  build_number                               = %d\n",
2592         system_info_.build_number);
2593  printf("  platform_id                                = %d\n",
2594         system_info_.platform_id);
2595  printf("  csd_version_rva                            = 0x%x\n",
2596         system_info_.csd_version_rva);
2597  printf("  suite_mask                                 = 0x%x\n",
2598         system_info_.suite_mask);
2599  for (unsigned int i = 0; i < 3; ++i) {
2600    printf("  cpu.x86_cpu_info.vendor_id[%d]              = 0x%x\n",
2601           i, system_info_.cpu.x86_cpu_info.vendor_id[i]);
2602  }
2603  printf("  cpu.x86_cpu_info.version_information       = 0x%x\n",
2604         system_info_.cpu.x86_cpu_info.version_information);
2605  printf("  cpu.x86_cpu_info.feature_information       = 0x%x\n",
2606         system_info_.cpu.x86_cpu_info.feature_information);
2607  printf("  cpu.x86_cpu_info.amd_extended_cpu_features = 0x%x\n",
2608         system_info_.cpu.x86_cpu_info.amd_extended_cpu_features);
2609  const string* csd_version = GetCSDVersion();
2610  if (csd_version) {
2611    printf("  (csd_version)                              = \"%s\"\n",
2612           csd_version->c_str());
2613  } else {
2614    printf("  (csd_version)                              = (null)\n");
2615  }
2616  const string* cpu_vendor = GetCPUVendor();
2617  if (cpu_vendor) {
2618    printf("  (cpu_vendor)                               = \"%s\"\n",
2619           cpu_vendor->c_str());
2620  } else {
2621    printf("  (cpu_vendor)                               = (null)\n");
2622  }
2623  printf("\n");
2624}
2625
2626
2627//
2628// MinidumpMiscInfo
2629//
2630
2631
2632MinidumpMiscInfo::MinidumpMiscInfo(Minidump* minidump)
2633    : MinidumpStream(minidump),
2634      misc_info_() {
2635}
2636
2637
2638bool MinidumpMiscInfo::Read(u_int32_t expected_size) {
2639  valid_ = false;
2640
2641  if (expected_size != MD_MISCINFO_SIZE &&
2642      expected_size != MD_MISCINFO2_SIZE) {
2643    BPLOG(ERROR) << "MinidumpMiscInfo size mismatch, " << expected_size <<
2644                    " != " << MD_MISCINFO_SIZE << ", " << MD_MISCINFO2_SIZE <<
2645                    ")";
2646    return false;
2647  }
2648
2649  if (!minidump_->ReadBytes(&misc_info_, expected_size)) {
2650    BPLOG(ERROR) << "MinidumpMiscInfo cannot read miscellaneous info";
2651    return false;
2652  }
2653
2654  if (minidump_->swap()) {
2655    Swap(&misc_info_.size_of_info);
2656    Swap(&misc_info_.flags1);
2657    Swap(&misc_info_.process_id);
2658    Swap(&misc_info_.process_create_time);
2659    Swap(&misc_info_.process_user_time);
2660    Swap(&misc_info_.process_kernel_time);
2661    if (misc_info_.size_of_info > MD_MISCINFO_SIZE) {
2662      Swap(&misc_info_.processor_max_mhz);
2663      Swap(&misc_info_.processor_current_mhz);
2664      Swap(&misc_info_.processor_mhz_limit);
2665      Swap(&misc_info_.processor_max_idle_state);
2666      Swap(&misc_info_.processor_current_idle_state);
2667    }
2668  }
2669
2670  if (expected_size != misc_info_.size_of_info) {
2671    BPLOG(ERROR) << "MinidumpMiscInfo size mismatch, " <<
2672                    expected_size << " != " << misc_info_.size_of_info;
2673    return false;
2674  }
2675
2676  valid_ = true;
2677  return true;
2678}
2679
2680
2681void MinidumpMiscInfo::Print() {
2682  if (!valid_) {
2683    BPLOG(ERROR) << "MinidumpMiscInfo cannot print invalid data";
2684    return;
2685  }
2686
2687  printf("MDRawMiscInfo\n");
2688  printf("  size_of_info                 = %d\n",   misc_info_.size_of_info);
2689  printf("  flags1                       = 0x%x\n", misc_info_.flags1);
2690  printf("  process_id                   = 0x%x\n", misc_info_.process_id);
2691  printf("  process_create_time          = 0x%x\n",
2692         misc_info_.process_create_time);
2693  printf("  process_user_time            = 0x%x\n",
2694         misc_info_.process_user_time);
2695  printf("  process_kernel_time          = 0x%x\n",
2696         misc_info_.process_kernel_time);
2697  if (misc_info_.size_of_info > MD_MISCINFO_SIZE) {
2698    printf("  processor_max_mhz            = %d\n",
2699           misc_info_.processor_max_mhz);
2700    printf("  processor_current_mhz        = %d\n",
2701           misc_info_.processor_current_mhz);
2702    printf("  processor_mhz_limit          = %d\n",
2703           misc_info_.processor_mhz_limit);
2704    printf("  processor_max_idle_state     = 0x%x\n",
2705           misc_info_.processor_max_idle_state);
2706    printf("  processor_current_idle_state = 0x%x\n",
2707           misc_info_.processor_current_idle_state);
2708  }
2709  printf("\n");
2710}
2711
2712
2713//
2714// MinidumpBreakpadInfo
2715//
2716
2717
2718MinidumpBreakpadInfo::MinidumpBreakpadInfo(Minidump* minidump)
2719    : MinidumpStream(minidump),
2720      breakpad_info_() {
2721}
2722
2723
2724bool MinidumpBreakpadInfo::Read(u_int32_t expected_size) {
2725  valid_ = false;
2726
2727  if (expected_size != sizeof(breakpad_info_)) {
2728    BPLOG(ERROR) << "MinidumpBreakpadInfo size mismatch, " << expected_size <<
2729                    " != " << sizeof(breakpad_info_);
2730    return false;
2731  }
2732
2733  if (!minidump_->ReadBytes(&breakpad_info_, sizeof(breakpad_info_))) {
2734    BPLOG(ERROR) << "MinidumpBreakpadInfo cannot read Breakpad info";
2735    return false;
2736  }
2737
2738  if (minidump_->swap()) {
2739    Swap(&breakpad_info_.validity);
2740    Swap(&breakpad_info_.dump_thread_id);
2741    Swap(&breakpad_info_.requesting_thread_id);
2742  }
2743
2744  valid_ = true;
2745  return true;
2746}
2747
2748
2749bool MinidumpBreakpadInfo::GetDumpThreadID(u_int32_t *thread_id) const {
2750  BPLOG_IF(ERROR, !thread_id) << "MinidumpBreakpadInfo::GetDumpThreadID "
2751                                 "requires |thread_id|";
2752  assert(thread_id);
2753  *thread_id = 0;
2754
2755  if (!valid_) {
2756    BPLOG(ERROR) << "Invalid MinidumpBreakpadInfo for GetDumpThreadID";
2757    return false;
2758  }
2759
2760  if (!(breakpad_info_.validity & MD_BREAKPAD_INFO_VALID_DUMP_THREAD_ID)) {
2761    BPLOG(INFO) << "MinidumpBreakpadInfo has no dump thread";
2762    return false;
2763  }
2764
2765  *thread_id = breakpad_info_.dump_thread_id;
2766  return true;
2767}
2768
2769
2770bool MinidumpBreakpadInfo::GetRequestingThreadID(u_int32_t *thread_id)
2771    const {
2772  BPLOG_IF(ERROR, !thread_id) << "MinidumpBreakpadInfo::GetRequestingThreadID "
2773                                 "requires |thread_id|";
2774  assert(thread_id);
2775  *thread_id = 0;
2776
2777  if (!thread_id || !valid_) {
2778    BPLOG(ERROR) << "Invalid MinidumpBreakpadInfo for GetRequestingThreadID";
2779    return false;
2780  }
2781
2782  if (!(breakpad_info_.validity &
2783            MD_BREAKPAD_INFO_VALID_REQUESTING_THREAD_ID)) {
2784    BPLOG(INFO) << "MinidumpBreakpadInfo has no requesting thread";
2785    return false;
2786  }
2787
2788  *thread_id = breakpad_info_.requesting_thread_id;
2789  return true;
2790}
2791
2792
2793void MinidumpBreakpadInfo::Print() {
2794  if (!valid_) {
2795    BPLOG(ERROR) << "MinidumpBreakpadInfo cannot print invalid data";
2796    return;
2797  }
2798
2799  printf("MDRawBreakpadInfo\n");
2800  printf("  validity             = 0x%x\n", breakpad_info_.validity);
2801
2802  if (breakpad_info_.validity & MD_BREAKPAD_INFO_VALID_DUMP_THREAD_ID) {
2803    printf("  dump_thread_id       = 0x%x\n", breakpad_info_.dump_thread_id);
2804  } else {
2805    printf("  dump_thread_id       = (invalid)\n");
2806  }
2807
2808  if (breakpad_info_.validity & MD_BREAKPAD_INFO_VALID_DUMP_THREAD_ID) {
2809    printf("  requesting_thread_id = 0x%x\n",
2810           breakpad_info_.requesting_thread_id);
2811  } else {
2812    printf("  requesting_thread_id = (invalid)\n");
2813  }
2814
2815  printf("\n");
2816}
2817
2818
2819//
2820// Minidump
2821//
2822
2823
2824Minidump::Minidump(const string& path)
2825    : header_(),
2826      directory_(NULL),
2827      stream_map_(new MinidumpStreamMap()),
2828      path_(path),
2829      fd_(-1),
2830      swap_(false),
2831      valid_(false) {
2832}
2833
2834
2835Minidump::~Minidump() {
2836  delete directory_;
2837  delete stream_map_;
2838  if (fd_ != -1) {
2839    BPLOG(INFO) << "Minidump closing minidump on fd " << fd_;
2840    close(fd_);
2841  }
2842}
2843
2844
2845bool Minidump::Open() {
2846  if (fd_ != -1) {
2847    BPLOG(INFO) << "Minidump reopening minidump " << path_ << " on fd " << fd_;
2848
2849    // The file is already open.  Seek to the beginning, which is the position
2850    // the file would be at if it were opened anew.
2851    return SeekSet(0);
2852  }
2853
2854  // O_BINARY is useful (and defined) on Windows.  On other platforms, it's
2855  // useless, and because it's defined as 0 above, harmless.
2856  fd_ = open(path_.c_str(), O_RDONLY | O_BINARY);
2857  if (fd_ == -1) {
2858    string error_string;
2859    int error_code = ErrnoString(&error_string);
2860    BPLOG(ERROR) << "Minidump could not open minidump " << path_ <<
2861                    ", error " << error_code << ": " << error_string;
2862    return false;
2863  }
2864
2865  BPLOG(INFO) << "Minidump opened minidump " << path_ << " on fd " << fd_;
2866  return true;
2867}
2868
2869
2870bool Minidump::Read() {
2871  // Invalidate cached data.
2872  delete directory_;
2873  directory_ = NULL;
2874  stream_map_->clear();
2875
2876  valid_ = false;
2877
2878  if (!Open()) {
2879    BPLOG(ERROR) << "Minidump cannot open minidump";
2880    return false;
2881  }
2882
2883  if (!ReadBytes(&header_, sizeof(MDRawHeader))) {
2884    BPLOG(ERROR) << "Minidump cannot read header";
2885    return false;
2886  }
2887
2888  if (header_.signature != MD_HEADER_SIGNATURE) {
2889    // The file may be byte-swapped.  Under the present architecture, these
2890    // classes don't know or need to know what CPU (or endianness) the
2891    // minidump was produced on in order to parse it.  Use the signature as
2892    // a byte order marker.
2893    u_int32_t signature_swapped = header_.signature;
2894    Swap(&signature_swapped);
2895    if (signature_swapped != MD_HEADER_SIGNATURE) {
2896      // This isn't a minidump or a byte-swapped minidump.
2897      BPLOG(ERROR) << "Minidump header signature mismatch: (" <<
2898                      HexString(header_.signature) << ", " <<
2899                      HexString(signature_swapped) << ") != " <<
2900                      HexString(MD_HEADER_SIGNATURE);
2901      return false;
2902    }
2903    swap_ = true;
2904  } else {
2905    // The file is not byte-swapped.  Set swap_ false (it may have been true
2906    // if the object is being reused?)
2907    swap_ = false;
2908  }
2909
2910  BPLOG(INFO) << "Minidump " << (swap_ ? "" : "not ") <<
2911                 "byte-swapping minidump";
2912
2913  if (swap_) {
2914    Swap(&header_.signature);
2915    Swap(&header_.version);
2916    Swap(&header_.stream_count);
2917    Swap(&header_.stream_directory_rva);
2918    Swap(&header_.checksum);
2919    Swap(&header_.time_date_stamp);
2920    Swap(&header_.flags);
2921  }
2922
2923  // Version check.  The high 16 bits of header_.version contain something
2924  // else "implementation specific."
2925  if ((header_.version & 0x0000ffff) != MD_HEADER_VERSION) {
2926    BPLOG(ERROR) << "Minidump version mismatch: " <<
2927                    HexString(header_.version & 0x0000ffff) << " != " <<
2928                    HexString(MD_HEADER_VERSION);
2929    return false;
2930  }
2931
2932  if (!SeekSet(header_.stream_directory_rva)) {
2933    BPLOG(ERROR) << "Minidump cannot seek to stream directory";
2934    return false;
2935  }
2936
2937  if (header_.stream_count) {
2938    // TODO(mmentovai): verify rational size!
2939    scoped_ptr<MinidumpDirectoryEntries> directory(
2940        new MinidumpDirectoryEntries(header_.stream_count));
2941
2942    // Read the entire array in one fell swoop, instead of reading one entry
2943    // at a time in the loop.
2944    if (!ReadBytes(&(*directory)[0],
2945                   sizeof(MDRawDirectory) * header_.stream_count)) {
2946      BPLOG(ERROR) << "Minidump cannot read stream directory";
2947      return false;
2948    }
2949
2950    for (unsigned int stream_index = 0;
2951         stream_index < header_.stream_count;
2952         ++stream_index) {
2953      MDRawDirectory* directory_entry = &(*directory)[stream_index];
2954
2955      if (swap_) {
2956        Swap(&directory_entry->stream_type);
2957        Swap(&directory_entry->location);
2958      }
2959
2960      // Initialize the stream_map_ map, which speeds locating a stream by
2961      // type.
2962      unsigned int stream_type = directory_entry->stream_type;
2963      switch (stream_type) {
2964        case MD_THREAD_LIST_STREAM:
2965        case MD_MODULE_LIST_STREAM:
2966        case MD_MEMORY_LIST_STREAM:
2967        case MD_EXCEPTION_STREAM:
2968        case MD_SYSTEM_INFO_STREAM:
2969        case MD_MISC_INFO_STREAM:
2970        case MD_BREAKPAD_INFO_STREAM: {
2971          if (stream_map_->find(stream_type) != stream_map_->end()) {
2972            // Another stream with this type was already found.  A minidump
2973            // file should contain at most one of each of these stream types.
2974            BPLOG(ERROR) << "Minidump found multiple streams of type " <<
2975                            stream_type << ", but can only deal with one";
2976            return false;
2977          }
2978          // Fall through to default
2979        }
2980
2981        default: {
2982          // Overwrites for stream types other than those above, but it's
2983          // expected to be the user's burden in that case.
2984          (*stream_map_)[stream_type].stream_index = stream_index;
2985        }
2986      }
2987    }
2988
2989    directory_ = directory.release();
2990  }
2991
2992  valid_ = true;
2993  return true;
2994}
2995
2996
2997MinidumpThreadList* Minidump::GetThreadList() {
2998  MinidumpThreadList* thread_list;
2999  return GetStream(&thread_list);
3000}
3001
3002
3003MinidumpModuleList* Minidump::GetModuleList() {
3004  MinidumpModuleList* module_list;
3005  return GetStream(&module_list);
3006}
3007
3008
3009MinidumpMemoryList* Minidump::GetMemoryList() {
3010  MinidumpMemoryList* memory_list;
3011  return GetStream(&memory_list);
3012}
3013
3014
3015MinidumpException* Minidump::GetException() {
3016  MinidumpException* exception;
3017  return GetStream(&exception);
3018}
3019
3020
3021MinidumpSystemInfo* Minidump::GetSystemInfo() {
3022  MinidumpSystemInfo* system_info;
3023  return GetStream(&system_info);
3024}
3025
3026
3027MinidumpMiscInfo* Minidump::GetMiscInfo() {
3028  MinidumpMiscInfo* misc_info;
3029  return GetStream(&misc_info);
3030}
3031
3032
3033MinidumpBreakpadInfo* Minidump::GetBreakpadInfo() {
3034  MinidumpBreakpadInfo* breakpad_info;
3035  return GetStream(&breakpad_info);
3036}
3037
3038
3039void Minidump::Print() {
3040  if (!valid_) {
3041    BPLOG(ERROR) << "Minidump cannot print invalid data";
3042    return;
3043  }
3044
3045  printf("MDRawHeader\n");
3046  printf("  signature            = 0x%x\n",    header_.signature);
3047  printf("  version              = 0x%x\n",    header_.version);
3048  printf("  stream_count         = %d\n",      header_.stream_count);
3049  printf("  stream_directory_rva = 0x%x\n",    header_.stream_directory_rva);
3050  printf("  checksum             = 0x%x\n",    header_.checksum);
3051  struct tm timestruct;
3052  gmtime_r(reinterpret_cast<time_t*>(&header_.time_date_stamp), &timestruct);
3053  char timestr[20];
3054  strftime(timestr, 20, "%Y-%m-%d %H:%M:%S", &timestruct);
3055  printf("  time_date_stamp      = 0x%x %s\n", header_.time_date_stamp,
3056                                               timestr);
3057  printf("  flags                = 0x%llx\n",  header_.flags);
3058  printf("\n");
3059
3060  for (unsigned int stream_index = 0;
3061       stream_index < header_.stream_count;
3062       ++stream_index) {
3063    MDRawDirectory* directory_entry = &(*directory_)[stream_index];
3064
3065    printf("mDirectory[%d]\n", stream_index);
3066    printf("MDRawDirectory\n");
3067    printf("  stream_type        = %d\n",   directory_entry->stream_type);
3068    printf("  location.data_size = %d\n",
3069           directory_entry->location.data_size);
3070    printf("  location.rva       = 0x%x\n", directory_entry->location.rva);
3071    printf("\n");
3072  }
3073
3074  printf("Streams:\n");
3075  for (MinidumpStreamMap::const_iterator iterator = stream_map_->begin();
3076       iterator != stream_map_->end();
3077       ++iterator) {
3078    u_int32_t stream_type = iterator->first;
3079    MinidumpStreamInfo info = iterator->second;
3080    printf("  stream type 0x%x at index %d\n", stream_type, info.stream_index);
3081  }
3082  printf("\n");
3083}
3084
3085
3086const MDRawDirectory* Minidump::GetDirectoryEntryAtIndex(unsigned int index)
3087      const {
3088  if (!valid_) {
3089    BPLOG(ERROR) << "Invalid Minidump for GetDirectoryEntryAtIndex";
3090    return NULL;
3091  }
3092
3093  if (index >= header_.stream_count) {
3094    BPLOG(ERROR) << "Minidump stream directory index out of range: " <<
3095                    index << "/" << header_.stream_count;
3096    return NULL;
3097  }
3098
3099  return &(*directory_)[index];
3100}
3101
3102
3103bool Minidump::ReadBytes(void* bytes, size_t count) {
3104  // Can't check valid_ because Read needs to call this method before
3105  // validity can be determined.  The only member that this method
3106  // depends on is mFD, and an unset or invalid fd may generate an
3107  // error but should not cause a crash.
3108  ssize_t bytes_read = read(fd_, bytes, count);
3109  if (static_cast<size_t>(bytes_read) != count) {
3110    if (bytes_read == -1) {
3111      string error_string;
3112      int error_code = ErrnoString(&error_string);
3113      BPLOG(ERROR) << "ReadBytes: error " << error_code << ": " << error_string;
3114    } else {
3115      BPLOG(ERROR) << "ReadBytes: read " << bytes_read << "/" << count;
3116    }
3117    return false;
3118  }
3119  return true;
3120}
3121
3122
3123bool Minidump::SeekSet(off_t offset) {
3124  // Can't check valid_ because Read needs to call this method before
3125  // validity can be determined.  The only member that this method
3126  // depends on is mFD, and an unset or invalid fd may generate an
3127  // error but should not cause a crash.
3128  off_t sought = lseek(fd_, offset, SEEK_SET);
3129  if (sought != offset) {
3130    if (sought == -1) {
3131      string error_string;
3132      int error_code = ErrnoString(&error_string);
3133      BPLOG(ERROR) << "SeekSet: error " << error_code << ": " << error_string;
3134    } else {
3135      BPLOG(ERROR) << "SeekSet: sought " << sought << "/" << offset;
3136    }
3137    return false;
3138  }
3139  return true;
3140}
3141
3142
3143string* Minidump::ReadString(off_t offset) {
3144  if (!valid_) {
3145    BPLOG(ERROR) << "Invalid Minidump for ReadString";
3146    return NULL;
3147  }
3148  if (!SeekSet(offset)) {
3149    BPLOG(ERROR) << "ReadString could not seek to string";
3150    return NULL;
3151  }
3152
3153  u_int32_t bytes;
3154  if (!ReadBytes(&bytes, sizeof(bytes))) {
3155    BPLOG(ERROR) << "ReadString could not read string size";
3156    return NULL;
3157  }
3158  if (swap_)
3159    Swap(&bytes);
3160
3161  if (bytes % 2 != 0) {
3162    BPLOG(ERROR) << "ReadString found odd-sized string of " << bytes <<
3163                    " bytes at offset " << offset;
3164    return NULL;
3165  }
3166  unsigned int utf16_words = bytes / 2;
3167
3168  // TODO(mmentovai): verify rational size!
3169  vector<u_int16_t> string_utf16(utf16_words);
3170
3171  if (utf16_words) {
3172    if (!ReadBytes(&string_utf16[0], bytes)) {
3173      BPLOG(ERROR) << "ReadString could not read string";
3174      return NULL;
3175    }
3176  }
3177
3178  return UTF16ToUTF8(string_utf16, swap_);
3179}
3180
3181
3182bool Minidump::SeekToStreamType(u_int32_t  stream_type,
3183                                u_int32_t* stream_length) {
3184  BPLOG_IF(ERROR, !stream_length) << "Minidump::SeekToStreamType requires "
3185                                     "|stream_length|";
3186  assert(stream_length);
3187  *stream_length = 0;
3188
3189  if (!valid_) {
3190    BPLOG(ERROR) << "Invalid Mindump for SeekToStreamType";
3191    return false;
3192  }
3193
3194  MinidumpStreamMap::const_iterator iterator = stream_map_->find(stream_type);
3195  if (iterator == stream_map_->end()) {
3196    // This stream type didn't exist in the directory.
3197    BPLOG(INFO) << "SeekToStreamType: type " << stream_type << " not present";
3198    return false;
3199  }
3200
3201  MinidumpStreamInfo info = iterator->second;
3202  if (info.stream_index >= header_.stream_count) {
3203    BPLOG(ERROR) << "SeekToStreamType: type " << stream_type <<
3204                    " out of range: " <<
3205                    info.stream_index << "/" << header_.stream_count;
3206    return false;
3207  }
3208
3209  MDRawDirectory* directory_entry = &(*directory_)[info.stream_index];
3210  if (!SeekSet(directory_entry->location.rva)) {
3211    BPLOG(ERROR) << "SeekToStreamType could not seek to stream type " <<
3212                    stream_type;
3213    return false;
3214  }
3215
3216  *stream_length = directory_entry->location.data_size;
3217
3218  return true;
3219}
3220
3221
3222template<typename T>
3223T* Minidump::GetStream(T** stream) {
3224  // stream is a garbage parameter that's present only to account for C++'s
3225  // inability to overload a method based solely on its return type.
3226
3227  const u_int32_t stream_type = T::kStreamType;
3228
3229  BPLOG_IF(ERROR, !stream) << "Minidump::GetStream type " << stream_type <<
3230                              " requires |stream|";
3231  assert(stream);
3232  *stream = NULL;
3233
3234  if (!valid_) {
3235    BPLOG(ERROR) << "Invalid Minidump for GetStream type " << stream_type;
3236    return NULL;
3237  }
3238
3239  MinidumpStreamMap::iterator iterator = stream_map_->find(stream_type);
3240  if (iterator == stream_map_->end()) {
3241    // This stream type didn't exist in the directory.
3242    BPLOG(INFO) << "GetStream: type " << stream_type << " not present";
3243    return NULL;
3244  }
3245
3246  // Get a pointer so that the stored stream field can be altered.
3247  MinidumpStreamInfo* info = &iterator->second;
3248
3249  if (info->stream) {
3250    // This cast is safe because info.stream is only populated by this
3251    // method, and there is a direct correlation between T and stream_type.
3252    *stream = static_cast<T*>(info->stream);
3253    return *stream;
3254  }
3255
3256  u_int32_t stream_length;
3257  if (!SeekToStreamType(stream_type, &stream_length)) {
3258    BPLOG(ERROR) << "GetStream could not seek to stream type " << stream_type;
3259    return NULL;
3260  }
3261
3262  scoped_ptr<T> new_stream(new T(this));
3263
3264  if (!new_stream->Read(stream_length)) {
3265    BPLOG(ERROR) << "GetStream could not read stream type " << stream_type;
3266    return NULL;
3267  }
3268
3269  *stream = new_stream.release();
3270  info->stream = *stream;
3271  return *stream;
3272}
3273
3274
3275}  // namespace google_breakpad
3276