minidump.h revision f7838a8665eb5e46f2eb136679b27707d6adb523
1// Copyright (c) 2010 Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// minidump.h: A minidump reader.
31//
32// The basic structure of this module tracks the structure of the minidump
33// file itself.  At the top level, a minidump file is represented by a
34// Minidump object.  Like most other classes in this module, Minidump
35// provides a Read method that initializes the object with information from
36// the file.  Most of the classes in this file are wrappers around the
37// "raw" structures found in the minidump file itself, and defined in
38// minidump_format.h.  For example, each thread is represented by a
39// MinidumpThread object, whose parameters are specified in an MDRawThread
40// structure.  A properly byte-swapped MDRawThread can be obtained from a
41// MinidumpThread easily by calling its thread() method.
42//
43// Most of the module lazily reads only the portion of the minidump file
44// necessary to fulfill the user's request.  Calling Minidump::Read
45// only reads the minidump's directory.  The thread list is not read until
46// it is needed, and even once it's read, the memory regions for each
47// thread's stack aren't read until they're needed.  This strategy avoids
48// unnecessary file input, and allocating memory for data in which the user
49// has no interest.  Note that although memory allocations for a typical
50// minidump file are not particularly large, it is possible for legitimate
51// minidumps to be sizable.  A full-memory minidump, for example, contains
52// a snapshot of the entire mapped memory space.  Even a normal minidump,
53// with stack memory only, can be large if, for example, the dump was
54// generated in response to a crash that occurred due to an infinite-
55// recursion bug that caused the stack's limits to be exceeded.  Finally,
56// some users of this library will unfortunately find themselves in the
57// position of having to process potentially-hostile minidumps that might
58// attempt to cause problems by forcing the minidump processor to over-
59// allocate memory.
60//
61// Memory management in this module is based on a strict
62// you-don't-own-anything policy.  The only object owned by the user is
63// the top-level Minidump object, the creation and destruction of which
64// must be the user's own responsibility.  All other objects obtained
65// through interaction with this module are ultimately owned by the
66// Minidump object, and will be freed upon the Minidump object's destruction.
67// Because memory regions can potentially involve large allocations, a
68// FreeMemory method is provided by MinidumpMemoryRegion, allowing the user
69// to release data when it is no longer needed.  Use of this method is
70// optional but recommended.  If freed data is later required, it will
71// be read back in from the minidump file again.
72//
73// There is one exception to this memory management policy:
74// Minidump::ReadString will return a string object to the user, and the user
75// is responsible for its deletion.
76//
77// Author: Mark Mentovai
78
79#ifndef GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
80#define GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
81
82#ifndef _WIN32
83#include <unistd.h>
84#endif
85
86#include <iostream>
87#include <map>
88#include <string>
89#include <vector>
90
91#include "common/using_std_string.h"
92#include "google_breakpad/common/minidump_format.h"
93#include "google_breakpad/processor/code_module.h"
94#include "google_breakpad/processor/code_modules.h"
95#include "google_breakpad/processor/memory_region.h"
96
97
98namespace google_breakpad {
99
100
101using std::map;
102using std::vector;
103
104
105class Minidump;
106template<typename AddressType, typename EntryType> class RangeMap;
107
108
109// MinidumpObject is the base of all Minidump* objects except for Minidump
110// itself.
111class MinidumpObject {
112 public:
113  virtual ~MinidumpObject() {}
114
115  bool valid() const { return valid_; }
116
117 protected:
118  explicit MinidumpObject(Minidump* minidump);
119
120  // Refers to the Minidump object that is the ultimate parent of this
121  // Some MinidumpObjects are owned by other MinidumpObjects, but at the
122  // root of the ownership tree is always a Minidump.  The Minidump object
123  // is kept here for access to its seeking and reading facilities, and
124  // for access to data about the minidump file itself, such as whether
125  // it should be byte-swapped.
126  Minidump* minidump_;
127
128  // MinidumpObjects are not valid when created.  When a subclass populates
129  // its own fields, it can set valid_ to true.  Accessors and mutators may
130  // wish to consider or alter the valid_ state as they interact with
131  // objects.
132  bool      valid_;
133};
134
135
136// This class exists primarily to provide a virtual destructor in a base
137// class common to all objects that might be stored in
138// Minidump::mStreamObjects.  Some object types (MinidumpContext) will
139// never be stored in Minidump::mStreamObjects, but are represented as
140// streams and adhere to the same interface, and may be derived from
141// this class.
142class MinidumpStream : public MinidumpObject {
143 public:
144  virtual ~MinidumpStream() {}
145
146 protected:
147  explicit MinidumpStream(Minidump* minidump);
148
149 private:
150  // Populate (and validate) the MinidumpStream.  minidump_ is expected
151  // to be positioned at the beginning of the stream, so that the next
152  // read from the minidump will be at the beginning of the stream.
153  // expected_size should be set to the stream's length as contained in
154  // the MDRawDirectory record or other identifying record.  A class
155  // that implements MinidumpStream can compare expected_size to a
156  // known size as an integrity check.
157  virtual bool Read(uint32_t expected_size) = 0;
158};
159
160
161// MinidumpContext carries a CPU-specific MDRawContext structure, which
162// contains CPU context such as register states.  Each thread has its
163// own context, and the exception record, if present, also has its own
164// context.  Note that if the exception record is present, the context it
165// refers to is probably what the user wants to use for the exception
166// thread, instead of that thread's own context.  The exception thread's
167// context (as opposed to the exception record's context) will contain
168// context for the exception handler (which performs minidump generation),
169// and not the context that caused the exception (which is probably what the
170// user wants).
171class MinidumpContext : public MinidumpStream {
172 public:
173  virtual ~MinidumpContext();
174
175  // Returns an MD_CONTEXT_* value such as MD_CONTEXT_X86 or MD_CONTEXT_PPC
176  // identifying the CPU type that the context was collected from.  The
177  // returned value will identify the CPU only, and will have any other
178  // MD_CONTEXT_* bits masked out.  Returns 0 on failure.
179  uint32_t GetContextCPU() const;
180
181  // A convenience method to get the instruction pointer out of the
182  // MDRawContext, since it varies per-CPU architecture.
183  bool GetInstructionPointer(uint64_t* ip) const;
184
185  // Returns raw CPU-specific context data for the named CPU type.  If the
186  // context data does not match the CPU type or does not exist, returns
187  // NULL.
188  const MDRawContextAMD64* GetContextAMD64() const;
189  const MDRawContextARM*   GetContextARM() const;
190  const MDRawContextPPC*   GetContextPPC() const;
191  const MDRawContextPPC64* GetContextPPC64() const;
192  const MDRawContextSPARC* GetContextSPARC() const;
193  const MDRawContextX86*   GetContextX86() const;
194
195  // Print a human-readable representation of the object to stdout.
196  void Print();
197
198 protected:
199  explicit MinidumpContext(Minidump* minidump);
200
201  // The CPU-specific context structure.
202  union {
203    MDRawContextBase*  base;
204    MDRawContextX86*   x86;
205    MDRawContextPPC*   ppc;
206    MDRawContextPPC64* ppc64;
207    MDRawContextAMD64* amd64;
208    // on Solaris SPARC, sparc is defined as a numeric constant,
209    // so variables can NOT be named as sparc
210    MDRawContextSPARC* ctx_sparc;
211    MDRawContextARM*   arm;
212  } context_;
213
214  // Store this separately because of the weirdo AMD64 context
215  uint32_t context_flags_;
216
217 private:
218  friend class MinidumpThread;
219  friend class MinidumpException;
220
221  bool Read(uint32_t expected_size);
222
223  // Free the CPU-specific context structure.
224  void FreeContext();
225
226  // If the minidump contains a SYSTEM_INFO_STREAM, makes sure that the
227  // system info stream gives an appropriate CPU type matching the context
228  // CPU type in context_cpu_type.  Returns false if the CPU type does not
229  // match.  Returns true if the CPU type matches or if the minidump does
230  // not contain a system info stream.
231  bool CheckAgainstSystemInfo(uint32_t context_cpu_type);
232};
233
234
235// MinidumpMemoryRegion does not wrap any MDRaw structure, and only contains
236// a reference to an MDMemoryDescriptor.  This object is intended to wrap
237// portions of a minidump file that contain memory dumps.  In normal
238// minidumps, each MinidumpThread owns a MinidumpMemoryRegion corresponding
239// to the thread's stack memory.  MinidumpMemoryList also gives access to
240// memory regions in its list as MinidumpMemoryRegions.  This class
241// adheres to MemoryRegion so that it may be used as a data provider to
242// the Stackwalker family of classes.
243class MinidumpMemoryRegion : public MinidumpObject,
244                             public MemoryRegion {
245 public:
246  virtual ~MinidumpMemoryRegion();
247
248  static void set_max_bytes(uint32_t max_bytes) { max_bytes_ = max_bytes; }
249  static uint32_t max_bytes() { return max_bytes_; }
250
251  // Returns a pointer to the base of the memory region.  Returns the
252  // cached value if available, otherwise, reads the minidump file and
253  // caches the memory region.
254  const uint8_t* GetMemory() const;
255
256  // The address of the base of the memory region.
257  uint64_t GetBase() const;
258
259  // The size, in bytes, of the memory region.
260  uint32_t GetSize() const;
261
262  // Frees the cached memory region, if cached.
263  void FreeMemory();
264
265  // Obtains the value of memory at the pointer specified by address.
266  bool GetMemoryAtAddress(uint64_t address, uint8_t*  value) const;
267  bool GetMemoryAtAddress(uint64_t address, uint16_t* value) const;
268  bool GetMemoryAtAddress(uint64_t address, uint32_t* value) const;
269  bool GetMemoryAtAddress(uint64_t address, uint64_t* value) const;
270
271  // Print a human-readable representation of the object to stdout.
272  void Print();
273
274 protected:
275  explicit MinidumpMemoryRegion(Minidump* minidump);
276
277 private:
278  friend class MinidumpThread;
279  friend class MinidumpMemoryList;
280
281  // Identify the base address and size of the memory region, and the
282  // location it may be found in the minidump file.
283  void SetDescriptor(MDMemoryDescriptor* descriptor);
284
285  // Implementation for GetMemoryAtAddress
286  template<typename T> bool GetMemoryAtAddressInternal(uint64_t address,
287                                                       T*        value) const;
288
289  // The largest memory region that will be read from a minidump.  The
290  // default is 1MB.
291  static uint32_t max_bytes_;
292
293  // Base address and size of the memory region, and its position in the
294  // minidump file.
295  MDMemoryDescriptor* descriptor_;
296
297  // Cached memory.
298  mutable vector<uint8_t>* memory_;
299};
300
301
302// MinidumpThread contains information about a thread of execution,
303// including a snapshot of the thread's stack and CPU context.  For
304// the thread that caused an exception, the context carried by
305// MinidumpException is probably desired instead of the CPU context
306// provided here.
307// Note that a MinidumpThread may be valid() even if it does not
308// contain a memory region or context.
309class MinidumpThread : public MinidumpObject {
310 public:
311  virtual ~MinidumpThread();
312
313  const MDRawThread* thread() const { return valid_ ? &thread_ : NULL; }
314  // GetMemory may return NULL even if the MinidumpThread is valid,
315  // if the thread memory cannot be read.
316  virtual MinidumpMemoryRegion* GetMemory();
317  // GetContext may return NULL even if the MinidumpThread is valid.
318  virtual MinidumpContext* GetContext();
319
320  // The thread ID is used to determine if a thread is the exception thread,
321  // so a special getter is provided to retrieve this data from the
322  // MDRawThread structure.  Returns false if the thread ID cannot be
323  // determined.
324  virtual bool GetThreadID(uint32_t *thread_id) const;
325
326  // Print a human-readable representation of the object to stdout.
327  void Print();
328
329 protected:
330  explicit MinidumpThread(Minidump* minidump);
331
332 private:
333  // These objects are managed by MinidumpThreadList.
334  friend class MinidumpThreadList;
335
336  // This works like MinidumpStream::Read, but is driven by
337  // MinidumpThreadList.  No size checking is done, because
338  // MinidumpThreadList handles that directly.
339  bool Read();
340
341  MDRawThread           thread_;
342  MinidumpMemoryRegion* memory_;
343  MinidumpContext*      context_;
344};
345
346
347// MinidumpThreadList contains all of the threads (as MinidumpThreads) in
348// a process.
349class MinidumpThreadList : public MinidumpStream {
350 public:
351  virtual ~MinidumpThreadList();
352
353  static void set_max_threads(uint32_t max_threads) {
354    max_threads_ = max_threads;
355  }
356  static uint32_t max_threads() { return max_threads_; }
357
358  virtual unsigned int thread_count() const {
359    return valid_ ? thread_count_ : 0;
360  }
361
362  // Sequential access to threads.
363  virtual MinidumpThread* GetThreadAtIndex(unsigned int index) const;
364
365  // Random access to threads.
366  MinidumpThread* GetThreadByID(uint32_t thread_id);
367
368  // Print a human-readable representation of the object to stdout.
369  void Print();
370
371 protected:
372  explicit MinidumpThreadList(Minidump* aMinidump);
373
374 private:
375  friend class Minidump;
376
377  typedef map<uint32_t, MinidumpThread*> IDToThreadMap;
378  typedef vector<MinidumpThread> MinidumpThreads;
379
380  static const uint32_t kStreamType = MD_THREAD_LIST_STREAM;
381
382  bool Read(uint32_t aExpectedSize);
383
384  // The largest number of threads that will be read from a minidump.  The
385  // default is 256.
386  static uint32_t max_threads_;
387
388  // Access to threads using the thread ID as the key.
389  IDToThreadMap    id_to_thread_map_;
390
391  // The list of threads.
392  MinidumpThreads* threads_;
393  uint32_t        thread_count_;
394};
395
396
397// MinidumpModule wraps MDRawModule, which contains information about loaded
398// code modules.  Access is provided to various data referenced indirectly
399// by MDRawModule, such as the module's name and a specification for where
400// to locate debugging information for the module.
401class MinidumpModule : public MinidumpObject,
402                       public CodeModule {
403 public:
404  virtual ~MinidumpModule();
405
406  static void set_max_cv_bytes(uint32_t max_cv_bytes) {
407    max_cv_bytes_ = max_cv_bytes;
408  }
409  static uint32_t max_cv_bytes() { return max_cv_bytes_; }
410
411  static void set_max_misc_bytes(uint32_t max_misc_bytes) {
412    max_misc_bytes_ = max_misc_bytes;
413  }
414  static uint32_t max_misc_bytes() { return max_misc_bytes_; }
415
416  const MDRawModule* module() const { return valid_ ? &module_ : NULL; }
417
418  // CodeModule implementation
419  virtual uint64_t base_address() const {
420    return valid_ ? module_.base_of_image : static_cast<uint64_t>(-1);
421  }
422  virtual uint64_t size() const { return valid_ ? module_.size_of_image : 0; }
423  virtual string code_file() const;
424  virtual string code_identifier() const;
425  virtual string debug_file() const;
426  virtual string debug_identifier() const;
427  virtual string version() const;
428  virtual const CodeModule* Copy() const;
429
430  // The CodeView record, which contains information to locate the module's
431  // debugging information (pdb).  This is returned as uint8_t* because
432  // the data can be of types MDCVInfoPDB20* or MDCVInfoPDB70*, or it may be
433  // of a type unknown to Breakpad, in which case the raw data will still be
434  // returned but no byte-swapping will have been performed.  Check the
435  // record's signature in the first four bytes to differentiate between
436  // the various types.  Current toolchains generate modules which carry
437  // MDCVInfoPDB70 by default.  Returns a pointer to the CodeView record on
438  // success, and NULL on failure.  On success, the optional |size| argument
439  // is set to the size of the CodeView record.
440  const uint8_t* GetCVRecord(uint32_t* size);
441
442  // The miscellaneous debug record, which is obsolete.  Current toolchains
443  // do not generate this type of debugging information (dbg), and this
444  // field is not expected to be present.  Returns a pointer to the debugging
445  // record on success, and NULL on failure.  On success, the optional |size|
446  // argument is set to the size of the debugging record.
447  const MDImageDebugMisc* GetMiscRecord(uint32_t* size);
448
449  // Print a human-readable representation of the object to stdout.
450  void Print();
451
452 private:
453  // These objects are managed by MinidumpModuleList.
454  friend class MinidumpModuleList;
455
456  explicit MinidumpModule(Minidump* minidump);
457
458  // This works like MinidumpStream::Read, but is driven by
459  // MinidumpModuleList.  No size checking is done, because
460  // MinidumpModuleList handles that directly.
461  bool Read();
462
463  // Reads indirectly-referenced data, including the module name, CodeView
464  // record, and miscellaneous debugging record.  This is necessary to allow
465  // MinidumpModuleList to fully construct MinidumpModule objects without
466  // requiring seeks to read a contiguous set of MinidumpModule objects.
467  // All auxiliary data should be available when Read is called, in order to
468  // allow the CodeModule getters to be const methods.
469  bool ReadAuxiliaryData();
470
471  // The largest number of bytes that will be read from a minidump for a
472  // CodeView record or miscellaneous debugging record, respectively.  The
473  // default for each is 1024.
474  static uint32_t max_cv_bytes_;
475  static uint32_t max_misc_bytes_;
476
477  // True after a successful Read.  This is different from valid_, which is
478  // not set true until ReadAuxiliaryData also completes successfully.
479  // module_valid_ is only used by ReadAuxiliaryData and the functions it
480  // calls to determine whether the object is ready for auxiliary data to
481  // be read.
482  bool              module_valid_;
483
484  // True if debug info was read from the module.  Certain modules
485  // may contain debug records in formats we don't support,
486  // so we can just set this to false to ignore them.
487  bool              has_debug_info_;
488
489  MDRawModule       module_;
490
491  // Cached module name.
492  const string*     name_;
493
494  // Cached CodeView record - this is MDCVInfoPDB20 or (likely)
495  // MDCVInfoPDB70, or possibly something else entirely.  Stored as a uint8_t
496  // because the structure contains a variable-sized string and its exact
497  // size cannot be known until it is processed.
498  vector<uint8_t>* cv_record_;
499
500  // If cv_record_ is present, cv_record_signature_ contains a copy of the
501  // CodeView record's first four bytes, for ease of determinining the
502  // type of structure that cv_record_ contains.
503  uint32_t cv_record_signature_;
504
505  // Cached MDImageDebugMisc (usually not present), stored as uint8_t
506  // because the structure contains a variable-sized string and its exact
507  // size cannot be known until it is processed.
508  vector<uint8_t>* misc_record_;
509};
510
511
512// MinidumpModuleList contains all of the loaded code modules for a process
513// in the form of MinidumpModules.  It maintains a map of these modules
514// so that it may easily provide a code module corresponding to a specific
515// address.
516class MinidumpModuleList : public MinidumpStream,
517                           public CodeModules {
518 public:
519  virtual ~MinidumpModuleList();
520
521  static void set_max_modules(uint32_t max_modules) {
522    max_modules_ = max_modules;
523  }
524  static uint32_t max_modules() { return max_modules_; }
525
526  // CodeModules implementation.
527  virtual unsigned int module_count() const {
528    return valid_ ? module_count_ : 0;
529  }
530  virtual const MinidumpModule* GetModuleForAddress(uint64_t address) const;
531  virtual const MinidumpModule* GetMainModule() const;
532  virtual const MinidumpModule* GetModuleAtSequence(
533      unsigned int sequence) const;
534  virtual const MinidumpModule* GetModuleAtIndex(unsigned int index) const;
535  virtual const CodeModules* Copy() const;
536
537  // Print a human-readable representation of the object to stdout.
538  void Print();
539
540 protected:
541  explicit MinidumpModuleList(Minidump* minidump);
542
543 private:
544  friend class Minidump;
545
546  typedef vector<MinidumpModule> MinidumpModules;
547
548  static const uint32_t kStreamType = MD_MODULE_LIST_STREAM;
549
550  bool Read(uint32_t expected_size);
551
552  // The largest number of modules that will be read from a minidump.  The
553  // default is 1024.
554  static uint32_t max_modules_;
555
556  // Access to modules using addresses as the key.
557  RangeMap<uint64_t, unsigned int> *range_map_;
558
559  MinidumpModules *modules_;
560  uint32_t module_count_;
561};
562
563
564// MinidumpMemoryList corresponds to a minidump's MEMORY_LIST_STREAM stream,
565// which references the snapshots of all of the memory regions contained
566// within the minidump.  For a normal minidump, this includes stack memory
567// (also referenced by each MinidumpThread, in fact, the MDMemoryDescriptors
568// here and in MDRawThread both point to exactly the same data in a
569// minidump file, conserving space), as well as a 256-byte snapshot of memory
570// surrounding the instruction pointer in the case of an exception.  Other
571// types of minidumps may contain significantly more memory regions.  Full-
572// memory minidumps contain all of a process' mapped memory.
573class MinidumpMemoryList : public MinidumpStream {
574 public:
575  virtual ~MinidumpMemoryList();
576
577  static void set_max_regions(uint32_t max_regions) {
578    max_regions_ = max_regions;
579  }
580  static uint32_t max_regions() { return max_regions_; }
581
582  unsigned int region_count() const { return valid_ ? region_count_ : 0; }
583
584  // Sequential access to memory regions.
585  MinidumpMemoryRegion* GetMemoryRegionAtIndex(unsigned int index);
586
587  // Random access to memory regions.  Returns the region encompassing
588  // the address identified by address.
589  MinidumpMemoryRegion* GetMemoryRegionForAddress(uint64_t address);
590
591  // Print a human-readable representation of the object to stdout.
592  void Print();
593
594 private:
595  friend class Minidump;
596
597  typedef vector<MDMemoryDescriptor>   MemoryDescriptors;
598  typedef vector<MinidumpMemoryRegion> MemoryRegions;
599
600  static const uint32_t kStreamType = MD_MEMORY_LIST_STREAM;
601
602  explicit MinidumpMemoryList(Minidump* minidump);
603
604  bool Read(uint32_t expected_size);
605
606  // The largest number of memory regions that will be read from a minidump.
607  // The default is 256.
608  static uint32_t max_regions_;
609
610  // Access to memory regions using addresses as the key.
611  RangeMap<uint64_t, unsigned int> *range_map_;
612
613  // The list of descriptors.  This is maintained separately from the list
614  // of regions, because MemoryRegion doesn't own its MemoryDescriptor, it
615  // maintains a pointer to it.  descriptors_ provides the storage for this
616  // purpose.
617  MemoryDescriptors *descriptors_;
618
619  // The list of regions.
620  MemoryRegions *regions_;
621  uint32_t region_count_;
622};
623
624
625// MinidumpException wraps MDRawExceptionStream, which contains information
626// about the exception that caused the minidump to be generated, if the
627// minidump was generated in an exception handler called as a result of
628// an exception.  It also provides access to a MinidumpContext object,
629// which contains the CPU context for the exception thread at the time
630// the exception occurred.
631class MinidumpException : public MinidumpStream {
632 public:
633  virtual ~MinidumpException();
634
635  const MDRawExceptionStream* exception() const {
636    return valid_ ? &exception_ : NULL;
637  }
638
639  // The thread ID is used to determine if a thread is the exception thread,
640  // so a special getter is provided to retrieve this data from the
641  // MDRawExceptionStream structure.  Returns false if the thread ID cannot
642  // be determined.
643  bool GetThreadID(uint32_t *thread_id) const;
644
645  MinidumpContext* GetContext();
646
647  // Print a human-readable representation of the object to stdout.
648  void Print();
649
650 private:
651  friend class Minidump;
652
653  static const uint32_t kStreamType = MD_EXCEPTION_STREAM;
654
655  explicit MinidumpException(Minidump* minidump);
656
657  bool Read(uint32_t expected_size);
658
659  MDRawExceptionStream exception_;
660  MinidumpContext*     context_;
661};
662
663// MinidumpAssertion wraps MDRawAssertionInfo, which contains information
664// about an assertion that caused the minidump to be generated.
665class MinidumpAssertion : public MinidumpStream {
666 public:
667  virtual ~MinidumpAssertion();
668
669  const MDRawAssertionInfo* assertion() const {
670    return valid_ ? &assertion_ : NULL;
671  }
672
673  string expression() const {
674    return valid_ ? expression_ : "";
675  }
676
677  string function() const {
678    return valid_ ? function_ : "";
679  }
680
681  string file() const {
682    return valid_ ? file_ : "";
683  }
684
685  // Print a human-readable representation of the object to stdout.
686  void Print();
687
688 private:
689  friend class Minidump;
690
691  static const uint32_t kStreamType = MD_ASSERTION_INFO_STREAM;
692
693  explicit MinidumpAssertion(Minidump* minidump);
694
695  bool Read(uint32_t expected_size);
696
697  MDRawAssertionInfo assertion_;
698  string expression_;
699  string function_;
700  string file_;
701};
702
703
704// MinidumpSystemInfo wraps MDRawSystemInfo and provides information about
705// the system on which the minidump was generated.  See also MinidumpMiscInfo.
706class MinidumpSystemInfo : public MinidumpStream {
707 public:
708  virtual ~MinidumpSystemInfo();
709
710  const MDRawSystemInfo* system_info() const {
711    return valid_ ? &system_info_ : NULL;
712  }
713
714  // GetOS and GetCPU return textual representations of the operating system
715  // and CPU that produced the minidump.  Unlike most other Minidump* methods,
716  // they return string objects, not weak pointers.  Defined values for
717  // GetOS() are "mac", "windows", and "linux".  Defined values for GetCPU
718  // are "x86" and "ppc".  These methods return an empty string when their
719  // values are unknown.
720  string GetOS();
721  string GetCPU();
722
723  // I don't know what CSD stands for, but this field is documented as
724  // returning a textual representation of the OS service pack.  On other
725  // platforms, this provides additional information about an OS version
726  // level beyond major.minor.micro.  Returns NULL if unknown.
727  const string* GetCSDVersion();
728
729  // If a CPU vendor string can be determined, returns a pointer to it,
730  // otherwise, returns NULL.  CPU vendor strings can be determined from
731  // x86 CPUs with CPUID 0.
732  const string* GetCPUVendor();
733
734  // Print a human-readable representation of the object to stdout.
735  void Print();
736
737 protected:
738  explicit MinidumpSystemInfo(Minidump* minidump);
739  MDRawSystemInfo system_info_;
740
741  // Textual representation of the OS service pack, for minidumps produced
742  // by MiniDumpWriteDump on Windows.
743  const string* csd_version_;
744
745 private:
746  friend class Minidump;
747
748  static const uint32_t kStreamType = MD_SYSTEM_INFO_STREAM;
749
750  bool Read(uint32_t expected_size);
751
752  // A string identifying the CPU vendor, if known.
753  const string* cpu_vendor_;
754};
755
756
757// MinidumpMiscInfo wraps MDRawMiscInfo and provides information about
758// the process that generated the minidump, and optionally additional system
759// information.  See also MinidumpSystemInfo.
760class MinidumpMiscInfo : public MinidumpStream {
761 public:
762  const MDRawMiscInfo* misc_info() const {
763    return valid_ ? &misc_info_ : NULL;
764  }
765
766  // Print a human-readable representation of the object to stdout.
767  void Print();
768
769 private:
770  friend class Minidump;
771
772  static const uint32_t kStreamType = MD_MISC_INFO_STREAM;
773
774  explicit MinidumpMiscInfo(Minidump* minidump_);
775
776  bool Read(uint32_t expected_size_);
777
778  MDRawMiscInfo misc_info_;
779};
780
781
782// MinidumpBreakpadInfo wraps MDRawBreakpadInfo, which is an optional stream in
783// a minidump that provides additional information about the process state
784// at the time the minidump was generated.
785class MinidumpBreakpadInfo : public MinidumpStream {
786 public:
787  const MDRawBreakpadInfo* breakpad_info() const {
788    return valid_ ? &breakpad_info_ : NULL;
789  }
790
791  // These thread IDs are used to determine if threads deserve special
792  // treatment, so special getters are provided to retrieve this data from
793  // the MDRawBreakpadInfo structure.  The getters return false if the thread
794  // IDs cannot be determined.
795  bool GetDumpThreadID(uint32_t *thread_id) const;
796  bool GetRequestingThreadID(uint32_t *thread_id) const;
797
798  // Print a human-readable representation of the object to stdout.
799  void Print();
800
801 private:
802  friend class Minidump;
803
804  static const uint32_t kStreamType = MD_BREAKPAD_INFO_STREAM;
805
806  explicit MinidumpBreakpadInfo(Minidump* minidump_);
807
808  bool Read(uint32_t expected_size_);
809
810  MDRawBreakpadInfo breakpad_info_;
811};
812
813// MinidumpMemoryInfo wraps MDRawMemoryInfo, which provides information
814// about mapped memory regions in a process, including their ranges
815// and protection.
816class MinidumpMemoryInfo : public MinidumpObject {
817 public:
818  const MDRawMemoryInfo* info() const { return valid_ ? &memory_info_ : NULL; }
819
820  // The address of the base of the memory region.
821  uint64_t GetBase() const { return valid_ ? memory_info_.base_address : 0; }
822
823  // The size, in bytes, of the memory region.
824  uint64_t GetSize() const { return valid_ ? memory_info_.region_size : 0; }
825
826  // Return true if the memory protection allows execution.
827  bool IsExecutable() const;
828
829  // Return true if the memory protection allows writing.
830  bool IsWritable() const;
831
832  // Print a human-readable representation of the object to stdout.
833  void Print();
834
835 private:
836  // These objects are managed by MinidumpMemoryInfoList.
837  friend class MinidumpMemoryInfoList;
838
839  explicit MinidumpMemoryInfo(Minidump* minidump);
840
841  // This works like MinidumpStream::Read, but is driven by
842  // MinidumpMemoryInfoList.  No size checking is done, because
843  // MinidumpMemoryInfoList handles that directly.
844  bool Read();
845
846  MDRawMemoryInfo memory_info_;
847};
848
849// MinidumpMemoryInfoList contains a list of information about
850// mapped memory regions for a process in the form of MDRawMemoryInfo.
851// It maintains a map of these structures so that it may easily provide
852// info corresponding to a specific address.
853class MinidumpMemoryInfoList : public MinidumpStream {
854 public:
855  virtual ~MinidumpMemoryInfoList();
856
857  unsigned int info_count() const { return valid_ ? info_count_ : 0; }
858
859  const MinidumpMemoryInfo* GetMemoryInfoForAddress(uint64_t address) const;
860  const MinidumpMemoryInfo* GetMemoryInfoAtIndex(unsigned int index) const;
861
862  // Print a human-readable representation of the object to stdout.
863  void Print();
864
865 private:
866  friend class Minidump;
867
868  typedef vector<MinidumpMemoryInfo> MinidumpMemoryInfos;
869
870  static const uint32_t kStreamType = MD_MEMORY_INFO_LIST_STREAM;
871
872  explicit MinidumpMemoryInfoList(Minidump* minidump);
873
874  bool Read(uint32_t expected_size);
875
876  // Access to memory info using addresses as the key.
877  RangeMap<uint64_t, unsigned int> *range_map_;
878
879  MinidumpMemoryInfos* infos_;
880  uint32_t info_count_;
881};
882
883
884// Minidump is the user's interface to a minidump file.  It wraps MDRawHeader
885// and provides access to the minidump's top-level stream directory.
886class Minidump {
887 public:
888  // path is the pathname of a file containing the minidump.
889  explicit Minidump(const string& path);
890  // input is an istream wrapping minidump data. Minidump holds a
891  // weak pointer to input, and the caller must ensure that the stream
892  // is valid as long as the Minidump object is.
893  explicit Minidump(std::istream& input);
894
895  virtual ~Minidump();
896
897  // path may be empty if the minidump was not opened from a file
898  virtual string path() const {
899    return path_;
900  }
901  static void set_max_streams(uint32_t max_streams) {
902    max_streams_ = max_streams;
903  }
904  static uint32_t max_streams() { return max_streams_; }
905
906  static void set_max_string_length(uint32_t max_string_length) {
907    max_string_length_ = max_string_length;
908  }
909  static uint32_t max_string_length() { return max_string_length_; }
910
911  virtual const MDRawHeader* header() const { return valid_ ? &header_ : NULL; }
912
913  // Reads the CPU information from the system info stream and generates the
914  // appropriate CPU flags.  The returned context_cpu_flags are the same as
915  // if the CPU type bits were set in the context_flags of a context record.
916  // On success, context_cpu_flags will have the flags that identify the CPU.
917  // If a system info stream is missing, context_cpu_flags will be 0.
918  // Returns true if the current position in the stream was not changed.
919  // Returns false when the current location in the stream was changed and the
920  // attempt to restore the original position failed.
921  bool GetContextCPUFlagsFromSystemInfo(uint32_t* context_cpu_flags);
922
923  // Reads the minidump file's header and top-level stream directory.
924  // The minidump is expected to be positioned at the beginning of the
925  // header.  Read() sets up the stream list and map, and validates the
926  // Minidump object.
927  virtual bool Read();
928
929  // The next set of methods are stubs that call GetStream.  They exist to
930  // force code generation of the templatized API within the module, and
931  // to avoid exposing an ugly API (GetStream needs to accept a garbage
932  // parameter).
933  virtual MinidumpThreadList* GetThreadList();
934  MinidumpModuleList* GetModuleList();
935  MinidumpMemoryList* GetMemoryList();
936  MinidumpException* GetException();
937  MinidumpAssertion* GetAssertion();
938  virtual MinidumpSystemInfo* GetSystemInfo();
939  MinidumpMiscInfo* GetMiscInfo();
940  MinidumpBreakpadInfo* GetBreakpadInfo();
941  MinidumpMemoryInfoList* GetMemoryInfoList();
942
943  // The next set of methods are provided for users who wish to access
944  // data in minidump files directly, while leveraging the rest of
945  // this class and related classes to handle the basic minidump
946  // structure and known stream types.
947
948  unsigned int GetDirectoryEntryCount() const {
949    return valid_ ? header_.stream_count : 0;
950  }
951  const MDRawDirectory* GetDirectoryEntryAtIndex(unsigned int index) const;
952
953  // The next 2 methods are lower-level I/O routines.  They use fd_.
954
955  // Reads count bytes from the minidump at the current position into
956  // the storage area pointed to by bytes.  bytes must be of sufficient
957  // size.  After the read, the file position is advanced by count.
958  bool ReadBytes(void* bytes, size_t count);
959
960  // Sets the position of the minidump file to offset.
961  bool SeekSet(off_t offset);
962
963  // Returns the current position of the minidump file.
964  off_t Tell();
965
966  // The next 2 methods are medium-level I/O routines.
967
968  // ReadString returns a string which is owned by the caller!  offset
969  // specifies the offset that a length-encoded string is stored at in the
970  // minidump file.
971  string* ReadString(off_t offset);
972
973  // SeekToStreamType positions the file at the beginning of a stream
974  // identified by stream_type, and informs the caller of the stream's
975  // length by setting *stream_length.  Because stream_map maps each stream
976  // type to only one stream in the file, this might mislead the user into
977  // thinking that the stream that this seeks to is the only stream with
978  // type stream_type.  That can't happen for streams that these classes
979  // deal with directly, because they're only supposed to be present in the
980  // file singly, and that's verified when stream_map_ is built.  Users who
981  // are looking for other stream types should be aware of this
982  // possibility, and consider using GetDirectoryEntryAtIndex (possibly
983  // with GetDirectoryEntryCount) if expecting multiple streams of the same
984  // type in a single minidump file.
985  bool SeekToStreamType(uint32_t stream_type, uint32_t* stream_length);
986
987  bool swap() const { return valid_ ? swap_ : false; }
988
989  // Print a human-readable representation of the object to stdout.
990  void Print();
991
992 private:
993  // MinidumpStreamInfo is used in the MinidumpStreamMap.  It lets
994  // the Minidump object locate interesting streams quickly, and
995  // provides a convenient place to stash MinidumpStream objects.
996  struct MinidumpStreamInfo {
997    MinidumpStreamInfo() : stream_index(0), stream(NULL) {}
998    ~MinidumpStreamInfo() { delete stream; }
999
1000    // Index into the MinidumpDirectoryEntries vector
1001    unsigned int    stream_index;
1002
1003    // Pointer to the stream if cached, or NULL if not yet populated
1004    MinidumpStream* stream;
1005  };
1006
1007  typedef vector<MDRawDirectory> MinidumpDirectoryEntries;
1008  typedef map<uint32_t, MinidumpStreamInfo> MinidumpStreamMap;
1009
1010  template<typename T> T* GetStream(T** stream);
1011
1012  // Opens the minidump file, or if already open, seeks to the beginning.
1013  bool Open();
1014
1015  // The largest number of top-level streams that will be read from a minidump.
1016  // Note that streams are only read (and only consume memory) as needed,
1017  // when directed by the caller.  The default is 128.
1018  static uint32_t max_streams_;
1019
1020  // The maximum length of a UTF-16 string that will be read from a minidump
1021  // in 16-bit words.  The default is 1024.  UTF-16 strings are converted
1022  // to UTF-8 when stored in memory, and each UTF-16 word will be represented
1023  // by as many as 3 bytes in UTF-8.
1024  static unsigned int max_string_length_;
1025
1026  MDRawHeader               header_;
1027
1028  // The list of streams.
1029  MinidumpDirectoryEntries* directory_;
1030
1031  // Access to streams using the stream type as the key.
1032  MinidumpStreamMap*        stream_map_;
1033
1034  // The pathname of the minidump file to process, set in the constructor.
1035  // This may be empty if the minidump was opened directly from a stream.
1036  const string              path_;
1037
1038  // The stream for all file I/O.  Used by ReadBytes and SeekSet.
1039  // Set based on the path in Open, or directly in the constructor.
1040  std::istream*             stream_;
1041
1042  // swap_ is true if the minidump file should be byte-swapped.  If the
1043  // minidump was produced by a CPU that is other-endian than the CPU
1044  // processing the minidump, this will be true.  If the two CPUs are
1045  // same-endian, this will be false.
1046  bool                      swap_;
1047
1048  // Validity of the Minidump structure, false immediately after
1049  // construction or after a failed Read(); true following a successful
1050  // Read().
1051  bool                      valid_;
1052};
1053
1054
1055}  // namespace google_breakpad
1056
1057
1058#endif  // GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
1059