minidump.h revision 61ea8bf0d5c2cf652e8d75605f770d0f9733acfe
1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// minidump.h: A minidump reader.
31//
32// The basic structure of this module tracks the structure of the minidump
33// file itself.  At the top level, a minidump file is represented by a
34// Minidump object.  Like most other classes in this module, Minidump
35// provides a Read method that initializes the object with information from
36// the file.  Most of the classes in this file are wrappers around the
37// "raw" structures found in the minidump file itself, and defined in
38// minidump_format.h.  For example, each thread is represented by a
39// MinidumpThread object, whose parameters are specified in an MDRawThread
40// structure.  A properly byte-swapped MDRawThread can be obtained from a
41// MinidumpThread easily by calling its thread() method.
42//
43// Most of the module lazily reads only the portion of the minidump file
44// necessary to fulfill the user's request.  Calling Minidump::Read
45// only reads the minidump's directory.  The thread list is not read until
46// it is needed, and even once it's read, the memory regions for each
47// thread's stack aren't read until they're needed.  This strategy avoids
48// unnecessary file input, and allocating memory for data in which the user
49// has no interest.  Note that although memory allocations for a typical
50// minidump file are not particularly large, it is possible for legitimate
51// minidumps to be sizable.  A full-memory minidump, for example, contains
52// a snapshot of the entire mapped memory space.  Even a normal minidump,
53// with stack memory only, can be large if, for example, the dump was
54// generated in response to a crash that occurred due to an infinite-
55// recursion bug that caused the stack's limits to be exceeded.  Finally,
56// some users of this library will unfortunately find themselves in the
57// position of having to process potentially-hostile minidumps that might
58// attempt to cause problems by forcing the minidump processor to over-
59// allocate memory.
60//
61// Memory management in this module is based on a strict
62// you-don't-own-anything policy.  The only object owned by the user is
63// the top-level Minidump object, the creation and destruction of which
64// must be the user's own responsibility.  All other objects obtained
65// through interaction with this module are ultimately owned by the
66// Minidump object, and will be freed upon the Minidump object's destruction.
67// Because memory regions can potentially involve large allocations, a
68// FreeMemory method is provided by MinidumpMemoryRegion, allowing the user
69// to release data when it is no longer needed.  Use of this method is
70// optional but recommended.  If freed data is later required, it will
71// be read back in from the minidump file again.
72//
73// There is one exception to this memory management policy:
74// Minidump::ReadString will return a string object to the user, and the user
75// is responsible for its deletion.
76//
77// Author: Mark Mentovai
78
79#ifndef GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
80#define GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
81
82#include <unistd.h>
83
84#include <map>
85#include <string>
86#include <vector>
87
88#include "google_breakpad/common/minidump_format.h"
89#include "google_breakpad/processor/code_module.h"
90#include "google_breakpad/processor/code_modules.h"
91#include "google_breakpad/processor/memory_region.h"
92
93
94namespace google_breakpad {
95
96
97using std::map;
98using std::string;
99using std::vector;
100
101
102class Minidump;
103template<typename AddressType, typename EntryType> class RangeMap;
104
105
106// MinidumpObject is the base of all Minidump* objects except for Minidump
107// itself.
108class MinidumpObject {
109 public:
110  virtual ~MinidumpObject() {}
111
112  bool valid() const { return valid_; }
113
114 protected:
115  explicit MinidumpObject(Minidump* minidump);
116
117  // Refers to the Minidump object that is the ultimate parent of this
118  // Some MinidumpObjects are owned by other MinidumpObjects, but at the
119  // root of the ownership tree is always a Minidump.  The Minidump object
120  // is kept here for access to its seeking and reading facilities, and
121  // for access to data about the minidump file itself, such as whether
122  // it should be byte-swapped.
123  Minidump* minidump_;
124
125  // MinidumpObjects are not valid when created.  When a subclass populates
126  // its own fields, it can set valid_ to true.  Accessors and mutators may
127  // wish to consider or alter the valid_ state as they interact with
128  // objects.
129  bool      valid_;
130};
131
132
133// This class exists primarily to provide a virtual destructor in a base
134// class common to all objects that might be stored in
135// Minidump::mStreamObjects.  Some object types (MinidumpContext) will
136// never be stored in Minidump::mStreamObjects, but are represented as
137// streams and adhere to the same interface, and may be derived from
138// this class.
139class MinidumpStream : public MinidumpObject {
140 public:
141  virtual ~MinidumpStream() {}
142
143 protected:
144  explicit MinidumpStream(Minidump* minidump);
145
146 private:
147  // Populate (and validate) the MinidumpStream.  minidump_ is expected
148  // to be positioned at the beginning of the stream, so that the next
149  // read from the minidump will be at the beginning of the stream.
150  // expected_size should be set to the stream's length as contained in
151  // the MDRawDirectory record or other identifying record.  A class
152  // that implements MinidumpStream can compare expected_size to a
153  // known size as an integrity check.
154  virtual bool Read(u_int32_t expected_size) = 0;
155};
156
157
158// MinidumpContext carries a CPU-specific MDRawContext structure, which
159// contains CPU context such as register states.  Each thread has its
160// own context, and the exception record, if present, also has its own
161// context.  Note that if the exception record is present, the context it
162// refers to is probably what the user wants to use for the exception
163// thread, instead of that thread's own context.  The exception thread's
164// context (as opposed to the exception record's context) will contain
165// context for the exception handler (which performs minidump generation),
166// and not the context that caused the exception (which is probably what the
167// user wants).
168class MinidumpContext : public MinidumpStream {
169 public:
170  virtual ~MinidumpContext();
171
172  // Returns an MD_CONTEXT_* value such as MD_CONTEXT_X86 or MD_CONTEXT_PPC
173  // identifying the CPU type that the context was collected from.  The
174  // returned value will identify the CPU only, and will have any other
175  // MD_CONTEXT_* bits masked out.  Returns 0 on failure.
176  u_int32_t GetContextCPU() const;
177
178  // Returns raw CPU-specific context data for the named CPU type.  If the
179  // context data does not match the CPU type or does not exist, returns
180  // NULL.
181  const MDRawContextX86*   GetContextX86() const;
182  const MDRawContextPPC*   GetContextPPC() const;
183  const MDRawContextAMD64* GetContextAMD64() const;
184  const MDRawContextSPARC* GetContextSPARC() const;
185
186  // Print a human-readable representation of the object to stdout.
187  void Print();
188
189 private:
190  friend class MinidumpThread;
191  friend class MinidumpException;
192
193  explicit MinidumpContext(Minidump* minidump);
194
195  bool Read(u_int32_t expected_size);
196
197  // Free the CPU-specific context structure.
198  void FreeContext();
199
200  // If the minidump contains a SYSTEM_INFO_STREAM, makes sure that the
201  // system info stream gives an appropriate CPU type matching the context
202  // CPU type in context_cpu_type.  Returns false if the CPU type does not
203  // match.  Returns true if the CPU type matches or if the minidump does
204  // not contain a system info stream.
205  bool CheckAgainstSystemInfo(u_int32_t context_cpu_type);
206
207  // Store this separately because of the weirdo AMD64 context
208  u_int32_t context_flags_;
209
210  // The CPU-specific context structure.
211  union {
212    MDRawContextBase*  base;
213    MDRawContextX86*   x86;
214    MDRawContextPPC*   ppc;
215    MDRawContextAMD64* amd64;
216    // on Solaris SPARC, sparc is defined as a numeric constant,
217    // so variables can NOT be named as sparc
218    MDRawContextSPARC*  ctx_sparc;
219  } context_;
220};
221
222
223// MinidumpMemoryRegion does not wrap any MDRaw structure, and only contains
224// a reference to an MDMemoryDescriptor.  This object is intended to wrap
225// portions of a minidump file that contain memory dumps.  In normal
226// minidumps, each MinidumpThread owns a MinidumpMemoryRegion corresponding
227// to the thread's stack memory.  MinidumpMemoryList also gives access to
228// memory regions in its list as MinidumpMemoryRegions.  This class
229// adheres to MemoryRegion so that it may be used as a data provider to
230// the Stackwalker family of classes.
231class MinidumpMemoryRegion : public MinidumpObject,
232                             public MemoryRegion {
233 public:
234  virtual ~MinidumpMemoryRegion();
235
236  static void set_max_bytes(u_int32_t max_bytes) { max_bytes_ = max_bytes; }
237  static u_int32_t max_bytes() { return max_bytes_; }
238
239  // Returns a pointer to the base of the memory region.  Returns the
240  // cached value if available, otherwise, reads the minidump file and
241  // caches the memory region.
242  const u_int8_t* GetMemory();
243
244  // The address of the base of the memory region.
245  u_int64_t GetBase();
246
247  // The size, in bytes, of the memory region.
248  u_int32_t GetSize();
249
250  // Frees the cached memory region, if cached.
251  void FreeMemory();
252
253  // Obtains the value of memory at the pointer specified by address.
254  bool GetMemoryAtAddress(u_int64_t address, u_int8_t*  value);
255  bool GetMemoryAtAddress(u_int64_t address, u_int16_t* value);
256  bool GetMemoryAtAddress(u_int64_t address, u_int32_t* value);
257  bool GetMemoryAtAddress(u_int64_t address, u_int64_t* value);
258
259  // Print a human-readable representation of the object to stdout.
260  void Print();
261
262 private:
263  friend class MinidumpThread;
264  friend class MinidumpMemoryList;
265
266  explicit MinidumpMemoryRegion(Minidump* minidump);
267
268  // Identify the base address and size of the memory region, and the
269  // location it may be found in the minidump file.
270  void SetDescriptor(MDMemoryDescriptor* descriptor);
271
272  // Implementation for GetMemoryAtAddress
273  template<typename T> bool GetMemoryAtAddressInternal(u_int64_t address,
274                                                       T*        value);
275
276  // The largest memory region that will be read from a minidump.  The
277  // default is 1MB.
278  static u_int32_t max_bytes_;
279
280  // Base address and size of the memory region, and its position in the
281  // minidump file.
282  MDMemoryDescriptor* descriptor_;
283
284  // Cached memory.
285  vector<u_int8_t>*   memory_;
286};
287
288
289// MinidumpThread contains information about a thread of execution,
290// including a snapshot of the thread's stack and CPU context.  For
291// the thread that caused an exception, the context carried by
292// MinidumpException is probably desired instead of the CPU context
293// provided here.
294class MinidumpThread : public MinidumpObject {
295 public:
296  virtual ~MinidumpThread();
297
298  const MDRawThread* thread() const { return valid_ ? &thread_ : NULL; }
299  MinidumpMemoryRegion* GetMemory();
300  MinidumpContext* GetContext();
301
302  // The thread ID is used to determine if a thread is the exception thread,
303  // so a special getter is provided to retrieve this data from the
304  // MDRawThread structure.  Returns false if the thread ID cannot be
305  // determined.
306  bool GetThreadID(u_int32_t *thread_id) const;
307
308  // Print a human-readable representation of the object to stdout.
309  void Print();
310
311 private:
312  // These objects are managed by MinidumpThreadList.
313  friend class MinidumpThreadList;
314
315  explicit MinidumpThread(Minidump* minidump);
316
317  // This works like MinidumpStream::Read, but is driven by
318  // MinidumpThreadList.  No size checking is done, because
319  // MinidumpThreadList handles that directly.
320  bool Read();
321
322  MDRawThread           thread_;
323  MinidumpMemoryRegion* memory_;
324  MinidumpContext*      context_;
325};
326
327
328// MinidumpThreadList contains all of the threads (as MinidumpThreads) in
329// a process.
330class MinidumpThreadList : public MinidumpStream {
331 public:
332  virtual ~MinidumpThreadList();
333
334  static void set_max_threads(u_int32_t max_threads) {
335    max_threads_ = max_threads;
336  }
337  static u_int32_t max_threads() { return max_threads_; }
338
339  unsigned int thread_count() const { return valid_ ? thread_count_ : 0; }
340
341  // Sequential access to threads.
342  MinidumpThread* GetThreadAtIndex(unsigned int index) const;
343
344  // Random access to threads.
345  MinidumpThread* GetThreadByID(u_int32_t thread_id);
346
347  // Print a human-readable representation of the object to stdout.
348  void Print();
349
350 private:
351  friend class Minidump;
352
353  typedef map<u_int32_t, MinidumpThread*> IDToThreadMap;
354  typedef vector<MinidumpThread> MinidumpThreads;
355
356  static const u_int32_t kStreamType = MD_THREAD_LIST_STREAM;
357
358  explicit MinidumpThreadList(Minidump* aMinidump);
359
360  bool Read(u_int32_t aExpectedSize);
361
362  // The largest number of threads that will be read from a minidump.  The
363  // default is 256.
364  static u_int32_t max_threads_;
365
366  // Access to threads using the thread ID as the key.
367  IDToThreadMap    id_to_thread_map_;
368
369  // The list of threads.
370  MinidumpThreads* threads_;
371  u_int32_t        thread_count_;
372};
373
374
375// MinidumpModule wraps MDRawModule, which contains information about loaded
376// code modules.  Access is provided to various data referenced indirectly
377// by MDRawModule, such as the module's name and a specification for where
378// to locate debugging information for the module.
379class MinidumpModule : public MinidumpObject,
380                       public CodeModule {
381 public:
382  virtual ~MinidumpModule();
383
384  static void set_max_cv_bytes(u_int32_t max_cv_bytes) {
385    max_cv_bytes_ = max_cv_bytes;
386  }
387  static u_int32_t max_cv_bytes() { return max_cv_bytes_; }
388
389  static void set_max_misc_bytes(u_int32_t max_misc_bytes) {
390    max_misc_bytes_ = max_misc_bytes;
391  }
392  static u_int32_t max_misc_bytes() { return max_misc_bytes_; }
393
394  const MDRawModule* module() const { return valid_ ? &module_ : NULL; }
395
396  // CodeModule implementation
397  virtual u_int64_t base_address() const {
398    return valid_ ? module_.base_of_image : static_cast<u_int64_t>(-1);
399  }
400  virtual u_int64_t size() const { return valid_ ? module_.size_of_image : 0; }
401  virtual string code_file() const;
402  virtual string code_identifier() const;
403  virtual string debug_file() const;
404  virtual string debug_identifier() const;
405  virtual string version() const;
406  virtual const CodeModule* Copy() const;
407
408  // The CodeView record, which contains information to locate the module's
409  // debugging information (pdb).  This is returned as u_int8_t* because
410  // the data can be of types MDCVInfoPDB20* or MDCVInfoPDB70*, or it may be
411  // of a type unknown to Breakpad, in which case the raw data will still be
412  // returned but no byte-swapping will have been performed.  Check the
413  // record's signature in the first four bytes to differentiate between
414  // the various types.  Current toolchains generate modules which carry
415  // MDCVInfoPDB70 by default.  Returns a pointer to the CodeView record on
416  // success, and NULL on failure.  On success, the optional |size| argument
417  // is set to the size of the CodeView record.
418  const u_int8_t* GetCVRecord(u_int32_t* size);
419
420  // The miscellaneous debug record, which is obsolete.  Current toolchains
421  // do not generate this type of debugging information (dbg), and this
422  // field is not expected to be present.  Returns a pointer to the debugging
423  // record on success, and NULL on failure.  On success, the optional |size|
424  // argument is set to the size of the debugging record.
425  const MDImageDebugMisc* GetMiscRecord(u_int32_t* size);
426
427  // Print a human-readable representation of the object to stdout.
428  void Print();
429
430 private:
431  // These objects are managed by MinidumpModuleList.
432  friend class MinidumpModuleList;
433
434  explicit MinidumpModule(Minidump* minidump);
435
436  // This works like MinidumpStream::Read, but is driven by
437  // MinidumpModuleList.  No size checking is done, because
438  // MinidumpModuleList handles that directly.
439  bool Read();
440
441  // Reads indirectly-referenced data, including the module name, CodeView
442  // record, and miscellaneous debugging record.  This is necessary to allow
443  // MinidumpModuleList to fully construct MinidumpModule objects without
444  // requiring seeks to read a contiguous set of MinidumpModule objects.
445  // All auxiliary data should be available when Read is called, in order to
446  // allow the CodeModule getters to be const methods.
447  bool ReadAuxiliaryData();
448
449  // The largest number of bytes that will be read from a minidump for a
450  // CodeView record or miscellaneous debugging record, respectively.  The
451  // default for each is 1024.
452  static u_int32_t max_cv_bytes_;
453  static u_int32_t max_misc_bytes_;
454
455  // True after a successful Read.  This is different from valid_, which is
456  // not set true until ReadAuxiliaryData also completes successfully.
457  // module_valid_ is only used by ReadAuxiliaryData and the functions it
458  // calls to determine whether the object is ready for auxiliary data to
459  // be read.
460  bool              module_valid_;
461
462  // True if debug info was read from the module.  Certain modules
463  // may contain debug records in formats we don't support,
464  // so we can just set this to false to ignore them.
465  bool              has_debug_info_;
466
467  MDRawModule       module_;
468
469  // Cached module name.
470  const string*     name_;
471
472  // Cached CodeView record - this is MDCVInfoPDB20 or (likely)
473  // MDCVInfoPDB70, or possibly something else entirely.  Stored as a u_int8_t
474  // because the structure contains a variable-sized string and its exact
475  // size cannot be known until it is processed.
476  vector<u_int8_t>* cv_record_;
477
478  // If cv_record_ is present, cv_record_signature_ contains a copy of the
479  // CodeView record's first four bytes, for ease of determinining the
480  // type of structure that cv_record_ contains.
481  u_int32_t cv_record_signature_;
482
483  // Cached MDImageDebugMisc (usually not present), stored as u_int8_t
484  // because the structure contains a variable-sized string and its exact
485  // size cannot be known until it is processed.
486  vector<u_int8_t>* misc_record_;
487};
488
489
490// MinidumpModuleList contains all of the loaded code modules for a process
491// in the form of MinidumpModules.  It maintains a map of these modules
492// so that it may easily provide a code module corresponding to a specific
493// address.
494class MinidumpModuleList : public MinidumpStream,
495                           public CodeModules {
496 public:
497  virtual ~MinidumpModuleList();
498
499  static void set_max_modules(u_int32_t max_modules) {
500    max_modules_ = max_modules;
501  }
502  static u_int32_t max_modules() { return max_modules_; }
503
504  // CodeModules implementation.
505  virtual unsigned int module_count() const {
506    return valid_ ? module_count_ : 0;
507  }
508  virtual const MinidumpModule* GetModuleForAddress(u_int64_t address) const;
509  virtual const MinidumpModule* GetMainModule() const;
510  virtual const MinidumpModule* GetModuleAtSequence(
511      unsigned int sequence) const;
512  virtual const MinidumpModule* GetModuleAtIndex(unsigned int index) const;
513  virtual const CodeModules* Copy() const;
514
515  // Print a human-readable representation of the object to stdout.
516  void Print();
517
518 private:
519  friend class Minidump;
520
521  typedef vector<MinidumpModule> MinidumpModules;
522
523  static const u_int32_t kStreamType = MD_MODULE_LIST_STREAM;
524
525  explicit MinidumpModuleList(Minidump* minidump);
526
527  bool Read(u_int32_t expected_size);
528
529  // The largest number of modules that will be read from a minidump.  The
530  // default is 1024.
531  static u_int32_t max_modules_;
532
533  // Access to modules using addresses as the key.
534  RangeMap<u_int64_t, unsigned int> *range_map_;
535
536  MinidumpModules *modules_;
537  u_int32_t module_count_;
538};
539
540
541// MinidumpMemoryList corresponds to a minidump's MEMORY_LIST_STREAM stream,
542// which references the snapshots of all of the memory regions contained
543// within the minidump.  For a normal minidump, this includes stack memory
544// (also referenced by each MinidumpThread, in fact, the MDMemoryDescriptors
545// here and in MDRawThread both point to exactly the same data in a
546// minidump file, conserving space), as well as a 256-byte snapshot of memory
547// surrounding the instruction pointer in the case of an exception.  Other
548// types of minidumps may contain significantly more memory regions.  Full-
549// memory minidumps contain all of a process' mapped memory.
550class MinidumpMemoryList : public MinidumpStream {
551 public:
552  virtual ~MinidumpMemoryList();
553
554  static void set_max_regions(u_int32_t max_regions) {
555    max_regions_ = max_regions;
556  }
557  static u_int32_t max_regions() { return max_regions_; }
558
559  unsigned int region_count() const { return valid_ ? region_count_ : 0; }
560
561  // Sequential access to memory regions.
562  MinidumpMemoryRegion* GetMemoryRegionAtIndex(unsigned int index);
563
564  // Random access to memory regions.  Returns the region encompassing
565  // the address identified by address.
566  MinidumpMemoryRegion* GetMemoryRegionForAddress(u_int64_t address);
567
568  // Print a human-readable representation of the object to stdout.
569  void Print();
570
571 private:
572  friend class Minidump;
573
574  typedef vector<MDMemoryDescriptor>   MemoryDescriptors;
575  typedef vector<MinidumpMemoryRegion> MemoryRegions;
576
577  static const u_int32_t kStreamType = MD_MEMORY_LIST_STREAM;
578
579  explicit MinidumpMemoryList(Minidump* minidump);
580
581  bool Read(u_int32_t expected_size);
582
583  // The largest number of memory regions that will be read from a minidump.
584  // The default is 256.
585  static u_int32_t max_regions_;
586
587  // Access to memory regions using addresses as the key.
588  RangeMap<u_int64_t, unsigned int> *range_map_;
589
590  // The list of descriptors.  This is maintained separately from the list
591  // of regions, because MemoryRegion doesn't own its MemoryDescriptor, it
592  // maintains a pointer to it.  descriptors_ provides the storage for this
593  // purpose.
594  MemoryDescriptors *descriptors_;
595
596  // The list of regions.
597  MemoryRegions *regions_;
598  u_int32_t region_count_;
599};
600
601
602// MinidumpException wraps MDRawExceptionStream, which contains information
603// about the exception that caused the minidump to be generated, if the
604// minidump was generated in an exception handler called as a result of
605// an exception.  It also provides access to a MinidumpContext object,
606// which contains the CPU context for the exception thread at the time
607// the exception occurred.
608class MinidumpException : public MinidumpStream {
609 public:
610  virtual ~MinidumpException();
611
612  const MDRawExceptionStream* exception() const {
613    return valid_ ? &exception_ : NULL;
614  }
615
616  // The thread ID is used to determine if a thread is the exception thread,
617  // so a special getter is provided to retrieve this data from the
618  // MDRawExceptionStream structure.  Returns false if the thread ID cannot
619  // be determined.
620  bool GetThreadID(u_int32_t *thread_id) const;
621
622  MinidumpContext* GetContext();
623
624  // Print a human-readable representation of the object to stdout.
625  void Print();
626
627 private:
628  friend class Minidump;
629
630  static const u_int32_t kStreamType = MD_EXCEPTION_STREAM;
631
632  explicit MinidumpException(Minidump* minidump);
633
634  bool Read(u_int32_t expected_size);
635
636  MDRawExceptionStream exception_;
637  MinidumpContext*     context_;
638};
639
640
641// MinidumpSystemInfo wraps MDRawSystemInfo and provides information about
642// the system on which the minidump was generated.  See also MinidumpMiscInfo.
643class MinidumpSystemInfo : public MinidumpStream {
644 public:
645  virtual ~MinidumpSystemInfo();
646
647  const MDRawSystemInfo* system_info() const {
648    return valid_ ? &system_info_ : NULL;
649  }
650
651  // GetOS and GetCPU return textual representations of the operating system
652  // and CPU that produced the minidump.  Unlike most other Minidump* methods,
653  // they return string objects, not weak pointers.  Defined values for
654  // GetOS() are "mac", "windows", and "linux".  Defined values for GetCPU
655  // are "x86" and "ppc".  These methods return an empty string when their
656  // values are unknown.
657  string GetOS();
658  string GetCPU();
659
660  // I don't know what CSD stands for, but this field is documented as
661  // returning a textual representation of the OS service pack.  On other
662  // platforms, this provides additional information about an OS version
663  // level beyond major.minor.micro.  Returns NULL if unknown.
664  const string* GetCSDVersion();
665
666  // If a CPU vendor string can be determined, returns a pointer to it,
667  // otherwise, returns NULL.  CPU vendor strings can be determined from
668  // x86 CPUs with CPUID 0.
669  const string* GetCPUVendor();
670
671  // Print a human-readable representation of the object to stdout.
672  void Print();
673
674 private:
675  friend class Minidump;
676
677  static const u_int32_t kStreamType = MD_SYSTEM_INFO_STREAM;
678
679  explicit MinidumpSystemInfo(Minidump* minidump);
680
681  bool Read(u_int32_t expected_size);
682
683  MDRawSystemInfo system_info_;
684
685  // Textual representation of the OS service pack, for minidumps produced
686  // by MiniDumpWriteDump on Windows.
687  const string* csd_version_;
688
689  // A string identifying the CPU vendor, if known.
690  const string* cpu_vendor_;
691};
692
693
694// MinidumpMiscInfo wraps MDRawMiscInfo and provides information about
695// the process that generated the minidump, and optionally additional system
696// information.  See also MinidumpSystemInfo.
697class MinidumpMiscInfo : public MinidumpStream {
698 public:
699  const MDRawMiscInfo* misc_info() const {
700    return valid_ ? &misc_info_ : NULL;
701  }
702
703  // Print a human-readable representation of the object to stdout.
704  void Print();
705
706 private:
707  friend class Minidump;
708
709  static const u_int32_t kStreamType = MD_MISC_INFO_STREAM;
710
711  explicit MinidumpMiscInfo(Minidump* minidump_);
712
713  bool Read(u_int32_t expected_size_);
714
715  MDRawMiscInfo misc_info_;
716};
717
718
719// MinidumpBreakpadInfo wraps MDRawBreakpadInfo, which is an optional stream in
720// a minidump that provides additional information about the process state
721// at the time the minidump was generated.
722class MinidumpBreakpadInfo : public MinidumpStream {
723 public:
724  const MDRawBreakpadInfo* breakpad_info() const {
725    return valid_ ? &breakpad_info_ : NULL;
726  }
727
728  // These thread IDs are used to determine if threads deserve special
729  // treatment, so special getters are provided to retrieve this data from
730  // the MDRawBreakpadInfo structure.  The getters return false if the thread
731  // IDs cannot be determined.
732  bool GetDumpThreadID(u_int32_t *thread_id) const;
733  bool GetRequestingThreadID(u_int32_t *thread_id) const;
734
735  // Print a human-readable representation of the object to stdout.
736  void Print();
737
738 private:
739  friend class Minidump;
740
741  static const u_int32_t kStreamType = MD_BREAKPAD_INFO_STREAM;
742
743  explicit MinidumpBreakpadInfo(Minidump* minidump_);
744
745  bool Read(u_int32_t expected_size_);
746
747  MDRawBreakpadInfo breakpad_info_;
748};
749
750
751// Minidump is the user's interface to a minidump file.  It wraps MDRawHeader
752// and provides access to the minidump's top-level stream directory.
753class Minidump {
754 public:
755  // path is the pathname of a file containing the minidump.
756  explicit Minidump(const string& path);
757
758  ~Minidump();
759
760  static void set_max_streams(u_int32_t max_streams) {
761    max_streams_ = max_streams;
762  }
763  static u_int32_t max_streams() { return max_streams_; }
764
765  static void set_max_string_length(u_int32_t max_string_length) {
766    max_string_length_ = max_string_length;
767  }
768  static u_int32_t max_string_length() { return max_string_length_; }
769
770  const MDRawHeader* header() const { return valid_ ? &header_ : NULL; }
771
772  // Reads the minidump file's header and top-level stream directory.
773  // The minidump is expected to be positioned at the beginning of the
774  // header.  Read() sets up the stream list and map, and validates the
775  // Minidump object.
776  bool Read();
777
778  // The next set of methods are stubs that call GetStream.  They exist to
779  // force code generation of the templatized API within the module, and
780  // to avoid exposing an ugly API (GetStream needs to accept a garbage
781  // parameter).
782  MinidumpThreadList* GetThreadList();
783  MinidumpModuleList* GetModuleList();
784  MinidumpMemoryList* GetMemoryList();
785  MinidumpException* GetException();
786  MinidumpSystemInfo* GetSystemInfo();
787  MinidumpMiscInfo* GetMiscInfo();
788  MinidumpBreakpadInfo* GetBreakpadInfo();
789
790  // The next set of methods are provided for users who wish to access
791  // data in minidump files directly, while leveraging the rest of
792  // this class and related classes to handle the basic minidump
793  // structure and known stream types.
794
795  unsigned int GetDirectoryEntryCount() const {
796    return valid_ ? header_.stream_count : 0;
797  }
798  const MDRawDirectory* GetDirectoryEntryAtIndex(unsigned int index) const;
799
800  // The next 2 methods are lower-level I/O routines.  They use fd_.
801
802  // Reads count bytes from the minidump at the current position into
803  // the storage area pointed to by bytes.  bytes must be of sufficient
804  // size.  After the read, the file position is advanced by count.
805  bool ReadBytes(void* bytes, size_t count);
806
807  // Sets the position of the minidump file to offset.
808  bool SeekSet(off_t offset);
809
810  // Returns the current position of the minidump file.
811  off_t Tell() { return valid_ ? lseek(fd_, 0, SEEK_CUR) : (off_t)-1; }
812
813  // The next 2 methods are medium-level I/O routines.
814
815  // ReadString returns a string which is owned by the caller!  offset
816  // specifies the offset that a length-encoded string is stored at in the
817  // minidump file.
818  string* ReadString(off_t offset);
819
820  // SeekToStreamType positions the file at the beginning of a stream
821  // identified by stream_type, and informs the caller of the stream's
822  // length by setting *stream_length.  Because stream_map maps each stream
823  // type to only one stream in the file, this might mislead the user into
824  // thinking that the stream that this seeks to is the only stream with
825  // type stream_type.  That can't happen for streams that these classes
826  // deal with directly, because they're only supposed to be present in the
827  // file singly, and that's verified when stream_map_ is built.  Users who
828  // are looking for other stream types should be aware of this
829  // possibility, and consider using GetDirectoryEntryAtIndex (possibly
830  // with GetDirectoryEntryCount) if expecting multiple streams of the same
831  // type in a single minidump file.
832  bool SeekToStreamType(u_int32_t stream_type, u_int32_t* stream_length);
833
834  bool swap() const { return valid_ ? swap_ : false; }
835
836  // Print a human-readable representation of the object to stdout.
837  void Print();
838
839 private:
840  // MinidumpStreamInfo is used in the MinidumpStreamMap.  It lets
841  // the Minidump object locate interesting streams quickly, and
842  // provides a convenient place to stash MinidumpStream objects.
843  struct MinidumpStreamInfo {
844    MinidumpStreamInfo() : stream_index(0), stream(NULL) {}
845    ~MinidumpStreamInfo() { delete stream; }
846
847    // Index into the MinidumpDirectoryEntries vector
848    unsigned int    stream_index;
849
850    // Pointer to the stream if cached, or NULL if not yet populated
851    MinidumpStream* stream;
852  };
853
854  typedef vector<MDRawDirectory> MinidumpDirectoryEntries;
855  typedef map<u_int32_t, MinidumpStreamInfo> MinidumpStreamMap;
856
857  template<typename T> T* GetStream(T** stream);
858
859  // Opens the minidump file, or if already open, seeks to the beginning.
860  bool Open();
861
862  // The largest number of top-level streams that will be read from a minidump.
863  // Note that streams are only read (and only consume memory) as needed,
864  // when directed by the caller.  The default is 128.
865  static u_int32_t max_streams_;
866
867  // The maximum length of a UTF-16 string that will be read from a minidump
868  // in 16-bit words.  The default is 1024.  UTF-16 strings are converted
869  // to UTF-8 when stored in memory, and each UTF-16 word will be represented
870  // by as many as 3 bytes in UTF-8.
871  static unsigned int max_string_length_;
872
873  MDRawHeader               header_;
874
875  // The list of streams.
876  MinidumpDirectoryEntries* directory_;
877
878  // Access to streams using the stream type as the key.
879  MinidumpStreamMap*        stream_map_;
880
881  // The pathname of the minidump file to process, set in the constructor.
882  const string              path_;
883
884  // The file descriptor for all file I/O.  Used by ReadBytes and SeekSet.
885  // Set based on the |path_| member by Open, which is called by Read.
886  int                       fd_;
887
888  // swap_ is true if the minidump file should be byte-swapped.  If the
889  // minidump was produced by a CPU that is other-endian than the CPU
890  // processing the minidump, this will be true.  If the two CPUs are
891  // same-endian, this will be false.
892  bool                      swap_;
893
894  // Validity of the Minidump structure, false immediately after
895  // construction or after a failed Read(); true following a successful
896  // Read().
897  bool                      valid_;
898};
899
900
901}  // namespace google_breakpad
902
903
904#endif  // GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
905