1// Copyright (c) 2010 Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
31//
32// See basic_source_line_resolver.h and basic_source_line_resolver_types.h
33// for documentation.
34
35#include <assert.h>
36#include <stdio.h>
37#include <string.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40
41#include <limits>
42#include <map>
43#include <utility>
44#include <vector>
45
46#include "google_breakpad/processor/basic_source_line_resolver.h"
47#include "processor/basic_source_line_resolver_types.h"
48#include "processor/module_factory.h"
49
50#include "processor/tokenize.h"
51
52using std::map;
53using std::vector;
54using std::make_pair;
55
56namespace google_breakpad {
57
58#ifdef _WIN32
59#define strtok_r strtok_s
60#define strtoull _strtoui64
61#endif
62
63static const char *kWhitespace = " \r\n";
64static const int kMaxErrorsPrinted = 5;
65static const int kMaxErrorsBeforeBailing = 100;
66
67BasicSourceLineResolver::BasicSourceLineResolver() :
68    SourceLineResolverBase(new BasicModuleFactory) { }
69
70// static
71void BasicSourceLineResolver::Module::LogParseError(
72   const string &message,
73   int line_number,
74   int *num_errors) {
75  if (++(*num_errors) <= kMaxErrorsPrinted) {
76    if (line_number > 0) {
77      BPLOG(ERROR) << "Line " << line_number << ": " << message;
78    } else {
79      BPLOG(ERROR) << message;
80    }
81  }
82}
83
84bool BasicSourceLineResolver::Module::LoadMapFromMemory(
85    char *memory_buffer,
86    size_t memory_buffer_size) {
87  linked_ptr<Function> cur_func;
88  int line_number = 0;
89  int num_errors = 0;
90  char *save_ptr;
91
92  // If the length is 0, we can still pretend we have a symbol file. This is
93  // for scenarios that want to test symbol lookup, but don't necessarily care
94  // if certain modules do not have any information, like system libraries.
95  if (memory_buffer_size == 0) {
96    return true;
97  }
98
99  // Make sure the last character is null terminator.
100  size_t last_null_terminator = memory_buffer_size - 1;
101  if (memory_buffer[last_null_terminator] != '\0') {
102    memory_buffer[last_null_terminator] = '\0';
103  }
104
105  // Skip any null terminators at the end of the memory buffer, and make sure
106  // there are no other null terminators in the middle of the memory buffer.
107  bool has_null_terminator_in_the_middle = false;
108  while (last_null_terminator > 0 &&
109         memory_buffer[last_null_terminator - 1] == '\0') {
110    last_null_terminator--;
111  }
112  for (size_t i = 0; i < last_null_terminator; i++) {
113    if (memory_buffer[i] == '\0') {
114      memory_buffer[i] = '_';
115      has_null_terminator_in_the_middle = true;
116    }
117  }
118  if (has_null_terminator_in_the_middle) {
119    LogParseError(
120       "Null terminator is not expected in the middle of the symbol data",
121       line_number,
122       &num_errors);
123  }
124
125  char *buffer;
126  buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
127
128  while (buffer != NULL) {
129    ++line_number;
130
131    if (strncmp(buffer, "FILE ", 5) == 0) {
132      if (!ParseFile(buffer)) {
133        LogParseError("ParseFile on buffer failed", line_number, &num_errors);
134      }
135    } else if (strncmp(buffer, "STACK ", 6) == 0) {
136      if (!ParseStackInfo(buffer)) {
137        LogParseError("ParseStackInfo failed", line_number, &num_errors);
138      }
139    } else if (strncmp(buffer, "FUNC ", 5) == 0) {
140      cur_func.reset(ParseFunction(buffer));
141      if (!cur_func.get()) {
142        LogParseError("ParseFunction failed", line_number, &num_errors);
143      } else {
144        // StoreRange will fail if the function has an invalid address or size.
145        // We'll silently ignore this, the function and any corresponding lines
146        // will be destroyed when cur_func is released.
147        functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
148      }
149    } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
150      // Clear cur_func: public symbols don't contain line number information.
151      cur_func.reset();
152
153      if (!ParsePublicSymbol(buffer)) {
154        LogParseError("ParsePublicSymbol failed", line_number, &num_errors);
155      }
156    } else if (strncmp(buffer, "MODULE ", 7) == 0) {
157      // Ignore these.  They're not of any use to BasicSourceLineResolver,
158      // which is fed modules by a SymbolSupplier.  These lines are present to
159      // aid other tools in properly placing symbol files so that they can
160      // be accessed by a SymbolSupplier.
161      //
162      // MODULE <guid> <age> <filename>
163    } else if (strncmp(buffer, "INFO ", 5) == 0) {
164      // Ignore these as well, they're similarly just for housekeeping.
165      //
166      // INFO CODE_ID <code id> <filename>
167    } else {
168      if (!cur_func.get()) {
169        LogParseError("Found source line data without a function",
170                       line_number, &num_errors);
171      } else {
172        Line *line = ParseLine(buffer);
173        if (!line) {
174          LogParseError("ParseLine failed", line_number, &num_errors);
175        } else {
176          cur_func->lines.StoreRange(line->address, line->size,
177                                     linked_ptr<Line>(line));
178        }
179      }
180    }
181    if (num_errors > kMaxErrorsBeforeBailing) {
182      break;
183    }
184    buffer = strtok_r(NULL, "\r\n", &save_ptr);
185  }
186  is_corrupt_ = num_errors > 0;
187  return true;
188}
189
190void BasicSourceLineResolver::Module::LookupAddress(StackFrame *frame) const {
191  MemAddr address = frame->instruction - frame->module->base_address();
192
193  // First, look for a FUNC record that covers address. Use
194  // RetrieveNearestRange instead of RetrieveRange so that, if there
195  // is no such function, we can use the next function to bound the
196  // extent of the PUBLIC symbol we find, below. This does mean we
197  // need to check that address indeed falls within the function we
198  // find; do the range comparison in an overflow-friendly way.
199  linked_ptr<Function> func;
200  linked_ptr<PublicSymbol> public_symbol;
201  MemAddr function_base;
202  MemAddr function_size;
203  MemAddr public_address;
204  if (functions_.RetrieveNearestRange(address, &func,
205                                      &function_base, &function_size) &&
206      address >= function_base && address - function_base < function_size) {
207    frame->function_name = func->name;
208    frame->function_base = frame->module->base_address() + function_base;
209
210    linked_ptr<Line> line;
211    MemAddr line_base;
212    if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) {
213      FileMap::const_iterator it = files_.find(line->source_file_id);
214      if (it != files_.end()) {
215        frame->source_file_name = files_.find(line->source_file_id)->second;
216      }
217      frame->source_line = line->line;
218      frame->source_line_base = frame->module->base_address() + line_base;
219    }
220  } else if (public_symbols_.Retrieve(address,
221                                      &public_symbol, &public_address) &&
222             (!func.get() || public_address > function_base)) {
223    frame->function_name = public_symbol->name;
224    frame->function_base = frame->module->base_address() + public_address;
225  }
226}
227
228WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo(
229    const StackFrame *frame) const {
230  MemAddr address = frame->instruction - frame->module->base_address();
231  scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
232
233  // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
234  // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
235  // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
236  // includes its own program string.
237  // WindowsFrameInfo::STACK_INFO_FPO is the older type
238  // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
239  linked_ptr<WindowsFrameInfo> frame_info;
240  if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
241       .RetrieveRange(address, &frame_info))
242      || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
243          .RetrieveRange(address, &frame_info))) {
244    result->CopyFrom(*frame_info.get());
245    return result.release();
246  }
247
248  // Even without a relevant STACK line, many functions contain
249  // information about how much space their parameters consume on the
250  // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
251  // we can use the function to bound the extent of the PUBLIC symbol,
252  // below. However, this does mean we need to check that ADDRESS
253  // falls within the retrieved function's range; do the range
254  // comparison in an overflow-friendly way.
255  linked_ptr<Function> function;
256  MemAddr function_base, function_size;
257  if (functions_.RetrieveNearestRange(address, &function,
258                                      &function_base, &function_size) &&
259      address >= function_base && address - function_base < function_size) {
260    result->parameter_size = function->parameter_size;
261    result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
262    return result.release();
263  }
264
265  // PUBLIC symbols might have a parameter size. Use the function we
266  // found above to limit the range the public symbol covers.
267  linked_ptr<PublicSymbol> public_symbol;
268  MemAddr public_address;
269  if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
270      (!function.get() || public_address > function_base)) {
271    result->parameter_size = public_symbol->parameter_size;
272  }
273
274  return NULL;
275}
276
277CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo(
278    const StackFrame *frame) const {
279  MemAddr address = frame->instruction - frame->module->base_address();
280  MemAddr initial_base, initial_size;
281  string initial_rules;
282
283  // Find the initial rule whose range covers this address. That
284  // provides an initial set of register recovery rules. Then, walk
285  // forward from the initial rule's starting address to frame's
286  // instruction address, applying delta rules.
287  if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules,
288                                        &initial_base, &initial_size)) {
289    return NULL;
290  }
291
292  // Create a frame info structure, and populate it with the rules from
293  // the STACK CFI INIT record.
294  scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
295  if (!ParseCFIRuleSet(initial_rules, rules.get()))
296    return NULL;
297
298  // Find the first delta rule that falls within the initial rule's range.
299  map<MemAddr, string>::const_iterator delta =
300    cfi_delta_rules_.lower_bound(initial_base);
301
302  // Apply delta rules up to and including the frame's address.
303  while (delta != cfi_delta_rules_.end() && delta->first <= address) {
304    ParseCFIRuleSet(delta->second, rules.get());
305    delta++;
306  }
307
308  return rules.release();
309}
310
311bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
312  long index;
313  char *filename;
314  if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
315    files_.insert(make_pair(index, string(filename)));
316    return true;
317  }
318  return false;
319}
320
321BasicSourceLineResolver::Function*
322BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
323  uint64_t address;
324  uint64_t size;
325  long stack_param_size;
326  char *name;
327  if (SymbolParseHelper::ParseFunction(function_line, &address, &size,
328                                       &stack_param_size, &name)) {
329    return new Function(name, address, size, stack_param_size);
330  }
331  return NULL;
332}
333
334BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
335    char *line_line) {
336  uint64_t address;
337  uint64_t size;
338  long line_number;
339  long source_file;
340
341  if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
342                                   &source_file)) {
343    return new Line(address, size, source_file, line_number);
344  }
345  return NULL;
346}
347
348bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
349  uint64_t address;
350  long stack_param_size;
351  char *name;
352
353  if (SymbolParseHelper::ParsePublicSymbol(public_line, &address,
354                                           &stack_param_size, &name)) {
355    // A few public symbols show up with an address of 0.  This has been seen
356    // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
357    // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1.  They would conflict
358    // with one another if they were allowed into the public_symbols_ map,
359    // but since the address is obviously invalid, gracefully accept them
360    // as input without putting them into the map.
361    if (address == 0) {
362      return true;
363    }
364
365    linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
366                                                     stack_param_size));
367    return public_symbols_.Store(address, symbol);
368  }
369  return false;
370}
371
372bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) {
373  // Skip "STACK " prefix.
374  stack_info_line += 6;
375
376  // Find the token indicating what sort of stack frame walking
377  // information this is.
378  while (*stack_info_line == ' ')
379    stack_info_line++;
380  const char *platform = stack_info_line;
381  while (!strchr(kWhitespace, *stack_info_line))
382    stack_info_line++;
383  *stack_info_line++ = '\0';
384
385  // MSVC stack frame info.
386  if (strcmp(platform, "WIN") == 0) {
387    int type = 0;
388    uint64_t rva, code_size;
389    linked_ptr<WindowsFrameInfo>
390      stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
391                                                         type,
392                                                         rva,
393                                                         code_size));
394    if (stack_frame_info == NULL)
395      return false;
396
397    // TODO(mmentovai): I wanted to use StoreRange's return value as this
398    // method's return value, but MSVC infrequently outputs stack info that
399    // violates the containment rules.  This happens with a section of code
400    // in strncpy_s in test_app.cc (testdata/minidump2).  There, problem looks
401    // like this:
402    //   STACK WIN 4 4242 1a a 0 ...  (STACK WIN 4 base size prolog 0 ...)
403    //   STACK WIN 4 4243 2e 9 0 ...
404    // ContainedRangeMap treats these two blocks as conflicting.  In reality,
405    // when the prolog lengths are taken into account, the actual code of
406    // these blocks doesn't conflict.  However, we can't take the prolog lengths
407    // into account directly here because we'd wind up with a different set
408    // of range conflicts when MSVC outputs stack info like this:
409    //   STACK WIN 4 1040 73 33 0 ...
410    //   STACK WIN 4 105a 59 19 0 ...
411    // because in both of these entries, the beginning of the code after the
412    // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
413    // Perhaps we could get away with storing ranges by rva + prolog_size
414    // if ContainedRangeMap were modified to allow replacement of
415    // already-stored values.
416
417    windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
418    return true;
419  } else if (strcmp(platform, "CFI") == 0) {
420    // DWARF CFI stack frame info
421    return ParseCFIFrameInfo(stack_info_line);
422  } else {
423    // Something unrecognized.
424    return false;
425  }
426}
427
428bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
429    char *stack_info_line) {
430  char *cursor;
431
432  // Is this an INIT record or a delta record?
433  char *init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
434  if (!init_or_address)
435    return false;
436
437  if (strcmp(init_or_address, "INIT") == 0) {
438    // This record has the form "STACK INIT <address> <size> <rules...>".
439    char *address_field = strtok_r(NULL, " \r\n", &cursor);
440    if (!address_field) return false;
441
442    char *size_field = strtok_r(NULL, " \r\n", &cursor);
443    if (!size_field) return false;
444
445    char *initial_rules = strtok_r(NULL, "\r\n", &cursor);
446    if (!initial_rules) return false;
447
448    MemAddr address = strtoul(address_field, NULL, 16);
449    MemAddr size    = strtoul(size_field,    NULL, 16);
450    cfi_initial_rules_.StoreRange(address, size, initial_rules);
451    return true;
452  }
453
454  // This record has the form "STACK <address> <rules...>".
455  char *address_field = init_or_address;
456  char *delta_rules = strtok_r(NULL, "\r\n", &cursor);
457  if (!delta_rules) return false;
458  MemAddr address = strtoul(address_field, NULL, 16);
459  cfi_delta_rules_[address] = delta_rules;
460  return true;
461}
462
463// static
464bool SymbolParseHelper::ParseFile(char *file_line, long *index,
465                                  char **filename) {
466  // FILE <id> <filename>
467  assert(strncmp(file_line, "FILE ", 5) == 0);
468  file_line += 5;  // skip prefix
469
470  vector<char*> tokens;
471  if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
472    return false;
473  }
474
475  char *after_number;
476  *index = strtol(tokens[0], &after_number, 10);
477  if (!IsValidAfterNumber(after_number) || *index < 0 ||
478      *index == std::numeric_limits<long>::max()) {
479    return false;
480  }
481
482  *filename = tokens[1];
483  if (!filename) {
484    return false;
485  }
486
487  return true;
488}
489
490// static
491bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address,
492                                      uint64_t *size, long *stack_param_size,
493                                      char **name) {
494  // FUNC <address> <size> <stack_param_size> <name>
495  assert(strncmp(function_line, "FUNC ", 5) == 0);
496  function_line += 5;  // skip prefix
497
498  vector<char*> tokens;
499  if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
500    return false;
501  }
502
503  char *after_number;
504  *address = strtoull(tokens[0], &after_number, 16);
505  if (!IsValidAfterNumber(after_number) ||
506      *address == std::numeric_limits<unsigned long long>::max()) {
507    return false;
508  }
509  *size = strtoull(tokens[1], &after_number, 16);
510  if (!IsValidAfterNumber(after_number) ||
511      *size == std::numeric_limits<unsigned long long>::max()) {
512    return false;
513  }
514  *stack_param_size = strtol(tokens[2], &after_number, 16);
515  if (!IsValidAfterNumber(after_number) ||
516      *stack_param_size == std::numeric_limits<long>::max() ||
517      *stack_param_size < 0) {
518    return false;
519  }
520  *name = tokens[3];
521
522  return true;
523}
524
525// static
526bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address,
527                                  uint64_t *size, long *line_number,
528                                  long *source_file) {
529  // <address> <size> <line number> <source file id>
530  vector<char*> tokens;
531  if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
532    return false;
533  }
534
535  char *after_number;
536  *address  = strtoull(tokens[0], &after_number, 16);
537  if (!IsValidAfterNumber(after_number) ||
538      *address == std::numeric_limits<unsigned long long>::max()) {
539    return false;
540  }
541  *size = strtoull(tokens[1], &after_number, 16);
542  if (!IsValidAfterNumber(after_number) ||
543      *size == std::numeric_limits<unsigned long long>::max()) {
544    return false;
545  }
546  *line_number = strtol(tokens[2], &after_number, 10);
547  if (!IsValidAfterNumber(after_number) ||
548      *line_number == std::numeric_limits<long>::max()) {
549    return false;
550  }
551  *source_file = strtol(tokens[3], &after_number, 10);
552  if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
553      *source_file == std::numeric_limits<long>::max()) {
554    return false;
555  }
556
557  // Valid line numbers normally start from 1, however there are functions that
558  // are associated with a source file but not associated with any line number
559  // (block helper function) and for such functions the symbol file contains 0
560  // for the line numbers.  Hence, 0 should be treated as a valid line number.
561  // For more information on block helper functions, please, take a look at:
562  // http://clang.llvm.org/docs/Block-ABI-Apple.html
563  if (*line_number < 0) {
564    return false;
565  }
566
567  return true;
568}
569
570// static
571bool SymbolParseHelper::ParsePublicSymbol(char *public_line,
572                                          uint64_t *address,
573                                          long *stack_param_size,
574                                          char **name) {
575  // PUBLIC <address> <stack_param_size> <name>
576  assert(strncmp(public_line, "PUBLIC ", 7) == 0);
577  public_line += 7;  // skip prefix
578
579  vector<char*> tokens;
580  if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
581    return false;
582  }
583
584  char *after_number;
585  *address = strtoull(tokens[0], &after_number, 16);
586  if (!IsValidAfterNumber(after_number) ||
587      *address == std::numeric_limits<unsigned long long>::max()) {
588    return false;
589  }
590  *stack_param_size = strtol(tokens[1], &after_number, 16);
591  if (!IsValidAfterNumber(after_number) ||
592      *stack_param_size == std::numeric_limits<long>::max() ||
593      *stack_param_size < 0) {
594    return false;
595  }
596  *name = tokens[2];
597
598  return true;
599}
600
601// static
602bool SymbolParseHelper::IsValidAfterNumber(char *after_number) {
603  if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
604    return true;
605  }
606  return false;
607}
608
609}  // namespace google_breakpad
610