1// Copyright (c) 2010 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7//     * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9//     * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13//     * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33
34#include "common/dwarf/dwarf2reader.h"
35
36#include <assert.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <string.h>
40
41#include <map>
42#include <memory>
43#include <stack>
44#include <string>
45#include <utility>
46
47#include "common/dwarf/bytereader-inl.h"
48#include "common/dwarf/bytereader.h"
49#include "common/dwarf/line_state_machine.h"
50#include "common/using_std_string.h"
51
52namespace dwarf2reader {
53
54CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset,
55                                 ByteReader* reader, Dwarf2Handler* handler)
56    : offset_from_section_start_(offset), reader_(reader),
57      sections_(sections), handler_(handler), abbrevs_(NULL),
58      string_buffer_(NULL), string_buffer_length_(0) {}
59
60// Read a DWARF2/3 abbreviation section.
61// Each abbrev consists of a abbreviation number, a tag, a byte
62// specifying whether the tag has children, and a list of
63// attribute/form pairs.
64// The list of forms is terminated by a 0 for the attribute, and a
65// zero for the form.  The entire abbreviation section is terminated
66// by a zero for the code.
67
68void CompilationUnit::ReadAbbrevs() {
69  if (abbrevs_)
70    return;
71
72  // First get the debug_abbrev section.  ".debug_abbrev" is the name
73  // recommended in the DWARF spec, and used on Linux;
74  // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
75  SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
76  if (iter == sections_.end())
77    iter = sections_.find("__debug_abbrev");
78  assert(iter != sections_.end());
79
80  abbrevs_ = new std::vector<Abbrev>;
81  abbrevs_->resize(1);
82
83  // The only way to check whether we are reading over the end of the
84  // buffer would be to first compute the size of the leb128 data by
85  // reading it, then go back and read it again.
86  const char* abbrev_start = iter->second.first +
87                                      header_.abbrev_offset;
88  const char* abbrevptr = abbrev_start;
89#ifndef NDEBUG
90  const uint64 abbrev_length = iter->second.second - header_.abbrev_offset;
91#endif
92
93  while (1) {
94    CompilationUnit::Abbrev abbrev;
95    size_t len;
96    const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
97
98    if (number == 0)
99      break;
100    abbrev.number = number;
101    abbrevptr += len;
102
103    assert(abbrevptr < abbrev_start + abbrev_length);
104    const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
105    abbrevptr += len;
106    abbrev.tag = static_cast<enum DwarfTag>(tag);
107
108    assert(abbrevptr < abbrev_start + abbrev_length);
109    abbrev.has_children = reader_->ReadOneByte(abbrevptr);
110    abbrevptr += 1;
111
112    assert(abbrevptr < abbrev_start + abbrev_length);
113
114    while (1) {
115      const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
116      abbrevptr += len;
117
118      assert(abbrevptr < abbrev_start + abbrev_length);
119      const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
120      abbrevptr += len;
121      if (nametemp == 0 && formtemp == 0)
122        break;
123
124      const enum DwarfAttribute name =
125        static_cast<enum DwarfAttribute>(nametemp);
126      const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
127      abbrev.attributes.push_back(std::make_pair(name, form));
128    }
129    assert(abbrev.number == abbrevs_->size());
130    abbrevs_->push_back(abbrev);
131  }
132}
133
134// Skips a single DIE's attributes.
135const char* CompilationUnit::SkipDIE(const char* start,
136                                              const Abbrev& abbrev) {
137  for (AttributeList::const_iterator i = abbrev.attributes.begin();
138       i != abbrev.attributes.end();
139       i++)  {
140    start = SkipAttribute(start, i->second);
141  }
142  return start;
143}
144
145// Skips a single attribute form's data.
146const char* CompilationUnit::SkipAttribute(const char* start,
147                                                    enum DwarfForm form) {
148  size_t len;
149
150  switch (form) {
151    case DW_FORM_indirect:
152      form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
153                                                                     &len));
154      start += len;
155      return SkipAttribute(start, form);
156
157    case DW_FORM_flag_present:
158      return start;
159    case DW_FORM_data1:
160    case DW_FORM_flag:
161    case DW_FORM_ref1:
162      return start + 1;
163    case DW_FORM_ref2:
164    case DW_FORM_data2:
165      return start + 2;
166    case DW_FORM_ref4:
167    case DW_FORM_data4:
168      return start + 4;
169    case DW_FORM_ref8:
170    case DW_FORM_data8:
171    case DW_FORM_ref_sig8:
172      return start + 8;
173    case DW_FORM_string:
174      return start + strlen(start) + 1;
175    case DW_FORM_udata:
176    case DW_FORM_ref_udata:
177      reader_->ReadUnsignedLEB128(start, &len);
178      return start + len;
179
180    case DW_FORM_sdata:
181      reader_->ReadSignedLEB128(start, &len);
182      return start + len;
183    case DW_FORM_addr:
184      return start + reader_->AddressSize();
185    case DW_FORM_ref_addr:
186      // DWARF2 and 3/4 differ on whether ref_addr is address size or
187      // offset size.
188      assert(header_.version >= 2);
189      if (header_.version == 2) {
190        return start + reader_->AddressSize();
191      } else if (header_.version >= 3) {
192        return start + reader_->OffsetSize();
193      }
194      break;
195
196    case DW_FORM_block1:
197      return start + 1 + reader_->ReadOneByte(start);
198    case DW_FORM_block2:
199      return start + 2 + reader_->ReadTwoBytes(start);
200    case DW_FORM_block4:
201      return start + 4 + reader_->ReadFourBytes(start);
202    case DW_FORM_block:
203    case DW_FORM_exprloc: {
204      uint64 size = reader_->ReadUnsignedLEB128(start, &len);
205      return start + size + len;
206    }
207    case DW_FORM_strp:
208    case DW_FORM_sec_offset:
209      return start + reader_->OffsetSize();
210  }
211  fprintf(stderr,"Unhandled form type");
212  return NULL;
213}
214
215// Read a DWARF2/3 header.
216// The header is variable length in DWARF3 (and DWARF2 as extended by
217// most compilers), and consists of an length field, a version number,
218// the offset in the .debug_abbrev section for our abbrevs, and an
219// address size.
220void CompilationUnit::ReadHeader() {
221  const char* headerptr = buffer_;
222  size_t initial_length_size;
223
224  assert(headerptr + 4 < buffer_ + buffer_length_);
225  const uint64 initial_length
226    = reader_->ReadInitialLength(headerptr, &initial_length_size);
227  headerptr += initial_length_size;
228  header_.length = initial_length;
229
230  assert(headerptr + 2 < buffer_ + buffer_length_);
231  header_.version = reader_->ReadTwoBytes(headerptr);
232  headerptr += 2;
233
234  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
235  header_.abbrev_offset = reader_->ReadOffset(headerptr);
236  headerptr += reader_->OffsetSize();
237
238  assert(headerptr + 1 < buffer_ + buffer_length_);
239  header_.address_size = reader_->ReadOneByte(headerptr);
240  reader_->SetAddressSize(header_.address_size);
241  headerptr += 1;
242
243  after_header_ = headerptr;
244
245  // This check ensures that we don't have to do checking during the
246  // reading of DIEs. header_.length does not include the size of the
247  // initial length.
248  assert(buffer_ + initial_length_size + header_.length <=
249        buffer_ + buffer_length_);
250}
251
252uint64 CompilationUnit::Start() {
253  // First get the debug_info section.  ".debug_info" is the name
254  // recommended in the DWARF spec, and used on Linux; "__debug_info"
255  // is the name used in Mac OS X Mach-O files.
256  SectionMap::const_iterator iter = sections_.find(".debug_info");
257  if (iter == sections_.end())
258    iter = sections_.find("__debug_info");
259  assert(iter != sections_.end());
260
261  // Set up our buffer
262  buffer_ = iter->second.first + offset_from_section_start_;
263  buffer_length_ = iter->second.second - offset_from_section_start_;
264
265  // Read the header
266  ReadHeader();
267
268  // Figure out the real length from the end of the initial length to
269  // the end of the compilation unit, since that is the value we
270  // return.
271  uint64 ourlength = header_.length;
272  if (reader_->OffsetSize() == 8)
273    ourlength += 12;
274  else
275    ourlength += 4;
276
277  // See if the user wants this compilation unit, and if not, just return.
278  if (!handler_->StartCompilationUnit(offset_from_section_start_,
279                                      reader_->AddressSize(),
280                                      reader_->OffsetSize(),
281                                      header_.length,
282                                      header_.version))
283    return ourlength;
284
285  // Otherwise, continue by reading our abbreviation entries.
286  ReadAbbrevs();
287
288  // Set the string section if we have one.  ".debug_str" is the name
289  // recommended in the DWARF spec, and used on Linux; "__debug_str"
290  // is the name used in Mac OS X Mach-O files.
291  iter = sections_.find(".debug_str");
292  if (iter == sections_.end())
293    iter = sections_.find("__debug_str");
294  if (iter != sections_.end()) {
295    string_buffer_ = iter->second.first;
296    string_buffer_length_ = iter->second.second;
297  }
298
299  // Now that we have our abbreviations, start processing DIE's.
300  ProcessDIEs();
301
302  return ourlength;
303}
304
305// If one really wanted, you could merge SkipAttribute and
306// ProcessAttribute
307// This is all boring data manipulation and calling of the handler.
308const char* CompilationUnit::ProcessAttribute(
309    uint64 dieoffset, const char* start, enum DwarfAttribute attr,
310    enum DwarfForm form) {
311  size_t len;
312
313  switch (form) {
314    // DW_FORM_indirect is never used because it is such a space
315    // waster.
316    case DW_FORM_indirect:
317      form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
318                                                                     &len));
319      start += len;
320      return ProcessAttribute(dieoffset, start, attr, form);
321
322    case DW_FORM_flag_present:
323      handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1);
324      return start;
325    case DW_FORM_data1:
326    case DW_FORM_flag:
327      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
328                                         reader_->ReadOneByte(start));
329      return start + 1;
330    case DW_FORM_data2:
331      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
332                                         reader_->ReadTwoBytes(start));
333      return start + 2;
334    case DW_FORM_data4:
335      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
336                                         reader_->ReadFourBytes(start));
337      return start + 4;
338    case DW_FORM_data8:
339      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
340                                         reader_->ReadEightBytes(start));
341      return start + 8;
342    case DW_FORM_string: {
343      const char* str = start;
344      handler_->ProcessAttributeString(dieoffset, attr, form,
345                                       str);
346      return start + strlen(str) + 1;
347    }
348    case DW_FORM_udata:
349      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
350                                         reader_->ReadUnsignedLEB128(start,
351                                                                     &len));
352      return start + len;
353
354    case DW_FORM_sdata:
355      handler_->ProcessAttributeSigned(dieoffset, attr, form,
356                                      reader_->ReadSignedLEB128(start, &len));
357      return start + len;
358    case DW_FORM_addr:
359      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
360                                         reader_->ReadAddress(start));
361      return start + reader_->AddressSize();
362    case DW_FORM_sec_offset:
363      handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
364                                         reader_->ReadOffset(start));
365      return start + reader_->OffsetSize();
366
367    case DW_FORM_ref1:
368      handler_->ProcessAttributeReference(dieoffset, attr, form,
369                                          reader_->ReadOneByte(start)
370                                          + offset_from_section_start_);
371      return start + 1;
372    case DW_FORM_ref2:
373      handler_->ProcessAttributeReference(dieoffset, attr, form,
374                                          reader_->ReadTwoBytes(start)
375                                          + offset_from_section_start_);
376      return start + 2;
377    case DW_FORM_ref4:
378      handler_->ProcessAttributeReference(dieoffset, attr, form,
379                                          reader_->ReadFourBytes(start)
380                                          + offset_from_section_start_);
381      return start + 4;
382    case DW_FORM_ref8:
383      handler_->ProcessAttributeReference(dieoffset, attr, form,
384                                          reader_->ReadEightBytes(start)
385                                          + offset_from_section_start_);
386      return start + 8;
387    case DW_FORM_ref_udata:
388      handler_->ProcessAttributeReference(dieoffset, attr, form,
389                                          reader_->ReadUnsignedLEB128(start,
390                                                                      &len)
391                                          + offset_from_section_start_);
392      return start + len;
393    case DW_FORM_ref_addr:
394      // DWARF2 and 3/4 differ on whether ref_addr is address size or
395      // offset size.
396      assert(header_.version >= 2);
397      if (header_.version == 2) {
398        handler_->ProcessAttributeReference(dieoffset, attr, form,
399                                            reader_->ReadAddress(start));
400        return start + reader_->AddressSize();
401      } else if (header_.version >= 3) {
402        handler_->ProcessAttributeReference(dieoffset, attr, form,
403                                            reader_->ReadOffset(start));
404        return start + reader_->OffsetSize();
405      }
406      break;
407    case DW_FORM_ref_sig8:
408      handler_->ProcessAttributeSignature(dieoffset, attr, form,
409                                          reader_->ReadEightBytes(start));
410      return start + 8;
411
412    case DW_FORM_block1: {
413      uint64 datalen = reader_->ReadOneByte(start);
414      handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
415                                       datalen);
416      return start + 1 + datalen;
417    }
418    case DW_FORM_block2: {
419      uint64 datalen = reader_->ReadTwoBytes(start);
420      handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
421                                       datalen);
422      return start + 2 + datalen;
423    }
424    case DW_FORM_block4: {
425      uint64 datalen = reader_->ReadFourBytes(start);
426      handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
427                                       datalen);
428      return start + 4 + datalen;
429    }
430    case DW_FORM_block:
431    case DW_FORM_exprloc: {
432      uint64 datalen = reader_->ReadUnsignedLEB128(start, &len);
433      handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
434                                       datalen);
435      return start + datalen + len;
436    }
437    case DW_FORM_strp: {
438      assert(string_buffer_ != NULL);
439
440      const uint64 offset = reader_->ReadOffset(start);
441      assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
442
443      const char* str = string_buffer_ + offset;
444      handler_->ProcessAttributeString(dieoffset, attr, form,
445                                       str);
446      return start + reader_->OffsetSize();
447    }
448  }
449  fprintf(stderr, "Unhandled form type\n");
450  return NULL;
451}
452
453const char* CompilationUnit::ProcessDIE(uint64 dieoffset,
454                                                 const char* start,
455                                                 const Abbrev& abbrev) {
456  for (AttributeList::const_iterator i = abbrev.attributes.begin();
457       i != abbrev.attributes.end();
458       i++)  {
459    start = ProcessAttribute(dieoffset, start, i->first, i->second);
460  }
461  return start;
462}
463
464void CompilationUnit::ProcessDIEs() {
465  const char* dieptr = after_header_;
466  size_t len;
467
468  // lengthstart is the place the length field is based on.
469  // It is the point in the header after the initial length field
470  const char* lengthstart = buffer_;
471
472  // In 64 bit dwarf, the initial length is 12 bytes, because of the
473  // 0xffffffff at the start.
474  if (reader_->OffsetSize() == 8)
475    lengthstart += 12;
476  else
477    lengthstart += 4;
478
479  std::stack<uint64> die_stack;
480
481  while (dieptr < (lengthstart + header_.length)) {
482    // We give the user the absolute offset from the beginning of
483    // debug_info, since they need it to deal with ref_addr forms.
484    uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
485
486    uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
487
488    dieptr += len;
489
490    // Abbrev == 0 represents the end of a list of children, or padding
491    // at the end of the compilation unit.
492    if (abbrev_num == 0) {
493      if (die_stack.size() == 0)
494        // If it is padding, then we are done with the compilation unit's DIEs.
495        return;
496      const uint64 offset = die_stack.top();
497      die_stack.pop();
498      handler_->EndDIE(offset);
499      continue;
500    }
501
502    const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
503    const enum DwarfTag tag = abbrev.tag;
504    if (!handler_->StartDIE(absolute_offset, tag)) {
505      dieptr = SkipDIE(dieptr, abbrev);
506    } else {
507      dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
508    }
509
510    if (abbrev.has_children) {
511      die_stack.push(absolute_offset);
512    } else {
513      handler_->EndDIE(absolute_offset);
514    }
515  }
516}
517
518LineInfo::LineInfo(const char* buffer, uint64 buffer_length,
519                   ByteReader* reader, LineInfoHandler* handler):
520    handler_(handler), reader_(reader), buffer_(buffer),
521    buffer_length_(buffer_length) {
522  header_.std_opcode_lengths = NULL;
523}
524
525uint64 LineInfo::Start() {
526  ReadHeader();
527  ReadLines();
528  return after_header_ - buffer_;
529}
530
531// The header for a debug_line section is mildly complicated, because
532// the line info is very tightly encoded.
533void LineInfo::ReadHeader() {
534  const char* lineptr = buffer_;
535  size_t initial_length_size;
536
537  const uint64 initial_length
538    = reader_->ReadInitialLength(lineptr, &initial_length_size);
539
540  lineptr += initial_length_size;
541  header_.total_length = initial_length;
542  assert(buffer_ + initial_length_size + header_.total_length <=
543        buffer_ + buffer_length_);
544
545  // Address size *must* be set by CU ahead of time.
546  assert(reader_->AddressSize() != 0);
547
548  header_.version = reader_->ReadTwoBytes(lineptr);
549  lineptr += 2;
550
551  header_.prologue_length = reader_->ReadOffset(lineptr);
552  lineptr += reader_->OffsetSize();
553
554  header_.min_insn_length = reader_->ReadOneByte(lineptr);
555  lineptr += 1;
556
557  header_.default_is_stmt = reader_->ReadOneByte(lineptr);
558  lineptr += 1;
559
560  header_.line_base = *reinterpret_cast<const int8*>(lineptr);
561  lineptr += 1;
562
563  header_.line_range = reader_->ReadOneByte(lineptr);
564  lineptr += 1;
565
566  header_.opcode_base = reader_->ReadOneByte(lineptr);
567  lineptr += 1;
568
569  header_.std_opcode_lengths = new std::vector<unsigned char>;
570  header_.std_opcode_lengths->resize(header_.opcode_base + 1);
571  (*header_.std_opcode_lengths)[0] = 0;
572  for (int i = 1; i < header_.opcode_base; i++) {
573    (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
574    lineptr += 1;
575  }
576
577  // It is legal for the directory entry table to be empty.
578  if (*lineptr) {
579    uint32 dirindex = 1;
580    while (*lineptr) {
581      const char* dirname = lineptr;
582      handler_->DefineDir(dirname, dirindex);
583      lineptr += strlen(dirname) + 1;
584      dirindex++;
585    }
586  }
587  lineptr++;
588
589  // It is also legal for the file entry table to be empty.
590  if (*lineptr) {
591    uint32 fileindex = 1;
592    size_t len;
593    while (*lineptr) {
594      const char* filename = lineptr;
595      lineptr += strlen(filename) + 1;
596
597      uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
598      lineptr += len;
599
600      uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
601      lineptr += len;
602
603      uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
604      lineptr += len;
605      handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex),
606                           mod_time, filelength);
607      fileindex++;
608    }
609  }
610  lineptr++;
611
612  after_header_ = lineptr;
613}
614
615/* static */
616bool LineInfo::ProcessOneOpcode(ByteReader* reader,
617                                LineInfoHandler* handler,
618                                const struct LineInfoHeader &header,
619                                const char* start,
620                                struct LineStateMachine* lsm,
621                                size_t* len,
622                                uintptr pc,
623                                bool *lsm_passes_pc) {
624  size_t oplen = 0;
625  size_t templen;
626  uint8 opcode = reader->ReadOneByte(start);
627  oplen++;
628  start++;
629
630  // If the opcode is great than the opcode_base, it is a special
631  // opcode. Most line programs consist mainly of special opcodes.
632  if (opcode >= header.opcode_base) {
633    opcode -= header.opcode_base;
634    const int64 advance_address = (opcode / header.line_range)
635                                  * header.min_insn_length;
636    const int32 advance_line = (opcode % header.line_range)
637                               + header.line_base;
638
639    // Check if the lsm passes "pc". If so, mark it as passed.
640    if (lsm_passes_pc &&
641        lsm->address <= pc && pc < lsm->address + advance_address) {
642      *lsm_passes_pc = true;
643    }
644
645    lsm->address += advance_address;
646    lsm->line_num += advance_line;
647    lsm->basic_block = true;
648    *len = oplen;
649    return true;
650  }
651
652  // Otherwise, we have the regular opcodes
653  switch (opcode) {
654    case DW_LNS_copy: {
655      lsm->basic_block = false;
656      *len = oplen;
657      return true;
658    }
659
660    case DW_LNS_advance_pc: {
661      uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen);
662      oplen += templen;
663
664      // Check if the lsm passes "pc". If so, mark it as passed.
665      if (lsm_passes_pc && lsm->address <= pc &&
666          pc < lsm->address + header.min_insn_length * advance_address) {
667        *lsm_passes_pc = true;
668      }
669
670      lsm->address += header.min_insn_length * advance_address;
671    }
672      break;
673    case DW_LNS_advance_line: {
674      const int64 advance_line = reader->ReadSignedLEB128(start, &templen);
675      oplen += templen;
676      lsm->line_num += static_cast<int32>(advance_line);
677
678      // With gcc 4.2.1, we can get the line_no here for the first time
679      // since DW_LNS_advance_line is called after DW_LNE_set_address is
680      // called. So we check if the lsm passes "pc" here, not in
681      // DW_LNE_set_address.
682      if (lsm_passes_pc && lsm->address == pc) {
683        *lsm_passes_pc = true;
684      }
685    }
686      break;
687    case DW_LNS_set_file: {
688      const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen);
689      oplen += templen;
690      lsm->file_num = static_cast<uint32>(fileno);
691    }
692      break;
693    case DW_LNS_set_column: {
694      const uint64 colno = reader->ReadUnsignedLEB128(start, &templen);
695      oplen += templen;
696      lsm->column_num = static_cast<uint32>(colno);
697    }
698      break;
699    case DW_LNS_negate_stmt: {
700      lsm->is_stmt = !lsm->is_stmt;
701    }
702      break;
703    case DW_LNS_set_basic_block: {
704      lsm->basic_block = true;
705    }
706      break;
707    case DW_LNS_fixed_advance_pc: {
708      const uint16 advance_address = reader->ReadTwoBytes(start);
709      oplen += 2;
710
711      // Check if the lsm passes "pc". If so, mark it as passed.
712      if (lsm_passes_pc &&
713          lsm->address <= pc && pc < lsm->address + advance_address) {
714        *lsm_passes_pc = true;
715      }
716
717      lsm->address += advance_address;
718    }
719      break;
720    case DW_LNS_const_add_pc: {
721      const int64 advance_address = header.min_insn_length
722                                    * ((255 - header.opcode_base)
723                                       / header.line_range);
724
725      // Check if the lsm passes "pc". If so, mark it as passed.
726      if (lsm_passes_pc &&
727          lsm->address <= pc && pc < lsm->address + advance_address) {
728        *lsm_passes_pc = true;
729      }
730
731      lsm->address += advance_address;
732    }
733      break;
734    case DW_LNS_extended_op: {
735      const uint64 extended_op_len = reader->ReadUnsignedLEB128(start,
736                                                                &templen);
737      start += templen;
738      oplen += templen + extended_op_len;
739
740      const uint64 extended_op = reader->ReadOneByte(start);
741      start++;
742
743      switch (extended_op) {
744        case DW_LNE_end_sequence: {
745          lsm->end_sequence = true;
746          *len = oplen;
747          return true;
748        }
749          break;
750        case DW_LNE_set_address: {
751          // With gcc 4.2.1, we cannot tell the line_no here since
752          // DW_LNE_set_address is called before DW_LNS_advance_line is
753          // called.  So we do not check if the lsm passes "pc" here.  See
754          // also the comment in DW_LNS_advance_line.
755          uint64 address = reader->ReadAddress(start);
756          lsm->address = address;
757        }
758          break;
759        case DW_LNE_define_file: {
760          const char* filename  = start;
761
762          templen = strlen(filename) + 1;
763          start += templen;
764
765          uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen);
766          oplen += templen;
767
768          const uint64 mod_time = reader->ReadUnsignedLEB128(start,
769                                                             &templen);
770          oplen += templen;
771
772          const uint64 filelength = reader->ReadUnsignedLEB128(start,
773                                                               &templen);
774          oplen += templen;
775
776          if (handler) {
777            handler->DefineFile(filename, -1, static_cast<uint32>(dirindex),
778                                mod_time, filelength);
779          }
780        }
781          break;
782      }
783    }
784      break;
785
786    default: {
787      // Ignore unknown opcode  silently
788      if (header.std_opcode_lengths) {
789        for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
790          reader->ReadUnsignedLEB128(start, &templen);
791          start += templen;
792          oplen += templen;
793        }
794      }
795    }
796      break;
797  }
798  *len = oplen;
799  return false;
800}
801
802void LineInfo::ReadLines() {
803  struct LineStateMachine lsm;
804
805  // lengthstart is the place the length field is based on.
806  // It is the point in the header after the initial length field
807  const char* lengthstart = buffer_;
808
809  // In 64 bit dwarf, the initial length is 12 bytes, because of the
810  // 0xffffffff at the start.
811  if (reader_->OffsetSize() == 8)
812    lengthstart += 12;
813  else
814    lengthstart += 4;
815
816  const char* lineptr = after_header_;
817  lsm.Reset(header_.default_is_stmt);
818
819  // The LineInfoHandler interface expects each line's length along
820  // with its address, but DWARF only provides addresses (sans
821  // length), and an end-of-sequence address; one infers the length
822  // from the next address. So we report a line only when we get the
823  // next line's address, or the end-of-sequence address.
824  bool have_pending_line = false;
825  uint64 pending_address = 0;
826  uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
827
828  while (lineptr < lengthstart + header_.total_length) {
829    size_t oplength;
830    bool add_row = ProcessOneOpcode(reader_, handler_, header_,
831                                    lineptr, &lsm, &oplength, (uintptr)-1,
832                                    NULL);
833    if (add_row) {
834      if (have_pending_line)
835        handler_->AddLine(pending_address, lsm.address - pending_address,
836                          pending_file_num, pending_line_num,
837                          pending_column_num);
838      if (lsm.end_sequence) {
839        lsm.Reset(header_.default_is_stmt);
840        have_pending_line = false;
841      } else {
842        pending_address = lsm.address;
843        pending_file_num = lsm.file_num;
844        pending_line_num = lsm.line_num;
845        pending_column_num = lsm.column_num;
846        have_pending_line = true;
847      }
848    }
849    lineptr += oplength;
850  }
851
852  after_header_ = lengthstart + header_.total_length;
853}
854
855// A DWARF rule for recovering the address or value of a register, or
856// computing the canonical frame address. There is one subclass of this for
857// each '*Rule' member function in CallFrameInfo::Handler.
858//
859// It's annoying that we have to handle Rules using pointers (because
860// the concrete instances can have an arbitrary size). They're small,
861// so it would be much nicer if we could just handle them by value
862// instead of fretting about ownership and destruction.
863//
864// It seems like all these could simply be instances of std::tr1::bind,
865// except that we need instances to be EqualityComparable, too.
866//
867// This could logically be nested within State, but then the qualified names
868// get horrendous.
869class CallFrameInfo::Rule {
870 public:
871  virtual ~Rule() { }
872
873  // Tell HANDLER that, at ADDRESS in the program, REGISTER can be
874  // recovered using this rule. If REGISTER is kCFARegister, then this rule
875  // describes how to compute the canonical frame address. Return what the
876  // HANDLER member function returned.
877  virtual bool Handle(Handler *handler,
878                      uint64 address, int register) const = 0;
879
880  // Equality on rules. We use these to decide which rules we need
881  // to report after a DW_CFA_restore_state instruction.
882  virtual bool operator==(const Rule &rhs) const = 0;
883
884  bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
885
886  // Return a pointer to a copy of this rule.
887  virtual Rule *Copy() const = 0;
888
889  // If this is a base+offset rule, change its base register to REG.
890  // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
891  virtual void SetBaseRegister(unsigned reg) { }
892
893  // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
894  // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
895  virtual void SetOffset(long long offset) { }
896};
897
898// Rule: the value the register had in the caller cannot be recovered.
899class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
900 public:
901  UndefinedRule() { }
902  ~UndefinedRule() { }
903  bool Handle(Handler *handler, uint64 address, int reg) const {
904    return handler->UndefinedRule(address, reg);
905  }
906  bool operator==(const Rule &rhs) const {
907    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
908    // been carefully considered; cheap RTTI-like workarounds are forbidden.
909    const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
910    return (our_rhs != NULL);
911  }
912  Rule *Copy() const { return new UndefinedRule(*this); }
913};
914
915// Rule: the register's value is the same as that it had in the caller.
916class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
917 public:
918  SameValueRule() { }
919  ~SameValueRule() { }
920  bool Handle(Handler *handler, uint64 address, int reg) const {
921    return handler->SameValueRule(address, reg);
922  }
923  bool operator==(const Rule &rhs) const {
924    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
925    // been carefully considered; cheap RTTI-like workarounds are forbidden.
926    const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
927    return (our_rhs != NULL);
928  }
929  Rule *Copy() const { return new SameValueRule(*this); }
930};
931
932// Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
933// may be CallFrameInfo::Handler::kCFARegister.
934class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
935 public:
936  OffsetRule(int base_register, long offset)
937      : base_register_(base_register), offset_(offset) { }
938  ~OffsetRule() { }
939  bool Handle(Handler *handler, uint64 address, int reg) const {
940    return handler->OffsetRule(address, reg, base_register_, offset_);
941  }
942  bool operator==(const Rule &rhs) const {
943    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
944    // been carefully considered; cheap RTTI-like workarounds are forbidden.
945    const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
946    return (our_rhs &&
947            base_register_ == our_rhs->base_register_ &&
948            offset_ == our_rhs->offset_);
949  }
950  Rule *Copy() const { return new OffsetRule(*this); }
951  // We don't actually need SetBaseRegister or SetOffset here, since they
952  // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
953  // doesn't make sense to use OffsetRule for computing the CFA: it
954  // computes the address at which a register is saved, not a value.
955 private:
956  int base_register_;
957  long offset_;
958};
959
960// Rule: the value the register had in the caller is the value of
961// BASE_REGISTER plus offset. BASE_REGISTER may be
962// CallFrameInfo::Handler::kCFARegister.
963class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
964 public:
965  ValOffsetRule(int base_register, long offset)
966      : base_register_(base_register), offset_(offset) { }
967  ~ValOffsetRule() { }
968  bool Handle(Handler *handler, uint64 address, int reg) const {
969    return handler->ValOffsetRule(address, reg, base_register_, offset_);
970  }
971  bool operator==(const Rule &rhs) const {
972    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
973    // been carefully considered; cheap RTTI-like workarounds are forbidden.
974    const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
975    return (our_rhs &&
976            base_register_ == our_rhs->base_register_ &&
977            offset_ == our_rhs->offset_);
978  }
979  Rule *Copy() const { return new ValOffsetRule(*this); }
980  void SetBaseRegister(unsigned reg) { base_register_ = reg; }
981  void SetOffset(long long offset) { offset_ = offset; }
982 private:
983  int base_register_;
984  long offset_;
985};
986
987// Rule: the register has been saved in another register REGISTER_NUMBER_.
988class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
989 public:
990  explicit RegisterRule(int register_number)
991      : register_number_(register_number) { }
992  ~RegisterRule() { }
993  bool Handle(Handler *handler, uint64 address, int reg) const {
994    return handler->RegisterRule(address, reg, register_number_);
995  }
996  bool operator==(const Rule &rhs) const {
997    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
998    // been carefully considered; cheap RTTI-like workarounds are forbidden.
999    const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1000    return (our_rhs && register_number_ == our_rhs->register_number_);
1001  }
1002  Rule *Copy() const { return new RegisterRule(*this); }
1003 private:
1004  int register_number_;
1005};
1006
1007// Rule: EXPRESSION evaluates to the address at which the register is saved.
1008class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1009 public:
1010  explicit ExpressionRule(const string &expression)
1011      : expression_(expression) { }
1012  ~ExpressionRule() { }
1013  bool Handle(Handler *handler, uint64 address, int reg) const {
1014    return handler->ExpressionRule(address, reg, expression_);
1015  }
1016  bool operator==(const Rule &rhs) const {
1017    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1018    // been carefully considered; cheap RTTI-like workarounds are forbidden.
1019    const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1020    return (our_rhs && expression_ == our_rhs->expression_);
1021  }
1022  Rule *Copy() const { return new ExpressionRule(*this); }
1023 private:
1024  string expression_;
1025};
1026
1027// Rule: EXPRESSION evaluates to the address at which the register is saved.
1028class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1029 public:
1030  explicit ValExpressionRule(const string &expression)
1031      : expression_(expression) { }
1032  ~ValExpressionRule() { }
1033  bool Handle(Handler *handler, uint64 address, int reg) const {
1034    return handler->ValExpressionRule(address, reg, expression_);
1035  }
1036  bool operator==(const Rule &rhs) const {
1037    // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1038    // been carefully considered; cheap RTTI-like workarounds are forbidden.
1039    const ValExpressionRule *our_rhs =
1040        dynamic_cast<const ValExpressionRule *>(&rhs);
1041    return (our_rhs && expression_ == our_rhs->expression_);
1042  }
1043  Rule *Copy() const { return new ValExpressionRule(*this); }
1044 private:
1045  string expression_;
1046};
1047
1048// A map from register numbers to rules.
1049class CallFrameInfo::RuleMap {
1050 public:
1051  RuleMap() : cfa_rule_(NULL) { }
1052  RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
1053  ~RuleMap() { Clear(); }
1054
1055  RuleMap &operator=(const RuleMap &rhs);
1056
1057  // Set the rule for computing the CFA to RULE. Take ownership of RULE.
1058  void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1059
1060  // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1061  // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1062  // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1063  // a rule for it has been established.
1064  Rule *CFARule() const { return cfa_rule_; }
1065
1066  // Return the rule for REG, or NULL if there is none. The caller takes
1067  // ownership of the result.
1068  Rule *RegisterRule(int reg) const;
1069
1070  // Set the rule for computing REG to RULE. Take ownership of RULE.
1071  void SetRegisterRule(int reg, Rule *rule);
1072
1073  // Make all the appropriate calls to HANDLER as if we were changing from
1074  // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1075  // DW_CFA_restore_state, where lots of rules can change simultaneously.
1076  // Return true if all handlers returned true; otherwise, return false.
1077  bool HandleTransitionTo(Handler *handler, uint64 address,
1078                          const RuleMap &new_rules) const;
1079
1080 private:
1081  // A map from register numbers to Rules.
1082  typedef std::map<int, Rule *> RuleByNumber;
1083
1084  // Remove all register rules and clear cfa_rule_.
1085  void Clear();
1086
1087  // The rule for computing the canonical frame address. This RuleMap owns
1088  // this rule.
1089  Rule *cfa_rule_;
1090
1091  // A map from register numbers to postfix expressions to recover
1092  // their values. This RuleMap owns the Rules the map refers to.
1093  RuleByNumber registers_;
1094};
1095
1096CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1097  Clear();
1098  // Since each map owns the rules it refers to, assignment must copy them.
1099  if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1100  for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1101       it != rhs.registers_.end(); it++)
1102    registers_[it->first] = it->second->Copy();
1103  return *this;
1104}
1105
1106CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1107  assert(reg != Handler::kCFARegister);
1108  RuleByNumber::const_iterator it = registers_.find(reg);
1109  if (it != registers_.end())
1110    return it->second->Copy();
1111  else
1112    return NULL;
1113}
1114
1115void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1116  assert(reg != Handler::kCFARegister);
1117  assert(rule);
1118  Rule **slot = &registers_[reg];
1119  delete *slot;
1120  *slot = rule;
1121}
1122
1123bool CallFrameInfo::RuleMap::HandleTransitionTo(
1124    Handler *handler,
1125    uint64 address,
1126    const RuleMap &new_rules) const {
1127  // Transition from cfa_rule_ to new_rules.cfa_rule_.
1128  if (cfa_rule_ && new_rules.cfa_rule_) {
1129    if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1130        !new_rules.cfa_rule_->Handle(handler, address,
1131                                     Handler::kCFARegister))
1132      return false;
1133  } else if (cfa_rule_) {
1134    // this RuleMap has a CFA rule but new_rules doesn't.
1135    // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1136    // it's garbage input. The instruction interpreter should have
1137    // detected this and warned, so take no action here.
1138  } else if (new_rules.cfa_rule_) {
1139    // This shouldn't be possible: NEW_RULES is some prior state, and
1140    // there's no way to remove entries.
1141    assert(0);
1142  } else {
1143    // Both CFA rules are empty.  No action needed.
1144  }
1145
1146  // Traverse the two maps in order by register number, and report
1147  // whatever differences we find.
1148  RuleByNumber::const_iterator old_it = registers_.begin();
1149  RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1150  while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1151    if (old_it->first < new_it->first) {
1152      // This RuleMap has an entry for old_it->first, but NEW_RULES
1153      // doesn't.
1154      //
1155      // This isn't really the right thing to do, but since CFI generally
1156      // only mentions callee-saves registers, and GCC's convention for
1157      // callee-saves registers is that they are unchanged, it's a good
1158      // approximation.
1159      if (!handler->SameValueRule(address, old_it->first))
1160        return false;
1161      old_it++;
1162    } else if (old_it->first > new_it->first) {
1163      // NEW_RULES has entry for new_it->first, but this RuleMap
1164      // doesn't. This shouldn't be possible: NEW_RULES is some prior
1165      // state, and there's no way to remove entries.
1166      assert(0);
1167    } else {
1168      // Both maps have an entry for this register. Report the new
1169      // rule if it is different.
1170      if (*old_it->second != *new_it->second &&
1171          !new_it->second->Handle(handler, address, new_it->first))
1172        return false;
1173      new_it++, old_it++;
1174    }
1175  }
1176  // Finish off entries from this RuleMap with no counterparts in new_rules.
1177  while (old_it != registers_.end()) {
1178    if (!handler->SameValueRule(address, old_it->first))
1179      return false;
1180    old_it++;
1181  }
1182  // Since we only make transitions from a rule set to some previously
1183  // saved rule set, and we can only add rules to the map, NEW_RULES
1184  // must have fewer rules than *this.
1185  assert(new_it == new_rules.registers_.end());
1186
1187  return true;
1188}
1189
1190// Remove all register rules and clear cfa_rule_.
1191void CallFrameInfo::RuleMap::Clear() {
1192  delete cfa_rule_;
1193  cfa_rule_ = NULL;
1194  for (RuleByNumber::iterator it = registers_.begin();
1195       it != registers_.end(); it++)
1196    delete it->second;
1197  registers_.clear();
1198}
1199
1200// The state of the call frame information interpreter as it processes
1201// instructions from a CIE and FDE.
1202class CallFrameInfo::State {
1203 public:
1204  // Create a call frame information interpreter state with the given
1205  // reporter, reader, handler, and initial call frame info address.
1206  State(ByteReader *reader, Handler *handler, Reporter *reporter,
1207        uint64 address)
1208      : reader_(reader), handler_(handler), reporter_(reporter),
1209        address_(address), entry_(NULL), cursor_(NULL) { }
1210
1211  // Interpret instructions from CIE, save the resulting rule set for
1212  // DW_CFA_restore instructions, and return true. On error, report
1213  // the problem to reporter_ and return false.
1214  bool InterpretCIE(const CIE &cie);
1215
1216  // Interpret instructions from FDE, and return true. On error,
1217  // report the problem to reporter_ and return false.
1218  bool InterpretFDE(const FDE &fde);
1219
1220 private:
1221  // The operands of a CFI instruction, for ParseOperands.
1222  struct Operands {
1223    unsigned register_number;  // A register number.
1224    uint64 offset;             // An offset or address.
1225    long signed_offset;        // A signed offset.
1226    string expression;         // A DWARF expression.
1227  };
1228
1229  // Parse CFI instruction operands from STATE's instruction stream as
1230  // described by FORMAT. On success, populate OPERANDS with the
1231  // results, and return true. On failure, report the problem and
1232  // return false.
1233  //
1234  // Each character of FORMAT should be one of the following:
1235  //
1236  //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
1237  //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
1238  //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
1239  //   'a'  machine-size address            (OPERANDS->offset)
1240  //        (If the CIE has a 'z' augmentation string, 'a' uses the
1241  //        encoding specified by the 'R' argument.)
1242  //   '1'  a one-byte offset               (OPERANDS->offset)
1243  //   '2'  a two-byte offset               (OPERANDS->offset)
1244  //   '4'  a four-byte offset              (OPERANDS->offset)
1245  //   '8'  an eight-byte offset            (OPERANDS->offset)
1246  //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
1247  //        DWARF expression
1248  bool ParseOperands(const char *format, Operands *operands);
1249
1250  // Interpret one CFI instruction from STATE's instruction stream, update
1251  // STATE, report any rule changes to handler_, and return true. On
1252  // failure, report the problem and return false.
1253  bool DoInstruction();
1254
1255  // The following Do* member functions are subroutines of DoInstruction,
1256  // factoring out the actual work of operations that have several
1257  // different encodings.
1258
1259  // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1260  // return true. On failure, report and return false. (Used for
1261  // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1262  bool DoDefCFA(unsigned base_register, long offset);
1263
1264  // Change the offset of the CFA rule to OFFSET, and return true. On
1265  // failure, report and return false. (Subroutine for
1266  // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1267  bool DoDefCFAOffset(long offset);
1268
1269  // Specify that REG can be recovered using RULE, and return true. On
1270  // failure, report and return false.
1271  bool DoRule(unsigned reg, Rule *rule);
1272
1273  // Specify that REG can be found at OFFSET from the CFA, and return true.
1274  // On failure, report and return false. (Subroutine for DW_CFA_offset,
1275  // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1276  bool DoOffset(unsigned reg, long offset);
1277
1278  // Specify that the caller's value for REG is the CFA plus OFFSET,
1279  // and return true. On failure, report and return false. (Subroutine
1280  // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1281  bool DoValOffset(unsigned reg, long offset);
1282
1283  // Restore REG to the rule established in the CIE, and return true. On
1284  // failure, report and return false. (Subroutine for DW_CFA_restore and
1285  // DW_CFA_restore_extended.)
1286  bool DoRestore(unsigned reg);
1287
1288  // Return the section offset of the instruction at cursor. For use
1289  // in error messages.
1290  uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1291
1292  // Report that entry_ is incomplete, and return false. For brevity.
1293  bool ReportIncomplete() {
1294    reporter_->Incomplete(entry_->offset, entry_->kind);
1295    return false;
1296  }
1297
1298  // For reading multi-byte values with the appropriate endianness.
1299  ByteReader *reader_;
1300
1301  // The handler to which we should report the data we find.
1302  Handler *handler_;
1303
1304  // For reporting problems in the info we're parsing.
1305  Reporter *reporter_;
1306
1307  // The code address to which the next instruction in the stream applies.
1308  uint64 address_;
1309
1310  // The entry whose instructions we are currently processing. This is
1311  // first a CIE, and then an FDE.
1312  const Entry *entry_;
1313
1314  // The next instruction to process.
1315  const char *cursor_;
1316
1317  // The current set of rules.
1318  RuleMap rules_;
1319
1320  // The set of rules established by the CIE, used by DW_CFA_restore
1321  // and DW_CFA_restore_extended. We set this after interpreting the
1322  // CIE's instructions.
1323  RuleMap cie_rules_;
1324
1325  // A stack of saved states, for DW_CFA_remember_state and
1326  // DW_CFA_restore_state.
1327  std::stack<RuleMap> saved_rules_;
1328};
1329
1330bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1331  entry_ = &cie;
1332  cursor_ = entry_->instructions;
1333  while (cursor_ < entry_->end)
1334    if (!DoInstruction())
1335      return false;
1336  // Note the rules established by the CIE, for use by DW_CFA_restore
1337  // and DW_CFA_restore_extended.
1338  cie_rules_ = rules_;
1339  return true;
1340}
1341
1342bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1343  entry_ = &fde;
1344  cursor_ = entry_->instructions;
1345  while (cursor_ < entry_->end)
1346    if (!DoInstruction())
1347      return false;
1348  return true;
1349}
1350
1351bool CallFrameInfo::State::ParseOperands(const char *format,
1352                                         Operands *operands) {
1353  size_t len;
1354  const char *operand;
1355
1356  for (operand = format; *operand; operand++) {
1357    size_t bytes_left = entry_->end - cursor_;
1358    switch (*operand) {
1359      case 'r':
1360        operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1361        if (len > bytes_left) return ReportIncomplete();
1362        cursor_ += len;
1363        break;
1364
1365      case 'o':
1366        operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1367        if (len > bytes_left) return ReportIncomplete();
1368        cursor_ += len;
1369        break;
1370
1371      case 's':
1372        operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1373        if (len > bytes_left) return ReportIncomplete();
1374        cursor_ += len;
1375        break;
1376
1377      case 'a':
1378        operands->offset =
1379          reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1380                                      &len);
1381        if (len > bytes_left) return ReportIncomplete();
1382        cursor_ += len;
1383        break;
1384
1385      case '1':
1386        if (1 > bytes_left) return ReportIncomplete();
1387        operands->offset = static_cast<unsigned char>(*cursor_++);
1388        break;
1389
1390      case '2':
1391        if (2 > bytes_left) return ReportIncomplete();
1392        operands->offset = reader_->ReadTwoBytes(cursor_);
1393        cursor_ += 2;
1394        break;
1395
1396      case '4':
1397        if (4 > bytes_left) return ReportIncomplete();
1398        operands->offset = reader_->ReadFourBytes(cursor_);
1399        cursor_ += 4;
1400        break;
1401
1402      case '8':
1403        if (8 > bytes_left) return ReportIncomplete();
1404        operands->offset = reader_->ReadEightBytes(cursor_);
1405        cursor_ += 8;
1406        break;
1407
1408      case 'e': {
1409        size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1410        if (len > bytes_left || expression_length > bytes_left - len)
1411          return ReportIncomplete();
1412        cursor_ += len;
1413        operands->expression = string(cursor_, expression_length);
1414        cursor_ += expression_length;
1415        break;
1416      }
1417
1418      default:
1419          assert(0);
1420    }
1421  }
1422
1423  return true;
1424}
1425
1426bool CallFrameInfo::State::DoInstruction() {
1427  CIE *cie = entry_->cie;
1428  Operands ops;
1429
1430  // Our entry's kind should have been set by now.
1431  assert(entry_->kind != kUnknown);
1432
1433  // We shouldn't have been invoked unless there were more
1434  // instructions to parse.
1435  assert(cursor_ < entry_->end);
1436
1437  unsigned opcode = *cursor_++;
1438  if ((opcode & 0xc0) != 0) {
1439    switch (opcode & 0xc0) {
1440      // Advance the address.
1441      case DW_CFA_advance_loc: {
1442        size_t code_offset = opcode & 0x3f;
1443        address_ += code_offset * cie->code_alignment_factor;
1444        break;
1445      }
1446
1447      // Find a register at an offset from the CFA.
1448      case DW_CFA_offset:
1449        if (!ParseOperands("o", &ops) ||
1450            !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1451          return false;
1452        break;
1453
1454      // Restore the rule established for a register by the CIE.
1455      case DW_CFA_restore:
1456        if (!DoRestore(opcode & 0x3f)) return false;
1457        break;
1458
1459      // The 'if' above should have excluded this possibility.
1460      default:
1461        assert(0);
1462    }
1463
1464    // Return here, so the big switch below won't be indented.
1465    return true;
1466  }
1467
1468  switch (opcode) {
1469    // Set the address.
1470    case DW_CFA_set_loc:
1471      if (!ParseOperands("a", &ops)) return false;
1472      address_ = ops.offset;
1473      break;
1474
1475    // Advance the address.
1476    case DW_CFA_advance_loc1:
1477      if (!ParseOperands("1", &ops)) return false;
1478      address_ += ops.offset * cie->code_alignment_factor;
1479      break;
1480
1481    // Advance the address.
1482    case DW_CFA_advance_loc2:
1483      if (!ParseOperands("2", &ops)) return false;
1484      address_ += ops.offset * cie->code_alignment_factor;
1485      break;
1486
1487    // Advance the address.
1488    case DW_CFA_advance_loc4:
1489      if (!ParseOperands("4", &ops)) return false;
1490      address_ += ops.offset * cie->code_alignment_factor;
1491      break;
1492
1493    // Advance the address.
1494    case DW_CFA_MIPS_advance_loc8:
1495      if (!ParseOperands("8", &ops)) return false;
1496      address_ += ops.offset * cie->code_alignment_factor;
1497      break;
1498
1499    // Compute the CFA by adding an offset to a register.
1500    case DW_CFA_def_cfa:
1501      if (!ParseOperands("ro", &ops) ||
1502          !DoDefCFA(ops.register_number, ops.offset))
1503        return false;
1504      break;
1505
1506    // Compute the CFA by adding an offset to a register.
1507    case DW_CFA_def_cfa_sf:
1508      if (!ParseOperands("rs", &ops) ||
1509          !DoDefCFA(ops.register_number,
1510                    ops.signed_offset * cie->data_alignment_factor))
1511        return false;
1512      break;
1513
1514    // Change the base register used to compute the CFA.
1515    case DW_CFA_def_cfa_register: {
1516      if (!ParseOperands("r", &ops)) return false;
1517      Rule *cfa_rule = rules_.CFARule();
1518      if (!cfa_rule) {
1519        if (!DoDefCFA(ops.register_number, ops.offset)) {
1520          reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1521          return false;
1522        }
1523      } else {
1524        cfa_rule->SetBaseRegister(ops.register_number);
1525        if (!cfa_rule->Handle(handler_, address_,
1526                              Handler::kCFARegister))
1527        return false;
1528      }
1529      break;
1530    }
1531
1532    // Change the offset used to compute the CFA.
1533    case DW_CFA_def_cfa_offset:
1534      if (!ParseOperands("o", &ops) ||
1535          !DoDefCFAOffset(ops.offset))
1536        return false;
1537      break;
1538
1539    // Change the offset used to compute the CFA.
1540    case DW_CFA_def_cfa_offset_sf:
1541      if (!ParseOperands("s", &ops) ||
1542          !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1543        return false;
1544      break;
1545
1546    // Specify an expression whose value is the CFA.
1547    case DW_CFA_def_cfa_expression: {
1548      if (!ParseOperands("e", &ops))
1549        return false;
1550      Rule *rule = new ValExpressionRule(ops.expression);
1551      rules_.SetCFARule(rule);
1552      if (!rule->Handle(handler_, address_,
1553                        Handler::kCFARegister))
1554        return false;
1555      break;
1556    }
1557
1558    // The register's value cannot be recovered.
1559    case DW_CFA_undefined: {
1560      if (!ParseOperands("r", &ops) ||
1561          !DoRule(ops.register_number, new UndefinedRule()))
1562        return false;
1563      break;
1564    }
1565
1566    // The register's value is unchanged from its value in the caller.
1567    case DW_CFA_same_value: {
1568      if (!ParseOperands("r", &ops) ||
1569          !DoRule(ops.register_number, new SameValueRule()))
1570        return false;
1571      break;
1572    }
1573
1574    // Find a register at an offset from the CFA.
1575    case DW_CFA_offset_extended:
1576      if (!ParseOperands("ro", &ops) ||
1577          !DoOffset(ops.register_number,
1578                    ops.offset * cie->data_alignment_factor))
1579        return false;
1580      break;
1581
1582    // The register is saved at an offset from the CFA.
1583    case DW_CFA_offset_extended_sf:
1584      if (!ParseOperands("rs", &ops) ||
1585          !DoOffset(ops.register_number,
1586                    ops.signed_offset * cie->data_alignment_factor))
1587        return false;
1588      break;
1589
1590    // The register is saved at an offset from the CFA.
1591    case DW_CFA_GNU_negative_offset_extended:
1592      if (!ParseOperands("ro", &ops) ||
1593          !DoOffset(ops.register_number,
1594                    -ops.offset * cie->data_alignment_factor))
1595        return false;
1596      break;
1597
1598    // The register's value is the sum of the CFA plus an offset.
1599    case DW_CFA_val_offset:
1600      if (!ParseOperands("ro", &ops) ||
1601          !DoValOffset(ops.register_number,
1602                       ops.offset * cie->data_alignment_factor))
1603        return false;
1604      break;
1605
1606    // The register's value is the sum of the CFA plus an offset.
1607    case DW_CFA_val_offset_sf:
1608      if (!ParseOperands("rs", &ops) ||
1609          !DoValOffset(ops.register_number,
1610                       ops.signed_offset * cie->data_alignment_factor))
1611        return false;
1612      break;
1613
1614    // The register has been saved in another register.
1615    case DW_CFA_register: {
1616      if (!ParseOperands("ro", &ops) ||
1617          !DoRule(ops.register_number, new RegisterRule(ops.offset)))
1618        return false;
1619      break;
1620    }
1621
1622    // An expression yields the address at which the register is saved.
1623    case DW_CFA_expression: {
1624      if (!ParseOperands("re", &ops) ||
1625          !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
1626        return false;
1627      break;
1628    }
1629
1630    // An expression yields the caller's value for the register.
1631    case DW_CFA_val_expression: {
1632      if (!ParseOperands("re", &ops) ||
1633          !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
1634        return false;
1635      break;
1636    }
1637
1638    // Restore the rule established for a register by the CIE.
1639    case DW_CFA_restore_extended:
1640      if (!ParseOperands("r", &ops) ||
1641          !DoRestore( ops.register_number))
1642        return false;
1643      break;
1644
1645    // Save the current set of rules on a stack.
1646    case DW_CFA_remember_state:
1647      saved_rules_.push(rules_);
1648      break;
1649
1650    // Pop the current set of rules off the stack.
1651    case DW_CFA_restore_state: {
1652      if (saved_rules_.empty()) {
1653        reporter_->EmptyStateStack(entry_->offset, entry_->kind,
1654                                   CursorOffset());
1655        return false;
1656      }
1657      const RuleMap &new_rules = saved_rules_.top();
1658      if (rules_.CFARule() && !new_rules.CFARule()) {
1659        reporter_->ClearingCFARule(entry_->offset, entry_->kind,
1660                                   CursorOffset());
1661        return false;
1662      }
1663      rules_.HandleTransitionTo(handler_, address_, new_rules);
1664      rules_ = new_rules;
1665      saved_rules_.pop();
1666      break;
1667    }
1668
1669    // No operation.  (Padding instruction.)
1670    case DW_CFA_nop:
1671      break;
1672
1673    // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
1674    // are saved in registers 24 through 31 (%i0-%i7), and registers
1675    // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
1676    // (0-15 * the register size). The register numbers must be
1677    // hard-coded. A GNU extension, and not a pretty one.
1678    case DW_CFA_GNU_window_save: {
1679      // Save %o0-%o7 in %i0-%i7.
1680      for (int i = 8; i < 16; i++)
1681        if (!DoRule(i, new RegisterRule(i + 16)))
1682          return false;
1683      // Save %l0-%l7 and %i0-%i7 at the CFA.
1684      for (int i = 16; i < 32; i++)
1685        // Assume that the byte reader's address size is the same as
1686        // the architecture's register size. !@#%*^ hilarious.
1687        if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
1688                                      (i - 16) * reader_->AddressSize())))
1689          return false;
1690      break;
1691    }
1692
1693    // I'm not sure what this is. GDB doesn't use it for unwinding.
1694    case DW_CFA_GNU_args_size:
1695      if (!ParseOperands("o", &ops)) return false;
1696      break;
1697
1698    // An opcode we don't recognize.
1699    default: {
1700      reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
1701      return false;
1702    }
1703  }
1704
1705  return true;
1706}
1707
1708bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
1709  Rule *rule = new ValOffsetRule(base_register, offset);
1710  rules_.SetCFARule(rule);
1711  return rule->Handle(handler_, address_,
1712                      Handler::kCFARegister);
1713}
1714
1715bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
1716  Rule *cfa_rule = rules_.CFARule();
1717  if (!cfa_rule) {
1718    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1719    return false;
1720  }
1721  cfa_rule->SetOffset(offset);
1722  return cfa_rule->Handle(handler_, address_,
1723                          Handler::kCFARegister);
1724}
1725
1726bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
1727  rules_.SetRegisterRule(reg, rule);
1728  return rule->Handle(handler_, address_, reg);
1729}
1730
1731bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
1732  if (!rules_.CFARule()) {
1733    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1734    return false;
1735  }
1736  return DoRule(reg,
1737                new OffsetRule(Handler::kCFARegister, offset));
1738}
1739
1740bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
1741  if (!rules_.CFARule()) {
1742    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1743    return false;
1744  }
1745  return DoRule(reg,
1746                new ValOffsetRule(Handler::kCFARegister, offset));
1747}
1748
1749bool CallFrameInfo::State::DoRestore(unsigned reg) {
1750  // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
1751  if (entry_->kind == kCIE) {
1752    reporter_->RestoreInCIE(entry_->offset, CursorOffset());
1753    return false;
1754  }
1755  Rule *rule = cie_rules_.RegisterRule(reg);
1756  if (!rule) {
1757    // This isn't really the right thing to do, but since CFI generally
1758    // only mentions callee-saves registers, and GCC's convention for
1759    // callee-saves registers is that they are unchanged, it's a good
1760    // approximation.
1761    rule = new SameValueRule();
1762  }
1763  return DoRule(reg, rule);
1764}
1765
1766bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) {
1767  const char *buffer_end = buffer_ + buffer_length_;
1768
1769  // Initialize enough of ENTRY for use in error reporting.
1770  entry->offset = cursor - buffer_;
1771  entry->start = cursor;
1772  entry->kind = kUnknown;
1773  entry->end = NULL;
1774
1775  // Read the initial length. This sets reader_'s offset size.
1776  size_t length_size;
1777  uint64 length = reader_->ReadInitialLength(cursor, &length_size);
1778  if (length_size > size_t(buffer_end - cursor))
1779    return ReportIncomplete(entry);
1780  cursor += length_size;
1781
1782  // In a .eh_frame section, a length of zero marks the end of the series
1783  // of entries.
1784  if (length == 0 && eh_frame_) {
1785    entry->kind = kTerminator;
1786    entry->end = cursor;
1787    return true;
1788  }
1789
1790  // Validate the length.
1791  if (length > size_t(buffer_end - cursor))
1792    return ReportIncomplete(entry);
1793
1794  // The length is the number of bytes after the initial length field;
1795  // we have that position handy at this point, so compute the end
1796  // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
1797  // and the length didn't fit in a size_t, we would have rejected it
1798  // above.)
1799  entry->end = cursor + length;
1800
1801  // Parse the next field: either the offset of a CIE or a CIE id.
1802  size_t offset_size = reader_->OffsetSize();
1803  if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
1804  entry->id = reader_->ReadOffset(cursor);
1805
1806  // Don't advance cursor past id field yet; in .eh_frame data we need
1807  // the id's position to compute the section offset of an FDE's CIE.
1808
1809  // Now we can decide what kind of entry this is.
1810  if (eh_frame_) {
1811    // In .eh_frame data, an ID of zero marks the entry as a CIE, and
1812    // anything else is an offset from the id field of the FDE to the start
1813    // of the CIE.
1814    if (entry->id == 0) {
1815      entry->kind = kCIE;
1816    } else {
1817      entry->kind = kFDE;
1818      // Turn the offset from the id into an offset from the buffer's start.
1819      entry->id = (cursor - buffer_) - entry->id;
1820    }
1821  } else {
1822    // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
1823    // offset size for the entry) marks the entry as a CIE, and anything
1824    // else is the offset of the CIE from the beginning of the section.
1825    if (offset_size == 4)
1826      entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
1827    else {
1828      assert(offset_size == 8);
1829      entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
1830    }
1831  }
1832
1833  // Now advance cursor past the id.
1834   cursor += offset_size;
1835
1836  // The fields specific to this kind of entry start here.
1837  entry->fields = cursor;
1838
1839  entry->cie = NULL;
1840
1841  return true;
1842}
1843
1844bool CallFrameInfo::ReadCIEFields(CIE *cie) {
1845  const char *cursor = cie->fields;
1846  size_t len;
1847
1848  assert(cie->kind == kCIE);
1849
1850  // Prepare for early exit.
1851  cie->version = 0;
1852  cie->augmentation.clear();
1853  cie->code_alignment_factor = 0;
1854  cie->data_alignment_factor = 0;
1855  cie->return_address_register = 0;
1856  cie->has_z_augmentation = false;
1857  cie->pointer_encoding = DW_EH_PE_absptr;
1858  cie->instructions = 0;
1859
1860  // Parse the version number.
1861  if (cie->end - cursor < 1)
1862    return ReportIncomplete(cie);
1863  cie->version = reader_->ReadOneByte(cursor);
1864  cursor++;
1865
1866  // If we don't recognize the version, we can't parse any more fields of the
1867  // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a
1868  // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well;
1869  // the difference between those versions seems to be the same as for
1870  // .debug_frame.
1871  if (cie->version < 1 || cie->version > 3) {
1872    reporter_->UnrecognizedVersion(cie->offset, cie->version);
1873    return false;
1874  }
1875
1876  const char *augmentation_start = cursor;
1877  const void *augmentation_end =
1878      memchr(augmentation_start, '\0', cie->end - augmentation_start);
1879  if (! augmentation_end) return ReportIncomplete(cie);
1880  cursor = static_cast<const char *>(augmentation_end);
1881  cie->augmentation = string(augmentation_start,
1882                                  cursor - augmentation_start);
1883  // Skip the terminating '\0'.
1884  cursor++;
1885
1886  // Is this CFI augmented?
1887  if (!cie->augmentation.empty()) {
1888    // Is it an augmentation we recognize?
1889    if (cie->augmentation[0] == DW_Z_augmentation_start) {
1890      // Linux C++ ABI 'z' augmentation, used for exception handling data.
1891      cie->has_z_augmentation = true;
1892    } else {
1893      // Not an augmentation we recognize. Augmentations can have arbitrary
1894      // effects on the form of rest of the content, so we have to give up.
1895      reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
1896      return false;
1897    }
1898  }
1899
1900  // Parse the code alignment factor.
1901  cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
1902  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1903  cursor += len;
1904
1905  // Parse the data alignment factor.
1906  cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
1907  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1908  cursor += len;
1909
1910  // Parse the return address register. This is a ubyte in version 1, and
1911  // a ULEB128 in version 3.
1912  if (cie->version == 1) {
1913    if (cursor >= cie->end) return ReportIncomplete(cie);
1914    cie->return_address_register = uint8(*cursor++);
1915  } else {
1916    cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
1917    if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1918    cursor += len;
1919  }
1920
1921  // If we have a 'z' augmentation string, find the augmentation data and
1922  // use the augmentation string to parse it.
1923  if (cie->has_z_augmentation) {
1924    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
1925    if (size_t(cie->end - cursor) < len + data_size)
1926      return ReportIncomplete(cie);
1927    cursor += len;
1928    const char *data = cursor;
1929    cursor += data_size;
1930    const char *data_end = cursor;
1931
1932    cie->has_z_lsda = false;
1933    cie->has_z_personality = false;
1934    cie->has_z_signal_frame = false;
1935
1936    // Walk the augmentation string, and extract values from the
1937    // augmentation data as the string directs.
1938    for (size_t i = 1; i < cie->augmentation.size(); i++) {
1939      switch (cie->augmentation[i]) {
1940        case DW_Z_has_LSDA:
1941          // The CIE's augmentation data holds the language-specific data
1942          // area pointer's encoding, and the FDE's augmentation data holds
1943          // the pointer itself.
1944          cie->has_z_lsda = true;
1945          // Fetch the LSDA encoding from the augmentation data.
1946          if (data >= data_end) return ReportIncomplete(cie);
1947          cie->lsda_encoding = DwarfPointerEncoding(*data++);
1948          if (!reader_->ValidEncoding(cie->lsda_encoding)) {
1949            reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
1950            return false;
1951          }
1952          // Don't check if the encoding is usable here --- we haven't
1953          // read the FDE's fields yet, so we're not prepared for
1954          // DW_EH_PE_funcrel, although that's a fine encoding for the
1955          // LSDA to use, since it appears in the FDE.
1956          break;
1957
1958        case DW_Z_has_personality_routine:
1959          // The CIE's augmentation data holds the personality routine
1960          // pointer's encoding, followed by the pointer itself.
1961          cie->has_z_personality = true;
1962          // Fetch the personality routine pointer's encoding from the
1963          // augmentation data.
1964          if (data >= data_end) return ReportIncomplete(cie);
1965          cie->personality_encoding = DwarfPointerEncoding(*data++);
1966          if (!reader_->ValidEncoding(cie->personality_encoding)) {
1967            reporter_->InvalidPointerEncoding(cie->offset,
1968                                              cie->personality_encoding);
1969            return false;
1970          }
1971          if (!reader_->UsableEncoding(cie->personality_encoding)) {
1972            reporter_->UnusablePointerEncoding(cie->offset,
1973                                               cie->personality_encoding);
1974            return false;
1975          }
1976          // Fetch the personality routine's pointer itself from the data.
1977          cie->personality_address =
1978            reader_->ReadEncodedPointer(data, cie->personality_encoding,
1979                                        &len);
1980          if (len > size_t(data_end - data))
1981            return ReportIncomplete(cie);
1982          data += len;
1983          break;
1984
1985        case DW_Z_has_FDE_address_encoding:
1986          // The CIE's augmentation data holds the pointer encoding to use
1987          // for addresses in the FDE.
1988          if (data >= data_end) return ReportIncomplete(cie);
1989          cie->pointer_encoding = DwarfPointerEncoding(*data++);
1990          if (!reader_->ValidEncoding(cie->pointer_encoding)) {
1991            reporter_->InvalidPointerEncoding(cie->offset,
1992                                              cie->pointer_encoding);
1993            return false;
1994          }
1995          if (!reader_->UsableEncoding(cie->pointer_encoding)) {
1996            reporter_->UnusablePointerEncoding(cie->offset,
1997                                               cie->pointer_encoding);
1998            return false;
1999          }
2000          break;
2001
2002        case DW_Z_is_signal_trampoline:
2003          // Frames using this CIE are signal delivery frames.
2004          cie->has_z_signal_frame = true;
2005          break;
2006
2007        default:
2008          // An augmentation we don't recognize.
2009          reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2010          return false;
2011      }
2012    }
2013  }
2014
2015  // The CIE's instructions start here.
2016  cie->instructions = cursor;
2017
2018  return true;
2019}
2020
2021bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2022  const char *cursor = fde->fields;
2023  size_t size;
2024
2025  fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2026                                             &size);
2027  if (size > size_t(fde->end - cursor))
2028    return ReportIncomplete(fde);
2029  cursor += size;
2030  reader_->SetFunctionBase(fde->address);
2031
2032  // For the length, we strip off the upper nybble of the encoding used for
2033  // the starting address.
2034  DwarfPointerEncoding length_encoding =
2035    DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2036  fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2037  if (size > size_t(fde->end - cursor))
2038    return ReportIncomplete(fde);
2039  cursor += size;
2040
2041  // If the CIE has a 'z' augmentation string, then augmentation data
2042  // appears here.
2043  if (fde->cie->has_z_augmentation) {
2044    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2045    if (size_t(fde->end - cursor) < size + data_size)
2046      return ReportIncomplete(fde);
2047    cursor += size;
2048
2049    // In the abstract, we should walk the augmentation string, and extract
2050    // items from the FDE's augmentation data as we encounter augmentation
2051    // string characters that specify their presence: the ordering of items
2052    // in the augmentation string determines the arrangement of values in
2053    // the augmentation data.
2054    //
2055    // In practice, there's only ever one value in FDE augmentation data
2056    // that we support --- the LSDA pointer --- and we have to bail if we
2057    // see any unrecognized augmentation string characters. So if there is
2058    // anything here at all, we know what it is, and where it starts.
2059    if (fde->cie->has_z_lsda) {
2060      // Check whether the LSDA's pointer encoding is usable now: only once
2061      // we've parsed the FDE's starting address do we call reader_->
2062      // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2063      // usable.
2064      if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2065        reporter_->UnusablePointerEncoding(fde->cie->offset,
2066                                           fde->cie->lsda_encoding);
2067        return false;
2068      }
2069
2070      fde->lsda_address =
2071        reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2072      if (size > data_size)
2073        return ReportIncomplete(fde);
2074      // Ideally, we would also complain here if there were unconsumed
2075      // augmentation data.
2076    }
2077
2078    cursor += data_size;
2079  }
2080
2081  // The FDE's instructions start after those.
2082  fde->instructions = cursor;
2083
2084  return true;
2085}
2086
2087bool CallFrameInfo::Start() {
2088  const char *buffer_end = buffer_ + buffer_length_;
2089  const char *cursor;
2090  bool all_ok = true;
2091  const char *entry_end;
2092  bool ok;
2093
2094  // Traverse all the entries in buffer_, skipping CIEs and offering
2095  // FDEs to the handler.
2096  for (cursor = buffer_; cursor < buffer_end;
2097       cursor = entry_end, all_ok = all_ok && ok) {
2098    FDE fde;
2099
2100    // Make it easy to skip this entry with 'continue': assume that
2101    // things are not okay until we've checked all the data, and
2102    // prepare the address of the next entry.
2103    ok = false;
2104
2105    // Read the entry's prologue.
2106    if (!ReadEntryPrologue(cursor, &fde)) {
2107      if (!fde.end) {
2108        // If we couldn't even figure out this entry's extent, then we
2109        // must stop processing entries altogether.
2110        all_ok = false;
2111        break;
2112      }
2113      entry_end = fde.end;
2114      continue;
2115    }
2116
2117    // The next iteration picks up after this entry.
2118    entry_end = fde.end;
2119
2120    // Did we see an .eh_frame terminating mark?
2121    if (fde.kind == kTerminator) {
2122      // If there appears to be more data left in the section after the
2123      // terminating mark, warn the user. But this is just a warning;
2124      // we leave all_ok true.
2125      if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2126      break;
2127    }
2128
2129    // In this loop, we skip CIEs. We only parse them fully when we
2130    // parse an FDE that refers to them. This limits our memory
2131    // consumption (beyond the buffer itself) to that needed to
2132    // process the largest single entry.
2133    if (fde.kind != kFDE) {
2134      ok = true;
2135      continue;
2136    }
2137
2138    // Validate the CIE pointer.
2139    if (fde.id > buffer_length_) {
2140      reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2141      continue;
2142    }
2143
2144    CIE cie;
2145
2146    // Parse this FDE's CIE header.
2147    if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2148      continue;
2149    // This had better be an actual CIE.
2150    if (cie.kind != kCIE) {
2151      reporter_->BadCIEId(fde.offset, fde.id);
2152      continue;
2153    }
2154    if (!ReadCIEFields(&cie))
2155      continue;
2156
2157    // We now have the values that govern both the CIE and the FDE.
2158    cie.cie = &cie;
2159    fde.cie = &cie;
2160
2161    // Parse the FDE's header.
2162    if (!ReadFDEFields(&fde))
2163      continue;
2164
2165    // Call Entry to ask the consumer if they're interested.
2166    if (!handler_->Entry(fde.offset, fde.address, fde.size,
2167                         cie.version, cie.augmentation,
2168                         cie.return_address_register)) {
2169      // The handler isn't interested in this entry. That's not an error.
2170      ok = true;
2171      continue;
2172    }
2173
2174    if (cie.has_z_augmentation) {
2175      // Report the personality routine address, if we have one.
2176      if (cie.has_z_personality) {
2177        if (!handler_
2178            ->PersonalityRoutine(cie.personality_address,
2179                                 IsIndirectEncoding(cie.personality_encoding)))
2180          continue;
2181      }
2182
2183      // Report the language-specific data area address, if we have one.
2184      if (cie.has_z_lsda) {
2185        if (!handler_
2186            ->LanguageSpecificDataArea(fde.lsda_address,
2187                                       IsIndirectEncoding(cie.lsda_encoding)))
2188          continue;
2189      }
2190
2191      // If this is a signal-handling frame, report that.
2192      if (cie.has_z_signal_frame) {
2193        if (!handler_->SignalHandler())
2194          continue;
2195      }
2196    }
2197
2198    // Interpret the CIE's instructions, and then the FDE's instructions.
2199    State state(reader_, handler_, reporter_, fde.address);
2200    ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2201
2202    // Tell the ByteReader that the function start address from the
2203    // FDE header is no longer valid.
2204    reader_->ClearFunctionBase();
2205
2206    // Report the end of the entry.
2207    handler_->End();
2208  }
2209
2210  return all_ok;
2211}
2212
2213const char *CallFrameInfo::KindName(EntryKind kind) {
2214  if (kind == CallFrameInfo::kUnknown)
2215    return "entry";
2216  else if (kind == CallFrameInfo::kCIE)
2217    return "common information entry";
2218  else if (kind == CallFrameInfo::kFDE)
2219    return "frame description entry";
2220  else {
2221    assert (kind == CallFrameInfo::kTerminator);
2222    return ".eh_frame sequence terminator";
2223  }
2224}
2225
2226bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2227  reporter_->Incomplete(entry->offset, entry->kind);
2228  return false;
2229}
2230
2231void CallFrameInfo::Reporter::Incomplete(uint64 offset,
2232                                         CallFrameInfo::EntryKind kind) {
2233  fprintf(stderr,
2234          "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
2235          filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2236          section_.c_str());
2237}
2238
2239void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
2240  fprintf(stderr,
2241          "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
2242          " before end of section contents\n",
2243          filename_.c_str(), offset, section_.c_str());
2244}
2245
2246void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
2247                                                   uint64 cie_offset) {
2248  fprintf(stderr,
2249          "%s: CFI frame description entry at offset 0x%llx in '%s':"
2250          " CIE pointer is out of range: 0x%llx\n",
2251          filename_.c_str(), offset, section_.c_str(), cie_offset);
2252}
2253
2254void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
2255  fprintf(stderr,
2256          "%s: CFI frame description entry at offset 0x%llx in '%s':"
2257          " CIE pointer does not point to a CIE: 0x%llx\n",
2258          filename_.c_str(), offset, section_.c_str(), cie_offset);
2259}
2260
2261void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
2262  fprintf(stderr,
2263          "%s: CFI frame description entry at offset 0x%llx in '%s':"
2264          " CIE specifies unrecognized version: %d\n",
2265          filename_.c_str(), offset, section_.c_str(), version);
2266}
2267
2268void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
2269                                                       const string &aug) {
2270  fprintf(stderr,
2271          "%s: CFI frame description entry at offset 0x%llx in '%s':"
2272          " CIE specifies unrecognized augmentation: '%s'\n",
2273          filename_.c_str(), offset, section_.c_str(), aug.c_str());
2274}
2275
2276void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
2277                                                     uint8 encoding) {
2278  fprintf(stderr,
2279          "%s: CFI common information entry at offset 0x%llx in '%s':"
2280          " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2281          filename_.c_str(), offset, section_.c_str(), encoding);
2282}
2283
2284void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
2285                                                      uint8 encoding) {
2286  fprintf(stderr,
2287          "%s: CFI common information entry at offset 0x%llx in '%s':"
2288          " 'z' augmentation specifies a pointer encoding for which"
2289          " we have no base address: 0x%02x\n",
2290          filename_.c_str(), offset, section_.c_str(), encoding);
2291}
2292
2293void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
2294  fprintf(stderr,
2295          "%s: CFI common information entry at offset 0x%llx in '%s':"
2296          " the DW_CFA_restore instruction at offset 0x%llx"
2297          " cannot be used in a common information entry\n",
2298          filename_.c_str(), offset, section_.c_str(), insn_offset);
2299}
2300
2301void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
2302                                             CallFrameInfo::EntryKind kind,
2303                                             uint64 insn_offset) {
2304  fprintf(stderr,
2305          "%s: CFI %s at offset 0x%llx in section '%s':"
2306          " the instruction at offset 0x%llx is unrecognized\n",
2307          filename_.c_str(), CallFrameInfo::KindName(kind),
2308          offset, section_.c_str(), insn_offset);
2309}
2310
2311void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
2312                                        CallFrameInfo::EntryKind kind,
2313                                        uint64 insn_offset) {
2314  fprintf(stderr,
2315          "%s: CFI %s at offset 0x%llx in section '%s':"
2316          " the instruction at offset 0x%llx assumes that a CFA rule has"
2317          " been set, but none has been set\n",
2318          filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2319          section_.c_str(), insn_offset);
2320}
2321
2322void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
2323                                              CallFrameInfo::EntryKind kind,
2324                                              uint64 insn_offset) {
2325  fprintf(stderr,
2326          "%s: CFI %s at offset 0x%llx in section '%s':"
2327          " the DW_CFA_restore_state instruction at offset 0x%llx"
2328          " should pop a saved state from the stack, but the stack is empty\n",
2329          filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2330          section_.c_str(), insn_offset);
2331}
2332
2333void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
2334                                              CallFrameInfo::EntryKind kind,
2335                                              uint64 insn_offset) {
2336  fprintf(stderr,
2337          "%s: CFI %s at offset 0x%llx in section '%s':"
2338          " the DW_CFA_restore_state instruction at offset 0x%llx"
2339          " would clear the CFA rule in effect\n",
2340          filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2341          section_.c_str(), insn_offset);
2342}
2343
2344}  // namespace dwarf2reader
2345