1/* Copyright (C) 2007-2010 The Android Open Source Project
2**
3** This software is licensed under the terms of the GNU General Public
4** License version 2, as published by the Free Software Foundation, and
5** may be copied, distributed, and modified under those terms.
6**
7** This program is distributed in the hope that it will be useful,
8** but WITHOUT ANY WARRANTY; without even the implied warranty of
9** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10** GNU General Public License for more details.
11*/
12
13/*
14 * Contains declarations of types, constants and structures
15 * describing DWARF format.
16 */
17
18#ifndef ELFF_DWARF_DEFS_H_
19#define ELFF_DWARF_DEFS_H_
20
21#include "dwarf.h"
22#include "elf_defs.h"
23
24/* DWARF structures are packed to 1 byte. */
25#define ELFF_PACKED __attribute__ ((packed))
26
27/*
28 * Helper types for misc. DWARF variables.
29 */
30
31/* Type for DWARF abbreviation number. */
32typedef uint32_t  Dwarf_AbbrNum;
33
34/* Type for DWARF tag ID. */
35typedef uint16_t  Dwarf_Tag;
36
37/* Type for DWARF attribute ID. */
38typedef uint16_t  Dwarf_At;
39
40/* Type for DWARF form ID. */
41typedef uint16_t  Dwarf_Form;
42
43/* Type for offset in 32-bit DWARF. */
44typedef uint32_t  Dwarf32_Off;
45
46/* Type for offset in 64-bit DWARF. */
47typedef uint64_t  Dwarf64_Off;
48
49/* Enumerates types of values, obtained during DWARF attribute decoding. */
50typedef enum DwarfValueType {
51  /* Undefined */
52  DWARF_VALUE_UNKNOWN = 1,
53
54  /* uint8_t */
55  DWARF_VALUE_U8,
56
57  /* int8_t */
58  DWARF_VALUE_S8,
59
60  /* uint16_t */
61  DWARF_VALUE_U16,
62
63  /* int16_t */
64  DWARF_VALUE_S16,
65
66  /* uint32_t */
67  DWARF_VALUE_U32,
68
69  /* int32_t */
70  DWARF_VALUE_S32,
71
72  /* uint64_t */
73  DWARF_VALUE_U64,
74
75  /* int64_t */
76  DWARF_VALUE_S64,
77
78  /* const char* */
79  DWARF_VALUE_STR,
80
81  /* 32-bit address */
82  DWARF_VALUE_PTR32,
83
84  /* 64-bit address */
85  DWARF_VALUE_PTR64,
86
87  /* Dwarf_Block */
88  DWARF_VALUE_BLOCK,
89} DwarfValueType;
90
91/* Describes block of data, stored directly in the mapped .debug_info
92 * section. This type is used to represent an attribute encoded with
93 * DW_FORM_block# form.
94 */
95typedef struct Dwarf_Block {
96  /* Pointer to the block data inside mapped .debug_info section. */
97  const void*   block_ptr;
98
99  /* Byte size of the block data. */
100  Elf_Word      block_size;
101} Dwarf_Block;
102
103/* Describes a value, obtained from the mapped .debug_info section
104 * during DWARF attribute decoding.
105 */
106typedef struct Dwarf_Value {
107  /* Unites all possible data types for the value.
108   * See DwarfValueType for the list of types.
109   */
110  union {
111    Elf_Byte      u8;
112    Elf_Sbyte     s8;
113    Elf_Half      u16;
114    Elf_Shalf     s16;
115    Elf_Word      u32;
116    Elf_Sword     s32;
117    Elf_Xword     u64;
118    Elf_Sxword    s64;
119    Elf_Word      ptr32;
120    Elf_Xword     ptr64;
121    const char*   str;
122    Dwarf_Block   block;
123  };
124
125  /* Value type (defines which variable in the union abowe
126   * contains the value).
127   */
128  DwarfValueType  type;
129
130  /* Number of bytes that encode this value in .debug_info section
131   * of ELF file.
132   */
133  Elf_Word        encoded_size;
134} Dwarf_Value;
135
136/* DWARF's LEB128 data type. LEB128 is defined as:
137 * Variable Length Data. "Little Endian Base 128" (LEB128) numbers. LEB128 is
138 * a scheme for encoding integers densely that exploits the assumption that
139 * most integers are small in magnitude. (This encoding is equally suitable
140 * whether the target machine architecture represents data in big-endian or
141 * littleendian order. It is "little endian" only in the sense that it avoids
142 * using space to represent the "big" end of an unsigned integer, when the big
143 * end is all zeroes or sign extension bits).
144 *
145 * Unsigned LEB128 numbers are encoded as follows: start at the low order end
146 * of an unsigned integer and chop it into 7-bit chunks. Place each chunk into
147 * the low order 7 bits of a byte. Typically, several of the high order bytes
148 * will be zero; discard them. Emit the remaining bytes in a stream, starting
149 * with the low order byte; set the high order bit on each byte except the last
150 * emitted byte. The high bit of zero on the last byte indicates to the decoder
151 * that it has encountered the last byte. The integer zero is a special case,
152 * consisting of a single zero byte.
153 *
154 * The encoding for signed LEB128 numbers is similar, except that the criterion
155 * for discarding high order bytes is not whether they are zero, but whether
156 * they consist entirely of sign extension bits. Consider the 32-bit integer
157 * -2. The three high level bytes of the number are sign extension, thus LEB128
158 * would represent it as a single byte containing the low order 7 bits, with
159 * the high order bit cleared to indicate the end of the byte stream. Note that
160 * there is nothing within the LEB128 representation that indicates whether an
161 * encoded number is signed or unsigned. The decoder must know what type of
162 * number to expect.
163 *
164 * NOTE: It's assumed that LEB128 will not contain encodings for integers,
165 * larger than 64 bit.
166*/
167typedef struct ELFF_PACKED Dwarf_Leb128 {
168  /* Beginning of the LEB128 block. */
169  Elf_Byte  val;
170
171  /* Pulls actual value, encoded with this LEB128 block.
172   * Param:
173   *  value - Upon return will contain value, encoded with this LEB128 block.
174   *  sign - If true, the caller expects the LEB128 to contain a signed
175   *    integer, otherwise, caller expects an unsigned integer value to be
176   *    encoded with this LEB128 block.
177   */
178  void get_common(Dwarf_Value* value, bool sign) const {
179    value->u64 = 0;
180    /* Integer zero is a special case. */
181    if (val == 0) {
182      value->type = sign ? DWARF_VALUE_S32 : DWARF_VALUE_U32;
183      value->encoded_size = 1;
184      return;
185    }
186
187    /* We've got to reconstruct the integer. */
188    value->type = DWARF_VALUE_UNKNOWN;
189    value->encoded_size = 0;
190
191    /* Byte by byte loop though the LEB128, reconstructing the integer from
192     * 7-bits chunks. Byte with 8-th bit set to zero indicates the end
193     * of the LEB128 block. For signed integers, 7-th bit of the last LEB128
194     * byte controls the sign. If 7-th bit of the last LEB128 byte is set,
195     * the integer is negative. If 7-th bit of the last LEB128 byte is not
196     * set, the integer is positive.
197     */
198    const Elf_Byte* cur = &val;
199    Elf_Word shift = 0;
200    while ((*cur & 0x80) != 0) {
201      value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
202      shift += 7;
203      value->encoded_size++;
204      cur++;
205    }
206    value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
207    value->encoded_size++;
208
209    /* LEB128 format doesn't carry any info of the sizeof of the integer it
210     * represents. We well guess it, judging by the highest bit set in the
211     * reconstucted integer.
212     */
213    if ((value->u64 & 0xFFFFFFFF00000000LL) == 0) {
214      /* 32-bit integer. */
215      if (sign) {
216        value->type = DWARF_VALUE_S32;
217        if (((*cur) & 0x40) != 0) {
218          // Value is negative.
219          value->u64 |= - (1 << (shift + 7));
220        } else if ((value->u32 & 0x80000000) != 0) {
221          // Make sure we don't report negative value in this case.
222          value->type = DWARF_VALUE_S64;
223        }
224      } else {
225        value->type = DWARF_VALUE_U32;
226      }
227    } else {
228      /* 64-bit integer. */
229      if (sign) {
230        value->type = DWARF_VALUE_S64;
231        if (((*cur) & 0x40) != 0) {
232          // Value is negative.
233          value->u64 |= - (1 << (shift + 7));
234        }
235      } else {
236        value->type = DWARF_VALUE_U64;
237      }
238    }
239  }
240
241  /* Pulls actual unsigned value, encoded with this LEB128 block.
242   * See get_common() for more info.
243   * Param:
244   *  value - Upon return will contain unsigned value, encoded with
245   *  this LEB128 block.
246   */
247  void get_unsigned(Dwarf_Value* value) const {
248    get_common(value, false);
249  }
250
251  /* Pulls actual signed value, encoded with this LEB128 block.
252   * See get_common() for more info.
253   * Param:
254   *  value - Upon return will contain signed value, encoded with
255   *  this LEB128 block.
256   */
257  void get_signed(Dwarf_Value* value) const {
258    get_common(value, true);
259  }
260
261  /* Pulls LEB128 value, advancing past this LEB128 block.
262   * See get_common() for more info.
263   * Return:
264   *  Pointer to the byte past this LEB128 block.
265   */
266  const void* process(Dwarf_Value* value, bool sign) const {
267    get_common(value, sign);
268    return INC_CPTR(&val, value->encoded_size);
269  }
270
271  /* Pulls LEB128 unsigned value, advancing past this LEB128 block.
272   * See process() for more info.
273   */
274  const void* process_unsigned(Dwarf_Value* value) const {
275    return process(value, false);
276  }
277
278  /* Pulls LEB128 signed value, advancing past this LEB128 block.
279   * See process() for more info.
280   */
281  const void* process_signed(Dwarf_Value* value) const {
282    return process(value, true);
283  }
284} Dwarf_Leb128;
285
286/* DIE attribute descriptor in the .debug_abbrev section.
287 * Attribute descriptor contains two LEB128 values. First one provides
288 * attribute ID (one of DW_AT_XXX values), and the second one provides
289 * format (one of DW_FORMAT_XXX values), in which attribute value is
290 * encoded in the .debug_info section of the ELF file.
291 */
292typedef struct ELFF_PACKED Dwarf_Abbr_AT {
293  /* Attribute ID (DW_AT_XXX).
294   * Attribute format (DW_FORMAT_XXX) follows immediately.
295   */
296  Dwarf_Leb128  at;
297
298  /* Checks if this is a separator descriptor.
299   * Zero is an invalid attribute ID, indicating the end of attribute
300   * list for the current DIE.
301   */
302  bool is_separator() const {
303    return at.val == 0;
304  }
305
306  /* Pulls attribute data, advancing past this descriptor.
307   * Param:
308   *  at_value - Upon return contains attribute value of this descriptor.
309   *  form - Upon return contains form value of this descriptor.
310   * Return:
311   *  Pointer to the byte past this descriptor block (usually, next
312   *  attribute decriptor).
313   */
314  const Dwarf_Abbr_AT* process(Dwarf_At* at_value, Dwarf_Form* form) const {
315    if (is_separator()) {
316      /* Size of separator descriptor is always 2 bytes. */
317      *at_value = 0;
318      *form = 0;
319      return INC_CPTR_T(Dwarf_Abbr_AT, &at.val, 2);
320    }
321
322    Dwarf_Value val;
323
324    /* Process attribute ID. */
325    const Dwarf_Leb128* next =
326        reinterpret_cast<const Dwarf_Leb128*>(at.process_unsigned(&val));
327    *at_value = val.u16;
328
329    /* Follow with processing the form. */
330    next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
331    *form = val.u16;
332    return reinterpret_cast<const Dwarf_Abbr_AT*>(next);
333  }
334} Dwarf_Abbr_AT;
335
336/* DIE abbreviation descriptor in the .debug_abbrev section.
337 * DIE abbreviation descriptor contains three parameters. The first one is a
338 * LEB128 value, that encodes 1 - based abbreviation descriptor number.
339 * Abbreviation descriptor numbers seems to be always in sequential order, and
340 * are counted on per-compilation unit basis. I.e. abbreviation number for the
341 * first DIE abbreviation descriptor of each compilation unit is always 1.
342 *
343 * Besides abbreviation number, DIE abbreviation descriptor contains two more
344 * values. The first one (after abbr_num) is a LEB128 value containing DIE's
345 * tag value, and the second one is one byte flag specifying whether or not
346 * the DIE contains any cildren.
347 *
348 * This descriptor is immediately followed by a list of attribute descriptors
349 * (see Dwarf_Abbr_AT) for the DIE represented by this abbreviation descriptor.
350 */
351typedef struct ELFF_PACKED Dwarf_Abbr_DIE {
352  /* 1 - based abbreviation number for the DIE. */
353  Dwarf_Leb128  abbr_num;
354
355  /* Gets abbreviation number for this descriptor. */
356  Dwarf_AbbrNum get_abbr_num() const {
357    Dwarf_Value val;
358    abbr_num.get_unsigned(&val);
359    return val.u16;
360  }
361
362  /* Gets DIE tag for this descriptor. */
363  Dwarf_Tag get_tag() const {
364    Dwarf_Tag tag;
365    process(NULL, &tag);
366    return tag;
367  }
368
369  /* Pulls DIE abbreviation descriptor data, advancing past this descriptor.
370   * Param:
371   *  abbr_index - Upon return contains abbreviation number for this
372   *    descriptor. This parameter can be NULL, if the caller is not interested
373   *    in this value.
374   *  tag - Upon return contains tag of the DIE for this descriptor. This
375   *    parameter can be NULL, if the caller is not interested in this value.
376   *  form - Upon return contains form of the DIE for this descriptor.
377   * Return:
378   *  Pointer to the list of attribute descriptors for the DIE.
379   */
380  const Dwarf_Abbr_AT* process(Dwarf_AbbrNum* abbr_index,
381                               Dwarf_Tag* tag) const {
382    Dwarf_Value val;
383    const Dwarf_Leb128* next =
384        reinterpret_cast<const Dwarf_Leb128*>(abbr_num.process_unsigned(&val));
385    if (abbr_index != NULL) {
386      *abbr_index = val.u32;
387    }
388
389    /* Next one is a "tag". */
390    next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
391    if (tag != NULL) {
392      *tag = val.u16;
393    }
394
395    /* Next one is a "has children" one byte flag. We're not interested in it,
396     * so jump to the list of attribute descriptors that immediately follows
397     * this DIE descriptor. */
398    return INC_CPTR_T(Dwarf_Abbr_AT, next, 1);
399  }
400} Dwarf_Abbr_DIE;
401
402/* DIE descriptor in the .debug_info section.
403 * DIE descriptor contains one LEB128-encoded value, containing DIE's
404 * abbreviation descriptor number in the .debug_abbrev section.
405 *
406 * DIE descriptor is immediately followed by the list of DIE attribute values,
407 * format of wich is defined by the list of attribute descriptors in the
408 * .debug_abbrev section, that immediately follow the DIE attribute descriptor,
409 * addressed by this descriptor's abbr_num LEB128.
410 */
411typedef struct ELFF_PACKED Dwarf_DIE {
412  /* 1 - based index of DIE abbreviation descriptor (Dwarf_Abbr_DIE) for this
413   * DIE in the .debug_abbrev section.
414   *
415   * NOTE: DIE abbreviation descriptor indexes are tied to the compilation
416   * unit. In other words, each compilation unit restarts counting DIE
417   * abbreviation descriptors from 1.
418   *
419   * NOTE: Zero is invalid value for this field, indicating that this DIE is a
420   * separator (usually it ends a list of "child" DIEs)
421   */
422  Dwarf_Leb128  abbr_num;
423
424  /* Checks if this is a separator DIE. */
425  bool is_separator() const {
426    return abbr_num.val == 0;
427  }
428
429  /* Gets (1 - based) abbreviation number for this DIE. */
430  Dwarf_AbbrNum get_abbr_num() const {
431    Dwarf_Value val;
432    abbr_num.get_unsigned(&val);
433    return val.u16;
434  }
435
436  /* Pulls DIE information, advancing past this descriptor to DIE attributes.
437   * Param:
438   *  abbr_num - Upon return contains abbreviation number for this DIE. This
439   *    parameter can be NULL, if the caller is not interested in this value.
440   * Return:
441   *  Pointer to the byte past this descriptor (the list of DIE attributes).
442   */
443  const Elf_Byte* process(Dwarf_AbbrNum* abbr_number) const {
444    if (is_separator()) {
445      if (abbr_number != NULL) {
446        *abbr_number = 0;
447      }
448      // Size of a separator DIE is 1 byte.
449      return INC_CPTR_T(Elf_Byte, &abbr_num.val, 1);
450    }
451    Dwarf_Value val;
452    const void* ret = abbr_num.process_unsigned(&val);
453    if (abbr_number != NULL) {
454      *abbr_number = val.u32;
455    }
456    return reinterpret_cast<const Elf_Byte*>(ret);
457  }
458} Dwarf_DIE;
459
460/*
461 * Variable size headers.
462 * When encoding size value in DWARF, the first 32 bits of a "size" header
463 * define header type. If first 32 bits of the header contain 0xFFFFFFFF
464 * value, this is 64-bit size header with the following 64 bits encoding
465 * the size. Otherwise, if first 32 bits are not 0xFFFFFFFF, they contain
466 * 32-bit size value.
467 */
468
469/* Size header for 32-bit DWARF. */
470typedef struct ELFF_PACKED Dwarf32_SizeHdr {
471  /* Size value. */
472  Elf_Word  size;
473} Dwarf32_SizeHdr;
474
475/* Size header for 64-bit DWARF. */
476typedef struct ELFF_PACKED Dwarf64_SizeHdr {
477  /* Size selector. For 64-bit DWARF this field is set to 0xFFFFFFFF */
478  Elf_Word  size_selector;
479
480  /* Actual size value. */
481  Elf_Xword   size;
482} Dwarf64_SizeHdr;
483
484/* Compilation unit header in the .debug_info section.
485 * Template param:
486 *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
487 *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
488 *  Elf_Off - Type for abbrev_offset field. Must be Elf_Word for for 32-bit
489 *    DWARF, or Elf_Xword for 64-bit DWARF.
490 */
491template <typename Dwarf_SizeHdr, typename Elf_Off>
492struct ELFF_PACKED Dwarf_CUHdr {
493  /* Size of the compilation unit data in .debug_info section. */
494  Dwarf_SizeHdr   size_hdr;
495
496  /* Compilation unit's DWARF version stamp. */
497  Elf_Half        version;
498
499  /* Relative (to the beginning of .debug_abbrev section data) offset of the
500   * beginning of abbreviation sequence for this compilation unit.
501   */
502  Elf_Off         abbrev_offset;
503
504  /* Pointer size for this compilation unit (should be 4, or 8). */
505  Elf_Byte        address_size;
506};
507/* Compilation unit header in the .debug_info section for 32-bit DWARF. */
508typedef Dwarf_CUHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_CUHdr;
509/* Compilation unit header in the .debug_info section for 64-bit DWARF. */
510typedef Dwarf_CUHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_CUHdr;
511
512/* CU STMTL header in the .debug_line section.
513 * Template param:
514 *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
515 *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
516 *  Elf_Size - Type for header_length field. Must be Elf_Word for for 32-bit
517 *    DWARF, or Elf_Xword for 64-bit DWARF.
518 */
519template <typename Dwarf_SizeHdr, typename Elf_Size>
520struct ELFF_PACKED Dwarf_STMTLHdr {
521  /* The size in bytes of the line number information for this compilation
522   * unit, not including the unit_length field itself. */
523  Dwarf_SizeHdr unit_length;
524
525  /* A version number. This number is specific to the line number information
526   * and is independent of the DWARF version number. */
527  Elf_Half      version;
528
529  /* The number of bytes following the header_length field to the beginning of
530   * the first byte of the line number program itself. In the 32-bit DWARF
531   * format, this is a 4-byte unsigned length; in the 64-bit DWARF format,
532   * this field is an 8-byte unsigned length. */
533  Elf_Size      header_length;
534
535  /* The size in bytes of the smallest target machine instruction. Line number
536   * program opcodes that alter the address register first multiply their
537   * operands by this value. */
538  Elf_Byte      min_instruction_len;
539
540  /* The initial value of the is_stmt register. */
541  Elf_Byte      default_is_stmt;
542
543  /* This parameter affects the meaning of the special opcodes. */
544  Elf_Sbyte     line_base;
545
546  /* This parameter affects the meaning of the special opcodes. */
547  Elf_Byte      line_range;
548
549  /* The number assigned to the first special opcode. */
550  Elf_Byte      opcode_base;
551
552  /* This is first opcode in an array specifying the number of LEB128 operands
553   * for each of the standard opcodes. The first element of the array
554   * corresponds to the opcode whose value is 1, and the last element
555   * corresponds to the opcode whose value is opcode_base - 1. By increasing
556   * opcode_base, and adding elements to this array, new standard opcodes can
557   * be added, while allowing consumers who do not know about these new opcodes
558   * to be able to skip them. NOTE: this array points to the mapped
559   * .debug_line section. */
560  Elf_Byte      standard_opcode_lengths;
561};
562/* CU STMTL header in the .debug_line section for 32-bit DWARF. */
563typedef Dwarf_STMTLHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_STMTLHdr;
564/* CU STMTL header in the .debug_line section for 64-bit DWARF. */
565typedef Dwarf_STMTLHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_STMTLHdr;
566
567/* Source file descriptor in the .debug_line section.
568 * Descriptor begins with zero-terminated file name, followed by an ULEB128,
569 * encoding directory index in the list of included directories, followed by
570 * an ULEB12, encoding file modification time, followed by an ULEB12, encoding
571 * file size.
572 */
573typedef struct ELFF_PACKED Dwarf_STMTL_FileDesc {
574  /* Zero-terminated file name. */
575  char  file_name[1];
576
577  /* Checks of this descriptor ends the list. */
578  bool is_last_entry() const {
579    return file_name[0] == '\0';
580  }
581
582  /* Gets file name. */
583  const char* get_file_name() const {
584    return file_name;
585  }
586
587  /* Processes this descriptor, advancing to the next one.
588   * Param:
589   *  dir_index - Upon return contains index of the parent directory in the
590   *    list of included directories. Can be NULL if caller is not interested
591   *    in this value.
592   * Return:
593   *  Pointer to the next source file descriptor in the list.
594   */
595  const Dwarf_STMTL_FileDesc* process(Elf_Word* dir_index) const {
596    if (is_last_entry()) {
597      return this;
598    }
599
600    /* First parameter: include directory index. */
601    Dwarf_Value tmp;
602    const Dwarf_Leb128* leb =
603        INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
604    leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
605    if (dir_index != NULL) {
606      *dir_index = tmp.u32;
607    }
608    /* Process file time. */
609    leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
610    /* Process file size. */
611    return reinterpret_cast<const Dwarf_STMTL_FileDesc*>(leb->process_unsigned(&tmp));
612  }
613
614  /* Gets directory index for this descriptor. */
615  Elf_Word get_dir_index() const {
616    assert(!is_last_entry());
617    if (is_last_entry()) {
618      return 0;
619    }
620    /* Get directory index. */
621    Dwarf_Value ret;
622    const Dwarf_Leb128* leb =
623      INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
624    leb->process_unsigned(&ret);
625    return ret.u32;
626  }
627} Dwarf_STMTL_FileDesc;
628
629/* Encapsulates a DIE attribute, collected during ELF file parsing.
630 */
631class DIEAttrib {
632 public:
633  /* Constructs DIEAttrib intance. */
634  DIEAttrib()
635      : at_(0),
636        form_(0) {
637    value_.type = DWARF_VALUE_UNKNOWN;
638  }
639
640  /* Destructs DIEAttrib intance. */
641  ~DIEAttrib() {
642  }
643
644  /* Gets DWARF attribute ID (DW_AT_Xxx) for this property. */
645  Dwarf_At at() const {
646    return at_;
647  }
648
649  /* Gets DWARF form ID (DW_FORM_Xxx) for this property. */
650  Dwarf_Form form() const {
651    return form_;
652  }
653
654  /* Gets value of this property. */
655  const Dwarf_Value* value() const {
656    return &value_;
657  }
658
659  /* Value of this property. */
660  Dwarf_Value   value_;
661
662  /* DWARF attribute ID (DW_AT_Xxx) for this property. */
663  Dwarf_At      at_;
664
665  /* DWARF form ID (DW_FORM_Xxx) for this property. */
666  Dwarf_Form    form_;
667};
668
669/* Parse tag context.
670 * This structure is used as an ELF file parsing parameter, limiting collected
671 * DIEs by the list of tags.
672 */
673typedef struct DwarfParseContext {
674  /* Zero-terminated list of tags to collect DIEs for. If this field is NULL,
675   * DIEs for all tags will be collected during the parsing. */
676  const Dwarf_Tag*  tags;
677} DwarfParseContext;
678
679/* Checks if a DIE with the given tag should be collected during the parsing.
680 * Param:
681 *  parse_context - Parse context to check the tag against. This parameter can
682 *  be NULL, indicating that all tags should be collected.
683 *  tag - Tag to check.
684 * Return:
685 *  true if a DIE with the given tag should be collected during the parsing,
686 *  or false, if the DIE should not be collected.
687 */
688static inline bool
689collect_die(const DwarfParseContext* parse_context, Dwarf_Tag tag) {
690  if (parse_context == NULL || parse_context->tags == NULL) {
691    return true;
692  }
693  for (const Dwarf_Tag* tags = parse_context->tags; *tags != 0; tags++) {
694    if (*tags == tag) {
695      return true;
696    }
697  }
698  return false;
699}
700
701/* Encapsulates an array of Dwarf_Abbr_DIE pointers, cached for a compilation
702 * unit. Although Dwarf_Abbr_DIE descriptors in the .debug_abbrev section of
703 * the ELF file seems to be always in sequential order, DIE descriptors may
704 * reference them randomly. So, to provide better performance, we will cache
705 * all Dwarf_Abbr_DIE pointers, that were found for each DIE. Since all of the
706 * Dwarf_Abbr_DIE are sequential, an array is the best way to cache them.
707 *
708 * NOTE: Objects of this class are instantiated one per each CU, as all DIE
709 * abbreviation numberation is restarted from 1 for each new CU.
710 */
711class DwarfAbbrDieArray {
712 public:
713  /* Constructs DwarfAbbrDieArray instance.
714   * Most of the CUs don't have too many unique Dwarf_Abbr_DIEs, so, in order
715   * to decrease the amount of memory allocation calls, we will preallocate
716   * a relatively small array for them along with the instance of this class,
717   * hopping, that all Dwarf_Abbr_DIEs for the CU will fit into it.
718   */
719  DwarfAbbrDieArray()
720      : array_(&small_array_[0]),
721        array_size_(ELFF_ARRAY_SIZE(small_array_)),
722        count_(0) {
723  }
724
725  /* Destructs DwarfAbbrDieArray instance. */
726  ~DwarfAbbrDieArray() {
727    if (array_ != &small_array_[0]) {
728      delete[] array_;
729    }
730  }
731
732  /* Adds new entry to the array
733   * Param:
734   *  abbr - New entry to add.
735   *  num - Abbreviation number for the adding entry.
736   *    NOTE: before adding, this method will verify that descriptor for the
737   *    given abbreviation number has not been cached yet.
738   *    NOTE: due to the nature of this array, entries MUST be added strictly
739   *    in sequential order.
740   * Return:
741   *  true on success, false on failure.
742   */
743  bool add(const Dwarf_Abbr_DIE* abbr, Dwarf_AbbrNum num) {
744    assert(num != 0);
745    if (num == 0) {
746      // Zero is illegal DIE abbreviation number.
747      _set_errno(EINVAL);
748      return false;
749    }
750
751    if (num <= count_) {
752      // Already cached.
753      return true;
754    }
755
756    // Enforce strict sequential order.
757    assert(num == (count_ + 1));
758    if (num != (count_ + 1)) {
759      _set_errno(EINVAL);
760      return false;
761    }
762
763    if (num >= array_size_) {
764      /* Expand the array. Make it 64 entries bigger than adding entry number.
765       * NOTE: that we don't check for an overflow here, since we secured
766       * ourselves from that by enforcing strict sequential order. So, an
767       * overflow may happen iff number of entries cached in this array is
768       * close to 4G, which is a) totally unreasonable, and b) we would die
769       * long before this amount of entries is cached.
770       */
771      Dwarf_AbbrNum new_size = num + 64;
772
773      // Reallocate.
774      const Dwarf_Abbr_DIE** new_array = new const Dwarf_Abbr_DIE*[new_size];
775      assert(new_array != NULL);
776      if (new_array == NULL) {
777        _set_errno(ENOMEM);
778        return false;
779      }
780      memcpy(new_array, array_, count_ * sizeof(const Dwarf_Abbr_DIE*));
781      if (array_ != &small_array_[0]) {
782        delete[] array_;
783      }
784      array_ = new_array;
785      array_size_ = new_size;
786    }
787
788    // Abbreviation numbers are 1-based.
789    array_[num - 1] = abbr;
790    count_++;
791    return true;
792  }
793
794  /* Adds new entry to the array
795   * Param:
796   *  abbr - New entry to add.
797   * Return:
798   *  true on success, false on failure.
799   */
800  bool add(const Dwarf_Abbr_DIE* abbr) {
801    return add(abbr, abbr->get_abbr_num());
802  }
803
804  /* Gets an entry from the array
805   * Param:
806   *  num - 1-based index of an entry to get.
807   * Return:
808   *  Entry on success, or NULL if num exceeds the number of entries
809   *  contained in the array.
810   */
811  const Dwarf_Abbr_DIE* get(Dwarf_AbbrNum num) const {
812    assert(num != 0 && num <= count_);
813    if (num != 0 && num <= count_) {
814      return array_[num - 1];
815    } else {
816      _set_errno(EINVAL);
817      return NULL;
818    }
819  }
820
821  /* Caches Dwarf_Abbr_DIEs into this array up to the requested number.
822   * NOTE: This method cannot be called on an empty array. Usually, first
823   * entry is inserted into this array when CU object is initialized.
824   * Param:
825   *  num - Entry number to cache entries up to.
826   * Return:
827   *  Last cached entry (actually, an entry for the 'num' index).
828   */
829  const Dwarf_Abbr_DIE* cache_to(Dwarf_AbbrNum num) {
830    /* Last cached DIE abbreviation. We always should have cached at least one
831     * abbreviation for the CU DIE itself, added via "add" method when CU
832     * object was initialized. */
833    const Dwarf_Abbr_DIE* cur_abbr = get(count_);
834    assert(cur_abbr != NULL);
835    if (cur_abbr == NULL) {
836      return NULL;
837    }
838
839    /* Starting with the last cached DIE abbreviation, loop through the
840     * remaining DIE abbreviations in the .debug_abbrev section of the
841     * mapped ELF file, caching them until we reach the requested
842     * abbreviation descriptor number. Normally, the very next DIE
843     * abbreviation will stop the loop. */
844    while (num > count_) {
845      Dwarf_AbbrNum abbr_num;
846      Dwarf_Tag tmp2;
847      Dwarf_Form tmp3;
848      Dwarf_At tmp4;
849
850      /* Process all AT abbreviations for the current DIE entry, reaching next
851       * DIE abbreviation. */
852      const Dwarf_Abbr_AT* abbr_at = cur_abbr->process(&abbr_num, &tmp2);
853      while (!abbr_at->is_separator()) {
854        abbr_at = abbr_at->process(&tmp4, &tmp3);
855      }
856
857      // Next DIE abbreviation is right after the separator AT abbreviation.
858      cur_abbr = reinterpret_cast<const Dwarf_Abbr_DIE*>
859                                              (abbr_at->process(&tmp4, &tmp3));
860      if (!add(cur_abbr)) {
861        return NULL;
862      }
863    }
864
865    return array_[num - 1];
866  }
867
868  /* Empties array and frees allocations. */
869  void empty() {
870    if (array_ != &small_array_[0]) {
871      delete[] array_;
872      array_ = &small_array_[0];
873      array_size_ = sizeof(small_array_) / sizeof(small_array_[0]);
874    }
875    count_ = 0;
876  }
877
878 protected:
879  /* Array, preallocated in anticipation of relatively small number of
880   * DIE abbreviations in compilation unit. */
881  const Dwarf_Abbr_DIE*   small_array_[64];
882
883  /* Array of Dwarf_Abbr_DIE pointers, cached for a compilation unit. */
884  const Dwarf_Abbr_DIE**  array_;
885
886  /* Current size of the array. */
887  Dwarf_AbbrNum           array_size_;
888
889  /* Number of entries, cached in the array. */
890  Dwarf_AbbrNum           count_;
891};
892
893/* Encapsulates a state machine for the "Line Number Program", that is run
894 * on data conained in the mapped .debug_line section.
895 */
896class DwarfStateMachine {
897 public:
898  /* Constructs DwarfStateMachine instance.
899   * Param:
900   *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
901   *    see Dwarf_STMTL_HdrXX.
902   */
903  explicit DwarfStateMachine(bool set_is_stmt)
904    : address_(0),
905      file_(1),
906      line_(1),
907      column_(0),
908      discriminator_(0),
909      is_stmt_(set_is_stmt),
910      basic_block_(false),
911      end_sequence_(false),
912      prologue_end_(false),
913      epilogue_begin_(false),
914      isa_(0),
915      set_file_info_(NULL) {
916  }
917
918  /* Destructs DwarfStateMachine instance. */
919  ~DwarfStateMachine() {
920  }
921
922  /* Resets the state to default.
923   * Param:
924   *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
925   *    see Dwarf_STMTL_HdrXX.
926  */
927  void reset(bool set_is_stmt) {
928    address_ = 0;
929    file_ = 1;
930    line_ = 1;
931    column_ = 0;
932    discriminator_ = 0;
933    is_stmt_ = set_is_stmt;
934    basic_block_ = false;
935    end_sequence_ = false;
936    prologue_end_ = false;
937    epilogue_begin_ = false;
938    isa_ = 0;
939    set_file_info_ = NULL;
940  }
941
942  /*
943   * Machine state.
944   */
945
946  /* Current address (current PC value). */
947  Elf_Xword                   address_;
948
949  /* Current index of source file descriptor. */
950  Elf_Word                    file_;
951
952  /* Current line in the current source file. */
953  Elf_Word                    line_;
954
955  /* Current column. */
956  Elf_Word                    column_;
957
958  /* Current discriminator value. */
959  Elf_Word                    discriminator_;
960
961  /* Current STMT flag. */
962  bool                        is_stmt_;
963
964  /* Current basic block flag. */
965  bool                        basic_block_;
966
967  /* Current end of sequence flag. */
968  bool                        end_sequence_;
969
970  /* Current end of prologue flag. */
971  bool                        prologue_end_;
972
973  /* Current epilogue begin flag. */
974  bool                        epilogue_begin_;
975
976  /* Current ISA value. */
977  Elf_Word                    isa_;
978
979  /* Current value for explicitly set current source file descriptor.
980   * If not NULL, this descriptor has priority over the descriptor, addressed
981   * by the file_ member of this class. */
982  const Dwarf_STMTL_FileDesc* set_file_info_;
983};
984
985/* Checks if given tag belongs to a routine. */
986static inline bool
987dwarf_tag_is_routine(Dwarf_Tag tag) {
988  return tag == DW_TAG_inlined_subroutine ||
989         tag == DW_TAG_subprogram ||
990         tag == DW_AT_main_subprogram;
991}
992
993/* Checks if given tag belongs to a compilation unit. */
994static inline bool
995dwarf_tag_is_cu(Dwarf_Tag tag) {
996  return tag == DW_TAG_compile_unit ||
997         tag == DW_TAG_partial_unit;
998}
999
1000#endif  // ELFF_DWARF_DEFS_H_
1001