1
2/*--------------------------------------------------------------------*/
3/*--- Format-neutral storage of and querying of info acquired from ---*/
4/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5/*---                                               priv_storage.h ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2000-2013 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32/*
33   Stabs reader greatly improved by Nick Nethercote, Apr 02.
34   This module was also extensively hacked on by Jeremy Fitzhardinge
35   and Tom Hughes.
36*/
37/* See comment at top of debuginfo.c for explanation of
38   the _svma / _avma / _image / _bias naming scheme.
39*/
40/* Note this is not freestanding; needs pub_core_xarray.h and
41   priv_tytypes.h to be included before it. */
42
43#ifndef __PRIV_STORAGE_H
44#define __PRIV_STORAGE_H
45
46#include "pub_core_basics.h"   // Addr
47#include "pub_core_xarray.h"   // XArray
48#include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
49#include "priv_d3basics.h"     // GExpr et al.
50#include "priv_image.h"        // DiCursor
51
52/* --------------------- SYMBOLS --------------------- */
53
54/* A structure to hold an ELF/MachO symbol (very crudely).  Usually
55   the symbol only has one name, which is stored in ::pri_name, and
56   ::sec_names is NULL.  If there are other names, these are stored in
57   ::sec_names, which is a NULL terminated vector holding the names.
58   The vector is allocated in VG_AR_DINFO, the names themselves live
59   in DebugInfo::strpool.
60
61   From the point of view of ELF, the primary vs secondary distinction
62   is artificial: they are all just names associated with the address,
63   none of which has higher precedence than any other.  However, from
64   the point of view of mapping an address to a name to display to the
65   user, we need to choose one "preferred" name, and so that might as
66   well be installed as the pri_name, whilst all others can live in
67   sec_names[].  This has the convenient side effect that, in the
68   common case where there is only one name for the address,
69   sec_names[] does not need to be allocated.
70*/
71typedef
72   struct {
73      SymAVMAs avmas;    /* Symbol Actual VMAs: lowest address of entity,
74                            + platform specific fields, to access with
75                            the macros defined in pub_core_debuginfo.h */
76      const HChar*  pri_name;  /* primary name, never NULL */
77      const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
78      // XXX: this could be shrunk (on 32-bit platforms) by using 30
79      // bits for the size and 1 bit each for isText and isIFunc.  If you
80      // do this, make sure that all assignments to the latter two use
81      // 0 or 1 (or True or False), and that a positive number larger
82      // than 1 is never used to represent True.
83      UInt    size;    /* size in bytes */
84      Bool    isText;
85      Bool    isIFunc; /* symbol is an indirect function? */
86   }
87   DiSym;
88
89/* --------------------- SRCLOCS --------------------- */
90
91/* Line count at which overflow happens, due to line numbers being
92   stored as shorts in `struct nlist' in a.out.h. */
93#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
94
95#define LINENO_BITS     20
96#define LOC_SIZE_BITS  (32 - LINENO_BITS)
97#define MAX_LINENO     ((1 << LINENO_BITS) - 1)
98
99/* Unlikely to have any lines with instruction ranges > 4096 bytes */
100#define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
101
102/* Number used to detect line number overflows; if one line is
103   60000-odd smaller than the previous, it was probably an overflow.
104 */
105#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
106
107/* Filename and Dirname pair. FnDn are stored in di->fndnpool
108   and are allocated using VG_(allocFixedEltDedupPA).
109   The filename/dirname strings are themselves stored in di->strpool. */
110typedef
111   struct {
112      const HChar* filename;     /* source filename */
113      const HChar* dirname;      /* source directory name */
114   } FnDn;
115
116/* A structure to hold addr-to-source info for a single line.  There
117  can be a lot of these, hence the dense packing. */
118typedef
119   struct {
120      /* Word 1 */
121      Addr   addr;               /* lowest address for this line */
122      /* Word 2 */
123      UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
124      UInt   lineno:LINENO_BITS; /* source line number, or zero */
125   }
126   DiLoc;
127
128#define LEVEL_BITS  (32 - LINENO_BITS)
129#define MAX_LEVEL     ((1 << LEVEL_BITS) - 1)
130
131/* A structure to hold addr-to-inlined fn info.  There
132   can be a lot of these, hence the dense packing.
133   Only caller source filename and lineno are stored.
134   Handling dirname should be done using fndn_ix technique
135   similar to  ML_(addLineInfo). */
136typedef
137   struct {
138      /* Word 1 */
139      Addr   addr_lo;            /* lowest address for inlined fn */
140      /* Word 2 */
141      Addr   addr_hi;            /* highest address following the inlined fn */
142      /* Word 3 */
143      const HChar* inlinedfn;    /* inlined function name */
144      /* Word 4 and 5 */
145      UInt   fndn_ix;            /* index in di->fndnpool of caller source
146                                    dirname/filename */
147      UInt   lineno:LINENO_BITS; /* caller line number */
148      UShort level:LEVEL_BITS;   /* level of inlining */
149   }
150   DiInlLoc;
151
152/* --------------------- CF INFO --------------------- */
153
154/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
155   address range [base .. base+len-1].
156
157   On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
158   some point and {e,r}ip is in the range [base .. base+len-1], it
159   tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
160   current frame and also ra, the return address of the current frame.
161
162   First off, calculate CFA, the Canonical Frame Address, thusly:
163
164     cfa = case cfa_how of
165              CFIC_IA_SPREL -> {e,r}sp + cfa_off
166              CFIC_IA_BPREL -> {e,r}bp + cfa_off
167              CFIC_EXPR     -> expr whose index is in cfa_off
168
169   Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
170   this frame's {e,r}ra value can be calculated like this:
171
172     old_{e,r}sp/{e,r}bp/ra
173         = case {e,r}sp/{e,r}bp/ra_how of
174              CFIR_UNKNOWN   -> we don't know, sorry
175              CFIR_SAME      -> same as it was before (sp/fp only)
176              CFIR_CFAREL    -> cfa + sp/bp/ra_off
177              CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
178              CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
179
180   On ARM it's pretty much the same, except we have more registers to
181   keep track of:
182
183     cfa = case cfa_how of
184              CFIC_ARM_R13REL -> r13 + cfa_off
185              CFIC_ARM_R12REL -> r12 + cfa_off
186              CFIC_ARM_R11REL -> r11 + cfa_off
187              CFIC_ARM_R7REL  -> r7  + cfa_off
188              CFIR_EXPR       -> expr whose index is in cfa_off
189
190     old_r14/r13/r12/r11/r7/ra
191         = case r14/r13/r12/r11/r7/ra_how of
192              CFIR_UNKNOWN   -> we don't know, sorry
193              CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
194              CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
195              CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
196              CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
197
198   On ARM64:
199
200     cfa = case cfa_how of
201              CFIC_ARM64_SPREL  -> sp + cfa_off
202              CFIC_ARM64_X29REL -> x29 + cfa_off
203              CFIC_EXPR         -> expr whose index is in cfa_off
204
205     old_sp/x30/x29/ra
206         = case sp/x30/x29/ra_how of
207              CFIR_UNKNOWN   -> we don't know, sorry
208              CFIR_SAME      -> same as it was before
209              CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
210              CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
211              CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
212
213   On s390x we have a similar logic as x86 or amd64. We need the stack pointer
214   (r15), the frame pointer r11 (like BP) and together with the instruction
215   address in the PSW we can calculate the previous values:
216     cfa = case cfa_how of
217              CFIC_IA_SPREL -> r15 + cfa_off
218              CFIC_IA_BPREL -> r11 + cfa_off
219              CFIC_EXPR     -> expr whose index is in cfa_off
220
221     old_sp/fp/ra
222         = case sp/fp/ra_how of
223              CFIR_UNKNOWN   -> we don't know, sorry
224              CFIR_SAME      -> same as it was before (sp/fp only)
225              CFIR_CFAREL    -> cfa + sp/fp/ra_off
226              CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
227              CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
228*/
229
230#define CFIC_IA_SPREL     ((UChar)1)
231#define CFIC_IA_BPREL     ((UChar)2)
232#define CFIC_ARM_R13REL   ((UChar)3)
233#define CFIC_ARM_R12REL   ((UChar)4)
234#define CFIC_ARM_R11REL   ((UChar)5)
235#define CFIC_ARM_R7REL    ((UChar)6)
236#define CFIC_ARM64_SPREL  ((UChar)7)
237#define CFIC_ARM64_X29REL ((UChar)8)
238#define CFIC_EXPR         ((UChar)9)  /* all targets */
239
240#define CFIR_UNKNOWN      ((UChar)64)
241#define CFIR_SAME         ((UChar)65)
242#define CFIR_CFAREL       ((UChar)66)
243#define CFIR_MEMCFAREL    ((UChar)67)
244#define CFIR_EXPR         ((UChar)68)
245
246/* Definition of the DiCfSI_m DiCfSI machine dependent part.
247   These are highly duplicated, and are stored in a pool. */
248#if defined(VGA_x86) || defined(VGA_amd64)
249typedef
250   struct {
251      UChar cfa_how; /* a CFIC_IA value */
252      UChar ra_how;  /* a CFIR_ value */
253      UChar sp_how;  /* a CFIR_ value */
254      UChar bp_how;  /* a CFIR_ value */
255      Int   cfa_off;
256      Int   ra_off;
257      Int   sp_off;
258      Int   bp_off;
259   }
260   DiCfSI_m;
261#elif defined(VGA_arm)
262typedef
263   struct {
264      UChar cfa_how; /* a CFIC_ value */
265      UChar ra_how;  /* a CFIR_ value */
266      UChar r14_how; /* a CFIR_ value */
267      UChar r13_how; /* a CFIR_ value */
268      UChar r12_how; /* a CFIR_ value */
269      UChar r11_how; /* a CFIR_ value */
270      UChar r7_how;  /* a CFIR_ value */
271      Int   cfa_off;
272      Int   ra_off;
273      Int   r14_off;
274      Int   r13_off;
275      Int   r12_off;
276      Int   r11_off;
277      Int   r7_off;
278      // If you add additional fields, don't forget to update the
279      // initialisation of this in readexidx.c accordingly.
280   }
281   DiCfSI_m;
282#elif defined(VGA_arm64)
283typedef
284   struct {
285      UChar cfa_how; /* a CFIC_ value */
286      UChar ra_how;  /* a CFIR_ value */
287      UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
288      UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
289      UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
290      Int   cfa_off;
291      Int   ra_off;
292      Int   sp_off;
293      Int   x30_off;
294      Int   x29_off;
295   }
296   DiCfSI_m;
297#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
298/* Just have a struct with the common fields in, so that code that
299   processes the common fields doesn't have to be ifdef'd against
300   VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
301   at the moment. */
302typedef
303   struct {
304      UChar cfa_how; /* a CFIC_ value */
305      UChar ra_how;  /* a CFIR_ value */
306      Int   cfa_off;
307      Int   ra_off;
308   }
309   DiCfSI_m;
310#elif defined(VGA_s390x)
311typedef
312   struct {
313      UChar cfa_how; /* a CFIC_ value */
314      UChar sp_how;  /* a CFIR_ value */
315      UChar ra_how;  /* a CFIR_ value */
316      UChar fp_how;  /* a CFIR_ value */
317      Int   cfa_off;
318      Int   sp_off;
319      Int   ra_off;
320      Int   fp_off;
321   }
322   DiCfSI_m;
323#elif defined(VGA_mips32) || defined(VGA_mips64)
324typedef
325   struct {
326      UChar cfa_how; /* a CFIC_ value */
327      UChar ra_how;  /* a CFIR_ value */
328      UChar sp_how;  /* a CFIR_ value */
329      UChar fp_how;  /* a CFIR_ value */
330      Int   cfa_off;
331      Int   ra_off;
332      Int   sp_off;
333      Int   fp_off;
334   }
335   DiCfSI_m;
336#elif defined(VGA_tilegx)
337typedef
338   struct {
339      UChar cfa_how; /* a CFIC_IA value */
340      UChar ra_how;  /* a CFIR_ value */
341      UChar sp_how;  /* a CFIR_ value */
342      UChar fp_how;  /* a CFIR_ value */
343      Int   cfa_off;
344      Int   ra_off;
345      Int   sp_off;
346      Int   fp_off;
347   }
348   DiCfSI_m;
349#else
350#  error "Unknown arch"
351#endif
352
353typedef
354   struct {
355      Addr  base;
356      UInt  len;
357      UInt  cfsi_m_ix;
358   }
359   DiCfSI;
360
361typedef
362   enum {
363      Cunop_Abs=0x231,
364      Cunop_Neg,
365      Cunop_Not
366   }
367   CfiUnop;
368
369typedef
370   enum {
371      Cbinop_Add=0x321,
372      Cbinop_Sub,
373      Cbinop_And,
374      Cbinop_Mul,
375      Cbinop_Shl,
376      Cbinop_Shr,
377      Cbinop_Eq,
378      Cbinop_Ge,
379      Cbinop_Gt,
380      Cbinop_Le,
381      Cbinop_Lt,
382      Cbinop_Ne
383   }
384   CfiBinop;
385
386typedef
387   enum {
388      Creg_INVALID=0x213,
389      Creg_IA_SP,
390      Creg_IA_BP,
391      Creg_IA_IP,
392      Creg_ARM_R13,
393      Creg_ARM_R12,
394      Creg_ARM_R15,
395      Creg_ARM_R14,
396      Creg_ARM_R7,
397      Creg_ARM64_X30,
398      Creg_S390_IA,
399      Creg_S390_SP,
400      Creg_S390_FP,
401      Creg_S390_LR,
402      Creg_MIPS_RA,
403      Creg_TILEGX_IP,
404      Creg_TILEGX_SP,
405      Creg_TILEGX_BP,
406      Creg_TILEGX_LR
407   }
408   CfiReg;
409
410typedef
411   enum {
412      Cex_Undef=0x123,
413      Cex_Deref,
414      Cex_Const,
415      Cex_Unop,
416      Cex_Binop,
417      Cex_CfiReg,
418      Cex_DwReg
419   }
420   CfiExprTag;
421
422typedef
423   struct {
424      CfiExprTag tag;
425      union {
426         struct {
427         } Undef;
428         struct {
429            Int ixAddr;
430         } Deref;
431         struct {
432            UWord con;
433         } Const;
434         struct {
435            CfiUnop op;
436            Int ix;
437         } Unop;
438         struct {
439            CfiBinop op;
440            Int ixL;
441            Int ixR;
442         } Binop;
443         struct {
444            CfiReg reg;
445         } CfiReg;
446         struct {
447            Int reg;
448         } DwReg;
449      }
450      Cex;
451   }
452   CfiExpr;
453
454extern Int ML_(CfiExpr_Undef) ( XArray* dst );
455extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
456extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
457extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
458extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
459extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
460extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
461
462extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
463
464/* ---------------- FPO INFO (Windows PE) -------------- */
465
466/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
467   a primitive CFI */
468typedef
469   struct _FPO_DATA {  /* 16 bytes */
470      UInt   ulOffStart; /* offset of 1st byte of function code */
471      UInt   cbProcSize; /* # bytes in function */
472      UInt   cdwLocals;  /* # bytes/4 in locals */
473      UShort cdwParams;  /* # bytes/4 in params */
474      UChar  cbProlog;   /* # bytes in prolog */
475      UChar  cbRegs :3;  /* # regs saved */
476      UChar  fHasSEH:1;  /* Structured Exception Handling */
477      UChar  fUseBP :1;  /* EBP has been used */
478      UChar  reserved:1;
479      UChar  cbFrame:2;  /* frame type */
480   }
481   FPO_DATA;
482
483#define PDB_FRAME_FPO  0
484#define PDB_FRAME_TRAP 1
485#define PDB_FRAME_TSS  2
486
487/* --------------------- VARIABLES --------------------- */
488
489typedef
490   struct {
491      Addr    aMin;
492      Addr    aMax;
493      XArray* /* of DiVariable */ vars;
494   }
495   DiAddrRange;
496
497typedef
498   struct {
499      const  HChar* name;  /* in DebugInfo.strpool */
500      UWord  typeR; /* a cuOff */
501      const GExpr* gexpr; /* on DebugInfo.gexprs list */
502      const GExpr* fbGX;  /* SHARED. */
503      UInt   fndn_ix; /* where declared; may be zero. index
504                         in DebugInfo.fndnpool */
505      Int    lineNo;   /* where declared; may be zero. */
506   }
507   DiVariable;
508
509Word
510ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
511
512/* --------------------- DEBUGINFO --------------------- */
513
514/* This is the top-level data type.  It's a structure which contains
515   information pertaining to one mapped ELF object.  This type is
516   exported only abstractly - in pub_tool_debuginfo.h. */
517
518/* First though, here's an auxiliary data structure.  It is only ever
519   used as part of a struct _DebugInfo.  We use it to record
520   observations about mappings and permission changes to the
521   associated file, so as to decide when to read debug info.  It's
522   essentially an ultra-trivial finite state machine which, when it
523   reaches an accept state, signals that we should now read debug info
524   from the object into the associated struct _DebugInfo.  The accept
525   state is arrived at when have_rx_map and have_rw_map both become
526   true.  The initial state is one in which we have no observations,
527   so have_rx_map and have_rw_map are both false.
528
529   This all started as a rather ad-hoc solution, but was further
530   expanded to handle weird object layouts, e.g. more than one rw
531   or rx mapping for one binary.
532
533   The normal sequence of events is one of
534
535   start  -->  r-x mapping  -->  rw- mapping  -->  accept
536   start  -->  rw- mapping  -->  r-x mapping  -->  accept
537
538   that is, take the first r-x and rw- mapping we see, and we're done.
539
540   On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
541
542   start  -->  r-- mapping  -->  rw- mapping
543          -->  upgrade r-- mapping to r-x mapping  -->  accept
544
545   where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
546   or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
547   Hence we need to also track this possibility.
548
549   From perusal of dyld sources, it appears that this scheme could
550   also be used 64 bit libraries, although that doesn't seem to happen
551   in practice.  dyld uses this scheme when the text section requires
552   relocation, which only appears to be the case for 32 bit objects.
553*/
554
555typedef struct
556{
557   Addr  avma; /* these fields record the file offset, length */
558   SizeT size; /* and map address of each mapping             */
559   OffT  foff;
560   Bool  rx, rw, ro;  /* memory access flags for this mapping */
561} DebugInfoMapping;
562
563struct _DebugInfoFSM
564{
565   HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
566   HChar*  dbgname;   /* in mallocville (VG_AR_DINFO)               */
567   XArray* maps;      /* XArray of DebugInfoMapping structs         */
568   Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
569   Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
570   Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
571};
572
573
574/* To do with the string table in struct _DebugInfo (::strpool) */
575#define SEGINFO_STRPOOLSIZE (64*1024)
576
577
578/* We may encounter more than one .eh_frame section in an object --
579   unusual but apparently allowed by ELF.  See
580   http://sourceware.org/bugzilla/show_bug.cgi?id=12675
581*/
582#define N_EHFRAME_SECTS 2
583
584
585/* So, the main structure for holding debug info for one object. */
586
587struct _DebugInfo {
588
589   /* Admin stuff */
590
591   struct _DebugInfo* next;   /* list of DebugInfos */
592   Bool               mark;   /* marked for deletion? */
593
594   /* An abstract handle, which can be used by entities outside of
595      m_debuginfo to (in an abstract datatype sense) refer to this
596      struct _DebugInfo.  A .handle of zero is invalid; valid handles
597      are 1 and above.  The same handle is never issued twice (in any
598      given run of Valgrind), so a handle becomes invalid when the
599      associated struct _DebugInfo is discarded, and remains invalid
600      forever thereafter.  The .handle field is set as soon as this
601      structure is allocated. */
602   ULong handle;
603
604   /* Used for debugging only - indicate what stuff to dump whilst
605      reading stuff into the seginfo.  Are computed as early in the
606      lifetime of the DebugInfo as possible -- at the point when it is
607      created.  Use these when deciding what to spew out; do not use
608      the global VG_(clo_blah) flags. */
609
610   Bool trace_symtab; /* symbols, our style */
611   Bool trace_cfi;    /* dwarf frame unwind, our style */
612   Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
613   Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
614   Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
615
616   /* The "decide when it is time to read debuginfo" state machine.
617      This structure must get filled in before we can start reading
618      anything from the ELF/MachO file.  This structure is filled in
619      by VG_(di_notify_mmap) and its immediate helpers. */
620   struct _DebugInfoFSM fsm;
621
622   /* Once the ::fsm has reached an accept state -- typically, when
623      both a rw? and r?x mapping for .filename have been observed --
624      we can go on to read the symbol tables and debug info.
625      .have_dinfo changes from False to True when the debug info has
626      been completely read in and postprocessed (canonicalised) and is
627      now suitable for querying. */
628   /* If have_dinfo is False, then all fields below this point are
629      invalid and should not be consulted. */
630   Bool  have_dinfo; /* initially False */
631
632   /* All the rest of the fields in this structure are filled in once
633      we have committed to reading the symbols and debug info (that
634      is, at the point where .have_dinfo is set to True). */
635
636   /* The file's soname. */
637   HChar* soname;
638
639   /* Description of some important mapped segments.  The presence or
640      absence of the mapping is denoted by the _present field, since
641      in some obscure circumstances (to do with data/sdata/bss) it is
642      possible for the mapping to be present but have zero size.
643      Certainly text_ is mandatory on all platforms; not sure about
644      the rest though.
645
646      --------------------------------------------------------
647
648      Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
649
650      either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
651
652      or the normal case, which is the AND of the following:
653      (0) size of at least one rx mapping > 0
654      (1) no two DebugInfos with some rx mapping of size > 0
655          have overlapping rx mappings
656      (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
657          [avma,+size) of one rx mapping; that is, the former
658          is a subrange or equal to the latter.
659      (3) all DiCfSI in the cfsi array all have ranges that fall within
660          [avma,+size) of that rx mapping.
661      (4) all DiCfSI in the cfsi array are non-overlapping
662
663      The cumulative effect of these restrictions is to ensure that
664      all the DiCfSI records in the entire system are non overlapping.
665      Hence any address falls into either exactly one DiCfSI record,
666      or none.  Hence it is safe to cache the results of searches for
667      DiCfSI records.  This is the whole point of these restrictions.
668      The caching of DiCfSI searches is done in VG_(use_CF_info).  The
669      cache is flushed after any change to debugInfo_list.  DiCfSI
670      searches are cached because they are central to stack unwinding
671      on amd64-linux.
672
673      Where are these invariants imposed and checked?
674
675      They are checked after a successful read of debuginfo into
676      a DebugInfo*, in check_CFSI_related_invariants.
677
678      (1) is not really imposed anywhere.  We simply assume that the
679      kernel will not map the text segments from two different objects
680      into the same space.  Sounds reasonable.
681
682      (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
683      (3) is ensured by ML_(addDiCfSI).
684      (4) is ensured by canonicaliseCFI.
685
686      --------------------------------------------------------
687
688      Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
689
690      The _debug_{svma,bias} fields were added as part of a fix to
691      #185816.  The problem encompassed in that bug report was that it
692      wasn't correct to use apply the bias values deduced for a
693      primary object to its associated debuginfo object, because the
694      debuginfo object (or the primary) could have been prelinked to a
695      different SVMA.  Hence debuginfo and primary objects need to
696      have their own biases.
697
698      ------ JRS: (referring to r9329): ------
699      Let me see if I understand the workings correctly.  Initially
700      the _debug_ values are set to the same values as the "normal"
701      ones, as there's a bunch of bits of code like this (in
702      readelf.c)
703
704         di->text_svma = svma;
705         ...
706         di->text_bias = rx_bias;
707         di->text_debug_svma = svma;
708         di->text_debug_bias = rx_bias;
709
710      If a debuginfo object subsequently shows up then the
711      _debug_svma/bias are set for the debuginfo object.  Result is
712      that if there's no debuginfo object then the values are the same
713      as the primary-object values, and if there is a debuginfo object
714      then they will (or at least may) be different.
715
716      Then when we need to actually bias something, we'll have to
717      decide whether to use the primary bias or the debuginfo bias.
718      And the strategy is to use the primary bias for ELF symbols but
719      the debuginfo bias for anything pulled out of Dwarf.
720
721      ------ THH: ------
722      Correct - the debug_svma and bias values apply to any address
723      read from the debug data regardless of where that debug data is
724      stored and the other values are used for addresses from other
725      places (primarily the symbol table).
726
727      ------ JRS: ------
728      Ok; so this was my only area of concern.  Are there any
729      corner-case scenarios where this wouldn't be right?  It sounds
730      like we're assuming the ELF symbols come from the primary object
731      and, if there is a debug object, then all the Dwarf comes from
732      there.  But what if (eg) both symbols and Dwarf come from the
733      debug object?  Is that even possible or allowable?
734
735      ------ THH: ------
736      You may have a point...
737
738      The current logic is to try and take any one set of data from
739      either the base object or the debug object. There are four sets
740      of data we consider:
741
742         - Symbol Table
743         - Stabs
744         - DWARF1
745         - DWARF2
746
747      If we see the primary section for a given set in the base object
748      then we ignore all sections relating to that set in the debug
749      object.
750
751      Now in principle if we saw a secondary section (like debug_line
752      say) in the base object, but not the main section (debug_info in
753      this case) then we would take debug_info from the debug object
754      but would use the debug_line from the base object unless we saw
755      a replacement copy in the debug object. That's probably unlikely
756      however.
757
758      A bigger issue might be, as you say, the symbol table as we will
759      pick that up from the debug object if it isn't in the base. The
760      dynamic symbol table will always have to be in the base object
761      though so we will have to be careful when processing symbols to
762      know which table we are reading in that case.
763
764      What we probably need to do is tell read_elf_symtab which object
765      the symbols it is being asked to read came from.
766
767      (A followup patch to deal with this was committed in r9469).
768   */
769   /* .text */
770   Bool     text_present;
771   Addr     text_avma;
772   Addr     text_svma;
773   SizeT    text_size;
774   PtrdiffT text_bias;
775   Addr     text_debug_svma;
776   PtrdiffT text_debug_bias;
777   /* .data */
778   Bool     data_present;
779   Addr     data_svma;
780   Addr     data_avma;
781   SizeT    data_size;
782   PtrdiffT data_bias;
783   Addr     data_debug_svma;
784   PtrdiffT data_debug_bias;
785   /* .sdata */
786   Bool     sdata_present;
787   Addr     sdata_svma;
788   Addr     sdata_avma;
789   SizeT    sdata_size;
790   PtrdiffT sdata_bias;
791   Addr     sdata_debug_svma;
792   PtrdiffT sdata_debug_bias;
793   /* .rodata */
794   Bool     rodata_present;
795   Addr     rodata_svma;
796   Addr     rodata_avma;
797   SizeT    rodata_size;
798   PtrdiffT rodata_bias;
799   Addr     rodata_debug_svma;
800   PtrdiffT rodata_debug_bias;
801   /* .bss */
802   Bool     bss_present;
803   Addr     bss_svma;
804   Addr     bss_avma;
805   SizeT    bss_size;
806   PtrdiffT bss_bias;
807   Addr     bss_debug_svma;
808   PtrdiffT bss_debug_bias;
809   /* .sbss */
810   Bool     sbss_present;
811   Addr     sbss_svma;
812   Addr     sbss_avma;
813   SizeT    sbss_size;
814   PtrdiffT sbss_bias;
815   Addr     sbss_debug_svma;
816   PtrdiffT sbss_debug_bias;
817   /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
818   Bool     exidx_present;
819   Addr     exidx_avma;
820   Addr     exidx_svma;
821   SizeT    exidx_size;
822   PtrdiffT exidx_bias;
823   /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
824   Bool     extab_present;
825   Addr     extab_avma;
826   Addr     extab_svma;
827   SizeT    extab_size;
828   PtrdiffT extab_bias;
829   /* .plt */
830   Bool   plt_present;
831   Addr	  plt_avma;
832   SizeT  plt_size;
833   /* .got */
834   Bool   got_present;
835   Addr   got_avma;
836   SizeT  got_size;
837   /* .got.plt */
838   Bool   gotplt_present;
839   Addr   gotplt_avma;
840   SizeT  gotplt_size;
841   /* .opd -- needed on ppc64be-linux for finding symbols */
842   Bool   opd_present;
843   Addr   opd_avma;
844   SizeT  opd_size;
845   /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
846      see more than one, hence the arrays. */
847   UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
848   Addr   ehframe_avma[N_EHFRAME_SECTS];
849   SizeT  ehframe_size[N_EHFRAME_SECTS];
850
851   /* Sorted tables of stuff we snarfed from the file.  This is the
852      eventual product of reading the debug info.  All this stuff
853      lives in VG_AR_DINFO. */
854
855   /* An expandable array of symbols. */
856   DiSym*  symtab;
857   UWord   symtab_used;
858   UWord   symtab_size;
859   /* Two expandable arrays, storing locations and their filename/dirname. */
860   DiLoc*  loctab;
861   UInt    sizeof_fndn_ix;  /* Similar use as sizeof_cfsi_m_ix below. */
862   void*   loctab_fndn_ix;  /* loctab[i] filename/dirname is identified by
863                               loctab_fnindex_ix[i] (an index in di->fndnpool)
864                               0 means filename/dirname unknown.
865                               The void* is an UChar* or UShort* or UInt*
866                               depending on sizeof_fndn_ix. */
867   UWord   loctab_used;
868   UWord   loctab_size;
869   /* An expandable array of inlined fn info.
870      maxinl_codesz is the biggest inlined piece of code
871      in inltab (i.e. the max of 'addr_hi - addr_lo'. */
872   DiInlLoc* inltab;
873   UWord   inltab_used;
874   UWord   inltab_size;
875   SizeT   maxinl_codesz;
876
877   /* A set of expandable arrays to store CFI summary info records.
878      The machine specific information (i.e. the DiCfSI_m struct)
879      are stored in cfsi_m_pool, as these are highly duplicated.
880      The DiCfSI_m are allocated in cfsi_m_pool and identified using
881      a (we hope) small integer : often one byte is enough, sometimes
882      2 bytes are needed.
883
884      cfsi_base contains the bases of the code address ranges.
885      cfsi_size is the size of the cfsi_base array.
886      The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
887      Following elements are not used (yet).
888
889      For each base in cfsi_base, an index into cfsi_m_pool is stored
890      in cfsi_m_ix array. The size of cfsi_m_ix is equal to
891      cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
892      cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
893
894      cfsi_base[i] gives the base address of a code range covered by
895      some CF Info. The corresponding CF Info is identified by an index
896      in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
897      cfsi_base[i] is given
898        by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
899        by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
900        by ((UInt*)  cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
901
902      The end of the code range starting at cfsi_base[i] is given by
903      cfsi_base[i+1]-1 (or cfsi_maxavma for  cfsi_base[cfsi_used-1]).
904      Some code ranges between cfsi_minavma and cfsi_maxavma might not
905      be covered by cfi information. Such not covered ranges are stored by
906      a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
907
908      A variable size representation has been chosen for the elements of
909      cfsi_m_ix as in many case, one byte is good enough. For big
910      objects, 2 bytes are needed. No object has yet been found where
911      4 bytes are needed (but the code is ready to handle this case).
912      Not covered ranges ('cfi holes') are stored explicitely in
913      cfsi_base/cfsi_m_ix as this is more memory efficient than storing
914      a length for each covered range : on x86 or amd64, we typically have
915      a hole every 8 covered ranges. On arm64, we have very few holes
916      (1 every 50 or 100 ranges).
917
918      The cfsi information is read and prepared in the cfsi_rd array.
919      Once all the information has been read, the cfsi_base and cfsi_m_ix
920      arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
921      This is all done by ML_(finish_CFSI_arrays).
922
923      Also includes summary address bounds, showing the min and max address
924      covered by any of the records, as an aid to fast searching.  And, if the
925      records require any expression nodes, they are stored in
926      cfsi_exprs. */
927   Addr* cfsi_base;
928   UInt  sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
929   void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
930                       The void* is an UChar* or UShort* or UInt*
931                       depending on sizeof_cfsi_m_ix.  */
932
933   DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
934
935   UWord   cfsi_used;
936   UWord   cfsi_size;
937
938   DedupPoolAlloc *cfsi_m_pool;
939   Addr    cfsi_minavma;
940   Addr    cfsi_maxavma;
941   XArray* cfsi_exprs; /* XArray of CfiExpr */
942
943   /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
944      data.  Non-expandable array, hence .size == .used. */
945   FPO_DATA* fpo;
946   UWord     fpo_size;
947   Addr      fpo_minavma;
948   Addr      fpo_maxavma;
949   Addr      fpo_base_avma;
950
951   /* Pool of strings -- the string table.  Pointers
952      into this are stable (the memory is not reallocated). */
953   DedupPoolAlloc *strpool;
954
955   /* Pool of FnDn -- filename and dirname.
956      Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
957   DedupPoolAlloc *fndnpool;
958
959   /* Variable scope information, as harvested from Dwarf3 files.
960
961      In short it's an
962
963         array of (array of PC address ranges and variables)
964
965      The outer array indexes over scopes, with Entry 0 containing
966      information on variables which exist for any value of the program
967      counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
968      etc contain information on increasinly deeply nested variables.
969
970      Each inner array is an array of (an address range, and a set
971      of variables that are in scope over that address range).
972
973      The address ranges may not overlap.
974
975      Since Entry 0 in the outer array holds information on variables
976      that exist for any value of the PC (that is, global vars), it
977      follows that Entry 0's inner array can only have one address
978      range pair, one that covers the entire address space.
979   */
980   XArray* /* of OSet of DiAddrRange */varinfo;
981
982   /* These are arrays of the relevant typed objects, held here
983      partially for the purposes of visiting each object exactly once
984      when we need to delete them. */
985
986   /* An array of TyEnts.  These are needed to make sense of any types
987      in the .varinfo.  Also, when deleting this DebugInfo, we must
988      first traverse this array and throw away malloc'd stuff hanging
989      off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
990   XArray* /* of TyEnt */ admin_tyents;
991
992   /* An array of guarded DWARF3 expressions. */
993   XArray* admin_gexprs;
994
995   /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
996      This helps performance a lot during ML_(addLineInfo) etc., which can
997      easily be invoked hundreds of thousands of times. */
998   DebugInfoMapping* last_rx_map;
999};
1000
1001/* --------------------- functions --------------------- */
1002
1003/* ------ Adding ------ */
1004
1005/* Add a symbol to si's symbol table.  The contents of 'sym' are
1006   copied.  It is assumed (and checked) that 'sym' only contains one
1007   name, so there is no auxiliary ::sec_names vector to duplicate.
1008   IOW, the copy is a shallow copy, and there are assertions in place
1009   to ensure that's OK. */
1010extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
1011
1012/* Add a filename/dirname pair to a DebugInfo and returns the index
1013   in the fndnpool fixed pool. */
1014extern UInt ML_(addFnDn) (struct _DebugInfo* di,
1015                          const HChar* filename,
1016                          const HChar* dirname);  /* NULL is allowable */
1017
1018/* Returns the filename of the fndn pair identified by fndn_ix.
1019   Returns "???" if fndn_ix is 0. */
1020extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
1021                                           UInt fndn_ix);
1022
1023/* Returns the dirname of the fndn pair identified by fndn_ix.
1024   Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1025extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
1026                                          UInt fndn_ix);
1027
1028/* Returns the fndn_ix for the LineInfo locno in di->loctab.
1029   0 if filename/dirname are unknown. */
1030extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
1031
1032/* Add a line-number record to a DebugInfo.
1033   fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1034   Give a 0 index for a unknown filename/dirname pair. */
1035extern
1036void ML_(addLineInfo) ( struct _DebugInfo* di,
1037                        UInt fndn_ix,
1038                        Addr this, Addr next, Int lineno, Int entry);
1039
1040/* Add a call inlined record to a DebugInfo.
1041   A call to the below means that inlinedfn code has been
1042   inlined, resulting in code from [addr_lo, addr_hi[.
1043   Note that addr_hi is excluded, i.e. is not part of the inlined code.
1044   fndn_ix and lineno identifies the location of the call that caused
1045   this inlining.
1046   fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1047   Give a 0 index for an unknown filename/dirname pair.
1048   In case of nested inlining, a small level indicates the call
1049   is closer to main that a call with a higher level. */
1050extern
1051void ML_(addInlInfo) ( struct _DebugInfo* di,
1052                       Addr addr_lo, Addr addr_hi,
1053                       const HChar* inlinedfn,
1054                       UInt fndn_ix,
1055                       Int lineno, UShort level);
1056
1057/* Add a CFI summary record.  The supplied DiCfSI_m is copied. */
1058extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
1059                             Addr base, UInt len, DiCfSI_m* cfsi_m );
1060
1061/* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1062   the corresponding cfsi_m*. Return NULL if the position corresponds
1063   to a cfsi hole. */
1064DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
1065
1066/* Add a string to the string table of a DebugInfo.  If len==-1,
1067   ML_(addStr) will itself measure the length of the string. */
1068extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
1069
1070/* Add a string to the string table of a DebugInfo, by copying the
1071   string from the given DiCursor.  Measures the length of the string
1072   itself. */
1073extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
1074
1075extern void ML_(addVar)( struct _DebugInfo* di,
1076                         Int    level,
1077                         Addr   aMin,
1078                         Addr   aMax,
1079                         const  HChar* name,
1080                         UWord  typeR, /* a cuOff */
1081                         const GExpr* gexpr,
1082                         const GExpr* fbGX, /* SHARED. */
1083                         UInt   fndn_ix, /* where decl'd - may be zero */
1084                         Int    lineNo, /* where decl'd - may be zero */
1085                         Bool   show );
1086/* Note: fndn_ix identifies a filename/dirname pair similarly to
1087   ML_(addInlInfo) and ML_(addLineInfo). */
1088
1089/* Canonicalise the tables held by 'di', in preparation for use.  Call
1090   this after finishing adding entries to these tables. */
1091extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1092
1093/* Canonicalise the call-frame-info table held by 'di', in preparation
1094   for use. This is called by ML_(canonicaliseTables) but can also be
1095   called on it's own to sort just this table. */
1096extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1097
1098/* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1099   from cfsi_rd array. cfsi_rd is then freed. */
1100extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1101
1102/* ------ Searching ------ */
1103
1104/* Find a symbol-table index containing the specified pointer, or -1
1105   if not found.  Binary search.  */
1106extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
1107                                     Bool match_anywhere_in_sym,
1108                                     Bool findText );
1109
1110/* Find a location-table index containing the specified pointer, or -1
1111   if not found.  Binary search.  */
1112extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );
1113
1114/* Find a CFI-table index containing the specified pointer, or -1 if
1115   not found.  Binary search.  */
1116extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );
1117
1118/* Find a FPO-table index containing the specified pointer, or -1
1119   if not found.  Binary search.  */
1120extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
1121
1122/* Helper function for the most often needed searching for an rx
1123   mapping containing the specified address range.  The range must
1124   fall entirely within the mapping to be considered to be within it.
1125   Asserts if lo > hi; caller must ensure this doesn't happen. */
1126extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
1127                                                Addr lo, Addr hi );
1128
1129/* ------ Misc ------ */
1130
1131/* Show a non-fatal debug info reading error.  Use VG_(core_panic) for
1132   fatal errors.  'serious' errors are always shown, not 'serious' ones
1133   are shown only at verbosity level 2 and above. */
1134extern
1135void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
1136
1137/* Print a symbol. */
1138extern void ML_(ppSym) ( Int idx, const DiSym* sym );
1139
1140/* Print a call-frame-info summary. */
1141extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
1142                            Addr base, UInt len,
1143                            const DiCfSI_m* si_m );
1144
1145
1146#define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1147#define TRACE_SYMTAB(format, args...) \
1148   if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1149
1150
1151#endif /* ndef __PRIV_STORAGE_H */
1152
1153/*--------------------------------------------------------------------*/
1154/*--- end                                                          ---*/
1155/*--------------------------------------------------------------------*/
1156