1
2/*--------------------------------------------------------------------*/
3/*--- Format-neutral storage of and querying of info acquired from ---*/
4/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5/*---                                               priv_storage.h ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2000-2013 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32/*
33   Stabs reader greatly improved by Nick Nethercote, Apr 02.
34   This module was also extensively hacked on by Jeremy Fitzhardinge
35   and Tom Hughes.
36*/
37/* See comment at top of debuginfo.c for explanation of
38   the _svma / _avma / _image / _bias naming scheme.
39*/
40/* Note this is not freestanding; needs pub_core_xarray.h and
41   priv_tytypes.h to be included before it. */
42
43#ifndef __PRIV_STORAGE_H
44#define __PRIV_STORAGE_H
45
46#include "pub_core_basics.h"   // Addr
47#include "pub_core_xarray.h"   // XArray
48#include "priv_d3basics.h"     // GExpr et al.
49#include "priv_image.h"        // DiCursor
50
51/* --------------------- SYMBOLS --------------------- */
52
53/* A structure to hold an ELF/MachO symbol (very crudely).  Usually
54   the symbol only has one name, which is stored in ::pri_name, and
55   ::sec_names is NULL.  If there are other names, these are stored in
56   ::sec_names, which is a NULL terminated vector holding the names.
57   The vector is allocated in VG_AR_DINFO, the names themselves live
58   in DebugInfo::strchunks.
59
60   From the point of view of ELF, the primary vs secondary distinction
61   is artificial: they are all just names associated with the address,
62   none of which has higher precedence than any other.  However, from
63   the point of view of mapping an address to a name to display to the
64   user, we need to choose one "preferred" name, and so that might as
65   well be installed as the pri_name, whilst all others can live in
66   sec_names[].  This has the convenient side effect that, in the
67   common case where there is only one name for the address,
68   sec_names[] does not need to be allocated.
69*/
70typedef
71   struct {
72      Addr    addr;    /* lowest address of entity */
73      Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
74      HChar*  pri_name;  /* primary name, never NULL */
75      HChar** sec_names; /* NULL, or a NULL term'd array of other names */
76      // XXX: this could be shrunk (on 32-bit platforms) by using 30
77      // bits for the size and 1 bit each for isText and isIFunc.  If you
78      // do this, make sure that all assignments to the latter two use
79      // 0 or 1 (or True or False), and that a positive number larger
80      // than 1 is never used to represent True.
81      UInt    size;    /* size in bytes */
82      Bool    isText;
83      Bool    isIFunc; /* symbol is an indirect function? */
84   }
85   DiSym;
86
87/* --------------------- SRCLOCS --------------------- */
88
89/* Line count at which overflow happens, due to line numbers being
90   stored as shorts in `struct nlist' in a.out.h. */
91#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
92
93#define LINENO_BITS     20
94#define LOC_SIZE_BITS  (32 - LINENO_BITS)
95#define MAX_LINENO     ((1 << LINENO_BITS) - 1)
96
97/* Unlikely to have any lines with instruction ranges > 4096 bytes */
98#define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
99
100/* Number used to detect line number overflows; if one line is
101   60000-odd smaller than the previous, it was probably an overflow.
102 */
103#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
104
105/* A structure to hold addr-to-source info for a single line.  There
106  can be a lot of these, hence the dense packing. */
107typedef
108   struct {
109      /* Word 1 */
110      Addr   addr;               /* lowest address for this line */
111      /* Word 2 */
112      UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
113      UInt   lineno:LINENO_BITS; /* source line number, or zero */
114      /* Word 3 */
115      const HChar* filename;     /* source filename */
116      /* Word 4 */
117      const HChar* dirname;      /* source directory name */
118   }
119   DiLoc;
120
121/* --------------------- CF INFO --------------------- */
122
123/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
124   address range [base .. base+len-1].
125
126   On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
127   some point and {e,r}ip is in the range [base .. base+len-1], it
128   tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
129   current frame and also ra, the return address of the current frame.
130
131   First off, calculate CFA, the Canonical Frame Address, thusly:
132
133     cfa = case cfa_how of
134              CFIC_IA_SPREL -> {e,r}sp + cfa_off
135              CFIC_IA_BPREL -> {e,r}bp + cfa_off
136              CFIC_EXPR     -> expr whose index is in cfa_off
137
138   Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
139   this frame's {e,r}ra value can be calculated like this:
140
141     old_{e,r}sp/{e,r}bp/ra
142         = case {e,r}sp/{e,r}bp/ra_how of
143              CFIR_UNKNOWN   -> we don't know, sorry
144              CFIR_SAME      -> same as it was before (sp/fp only)
145              CFIR_CFAREL    -> cfa + sp/bp/ra_off
146              CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
147              CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
148
149   On ARM it's pretty much the same, except we have more registers to
150   keep track of:
151
152     cfa = case cfa_how of
153              CFIC_ARM_R13REL -> r13 + cfa_off
154              CFIC_ARM_R12REL -> r12 + cfa_off
155              CFIC_ARM_R11REL -> r11 + cfa_off
156              CFIC_ARM_R7REL  -> r7  + cfa_off
157              CFIR_EXPR       -> expr whose index is in cfa_off
158
159     old_r14/r13/r12/r11/r7/ra
160         = case r14/r13/r12/r11/r7/ra_how of
161              CFIR_UNKNOWN   -> we don't know, sorry
162              CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
163              CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
164              CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
165              CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
166
167   On ARM64:
168
169     cfa = case cfa_how of
170              CFIC_ARM64_SPREL  -> sp + cfa_off
171              CFIC_ARM64_X29REL -> x29 + cfa_off
172              CFIC_EXPR         -> expr whose index is in cfa_off
173
174     old_sp/x30/x29/ra
175         = case sp/x30/x29/ra_how of
176              CFIR_UNKNOWN   -> we don't know, sorry
177              CFIR_SAME      -> same as it was before
178              CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
179              CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
180              CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
181
182   On s390x we have a similar logic as x86 or amd64. We need the stack pointer
183   (r15), the frame pointer r11 (like BP) and together with the instruction
184   address in the PSW we can calculate the previous values:
185     cfa = case cfa_how of
186              CFIC_IA_SPREL -> r15 + cfa_off
187              CFIC_IA_BPREL -> r11 + cfa_off
188              CFIC_EXPR     -> expr whose index is in cfa_off
189
190     old_sp/fp/ra
191         = case sp/fp/ra_how of
192              CFIR_UNKNOWN   -> we don't know, sorry
193              CFIR_SAME      -> same as it was before (sp/fp only)
194              CFIR_CFAREL    -> cfa + sp/fp/ra_off
195              CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
196              CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
197*/
198
199#define CFIC_IA_SPREL     ((UChar)1)
200#define CFIC_IA_BPREL     ((UChar)2)
201#define CFIC_ARM_R13REL   ((UChar)3)
202#define CFIC_ARM_R12REL   ((UChar)4)
203#define CFIC_ARM_R11REL   ((UChar)5)
204#define CFIC_ARM_R7REL    ((UChar)6)
205#define CFIC_ARM64_SPREL  ((UChar)7)
206#define CFIC_ARM64_X29REL ((UChar)8)
207#define CFIC_EXPR         ((UChar)9)  /* all targets */
208
209#define CFIR_UNKNOWN      ((UChar)64)
210#define CFIR_SAME         ((UChar)65)
211#define CFIR_CFAREL       ((UChar)66)
212#define CFIR_MEMCFAREL    ((UChar)67)
213#define CFIR_EXPR         ((UChar)68)
214
215#if defined(VGA_x86) || defined(VGA_amd64)
216typedef
217   struct {
218      Addr  base;
219      UInt  len;
220      UChar cfa_how; /* a CFIC_IA value */
221      UChar ra_how;  /* a CFIR_ value */
222      UChar sp_how;  /* a CFIR_ value */
223      UChar bp_how;  /* a CFIR_ value */
224      Int   cfa_off;
225      Int   ra_off;
226      Int   sp_off;
227      Int   bp_off;
228   }
229   DiCfSI;
230#elif defined(VGA_arm)
231typedef
232   struct {
233      Addr  base;
234      UInt  len;
235      UChar cfa_how; /* a CFIC_ value */
236      UChar ra_how;  /* a CFIR_ value */
237      UChar r14_how; /* a CFIR_ value */
238      UChar r13_how; /* a CFIR_ value */
239      UChar r12_how; /* a CFIR_ value */
240      UChar r11_how; /* a CFIR_ value */
241      UChar r7_how;  /* a CFIR_ value */
242      Int   cfa_off;
243      Int   ra_off;
244      Int   r14_off;
245      Int   r13_off;
246      Int   r12_off;
247      Int   r11_off;
248      Int   r7_off;
249   }
250   DiCfSI;
251#elif defined(VGA_arm64)
252typedef
253   struct {
254      Addr  base;
255      UInt  len;
256      UChar cfa_how; /* a CFIC_ value */
257      UChar ra_how;  /* a CFIR_ value */
258      UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
259      UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
260      UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
261      Int   cfa_off;
262      Int   ra_off;
263      Int   sp_off;
264      Int   x30_off;
265      Int   x29_off;
266   }
267   DiCfSI;
268#elif defined(VGA_ppc32) || defined(VGA_ppc64)
269/* Just have a struct with the common fields in, so that code that
270   processes the common fields doesn't have to be ifdef'd against
271   VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
272   at the moment. */
273typedef
274   struct {
275      Addr  base;
276      UInt  len;
277      UChar cfa_how; /* a CFIC_ value */
278      UChar ra_how;  /* a CFIR_ value */
279      Int   cfa_off;
280      Int   ra_off;
281   }
282   DiCfSI;
283#elif defined(VGA_s390x)
284typedef
285   struct {
286      Addr  base;
287      UInt  len;
288      UChar cfa_how; /* a CFIC_ value */
289      UChar sp_how;  /* a CFIR_ value */
290      UChar ra_how;  /* a CFIR_ value */
291      UChar fp_how;  /* a CFIR_ value */
292      Int   cfa_off;
293      Int   sp_off;
294      Int   ra_off;
295      Int   fp_off;
296   }
297   DiCfSI;
298#elif defined(VGA_mips32) || defined(VGA_mips64)
299typedef
300   struct {
301      Addr  base;
302      UInt  len;
303      UChar cfa_how; /* a CFIC_ value */
304      UChar ra_how;  /* a CFIR_ value */
305      UChar sp_how;  /* a CFIR_ value */
306      UChar fp_how;  /* a CFIR_ value */
307      Int   cfa_off;
308      Int   ra_off;
309      Int   sp_off;
310      Int   fp_off;
311   }
312   DiCfSI;
313#else
314#  error "Unknown arch"
315#endif
316
317
318typedef
319   enum {
320      Cunop_Abs=0x231,
321      Cunop_Neg,
322      Cunop_Not
323   }
324   CfiUnop;
325
326typedef
327   enum {
328      Cbinop_Add=0x321,
329      Cbinop_Sub,
330      Cbinop_And,
331      Cbinop_Mul,
332      Cbinop_Shl,
333      Cbinop_Shr,
334      Cbinop_Eq,
335      Cbinop_Ge,
336      Cbinop_Gt,
337      Cbinop_Le,
338      Cbinop_Lt,
339      Cbinop_Ne
340   }
341   CfiBinop;
342
343typedef
344   enum {
345      Creg_IA_SP=0x213,
346      Creg_IA_BP,
347      Creg_IA_IP,
348      Creg_ARM_R13,
349      Creg_ARM_R12,
350      Creg_ARM_R15,
351      Creg_ARM_R14,
352      Creg_ARM64_X30,
353      Creg_S390_R14,
354      Creg_MIPS_RA
355   }
356   CfiReg;
357
358typedef
359   enum {
360      Cex_Undef=0x123,
361      Cex_Deref,
362      Cex_Const,
363      Cex_Unop,
364      Cex_Binop,
365      Cex_CfiReg,
366      Cex_DwReg
367   }
368   CfiExprTag;
369
370typedef
371   struct {
372      CfiExprTag tag;
373      union {
374         struct {
375         } Undef;
376         struct {
377            Int ixAddr;
378         } Deref;
379         struct {
380            UWord con;
381         } Const;
382         struct {
383            CfiUnop op;
384            Int ix;
385         } Unop;
386         struct {
387            CfiBinop op;
388            Int ixL;
389            Int ixR;
390         } Binop;
391         struct {
392            CfiReg reg;
393         } CfiReg;
394         struct {
395            Int reg;
396         } DwReg;
397      }
398      Cex;
399   }
400   CfiExpr;
401
402extern Int ML_(CfiExpr_Undef) ( XArray* dst );
403extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
404extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
405extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
406extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
407extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
408extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
409
410extern void ML_(ppCfiExpr)( XArray* src, Int ix );
411
412/* ---------------- FPO INFO (Windows PE) -------------- */
413
414/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
415   a primitive CFI */
416typedef
417   struct _FPO_DATA {  /* 16 bytes */
418      UInt   ulOffStart; /* offset of 1st byte of function code */
419      UInt   cbProcSize; /* # bytes in function */
420      UInt   cdwLocals;  /* # bytes/4 in locals */
421      UShort cdwParams;  /* # bytes/4 in params */
422      UChar  cbProlog;   /* # bytes in prolog */
423      UChar  cbRegs :3;  /* # regs saved */
424      UChar  fHasSEH:1;  /* Structured Exception Handling */
425      UChar  fUseBP :1;  /* EBP has been used */
426      UChar  reserved:1;
427      UChar  cbFrame:2;  /* frame type */
428   }
429   FPO_DATA;
430
431#define PDB_FRAME_FPO  0
432#define PDB_FRAME_TRAP 1
433#define PDB_FRAME_TSS  2
434
435/* --------------------- VARIABLES --------------------- */
436
437typedef
438   struct {
439      Addr    aMin;
440      Addr    aMax;
441      XArray* /* of DiVariable */ vars;
442   }
443   DiAddrRange;
444
445typedef
446   struct {
447      HChar* name;  /* in DebugInfo.strchunks */
448      UWord  typeR; /* a cuOff */
449      GExpr* gexpr; /* on DebugInfo.gexprs list */
450      GExpr* fbGX;  /* SHARED. */
451      HChar* fileName; /* where declared; may be NULL. in
452                          DebugInfo.strchunks */
453      Int    lineNo;   /* where declared; may be zero. */
454   }
455   DiVariable;
456
457Word
458ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
459
460/* --------------------- DEBUGINFO --------------------- */
461
462/* This is the top-level data type.  It's a structure which contains
463   information pertaining to one mapped ELF object.  This type is
464   exported only abstractly - in pub_tool_debuginfo.h. */
465
466/* First though, here's an auxiliary data structure.  It is only ever
467   used as part of a struct _DebugInfo.  We use it to record
468   observations about mappings and permission changes to the
469   associated file, so as to decide when to read debug info.  It's
470   essentially an ultra-trivial finite state machine which, when it
471   reaches an accept state, signals that we should now read debug info
472   from the object into the associated struct _DebugInfo.  The accept
473   state is arrived at when have_rx_map and have_rw_map both become
474   true.  The initial state is one in which we have no observations,
475   so have_rx_map and have_rw_map are both false.
476
477   This all started as a rather ad-hoc solution, but was further
478   expanded to handle weird object layouts, e.g. more than one rw
479   or rx mapping for one binary.
480
481   The normal sequence of events is one of
482
483   start  -->  r-x mapping  -->  rw- mapping  -->  accept
484   start  -->  rw- mapping  -->  r-x mapping  -->  accept
485
486   that is, take the first r-x and rw- mapping we see, and we're done.
487
488   On MacOSX 10.7, 32-bit, there appears to be a new variant:
489
490   start  -->  r-- mapping  -->  rw- mapping
491          -->  upgrade r-- mapping to r-x mapping  -->  accept
492
493   where the upgrade is done by a call to vm_protect.  Hence we
494   need to also track this possibility.
495*/
496
497struct _DebugInfoMapping
498{
499   Addr  avma; /* these fields record the file offset, length */
500   SizeT size; /* and map address of each mapping             */
501   OffT  foff;
502   Bool  rx, rw, ro;  /* memory access flags for this mapping */
503};
504
505struct _DebugInfoFSM
506{
507   HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
508   XArray* maps;      /* XArray of _DebugInfoMapping structs        */
509   Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
510   Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
511   Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
512};
513
514
515/* To do with the string table in struct _DebugInfo (::strchunks) */
516#define SEGINFO_STRCHUNKSIZE (64*1024)
517
518
519/* We may encounter more than one .eh_frame section in an object --
520   unusual but apparently allowed by ELF.  See
521   http://sourceware.org/bugzilla/show_bug.cgi?id=12675
522*/
523#define N_EHFRAME_SECTS 2
524
525
526/* So, the main structure for holding debug info for one object. */
527
528struct _DebugInfo {
529
530   /* Admin stuff */
531
532   struct _DebugInfo* next;   /* list of DebugInfos */
533   Bool               mark;   /* marked for deletion? */
534
535   /* An abstract handle, which can be used by entities outside of
536      m_debuginfo to (in an abstract datatype sense) refer to this
537      struct _DebugInfo.  A .handle of zero is invalid; valid handles
538      are 1 and above.  The same handle is never issued twice (in any
539      given run of Valgrind), so a handle becomes invalid when the
540      associated struct _DebugInfo is discarded, and remains invalid
541      forever thereafter.  The .handle field is set as soon as this
542      structure is allocated. */
543   ULong handle;
544
545   /* Used for debugging only - indicate what stuff to dump whilst
546      reading stuff into the seginfo.  Are computed as early in the
547      lifetime of the DebugInfo as possible -- at the point when it is
548      created.  Use these when deciding what to spew out; do not use
549      the global VG_(clo_blah) flags. */
550
551   Bool trace_symtab; /* symbols, our style */
552   Bool trace_cfi;    /* dwarf frame unwind, our style */
553   Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
554   Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
555   Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
556
557   /* The "decide when it is time to read debuginfo" state machine.
558      This structure must get filled in before we can start reading
559      anything from the ELF/MachO file.  This structure is filled in
560      by VG_(di_notify_mmap) and its immediate helpers. */
561   struct _DebugInfoFSM fsm;
562
563   /* Once the ::fsm has reached an accept state -- typically, when
564      both a rw? and r?x mapping for .filename have been observed --
565      we can go on to read the symbol tables and debug info.
566      .have_dinfo changes from False to True when the debug info has
567      been completely read in and postprocessed (canonicalised) and is
568      now suitable for querying. */
569   /* If have_dinfo is False, then all fields below this point are
570      invalid and should not be consulted. */
571   Bool  have_dinfo; /* initially False */
572
573   /* All the rest of the fields in this structure are filled in once
574      we have committed to reading the symbols and debug info (that
575      is, at the point where .have_dinfo is set to True). */
576
577   /* The file's soname. */
578   HChar* soname;
579
580   /* Description of some important mapped segments.  The presence or
581      absence of the mapping is denoted by the _present field, since
582      in some obscure circumstances (to do with data/sdata/bss) it is
583      possible for the mapping to be present but have zero size.
584      Certainly text_ is mandatory on all platforms; not sure about
585      the rest though.
586
587      --------------------------------------------------------
588
589      Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
590
591      either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
592
593      or the normal case, which is the AND of the following:
594      (0) size of at least one rx mapping > 0
595      (1) no two DebugInfos with some rx mapping of size > 0
596          have overlapping rx mappings
597      (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
598          [avma,+size) of one rx mapping; that is, the former
599          is a subrange or equal to the latter.
600      (3) all DiCfSI in the cfsi array all have ranges that fall within
601          [avma,+size) of that rx mapping.
602      (4) all DiCfSI in the cfsi array are non-overlapping
603
604      The cumulative effect of these restrictions is to ensure that
605      all the DiCfSI records in the entire system are non overlapping.
606      Hence any address falls into either exactly one DiCfSI record,
607      or none.  Hence it is safe to cache the results of searches for
608      DiCfSI records.  This is the whole point of these restrictions.
609      The caching of DiCfSI searches is done in VG_(use_CF_info).  The
610      cache is flushed after any change to debugInfo_list.  DiCfSI
611      searches are cached because they are central to stack unwinding
612      on amd64-linux.
613
614      Where are these invariants imposed and checked?
615
616      They are checked after a successful read of debuginfo into
617      a DebugInfo*, in check_CFSI_related_invariants.
618
619      (1) is not really imposed anywhere.  We simply assume that the
620      kernel will not map the text segments from two different objects
621      into the same space.  Sounds reasonable.
622
623      (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
624      (3) is ensured by ML_(addDiCfSI).
625      (4) is ensured by canonicaliseCFI.
626
627      --------------------------------------------------------
628
629      Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
630
631      The _debug_{svma,bias} fields were added as part of a fix to
632      #185816.  The problem encompassed in that bug report was that it
633      wasn't correct to use apply the bias values deduced for a
634      primary object to its associated debuginfo object, because the
635      debuginfo object (or the primary) could have been prelinked to a
636      different SVMA.  Hence debuginfo and primary objects need to
637      have their own biases.
638
639      ------ JRS: (referring to r9329): ------
640      Let me see if I understand the workings correctly.  Initially
641      the _debug_ values are set to the same values as the "normal"
642      ones, as there's a bunch of bits of code like this (in
643      readelf.c)
644
645         di->text_svma = svma;
646         ...
647         di->text_bias = rx_bias;
648         di->text_debug_svma = svma;
649         di->text_debug_bias = rx_bias;
650
651      If a debuginfo object subsequently shows up then the
652      _debug_svma/bias are set for the debuginfo object.  Result is
653      that if there's no debuginfo object then the values are the same
654      as the primary-object values, and if there is a debuginfo object
655      then they will (or at least may) be different.
656
657      Then when we need to actually bias something, we'll have to
658      decide whether to use the primary bias or the debuginfo bias.
659      And the strategy is to use the primary bias for ELF symbols but
660      the debuginfo bias for anything pulled out of Dwarf.
661
662      ------ THH: ------
663      Correct - the debug_svma and bias values apply to any address
664      read from the debug data regardless of where that debug data is
665      stored and the other values are used for addresses from other
666      places (primarily the symbol table).
667
668      ------ JRS: ------
669      Ok; so this was my only area of concern.  Are there any
670      corner-case scenarios where this wouldn't be right?  It sounds
671      like we're assuming the ELF symbols come from the primary object
672      and, if there is a debug object, then all the Dwarf comes from
673      there.  But what if (eg) both symbols and Dwarf come from the
674      debug object?  Is that even possible or allowable?
675
676      ------ THH: ------
677      You may have a point...
678
679      The current logic is to try and take any one set of data from
680      either the base object or the debug object. There are four sets
681      of data we consider:
682
683         - Symbol Table
684         - Stabs
685         - DWARF1
686         - DWARF2
687
688      If we see the primary section for a given set in the base object
689      then we ignore all sections relating to that set in the debug
690      object.
691
692      Now in principle if we saw a secondary section (like debug_line
693      say) in the base object, but not the main section (debug_info in
694      this case) then we would take debug_info from the debug object
695      but would use the debug_line from the base object unless we saw
696      a replacement copy in the debug object. That's probably unlikely
697      however.
698
699      A bigger issue might be, as you say, the symbol table as we will
700      pick that up from the debug object if it isn't in the base. The
701      dynamic symbol table will always have to be in the base object
702      though so we will have to be careful when processing symbols to
703      know which table we are reading in that case.
704
705      What we probably need to do is tell read_elf_symtab which object
706      the symbols it is being asked to read came from.
707
708      (A followup patch to deal with this was committed in r9469).
709   */
710   /* .text */
711   Bool     text_present;
712   Addr     text_avma;
713   Addr     text_svma;
714   SizeT    text_size;
715   PtrdiffT text_bias;
716   Addr     text_debug_svma;
717   PtrdiffT text_debug_bias;
718   /* .data */
719   Bool     data_present;
720   Addr     data_svma;
721   Addr     data_avma;
722   SizeT    data_size;
723   PtrdiffT data_bias;
724   Addr     data_debug_svma;
725   PtrdiffT data_debug_bias;
726   /* .sdata */
727   Bool     sdata_present;
728   Addr     sdata_svma;
729   Addr     sdata_avma;
730   SizeT    sdata_size;
731   PtrdiffT sdata_bias;
732   Addr     sdata_debug_svma;
733   PtrdiffT sdata_debug_bias;
734   /* .rodata */
735   Bool     rodata_present;
736   Addr     rodata_svma;
737   Addr     rodata_avma;
738   SizeT    rodata_size;
739   PtrdiffT rodata_bias;
740   Addr     rodata_debug_svma;
741   PtrdiffT rodata_debug_bias;
742   /* .bss */
743   Bool     bss_present;
744   Addr     bss_svma;
745   Addr     bss_avma;
746   SizeT    bss_size;
747   PtrdiffT bss_bias;
748   Addr     bss_debug_svma;
749   PtrdiffT bss_debug_bias;
750   /* .sbss */
751   Bool     sbss_present;
752   Addr     sbss_svma;
753   Addr     sbss_avma;
754   SizeT    sbss_size;
755   PtrdiffT sbss_bias;
756   Addr     sbss_debug_svma;
757   PtrdiffT sbss_debug_bias;
758   /* .plt */
759   Bool   plt_present;
760   Addr	  plt_avma;
761   SizeT  plt_size;
762   /* .got */
763   Bool   got_present;
764   Addr   got_avma;
765   SizeT  got_size;
766   /* .got.plt */
767   Bool   gotplt_present;
768   Addr   gotplt_avma;
769   SizeT  gotplt_size;
770   /* .opd -- needed on ppc64-linux for finding symbols */
771   Bool   opd_present;
772   Addr   opd_avma;
773   SizeT  opd_size;
774   /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
775      see more than one, hence the arrays. */
776   UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
777   Addr   ehframe_avma[N_EHFRAME_SECTS];
778   SizeT  ehframe_size[N_EHFRAME_SECTS];
779
780   /* Sorted tables of stuff we snarfed from the file.  This is the
781      eventual product of reading the debug info.  All this stuff
782      lives in VG_AR_DINFO. */
783
784   /* An expandable array of symbols. */
785   DiSym*  symtab;
786   UWord   symtab_used;
787   UWord   symtab_size;
788   /* An expandable array of locations. */
789   DiLoc*  loctab;
790   UWord   loctab_used;
791   UWord   loctab_size;
792   /* An expandable array of CFI summary info records.  Also includes
793      summary address bounds, showing the min and max address covered
794      by any of the records, as an aid to fast searching.  And, if the
795      records require any expression nodes, they are stored in
796      cfsi_exprs. */
797   DiCfSI* cfsi;
798   UWord   cfsi_used;
799   UWord   cfsi_size;
800   Addr    cfsi_minavma;
801   Addr    cfsi_maxavma;
802   XArray* cfsi_exprs; /* XArray of CfiExpr */
803
804   /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
805      data.  Non-expandable array, hence .size == .used. */
806   FPO_DATA* fpo;
807   UWord     fpo_size;
808   Addr      fpo_minavma;
809   Addr      fpo_maxavma;
810   Addr      fpo_base_avma;
811
812   /* Expandable arrays of characters -- the string table.  Pointers
813      into this are stable (the arrays are not reallocated). */
814   struct strchunk {
815      UInt   strtab_used;
816      struct strchunk* next;
817      HChar  strtab[SEGINFO_STRCHUNKSIZE];
818   } *strchunks;
819
820   /* Variable scope information, as harvested from Dwarf3 files.
821
822      In short it's an
823
824         array of (array of PC address ranges and variables)
825
826      The outer array indexes over scopes, with Entry 0 containing
827      information on variables which exist for any value of the program
828      counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
829      etc contain information on increasinly deeply nested variables.
830
831      Each inner array is an array of (an address range, and a set
832      of variables that are in scope over that address range).
833
834      The address ranges may not overlap.
835
836      Since Entry 0 in the outer array holds information on variables
837      that exist for any value of the PC (that is, global vars), it
838      follows that Entry 0's inner array can only have one address
839      range pair, one that covers the entire address space.
840   */
841   XArray* /* of OSet of DiAddrRange */varinfo;
842
843   /* These are arrays of the relevant typed objects, held here
844      partially for the purposes of visiting each object exactly once
845      when we need to delete them. */
846
847   /* An array of TyEnts.  These are needed to make sense of any types
848      in the .varinfo.  Also, when deleting this DebugInfo, we must
849      first traverse this array and throw away malloc'd stuff hanging
850      off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
851   XArray* /* of TyEnt */ admin_tyents;
852
853   /* An array of guarded DWARF3 expressions. */
854   XArray* admin_gexprs;
855
856   /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
857      This helps performance a lot during ML_(addLineInfo) etc., which can
858      easily be invoked hundreds of thousands of times. */
859   struct _DebugInfoMapping* last_rx_map;
860};
861
862/* --------------------- functions --------------------- */
863
864/* ------ Adding ------ */
865
866/* Add a symbol to si's symbol table.  The contents of 'sym' are
867   copied.  It is assumed (and checked) that 'sym' only contains one
868   name, so there is no auxiliary ::sec_names vector to duplicate.
869   IOW, the copy is a shallow copy, and there are assertions in place
870   to ensure that's OK. */
871extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
872
873/* Add a line-number record to a DebugInfo. */
874extern
875void ML_(addLineInfo) ( struct _DebugInfo* di,
876                        const HChar* filename,
877                        const HChar* dirname,  /* NULL is allowable */
878                        Addr this, Addr next, Int lineno, Int entry);
879
880/* Add a CFI summary record.  The supplied DiCfSI is copied. */
881extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
882
883/* Add a string to the string table of a DebugInfo.  If len==-1,
884   ML_(addStr) will itself measure the length of the string. */
885extern HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len );
886
887/* Add a string to the string table of a DebugInfo, by copying the
888   string from the given DiCursor.  Measures the length of the string
889   itself. */
890extern HChar* ML_(addStrFromCursor)( struct _DebugInfo* di, DiCursor c );
891
892extern void ML_(addVar)( struct _DebugInfo* di,
893                         Int    level,
894                         Addr   aMin,
895                         Addr   aMax,
896                         HChar* name,
897                         UWord  typeR, /* a cuOff */
898                         GExpr* gexpr,
899                         GExpr* fbGX, /* SHARED. */
900                         HChar* fileName, /* where decl'd - may be NULL */
901                         Int    lineNo, /* where decl'd - may be zero */
902                         Bool   show );
903
904/* Canonicalise the tables held by 'di', in preparation for use.  Call
905   this after finishing adding entries to these tables. */
906extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
907
908/* Canonicalise the call-frame-info table held by 'di', in preparation
909   for use. This is called by ML_(canonicaliseTables) but can also be
910   called on it's own to sort just this table. */
911extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
912
913/* ------ Searching ------ */
914
915/* Find a symbol-table index containing the specified pointer, or -1
916   if not found.  Binary search.  */
917extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
918                                     Bool match_anywhere_in_sym,
919                                     Bool findText );
920
921/* Find a location-table index containing the specified pointer, or -1
922   if not found.  Binary search.  */
923extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
924
925/* Find a CFI-table index containing the specified pointer, or -1 if
926   not found.  Binary search.  */
927extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
928
929/* Find a FPO-table index containing the specified pointer, or -1
930   if not found.  Binary search.  */
931extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
932
933/* Helper function for the most often needed searching for an rx
934   mapping containing the specified address range.  The range must
935   fall entirely within the mapping to be considered to be within it.
936   Asserts if lo > hi; caller must ensure this doesn't happen. */
937extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
938                                                        Addr lo, Addr hi );
939
940/* ------ Misc ------ */
941
942/* Show a non-fatal debug info reading error.  Use vg_panic if
943   terminal.  'serious' errors are always shown, not 'serious' ones
944   are shown only at verbosity level 2 and above. */
945extern
946void ML_(symerr) ( struct _DebugInfo* di, Bool serious, const HChar* msg );
947
948/* Print a symbol. */
949extern void ML_(ppSym) ( Int idx, DiSym* sym );
950
951/* Print a call-frame-info summary. */
952extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
953
954
955#define TRACE_SYMTAB_ENABLED (di->trace_symtab)
956#define TRACE_SYMTAB(format, args...) \
957   if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
958
959
960#endif /* ndef __PRIV_STORAGE_H */
961
962/*--------------------------------------------------------------------*/
963/*--- end                                                          ---*/
964/*--------------------------------------------------------------------*/
965