priv_storage.h revision 2d3c75979dd0bbf2606bf1a8e11b72ae6220e5db
1
2/*--------------------------------------------------------------------*/
3/*--- Format-neutral storage of and querying of info acquired from ---*/
4/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5/*---                                               priv_storage.h ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2000-2012 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32/*
33   Stabs reader greatly improved by Nick Nethercote, Apr 02.
34   This module was also extensively hacked on by Jeremy Fitzhardinge
35   and Tom Hughes.
36*/
37/* See comment at top of debuginfo.c for explanation of
38   the _svma / _avma / _image / _bias naming scheme.
39*/
40/* Note this is not freestanding; needs pub_core_xarray.h and
41   priv_tytypes.h to be included before it. */
42
43#ifndef __PRIV_STORAGE_H
44#define __PRIV_STORAGE_H
45
46/* --------------------- SYMBOLS --------------------- */
47
48/* A structure to hold an ELF/MachO symbol (very crudely).  Usually
49   the symbol only has one name, which is stored in ::pri_name, and
50   ::sec_names is NULL.  If there are other names, these are stored in
51   ::sec_names, which is a NULL terminated vector holding the names.
52   The vector is allocated in VG_AR_DINFO, the names themselves live
53   in DebugInfo::strchunks.
54
55   From the point of view of ELF, the primary vs secondary distinction
56   is artificial: they are all just names associated with the address,
57   none of which has higher precedence than any other.  However, from
58   the point of view of mapping an address to a name to display to the
59   user, we need to choose one "preferred" name, and so that might as
60   well be installed as the pri_name, whilst all others can live in
61   sec_names[].  This has the convenient side effect that, in the
62   common case where there is only one name for the address,
63   sec_names[] does not need to be allocated.
64*/
65typedef
66   struct {
67      Addr    addr;    /* lowest address of entity */
68      Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
69      UChar*  pri_name;  /* primary name, never NULL */
70      UChar** sec_names; /* NULL, or a NULL term'd array of other names */
71      // XXX: this could be shrunk (on 32-bit platforms) by using 30
72      // bits for the size and 1 bit each for isText and isIFunc.  If you
73      // do this, make sure that all assignments to the latter two use
74      // 0 or 1 (or True or False), and that a positive number larger
75      // than 1 is never used to represent True.
76      UInt    size;    /* size in bytes */
77      Bool    isText;
78      Bool    isIFunc; /* symbol is an indirect function? */
79   }
80   DiSym;
81
82/* --------------------- SRCLOCS --------------------- */
83
84/* Line count at which overflow happens, due to line numbers being
85   stored as shorts in `struct nlist' in a.out.h. */
86#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
87
88#define LINENO_BITS     20
89#define LOC_SIZE_BITS  (32 - LINENO_BITS)
90#define MAX_LINENO     ((1 << LINENO_BITS) - 1)
91
92/* Unlikely to have any lines with instruction ranges > 4096 bytes */
93#define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
94
95/* Number used to detect line number overflows; if one line is
96   60000-odd smaller than the previous, it was probably an overflow.
97 */
98#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
99
100/* A structure to hold addr-to-source info for a single line.  There
101  can be a lot of these, hence the dense packing. */
102typedef
103   struct {
104      /* Word 1 */
105      Addr   addr;               /* lowest address for this line */
106      /* Word 2 */
107      UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
108      UInt   lineno:LINENO_BITS; /* source line number, or zero */
109      /* Word 3 */
110      UChar*  filename;          /* source filename */
111      /* Word 4 */
112      UChar*  dirname;           /* source directory name */
113   }
114   DiLoc;
115
116/* --------------------- CF INFO --------------------- */
117
118/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
119   address range [base .. base+len-1].
120
121   On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
122   some point and {e,r}ip is in the range [base .. base+len-1], it
123   tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
124   current frame and also ra, the return address of the current frame.
125
126   First off, calculate CFA, the Canonical Frame Address, thusly:
127
128     cfa = case cfa_how of
129              CFIC_IA_SPREL -> {e,r}sp + cfa_off
130              CFIC_IA_BPREL -> {e,r}bp + cfa_off
131              CFIR_IA_EXPR  -> expr whose index is in cfa_off
132
133   Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
134   this frame's {e,r}ra value can be calculated like this:
135
136     old_{e,r}sp/{e,r}bp/ra
137         = case {e,r}sp/{e,r}bp/ra_how of
138              CFIR_UNKNOWN   -> we don't know, sorry
139              CFIR_SAME      -> same as it was before (sp/fp only)
140              CFIR_CFAREL    -> cfa + sp/bp/ra_off
141              CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
142              CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
143
144   On ARM it's pretty much the same, except we have more registers to
145   keep track of:
146
147     cfa = case cfa_how of
148              CFIC_R13REL -> r13 + cfa_off
149              CFIC_R12REL -> r12 + cfa_off
150              CFIC_R11REL -> r11 + cfa_off
151              CFIC_R7REL  -> r7  + cfa_off
152              CFIR_EXPR   -> expr whose index is in cfa_off
153
154     old_r14/r13/r12/r11/r7/ra
155         = case r14/r13/r12/r11/r7/ra_how of
156              CFIR_UNKNOWN   -> we don't know, sorry
157              CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
158              CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
159              CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
160              CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
161
162   On s390x we have a similar logic as x86 or amd64. We need the stack pointer
163   (r15), the frame pointer r11 (like BP) and together with the instruction
164   address in the PSW we can calculate the previous values:
165     cfa = case cfa_how of
166              CFIC_IA_SPREL -> r15 + cfa_off
167              CFIC_IA_BPREL -> r11 + cfa_off
168              CFIR_IA_EXPR  -> expr whose index is in cfa_off
169
170     old_sp/fp/ra
171         = case sp/fp/ra_how of
172              CFIR_UNKNOWN   -> we don't know, sorry
173              CFIR_SAME      -> same as it was before (sp/fp only)
174              CFIR_CFAREL    -> cfa + sp/fp/ra_off
175              CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
176              CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
177*/
178
179#define CFIC_IA_SPREL     ((UChar)1)
180#define CFIC_IA_BPREL     ((UChar)2)
181#define CFIC_IA_EXPR      ((UChar)3)
182#define CFIC_ARM_R13REL   ((UChar)4)
183#define CFIC_ARM_R12REL   ((UChar)5)
184#define CFIC_ARM_R11REL   ((UChar)6)
185#define CFIC_ARM_R7REL    ((UChar)7)
186#define CFIC_EXPR         ((UChar)8)  /* all targets */
187
188#define CFIR_UNKNOWN      ((UChar)64)
189#define CFIR_SAME         ((UChar)65)
190#define CFIR_CFAREL       ((UChar)66)
191#define CFIR_MEMCFAREL    ((UChar)67)
192#define CFIR_EXPR         ((UChar)68)
193
194#if defined(VGA_x86) || defined(VGA_amd64)
195typedef
196   struct {
197      Addr  base;
198      UInt  len;
199      UChar cfa_how; /* a CFIC_IA value */
200      UChar ra_how;  /* a CFIR_ value */
201      UChar sp_how;  /* a CFIR_ value */
202      UChar bp_how;  /* a CFIR_ value */
203      Int   cfa_off;
204      Int   ra_off;
205      Int   sp_off;
206      Int   bp_off;
207   }
208   DiCfSI;
209#elif defined(VGA_arm)
210typedef
211   struct {
212      Addr  base;
213      UInt  len;
214      UChar cfa_how; /* a CFIC_ value */
215      UChar ra_how;  /* a CFIR_ value */
216      UChar r14_how; /* a CFIR_ value */
217      UChar r13_how; /* a CFIR_ value */
218      UChar r12_how; /* a CFIR_ value */
219      UChar r11_how; /* a CFIR_ value */
220      UChar r7_how;  /* a CFIR_ value */
221      Int   cfa_off;
222      Int   ra_off;
223      Int   r14_off;
224      Int   r13_off;
225      Int   r12_off;
226      Int   r11_off;
227      Int   r7_off;
228   }
229   DiCfSI;
230#elif defined(VGA_ppc32) || defined(VGA_ppc64)
231/* Just have a struct with the common fields in, so that code that
232   processes the common fields doesn't have to be ifdef'd against
233   VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
234   at the moment. */
235typedef
236   struct {
237      Addr  base;
238      UInt  len;
239      UChar cfa_how; /* a CFIC_ value */
240      UChar ra_how;  /* a CFIR_ value */
241      Int   cfa_off;
242      Int   ra_off;
243   }
244   DiCfSI;
245#elif defined(VGA_s390x)
246typedef
247   struct {
248      Addr  base;
249      UInt  len;
250      UChar cfa_how; /* a CFIC_ value */
251      UChar sp_how;  /* a CFIR_ value */
252      UChar ra_how;  /* a CFIR_ value */
253      UChar fp_how;  /* a CFIR_ value */
254      Int   cfa_off;
255      Int   sp_off;
256      Int   ra_off;
257      Int   fp_off;
258   }
259   DiCfSI;
260#elif defined(VGA_mips32)
261typedef
262   struct {
263      Addr  base;
264      UInt  len;
265      UChar cfa_how; /* a CFIC_ value */
266      UChar ra_how;  /* a CFIR_ value */
267      UChar sp_how;  /* a CFIR_ value */
268      UChar fp_how;  /* a CFIR_ value */
269      Int   cfa_off;
270      Int   ra_off;
271      Int   sp_off;
272      Int   fp_off;
273   }
274   DiCfSI;
275#else
276#  error "Unknown arch"
277#endif
278
279
280typedef
281   enum {
282      Cop_Add=0x321,
283      Cop_Sub,
284      Cop_And,
285      Cop_Mul,
286      Cop_Shl,
287      Cop_Shr,
288      Cop_Eq,
289      Cop_Ge,
290      Cop_Gt,
291      Cop_Le,
292      Cop_Lt,
293      Cop_Ne
294   }
295   CfiOp;
296
297typedef
298   enum {
299      Creg_IA_SP=0x213,
300      Creg_IA_BP,
301      Creg_IA_IP,
302      Creg_ARM_R13,
303      Creg_ARM_R12,
304      Creg_ARM_R15,
305      Creg_ARM_R14,
306      Creg_S390_R14,
307      Creg_MIPS_RA
308   }
309   CfiReg;
310
311typedef
312   enum {
313      Cex_Undef=0x123,
314      Cex_Deref,
315      Cex_Const,
316      Cex_Binop,
317      Cex_CfiReg,
318      Cex_DwReg
319   }
320   CfiExprTag;
321
322typedef
323   struct {
324      CfiExprTag tag;
325      union {
326         struct {
327         } Undef;
328         struct {
329            Int ixAddr;
330         } Deref;
331         struct {
332            UWord con;
333         } Const;
334         struct {
335            CfiOp op;
336            Int ixL;
337            Int ixR;
338         } Binop;
339         struct {
340            CfiReg reg;
341         } CfiReg;
342         struct {
343            Int reg;
344         } DwReg;
345      }
346      Cex;
347   }
348   CfiExpr;
349
350extern Int ML_(CfiExpr_Undef) ( XArray* dst );
351extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
352extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
353extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR );
354extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
355extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
356
357extern void ML_(ppCfiExpr)( XArray* src, Int ix );
358
359/* ---------------- FPO INFO (Windows PE) -------------- */
360
361/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
362   a primitive CFI */
363typedef
364   struct _FPO_DATA {  /* 16 bytes */
365      UInt   ulOffStart; /* offset of 1st byte of function code */
366      UInt   cbProcSize; /* # bytes in function */
367      UInt   cdwLocals;  /* # bytes/4 in locals */
368      UShort cdwParams;  /* # bytes/4 in params */
369      UChar  cbProlog;   /* # bytes in prolog */
370      UChar  cbRegs :3;  /* # regs saved */
371      UChar  fHasSEH:1;  /* Structured Exception Handling */
372      UChar  fUseBP :1;  /* EBP has been used */
373      UChar  reserved:1;
374      UChar  cbFrame:2;  /* frame type */
375   }
376   FPO_DATA;
377
378#define PDB_FRAME_FPO  0
379#define PDB_FRAME_TRAP 1
380#define PDB_FRAME_TSS  2
381
382/* --------------------- VARIABLES --------------------- */
383
384typedef
385   struct {
386      Addr    aMin;
387      Addr    aMax;
388      XArray* /* of DiVariable */ vars;
389   }
390   DiAddrRange;
391
392typedef
393   struct {
394      UChar* name;  /* in DebugInfo.strchunks */
395      UWord  typeR; /* a cuOff */
396      GExpr* gexpr; /* on DebugInfo.gexprs list */
397      GExpr* fbGX;  /* SHARED. */
398      UChar* fileName; /* where declared; may be NULL. in
399                          DebugInfo.strchunks */
400      Int    lineNo;   /* where declared; may be zero. */
401   }
402   DiVariable;
403
404Word
405ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
406
407/* --------------------- DEBUGINFO --------------------- */
408
409/* This is the top-level data type.  It's a structure which contains
410   information pertaining to one mapped ELF object.  This type is
411   exported only abstractly - in pub_tool_debuginfo.h. */
412
413/* First though, here's an auxiliary data structure.  It is only ever
414   used as part of a struct _DebugInfo.  We use it to record
415   observations about mappings and permission changes to the
416   associated file, so as to decide when to read debug info.  It's
417   essentially an ultra-trivial finite state machine which, when it
418   reaches an accept state, signals that we should now read debug info
419   from the object into the associated struct _DebugInfo.  The accept
420   state is arrived at when have_rx_map and have_rw_map both become
421   true.  The initial state is one in which we have no observations,
422   so have_rx_map and have_rw_map are both false.
423
424   This all started as a rather ad-hoc solution, but was further
425   expanded to handle weird object layouts, e.g. more than one rw
426   or rx mapping for one binary.
427
428   The normal sequence of events is one of
429
430   start  -->  r-x mapping  -->  rw- mapping  -->  accept
431   start  -->  rw- mapping  -->  r-x mapping  -->  accept
432
433   that is, take the first r-x and rw- mapping we see, and we're done.
434
435   On MacOSX 10.7, 32-bit, there appears to be a new variant:
436
437   start  -->  r-- mapping  -->  rw- mapping
438          -->  upgrade r-- mapping to r-x mapping  -->  accept
439
440   where the upgrade is done by a call to vm_protect.  Hence we
441   need to also track this possibility.
442*/
443
444struct _DebugInfoMapping
445{
446   Addr  avma; /* these fields record the file offset, length */
447   SizeT size; /* and map address of each mapping             */
448   OffT  foff;
449   Bool  rx, rw, ro;  /* memory access flags for this mapping */
450};
451
452struct _DebugInfoFSM
453{
454   UChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
455   XArray* maps;      /* XArray of _DebugInfoMapping structs        */
456   Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
457   Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
458   Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
459};
460
461
462/* To do with the string table in struct _DebugInfo (::strchunks) */
463#define SEGINFO_STRCHUNKSIZE (64*1024)
464
465
466/* We may encounter more than one .eh_frame section in an object --
467   unusual but apparently allowed by ELF.  See
468   http://sourceware.org/bugzilla/show_bug.cgi?id=12675
469*/
470#define N_EHFRAME_SECTS 2
471
472
473/* So, the main structure for holding debug info for one object. */
474
475struct _DebugInfo {
476
477   /* Admin stuff */
478
479   struct _DebugInfo* next;   /* list of DebugInfos */
480   Bool               mark;   /* marked for deletion? */
481
482   /* An abstract handle, which can be used by entities outside of
483      m_debuginfo to (in an abstract datatype sense) refer to this
484      struct _DebugInfo.  A .handle of zero is invalid; valid handles
485      are 1 and above.  The same handle is never issued twice (in any
486      given run of Valgrind), so a handle becomes invalid when the
487      associated struct _DebugInfo is discarded, and remains invalid
488      forever thereafter.  The .handle field is set as soon as this
489      structure is allocated. */
490   ULong handle;
491
492   /* Used for debugging only - indicate what stuff to dump whilst
493      reading stuff into the seginfo.  Are computed as early in the
494      lifetime of the DebugInfo as possible -- at the point when it is
495      created.  Use these when deciding what to spew out; do not use
496      the global VG_(clo_blah) flags. */
497
498   Bool trace_symtab; /* symbols, our style */
499   Bool trace_cfi;    /* dwarf frame unwind, our style */
500   Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
501   Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
502   Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
503
504   /* The "decide when it is time to read debuginfo" state machine.
505      This structure must get filled in before we can start reading
506      anything from the ELF/MachO file.  This structure is filled in
507      by VG_(di_notify_mmap) and its immediate helpers. */
508   struct _DebugInfoFSM fsm;
509
510   /* Once the ::fsm has reached an accept state -- typically, when
511      both a rw? and r?x mapping for .filename have been observed --
512      we can go on to read the symbol tables and debug info.
513      .have_dinfo changes from False to True when the debug info has
514      been completely read in and postprocessed (canonicalised) and is
515      now suitable for querying. */
516   /* If have_dinfo is False, then all fields below this point are
517      invalid and should not be consulted. */
518   Bool  have_dinfo; /* initially False */
519
520   /* All the rest of the fields in this structure are filled in once
521      we have committed to reading the symbols and debug info (that
522      is, at the point where .have_dinfo is set to True). */
523
524   /* The file's soname. */
525   UChar* soname;
526
527   /* Description of some important mapped segments.  The presence or
528      absence of the mapping is denoted by the _present field, since
529      in some obscure circumstances (to do with data/sdata/bss) it is
530      possible for the mapping to be present but have zero size.
531      Certainly text_ is mandatory on all platforms; not sure about
532      the rest though.
533
534      --------------------------------------------------------
535
536      Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
537
538      either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
539
540      or the normal case, which is the AND of the following:
541      (0) size of at least one rx mapping > 0
542      (1) no two DebugInfos with some rx mapping of size > 0
543          have overlapping rx mappings
544      (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
545          [avma,+size) of one rx mapping; that is, the former
546          is a subrange or equal to the latter.
547      (3) all DiCfSI in the cfsi array all have ranges that fall within
548          [avma,+size) of that rx mapping.
549      (4) all DiCfSI in the cfsi array are non-overlapping
550
551      The cumulative effect of these restrictions is to ensure that
552      all the DiCfSI records in the entire system are non overlapping.
553      Hence any address falls into either exactly one DiCfSI record,
554      or none.  Hence it is safe to cache the results of searches for
555      DiCfSI records.  This is the whole point of these restrictions.
556      The caching of DiCfSI searches is done in VG_(use_CF_info).  The
557      cache is flushed after any change to debugInfo_list.  DiCfSI
558      searches are cached because they are central to stack unwinding
559      on amd64-linux.
560
561      Where are these invariants imposed and checked?
562
563      They are checked after a successful read of debuginfo into
564      a DebugInfo*, in check_CFSI_related_invariants.
565
566      (1) is not really imposed anywhere.  We simply assume that the
567      kernel will not map the text segments from two different objects
568      into the same space.  Sounds reasonable.
569
570      (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
571      (3) is ensured by ML_(addDiCfSI).
572      (4) is ensured by canonicaliseCFI.
573
574      --------------------------------------------------------
575
576      Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
577
578      The _debug_{svma,bias} fields were added as part of a fix to
579      #185816.  The problem encompassed in that bug report was that it
580      wasn't correct to use apply the bias values deduced for a
581      primary object to its associated debuginfo object, because the
582      debuginfo object (or the primary) could have been prelinked to a
583      different SVMA.  Hence debuginfo and primary objects need to
584      have their own biases.
585
586      ------ JRS: (referring to r9329): ------
587      Let me see if I understand the workings correctly.  Initially
588      the _debug_ values are set to the same values as the "normal"
589      ones, as there's a bunch of bits of code like this (in
590      readelf.c)
591
592         di->text_svma = svma;
593         ...
594         di->text_bias = rx_bias;
595         di->text_debug_svma = svma;
596         di->text_debug_bias = rx_bias;
597
598      If a debuginfo object subsequently shows up then the
599      _debug_svma/bias are set for the debuginfo object.  Result is
600      that if there's no debuginfo object then the values are the same
601      as the primary-object values, and if there is a debuginfo object
602      then they will (or at least may) be different.
603
604      Then when we need to actually bias something, we'll have to
605      decide whether to use the primary bias or the debuginfo bias.
606      And the strategy is to use the primary bias for ELF symbols but
607      the debuginfo bias for anything pulled out of Dwarf.
608
609      ------ THH: ------
610      Correct - the debug_svma and bias values apply to any address
611      read from the debug data regardless of where that debug data is
612      stored and the other values are used for addresses from other
613      places (primarily the symbol table).
614
615      ------ JRS: ------
616      Ok; so this was my only area of concern.  Are there any
617      corner-case scenarios where this wouldn't be right?  It sounds
618      like we're assuming the ELF symbols come from the primary object
619      and, if there is a debug object, then all the Dwarf comes from
620      there.  But what if (eg) both symbols and Dwarf come from the
621      debug object?  Is that even possible or allowable?
622
623      ------ THH: ------
624      You may have a point...
625
626      The current logic is to try and take any one set of data from
627      either the base object or the debug object. There are four sets
628      of data we consider:
629
630         - Symbol Table
631         - Stabs
632         - DWARF1
633         - DWARF2
634
635      If we see the primary section for a given set in the base object
636      then we ignore all sections relating to that set in the debug
637      object.
638
639      Now in principle if we saw a secondary section (like debug_line
640      say) in the base object, but not the main section (debug_info in
641      this case) then we would take debug_info from the debug object
642      but would use the debug_line from the base object unless we saw
643      a replacement copy in the debug object. That's probably unlikely
644      however.
645
646      A bigger issue might be, as you say, the symbol table as we will
647      pick that up from the debug object if it isn't in the base. The
648      dynamic symbol table will always have to be in the base object
649      though so we will have to be careful when processing symbols to
650      know which table we are reading in that case.
651
652      What we probably need to do is tell read_elf_symtab which object
653      the symbols it is being asked to read came from.
654
655      (A followup patch to deal with this was committed in r9469).
656   */
657   /* .text */
658   Bool     text_present;
659   Addr     text_avma;
660   Addr     text_svma;
661   SizeT    text_size;
662   PtrdiffT text_bias;
663   Addr     text_debug_svma;
664   PtrdiffT text_debug_bias;
665   /* .data */
666   Bool     data_present;
667   Addr     data_svma;
668   Addr     data_avma;
669   SizeT    data_size;
670   PtrdiffT data_bias;
671   Addr     data_debug_svma;
672   PtrdiffT data_debug_bias;
673   /* .sdata */
674   Bool     sdata_present;
675   Addr     sdata_svma;
676   Addr     sdata_avma;
677   SizeT    sdata_size;
678   PtrdiffT sdata_bias;
679   Addr     sdata_debug_svma;
680   PtrdiffT sdata_debug_bias;
681   /* .rodata */
682   Bool     rodata_present;
683   Addr     rodata_svma;
684   Addr     rodata_avma;
685   SizeT    rodata_size;
686   PtrdiffT rodata_bias;
687   Addr     rodata_debug_svma;
688   PtrdiffT rodata_debug_bias;
689   /* .bss */
690   Bool     bss_present;
691   Addr     bss_svma;
692   Addr     bss_avma;
693   SizeT    bss_size;
694   PtrdiffT bss_bias;
695   Addr     bss_debug_svma;
696   PtrdiffT bss_debug_bias;
697   /* .sbss */
698   Bool     sbss_present;
699   Addr     sbss_svma;
700   Addr     sbss_avma;
701   SizeT    sbss_size;
702   PtrdiffT sbss_bias;
703   Addr     sbss_debug_svma;
704   PtrdiffT sbss_debug_bias;
705   /* .plt */
706   Bool   plt_present;
707   Addr	  plt_avma;
708   SizeT  plt_size;
709   /* .got */
710   Bool   got_present;
711   Addr   got_avma;
712   SizeT  got_size;
713   /* .got.plt */
714   Bool   gotplt_present;
715   Addr   gotplt_avma;
716   SizeT  gotplt_size;
717   /* .opd -- needed on ppc64-linux for finding symbols */
718   Bool   opd_present;
719   Addr   opd_avma;
720   SizeT  opd_size;
721   /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
722      see more than one, hence the arrays. */
723   UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
724   Addr   ehframe_avma[N_EHFRAME_SECTS];
725   SizeT  ehframe_size[N_EHFRAME_SECTS];
726
727   /* Sorted tables of stuff we snarfed from the file.  This is the
728      eventual product of reading the debug info.  All this stuff
729      lives in VG_AR_DINFO. */
730
731   /* An expandable array of symbols. */
732   DiSym*  symtab;
733   UWord   symtab_used;
734   UWord   symtab_size;
735   /* An expandable array of locations. */
736   DiLoc*  loctab;
737   UWord   loctab_used;
738   UWord   loctab_size;
739   /* An expandable array of CFI summary info records.  Also includes
740      summary address bounds, showing the min and max address covered
741      by any of the records, as an aid to fast searching.  And, if the
742      records require any expression nodes, they are stored in
743      cfsi_exprs. */
744   DiCfSI* cfsi;
745   UWord   cfsi_used;
746   UWord   cfsi_size;
747   Addr    cfsi_minavma;
748   Addr    cfsi_maxavma;
749   XArray* cfsi_exprs; /* XArray of CfiExpr */
750
751   /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
752      data.  Non-expandable array, hence .size == .used. */
753   FPO_DATA* fpo;
754   UWord     fpo_size;
755   Addr      fpo_minavma;
756   Addr      fpo_maxavma;
757   Addr      fpo_base_avma;
758
759   /* Expandable arrays of characters -- the string table.  Pointers
760      into this are stable (the arrays are not reallocated). */
761   struct strchunk {
762      UInt   strtab_used;
763      struct strchunk* next;
764      UChar  strtab[SEGINFO_STRCHUNKSIZE];
765   } *strchunks;
766
767   /* Variable scope information, as harvested from Dwarf3 files.
768
769      In short it's an
770
771         array of (array of PC address ranges and variables)
772
773      The outer array indexes over scopes, with Entry 0 containing
774      information on variables which exist for any value of the program
775      counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
776      etc contain information on increasinly deeply nested variables.
777
778      Each inner array is an array of (an address range, and a set
779      of variables that are in scope over that address range).
780
781      The address ranges may not overlap.
782
783      Since Entry 0 in the outer array holds information on variables
784      that exist for any value of the PC (that is, global vars), it
785      follows that Entry 0's inner array can only have one address
786      range pair, one that covers the entire address space.
787   */
788   XArray* /* of OSet of DiAddrRange */varinfo;
789
790   /* These are arrays of the relevant typed objects, held here
791      partially for the purposes of visiting each object exactly once
792      when we need to delete them. */
793
794   /* An array of TyEnts.  These are needed to make sense of any types
795      in the .varinfo.  Also, when deleting this DebugInfo, we must
796      first traverse this array and throw away malloc'd stuff hanging
797      off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
798   XArray* /* of TyEnt */ admin_tyents;
799
800   /* An array of guarded DWARF3 expressions. */
801   XArray* admin_gexprs;
802
803   /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
804      This helps performance a lot during ML_(addLineInfo) etc., which can
805      easily be invoked hundreds of thousands of times. */
806   struct _DebugInfoMapping* last_rx_map;
807};
808
809/* --------------------- functions --------------------- */
810
811/* ------ Adding ------ */
812
813/* Add a symbol to si's symbol table.  The contents of 'sym' are
814   copied.  It is assumed (and checked) that 'sym' only contains one
815   name, so there is no auxiliary ::sec_names vector to duplicate.
816   IOW, the copy is a shallow copy, and there are assertions in place
817   to ensure that's OK. */
818extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
819
820/* Add a line-number record to a DebugInfo. */
821extern
822void ML_(addLineInfo) ( struct _DebugInfo* di,
823                        UChar*   filename,
824                        UChar*   dirname,  /* NULL is allowable */
825                        Addr this, Addr next, Int lineno, Int entry);
826
827/* Add a CFI summary record.  The supplied DiCfSI is copied. */
828extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
829
830/* Add a string to the string table of a DebugInfo.  If len==-1,
831   ML_(addStr) will itself measure the length of the string. */
832extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len );
833
834extern void ML_(addVar)( struct _DebugInfo* di,
835                         Int    level,
836                         Addr   aMin,
837                         Addr   aMax,
838                         UChar* name,
839                         UWord  typeR, /* a cuOff */
840                         GExpr* gexpr,
841                         GExpr* fbGX, /* SHARED. */
842                         UChar* fileName, /* where decl'd - may be NULL */
843                         Int    lineNo, /* where decl'd - may be zero */
844                         Bool   show );
845
846/* Canonicalise the tables held by 'di', in preparation for use.  Call
847   this after finishing adding entries to these tables. */
848extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
849
850/* Canonicalise the call-frame-info table held by 'di', in preparation
851   for use. This is called by ML_(canonicaliseTables) but can also be
852   called on it's own to sort just this table. */
853extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
854
855/* ------ Searching ------ */
856
857/* Find a symbol-table index containing the specified pointer, or -1
858   if not found.  Binary search.  */
859extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
860                                     Bool match_anywhere_in_sym,
861                                     Bool findText );
862
863/* Find a location-table index containing the specified pointer, or -1
864   if not found.  Binary search.  */
865extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
866
867/* Find a CFI-table index containing the specified pointer, or -1 if
868   not found.  Binary search.  */
869extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
870
871/* Find a FPO-table index containing the specified pointer, or -1
872   if not found.  Binary search.  */
873extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
874
875/* Helper function for the most often needed searching for an rx
876   mapping containing the specified address range.  The range must
877   fall entirely within the mapping to be considered to be within it.
878   Asserts if lo > hi; caller must ensure this doesn't happen. */
879extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
880                                                        Addr lo, Addr hi );
881
882/* ------ Misc ------ */
883
884/* Show a non-fatal debug info reading error.  Use vg_panic if
885   terminal.  'serious' errors are always shown, not 'serious' ones
886   are shown only at verbosity level 2 and above. */
887extern
888void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg );
889
890/* Print a symbol. */
891extern void ML_(ppSym) ( Int idx, DiSym* sym );
892
893/* Print a call-frame-info summary. */
894extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
895
896
897#define TRACE_SYMTAB(format, args...) \
898   if (di->trace_symtab) { VG_(printf)(format, ## args); }
899
900
901#endif /* ndef __PRIV_STORAGE_H */
902
903/*--------------------------------------------------------------------*/
904/*--- end                                                          ---*/
905/*--------------------------------------------------------------------*/
906