readdwarf3.c revision 0b5bf911d9d40b8dd3130f6043ef7ba68a9f446e
1
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3 ".debug_info" sections (DIE trees).              ---*/
4/*---                                                 readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2008 OpenWorks LLP
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37/* REFERENCE (without which this code will not make much sense):
38
39   DWARF Debugging Information Format, Version 3,
40   dated 20 December 2005 (the "D3 spec").
41
42   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
43   .doc (MS Word) version, but for some reason the section numbers
44   between the Word and PDF versions differ by 1 in the first digit.
45   All section references in this code are to the PDF version.
46
47   CURRENT HACKS:
48
49   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
50      assumed to mean "const void" or "volatile void" respectively.
51      GDB appears to interpret them like this, anyway.
52
53   In many cases it is important to know the svma of a CU (the "base
54   address of the CU", as the D3 spec calls it).  There are some
55   situations in which the spec implies this value is unknown, but the
56   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
57   merely zero when not explicitly stated.  So we too have to make
58   that assumption.
59
60   TODO, 2008 Feb 17:
61
62   get rid of cu_svma_known and document the assumed-zero svma hack.
63
64   ML_(sizeOfType): differentiate between zero sized types and types
65   for which the size is unknown.  Is this important?  I don't know.
66
67   DW_AT_array_types: deal with explicit sizes (currently we compute
68   the size from the bounds and the element size, although that's
69   fragile, if the bounds incompletely specified, or completely
70   absent)
71
72   Document reason for difference (by 1) of stack preening depth in
73   parse_var_DIE vs parse_type_DIE.
74
75   Don't hand to ML_(addVars), vars whose locations are entirely in
76   registers (DW_OP_reg*).  This is merely a space-saving
77   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
78   expressions correctly, by failing to evaluate them and hence
79   effectively ignoring the variable with which they are associated.
80
81   Deal with DW_AT_array_types which have element size != stride
82
83   In some cases, the info for a variable is split between two
84   different DIEs (generally a declarer and a definer).  We punt on
85   these.  Could do better here.
86
87   The 'data_bias' argument passed to the expression evaluator
88   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
89   MaybeUWord, to make it clear when we do vs don't know what it is
90   for the evaluation of an expression.  At the moment zero is passed
91   for this parameter in the don't know case.  That's a bit fragile
92   and obscure; using a MaybeUWord would be clearer.
93
94   POTENTIAL PERFORMANCE IMPROVEMENTS:
95
96   The number of type entities that end up in the list of TyAdmins
97   rapidly becomes huge (eg, for libQtGui.so.4.3.2 (amd64-linux, size
98   80729047 bytes), there are 786860 entries in the list).  Mostly
99   this seems to be caused by g++ adding type DIEs for all the basic
100   types once for each source file contributing to the compilation
101   unit, and for a large library they add up quickly.  That causes
102   both a lot of work for this reader module, and also wastes vast
103   amounts of memory storing this duplicated information.  We could
104   surely do a lot better here.
105
106   Handle interaction between read_DIE and parse_{var,type}_DIE
107   better.  Currently read_DIE reads the entire DIE just to find where
108   the end is (and for debug printing), so that it can later reliably
109   move the cursor to the end regardless of what parse_{var,type}_DIE
110   do.  This means many DIEs (most, even?) are read twice.  It would
111   be smarter to make parse_{var,type}_DIE return a Bool indicating
112   whether or not they advanced the DIE cursor, and only if they
113   didn't should read_DIE itself read through the DIE.
114
115   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
116   zero variables in their .vars XArray.  Rather than have an XArray
117   with zero elements (which uses 2 malloc'd blocks), allow the .vars
118   pointer to be NULL in this case.
119
120   More generally, reduce the amount of memory allocated and freed
121   while reading Dwarf3 type/variable information.  Even modest (20MB)
122   objects cause this module to allocate and free hundreds of
123   thousands of small blocks, and ML_(arena_malloc) and its various
124   groupies always show up at the top of performance profiles. */
125
126#include "pub_core_basics.h"
127#include "pub_core_libcbase.h"
128#include "pub_core_libcassert.h"
129#include "pub_core_libcprint.h"
130#include "pub_core_options.h"
131#include "pub_core_xarray.h"
132#include "priv_misc.h"             /* dinfo_zalloc/free */
133#include "priv_tytypes.h"
134#include "priv_d3basics.h"
135#include "priv_storage.h"
136#include "priv_readdwarf3.h"       /* self */
137
138
139/*------------------------------------------------------------*/
140/*---                                                      ---*/
141/*--- Basic machinery for parsing DIEs.                    ---*/
142/*---                                                      ---*/
143/*------------------------------------------------------------*/
144
145#define TRACE_D3(format, args...) \
146   if (td3) { VG_(printf)(format, ## args); }
147
148#define D3_INVALID_CUOFF  ((void*)(-1UL))
149#define D3_FAKEVOID_CUOFF ((void*)(-2UL))
150
151typedef
152   struct {
153      UChar* region_start_img;
154      UWord  region_szB;
155      UWord  region_next;
156      void (*barf)( HChar* ) __attribute__((noreturn));
157      HChar* barfstr;
158   }
159   Cursor;
160
161static inline Bool is_sane_Cursor ( Cursor* c ) {
162   if (!c)                return False;
163   if (!c->barf)          return False;
164   if (!c->barfstr)       return False;
165   return True;
166}
167
168static void init_Cursor ( Cursor* c,
169                          UChar*  region_start_img,
170                          UWord   region_szB,
171                          UWord   region_next,
172                          __attribute__((noreturn)) void (*barf)( HChar* ),
173                          HChar*  barfstr )
174{
175   vg_assert(c);
176   VG_(memset)(c, 0, sizeof(*c));
177   c->region_start_img = region_start_img;
178   c->region_szB       = region_szB;
179   c->region_next      = region_next;
180   c->barf             = barf;
181   c->barfstr          = barfstr;
182   vg_assert(is_sane_Cursor(c));
183}
184
185static Bool is_at_end_Cursor ( Cursor* c ) {
186   vg_assert(is_sane_Cursor(c));
187   return c->region_next >= c->region_szB;
188}
189
190static inline UWord get_position_of_Cursor ( Cursor* c ) {
191   vg_assert(is_sane_Cursor(c));
192   return c->region_next;
193}
194static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
195   c->region_next = pos;
196   vg_assert(is_sane_Cursor(c));
197}
198
199static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
200   vg_assert(is_sane_Cursor(c));
201   return c->region_szB - c->region_next;
202}
203
204static UChar* get_address_of_Cursor ( Cursor* c ) {
205   vg_assert(is_sane_Cursor(c));
206   return &c->region_start_img[ c->region_next ];
207}
208
209__attribute__((noreturn))
210static void failWith ( Cursor* c, HChar* str ) {
211   vg_assert(c);
212   vg_assert(c->barf);
213   c->barf(str);
214   /*NOTREACHED*/
215   vg_assert(0);
216}
217
218/* FIXME: document assumptions on endianness for
219   get_UShort/UInt/ULong. */
220static inline UChar get_UChar ( Cursor* c ) {
221   UChar r;
222   /* vg_assert(is_sane_Cursor(c)); */
223   if (c->region_next + sizeof(UChar) > c->region_szB) {
224      c->barf(c->barfstr);
225      /*NOTREACHED*/
226      vg_assert(0);
227   }
228   r = * (UChar*) &c->region_start_img[ c->region_next ];
229   c->region_next += sizeof(UChar);
230   return r;
231}
232static UShort get_UShort ( Cursor* c ) {
233   UShort r;
234   vg_assert(is_sane_Cursor(c));
235   if (c->region_next + sizeof(UShort) > c->region_szB) {
236      c->barf(c->barfstr);
237      /*NOTREACHED*/
238      vg_assert(0);
239   }
240   r = * (UShort*) &c->region_start_img[ c->region_next ];
241   c->region_next += sizeof(UShort);
242   return r;
243}
244static UInt get_UInt ( Cursor* c ) {
245   UInt r;
246   vg_assert(is_sane_Cursor(c));
247   if (c->region_next + sizeof(UInt) > c->region_szB) {
248      c->barf(c->barfstr);
249      /*NOTREACHED*/
250      vg_assert(0);
251   }
252   r = * (UInt*) &c->region_start_img[ c->region_next ];
253   c->region_next += sizeof(UInt);
254   return r;
255}
256static ULong get_ULong ( Cursor* c ) {
257   ULong r;
258   vg_assert(is_sane_Cursor(c));
259   if (c->region_next + sizeof(ULong) > c->region_szB) {
260      c->barf(c->barfstr);
261      /*NOTREACHED*/
262      vg_assert(0);
263   }
264   r = * (ULong*) &c->region_start_img[ c->region_next ];
265   c->region_next += sizeof(ULong);
266   return r;
267}
268static inline ULong get_ULEB128 ( Cursor* c ) {
269   ULong result;
270   Int   shift;
271   UChar byte;
272   /* unroll first iteration */
273   byte = get_UChar( c );
274   result = (ULong)(byte & 0x7f);
275   if (LIKELY(!(byte & 0x80))) return result;
276   shift = 7;
277   /* end unroll first iteration */
278   do {
279      byte = get_UChar( c );
280      result |= ((ULong)(byte & 0x7f)) << shift;
281      shift += 7;
282   } while (byte & 0x80);
283   return result;
284}
285static Long get_SLEB128 ( Cursor* c ) {
286   ULong  result = 0;
287   Int    shift = 0;
288   UChar  byte;
289   do {
290      byte = get_UChar(c);
291      result |= ((ULong)(byte & 0x7f)) << shift;
292      shift += 7;
293   } while (byte & 0x80);
294   if (shift < 64 && (byte & 0x40))
295      result |= -(1ULL << shift);
296   return result;
297}
298
299/* Assume 'c' points to the start of a string.  Return the absolute
300   address of whatever it points at, and advance it past the
301   terminating zero.  This makes it safe for the caller to then copy
302   the string with ML_(addStr), since (w.r.t. image overruns) the
303   process of advancing past the terminating zero will already have
304   "vetted" the string. */
305static UChar* get_AsciiZ ( Cursor* c ) {
306   UChar  uc;
307   UChar* res = get_address_of_Cursor(c);
308   do { uc = get_UChar(c); } while (uc != 0);
309   return res;
310}
311
312static ULong peek_ULEB128 ( Cursor* c ) {
313   Word here = c->region_next;
314   ULong r = get_ULEB128( c );
315   c->region_next = here;
316   return r;
317}
318static UChar peek_UChar ( Cursor* c ) {
319   Word here = c->region_next;
320   UChar r = get_UChar( c );
321   c->region_next = here;
322   return r;
323}
324
325static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
326   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
327}
328
329static UWord get_UWord ( Cursor* c ) {
330   vg_assert(sizeof(UWord) == sizeof(void*));
331   if (sizeof(UWord) == 4) return get_UInt(c);
332   if (sizeof(UWord) == 8) return get_ULong(c);
333   vg_assert(0);
334}
335
336
337/* Read a DWARF3 'Initial Length' field */
338static ULong get_Initial_Length ( /*OUT*/Bool* is64,
339                                  Cursor* c,
340                                  HChar* barfMsg )
341{
342   ULong w64;
343   UInt  w32;
344   *is64 = False;
345   w32 = get_UInt( c );
346   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
347      c->barf( barfMsg );
348   }
349   else if (w32 == 0xFFFFFFFF) {
350      *is64 = True;
351      w64   = get_ULong( c );
352   } else {
353      *is64 = False;
354      w64 = (ULong)w32;
355   }
356   return w64;
357}
358
359
360/*------------------------------------------------------------*/
361/*---                                                      ---*/
362/*--- "CUConst" structure                                  ---*/
363/*---                                                      ---*/
364/*------------------------------------------------------------*/
365
366#define N_ABBV_CACHE 32
367
368/* Holds information that is constant through the parsing of a
369   Compilation Unit.  This is basically plumbed through to
370   everywhere. */
371typedef
372   struct {
373      /* Call here if anything goes wrong */
374      void (*barf)( HChar* ) __attribute__((noreturn));
375      /* Is this 64-bit DWARF ? */
376      Bool   is_dw64;
377      /* Which DWARF version ?  (2 or 3) */
378      UShort version;
379      /* Length of this Compilation Unit, excluding its Header */
380      ULong  unit_length;
381      /* Offset of start of this unit in .debug_info */
382      UWord  cu_start_offset;
383      /* SVMA for this CU.  In the D3 spec, is known as the "base
384         address of the compilation unit (last para sec 3.1.1).
385         Needed for (amongst things) interpretation of location-list
386         values. */
387      Addr   cu_svma;
388      Bool   cu_svma_known;
389      /* The debug_abbreviations table to be used for this Unit */
390      UChar* debug_abbv;
391      /* Upper bound on size thereof (an overestimate, in general) */
392      UWord  debug_abbv_maxszB;
393      /* Where is .debug_str ? */
394      UChar* debug_str_img;
395      UWord  debug_str_sz;
396      /* Where is .debug_ranges ? */
397      UChar* debug_ranges_img;
398      UWord  debug_ranges_sz;
399      /* Where is .debug_loc ? */
400      UChar* debug_loc_img;
401      UWord  debug_loc_sz;
402      /* Where is .debug_line? */
403      UChar* debug_line_img;
404      UWord  debug_line_sz;
405      /* --- Needed so we can add stuff to the string table. --- */
406      struct _DebugInfo* di;
407      /* --- a cache for set_abbv_Cursor --- */
408      /* abbv_code == (ULong)-1 for an unused entry. */
409      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
410      UWord saC_cache_queries;
411      UWord saC_cache_misses;
412   }
413   CUConst;
414
415
416/*------------------------------------------------------------*/
417/*---                                                      ---*/
418/*--- Helper functions for Guarded Expressions             ---*/
419/*---                                                      ---*/
420/*------------------------------------------------------------*/
421
422/* Parse the location list starting at img-offset 'debug_loc_offset'
423   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
424   and so I believe are correct SVMAs for the object as a whole.  This
425   function allocates the UChar*, and the caller must deallocate it.
426   The resulting block is in so-called Guarded-Expression format.
427
428   Guarded-Expression format is similar but not identical to the DWARF3
429   location-list format.  The format of each returned block is:
430
431      UChar biasMe;
432      UChar isEnd;
433      followed by zero or more of
434
435      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
436
437   '..bytes..' is an standard DWARF3 location expression which is
438   valid when aMin <= pc <= aMax (possibly after suitable biasing).
439
440   The number of bytes in '..bytes..' is nbytes.
441
442   The end of the sequence is marked by an isEnd == 1 value.  All
443   previous isEnd values must be zero.
444
445   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
446   text_bias added before use, and 0 if the GX is this is not
447   necessary (is ready to go).
448
449   Hence the block can be quickly parsed and is self-describing.  Note
450   that aMax is 1 less than the corresponding value in a DWARF3
451   location list.  Zero length ranges, with aMax == aMin-1, are not
452   allowed.
453*/
454void ML_(pp_GX) ( GExpr* gx ) {
455   Addr   aMin, aMax;
456   UChar  uc;
457   UShort nbytes;
458   UChar* p = &gx->payload[0];
459   uc = *p++;
460   VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" );
461   vg_assert(uc == 0 || uc == 1);
462   while (True) {
463      uc = *p++;
464      if (uc == 1)
465         break; /*isEnd*/
466      vg_assert(uc == 0);
467      aMin   = * (Addr*)p;  p += sizeof(Addr);
468      aMax   = * (Addr*)p;  p += sizeof(Addr);
469      nbytes = * (UShort*)p; p += sizeof(UShort);
470      VG_(printf)("[%p,%p]=", aMin, aMax);
471      while (nbytes > 0) {
472         VG_(printf)("%02x", (UInt)*p++);
473         nbytes--;
474      }
475      if (*p == 0)
476         VG_(printf)(",");
477   }
478   VG_(printf)("}");
479}
480
481static void bias_GX ( /*MOD*/GExpr* gx, Addr bias )
482{
483   UShort nbytes;
484   UChar* p = &gx->payload[0];
485   UChar  uc;
486   uc = *p++; /*biasMe*/
487   if (uc == 0)
488      return;
489   vg_assert(uc == 1);
490   p[-1] = 0; /* mark it as done */
491   while (True) {
492      uc = *p++;
493      if (uc == 1)
494         break; /*isEnd*/
495      vg_assert(uc == 0);
496      * ((Addr*)p) += bias; /*aMin*/  p += sizeof(Addr);
497      * ((Addr*)p) += bias; /*aMax*/  p += sizeof(Addr);
498      nbytes = * (UShort*)p; p += sizeof(UShort);
499      p += nbytes;
500   }
501}
502
503__attribute__((noinline))
504static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
505{
506   SizeT  bytesReqd;
507   GExpr* gx;
508   UChar *p, *pstart;
509
510   vg_assert(sizeof(UWord) == sizeof(Addr));
511   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
512   bytesReqd
513      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
514        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
515        + sizeof(UShort) /*nbytes*/    + nbytes
516        + sizeof(UChar); /*isEnd*/
517
518   gx = ML_(dinfo_zalloc)( sizeof(GExpr) + bytesReqd );
519   vg_assert(gx);
520
521   p = pstart = &gx->payload[0];
522
523   * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
524   * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
525   * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
526   * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
527   * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
528   VG_(memcpy)(p, block, nbytes); p += nbytes;
529   * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
530
531   vg_assert( (SizeT)(p - pstart) == bytesReqd);
532   vg_assert( &gx->payload[bytesReqd]
533              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
534
535   gx->next = NULL;
536   return gx;
537}
538
539__attribute__((noinline))
540static GExpr* make_general_GX ( CUConst* cc,
541                                Bool     td3,
542                                UWord    debug_loc_offset,
543                                Addr     svma_of_referencing_CU )
544{
545   Addr      base;
546   Cursor    loc;
547   XArray*   xa; /* XArray of UChar */
548   GExpr*    gx;
549   Word      nbytes;
550
551   vg_assert(sizeof(UWord) == sizeof(Addr));
552   if (cc->debug_loc_sz == 0)
553      cc->barf("make_general_GX: .debug_loc is empty/missing");
554
555   init_Cursor( &loc, cc->debug_loc_img,
556                cc->debug_loc_sz, 0, cc->barf,
557                "Overrun whilst reading .debug_loc section(2)" );
558   set_position_of_Cursor( &loc, debug_loc_offset );
559
560   TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
561            debug_loc_offset, get_address_of_Cursor( &loc ) );
562
563   /* Who frees this xa?  It is freed before this fn exits. */
564   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
565                    sizeof(UChar) );
566
567   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
568
569   base = 0;
570   while (True) {
571      Bool  acquire;
572      UWord len;
573      /* Read a (host-)word pair.  This is something of a hack since
574         the word size to read is really dictated by the ELF file;
575         however, we assume we're reading a file with the same
576         word-sizeness as the host.  Reasonably enough. */
577      UWord w1 = get_UWord( &loc );
578      UWord w2 = get_UWord( &loc );
579
580      TRACE_D3("   %08lx %08lx\n", w1, w2);
581      if (w1 == 0 && w2 == 0)
582         break; /* end of list */
583
584      if (w1 == -1UL) {
585         /* new value for 'base' */
586         base = w2;
587         continue;
588      }
589
590      /* else a location expression follows */
591      /* else enumerate [w1+base, w2+base) */
592      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
593         (sec 2.17.2) */
594      if (w1 > w2) {
595         TRACE_D3("negative range is for .debug_loc expr at "
596                  "file offset %lu\n",
597                  debug_loc_offset);
598         cc->barf( "negative range in .debug_loc section" );
599      }
600
601      /* ignore zero length ranges */
602      acquire = w1 < w2;
603      len     = (UWord)get_UShort( &loc );
604
605      if (acquire) {
606         UWord  w;
607         UShort s;
608         UChar  c;
609         c = 0; /* !isEnd*/
610         VG_(addBytesToXA)( xa, &c, sizeof(c) );
611         w = w1    + base + svma_of_referencing_CU;
612         VG_(addBytesToXA)( xa, &w, sizeof(w) );
613         w = w2 -1 + base + svma_of_referencing_CU;
614         VG_(addBytesToXA)( xa, &w, sizeof(w) );
615         s = (UShort)len;
616         VG_(addBytesToXA)( xa, &s, sizeof(s) );
617      }
618
619      while (len > 0) {
620         UChar byte = get_UChar( &loc );
621         TRACE_D3("%02x", (UInt)byte);
622         if (acquire)
623            VG_(addBytesToXA)( xa, &byte, 1 );
624         len--;
625      }
626      TRACE_D3("\n");
627   }
628
629   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
630
631   nbytes = VG_(sizeXA)( xa );
632   vg_assert(nbytes >= 1);
633
634   gx = ML_(dinfo_zalloc)( sizeof(GExpr) + nbytes );
635   vg_assert(gx);
636   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
637   vg_assert( &gx->payload[nbytes]
638              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
639
640   VG_(deleteXA)( xa );
641
642   gx->next = NULL;
643
644   TRACE_D3("}\n");
645
646   return gx;
647}
648
649
650/*------------------------------------------------------------*/
651/*---                                                      ---*/
652/*--- Helper functions for range lists and CU headers      ---*/
653/*---                                                      ---*/
654/*------------------------------------------------------------*/
655
656/* Denotes an address range.  Both aMin and aMax are included in the
657   range; hence a complete range is (0, ~0) and an empty range is any
658   (X, X-1) for X > 0.*/
659typedef
660   struct { Addr aMin; Addr aMax; }
661   AddrRange;
662
663
664__attribute__((noinline))
665static XArray* /* of AddrRange */ empty_range_list ( void )
666{
667   XArray* xa; /* XArray of AddrRange */
668   /* Who frees this xa?  varstack_preen() does. */
669   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
670                    sizeof(AddrRange) );
671   return xa;
672}
673
674
675static XArray* unitary_range_list ( Addr aMin, Addr aMax )
676{
677   XArray*   xa;
678   AddrRange pair;
679   vg_assert(aMin <= aMax);
680   /* Who frees this xa?  varstack_preen() does. */
681   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
682                    sizeof(AddrRange) );
683   pair.aMin = aMin;
684   pair.aMax = aMax;
685   VG_(addToXA)( xa, &pair );
686   return xa;
687}
688
689
690/* Enumerate the address ranges starting at img-offset
691   'debug_ranges_offset' in .debug_ranges.  Results are biased with
692   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
693   object as a whole.  This function allocates the XArray, and the
694   caller must deallocate it. */
695__attribute__((noinline))
696static XArray* /* of AddrRange */
697       get_range_list ( CUConst* cc,
698                        Bool     td3,
699                        UWord    debug_ranges_offset,
700                        Addr     svma_of_referencing_CU )
701{
702   Addr      base;
703   Cursor    ranges;
704   XArray*   xa; /* XArray of AddrRange */
705   AddrRange pair;
706
707   if (cc->debug_ranges_sz == 0)
708      cc->barf("get_range_list: .debug_ranges is empty/missing");
709
710   init_Cursor( &ranges, cc->debug_ranges_img,
711                cc->debug_ranges_sz, 0, cc->barf,
712                "Overrun whilst reading .debug_ranges section(2)" );
713   set_position_of_Cursor( &ranges, debug_ranges_offset );
714
715   /* Who frees this xa?  varstack_preen() does. */
716   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
717                    sizeof(AddrRange) );
718   base = 0;
719   while (True) {
720      /* Read a (host-)word pair.  This is something of a hack since
721         the word size to read is really dictated by the ELF file;
722         however, we assume we're reading a file with the same
723         word-sizeness as the host.  Reasonably enough. */
724      UWord w1 = get_UWord( &ranges );
725      UWord w2 = get_UWord( &ranges );
726
727      if (w1 == 0 && w2 == 0)
728         break; /* end of list. */
729
730      if (w1 == -1UL) {
731         /* new value for 'base' */
732         base = w2;
733         continue;
734      }
735
736      /* else enumerate [w1+base, w2+base) */
737      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
738         (sec 2.17.2) */
739      if (w1 > w2)
740         cc->barf( "negative range in .debug_ranges section" );
741      if (w1 < w2) {
742         pair.aMin = w1     + base + svma_of_referencing_CU;
743         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
744         vg_assert(pair.aMin <= pair.aMax);
745         VG_(addToXA)( xa, &pair );
746      }
747   }
748   return xa;
749}
750
751
752/* Parse the Compilation Unit header indicated at 'c' and
753   initialise 'cc' accordingly. */
754static __attribute__((noinline))
755void parse_CU_Header ( /*OUT*/CUConst* cc,
756                       Bool td3,
757                       Cursor* c,
758                       UChar* debug_abbv_img, UWord debug_abbv_sz )
759{
760   UChar  address_size;
761   UWord  debug_abbrev_offset;
762   Int    i;
763
764   VG_(memset)(cc, 0, sizeof(*cc));
765   vg_assert(c && c->barf);
766   cc->barf = c->barf;
767
768   /* initial_length field */
769   cc->unit_length
770      = get_Initial_Length( &cc->is_dw64, c,
771           "parse_CU_Header: invalid initial-length field" );
772
773   TRACE_D3("   Length:        %lld\n", cc->unit_length );
774
775   /* version */
776   cc->version = get_UShort( c );
777   if (cc->version != 2 && cc->version != 3)
778      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3" );
779   TRACE_D3("   Version:       %d\n", (Int)cc->version );
780
781   /* debug_abbrev_offset */
782   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
783   if (debug_abbrev_offset >= debug_abbv_sz)
784      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
785   TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
786
787   /* address size.  If this isn't equal to the host word size, just
788      give up.  This makes it safe to assume elsewhere that
789      DW_FORM_addr can be treated as a host word. */
790   address_size = get_UChar( c );
791   if (address_size != sizeof(void*))
792      cc->barf( "parse_CU_Header: invalid address_size" );
793   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
794
795   /* Set up so that cc->debug_abbv points to the relevant table for
796      this CU.  Set the szB so that at least we can't read off the end
797      of the debug_abbrev section -- potentially (and quite likely)
798      too big, if this isn't the last table in the section, but at
799      least it's safe. */
800   cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
801   cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
802   /* and empty out the set_abbv_Cursor cache */
803   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
804   for (i = 0; i < N_ABBV_CACHE; i++) {
805      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
806      cc->saC_cache[i].posn = 0;
807   }
808   cc->saC_cache_queries = 0;
809   cc->saC_cache_misses = 0;
810}
811
812
813/* Set up 'c' so it is ready to parse the abbv table entry code
814   'abbv_code' for this compilation unit.  */
815static __attribute__((noinline))
816void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
817                       CUConst* cc, ULong abbv_code )
818{
819   Int   i;
820   ULong acode;
821
822   if (abbv_code == 0)
823      cc->barf("set_abbv_Cursor: abbv_code == 0" );
824
825   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
826      allow it.  In any case no valid DWARF3 should make a reference
827      to a negative abbreviation code.  [at least, they always seem to
828      be numbered upwards from zero as far as I have seen] */
829   vg_assert(abbv_code != (ULong)-1);
830
831   /* First search the cache. */
832   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
833   cc->saC_cache_queries++;
834   for (i = 0; i < N_ABBV_CACHE; i++) {
835      /* No need to test the cached abbv_codes for -1 (empty), since
836         we just asserted that abbv_code is not -1. */
837     if (cc->saC_cache[i].abbv_code == abbv_code) {
838        /* Found it.  Cool.  Set up the parser using the cached
839           position, and move this cache entry 1 step closer to the
840           front. */
841        if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
842        init_Cursor( c, cc->debug_abbv,
843                     cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
844                     cc->barf,
845                     "Overrun whilst parsing .debug_abbrev section(1)" );
846        if (i > 0) {
847           ULong t_abbv_code = cc->saC_cache[i].abbv_code;
848           UWord t_posn = cc->saC_cache[i].posn;
849           while (i > 0) {
850              cc->saC_cache[i] = cc->saC_cache[i-1];
851              cc->saC_cache[0].abbv_code = t_abbv_code;
852              cc->saC_cache[0].posn = t_posn;
853              i--;
854           }
855        }
856        return;
857     }
858   }
859
860   /* No.  It's not in the cache.  We have to search through
861      .debug_abbrev, of course taking care to update the cache
862      when done. */
863
864   cc->saC_cache_misses++;
865   init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
866               "Overrun whilst parsing .debug_abbrev section(2)" );
867
868   /* Now iterate though the table until we find the requested
869      entry. */
870   while (True) {
871      ULong atag;
872      UInt  has_children;
873      acode = get_ULEB128( c );
874      if (acode == 0) break; /* end of the table */
875      if (acode == abbv_code) break; /* found it */
876      atag         = get_ULEB128( c );
877      has_children = get_UChar( c );
878      //TRACE_D3("   %llu      %s    [%s]\n",
879      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
880      while (True) {
881         ULong at_name = get_ULEB128( c );
882         ULong at_form = get_ULEB128( c );
883         if (at_name == 0 && at_form == 0) break;
884         //TRACE_D3("    %18s %s\n",
885         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
886      }
887   }
888
889   if (acode == 0) {
890      /* Not found.  This is fatal. */
891      cc->barf("set_abbv_Cursor: abbv_code not found");
892   }
893
894   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
895      starting from the abbreviation entry's tag.  So just cache
896      the result, and return. */
897   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
898      cc->saC_cache[i] = cc->saC_cache[i-1];
899   }
900   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
901   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
902   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
903}
904
905
906/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
907
908   If *cts itself contains the entire result, then *ctsSzB is set to
909   1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
910
911   Alternatively, the result can be a block of data (in the
912   transiently mapped-in object, so-called "image" space).  If so then
913   the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
914   image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
915
916   Unfortunately this means it is impossible to represent a zero-size
917   image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
918   and so is ambiguous (which case it is?)
919
920   Invariant on successful return:
921      (*ctsSzB > 0 && *ctsMemSzB == 0)
922      || (*ctsSzB == 0 && *ctsMemSzB > 0)
923*/
924static
925void get_Form_contents ( /*OUT*/ULong* cts,
926                         /*OUT*/Int*   ctsSzB,
927                         /*OUT*/UWord* ctsMemSzB,
928                         CUConst* cc, Cursor* c,
929                         Bool td3, DW_FORM form )
930{
931   *cts       = 0;
932   *ctsSzB    = 0;
933   *ctsMemSzB = 0;
934   switch (form) {
935      case DW_FORM_data1:
936         *cts = (ULong)(UChar)get_UChar(c);
937         *ctsSzB = 1;
938         TRACE_D3("%u", (UInt)*cts);
939         break;
940      case DW_FORM_data2:
941         *cts = (ULong)(UShort)get_UShort(c);
942         *ctsSzB = 2;
943         TRACE_D3("%u", (UInt)*cts);
944         break;
945      case DW_FORM_data4:
946         *cts = (ULong)(UInt)get_UInt(c);
947         *ctsSzB = 4;
948         TRACE_D3("%u", (UInt)*cts);
949         break;
950      case DW_FORM_data8:
951         *cts = get_ULong(c);
952         *ctsSzB = 8;
953         TRACE_D3("%llu", *cts);
954         break;
955      case DW_FORM_sdata:
956         *cts = (ULong)(Long)get_SLEB128(c);
957         *ctsSzB = 8;
958         TRACE_D3("%lld", (Long)*cts);
959         break;
960      case DW_FORM_addr:
961         /* note, this is a hack.  DW_FORM_addr is defined as getting
962            a word the size of the target machine as defined by the
963            address_size field in the CU Header.  However,
964            parse_CU_Header() rejects all inputs except those for
965            which address_size == sizeof(Word), hence we can just
966            treat it as a (host) Word.  */
967         *cts = (ULong)(UWord)get_UWord(c);
968         *ctsSzB = sizeof(UWord);
969         TRACE_D3("0x%lx", (UWord)*cts);
970         break;
971      case DW_FORM_strp: {
972         /* this is an offset into .debug_str */
973         UChar* str;
974         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
975         if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
976            cc->barf("read_and_show_Form: DW_FORM_strp "
977                     "points outside .debug_str");
978         /* FIXME: check the entire string lies inside debug_str,
979            not just the first byte of it. */
980         str = (UChar*)cc->debug_str_img + uw;
981         TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
982         *cts = (ULong)(UWord)str;
983         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
984         break;
985      }
986      case DW_FORM_string: {
987         UChar* str = get_AsciiZ(c);
988         TRACE_D3("%s", str);
989         *cts = (ULong)(UWord)str;
990         /* strlen is safe because get_AsciiZ already 'vetted' the
991            entire string */
992         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
993         break;
994      }
995      case DW_FORM_ref4: {
996         UInt  u32 = get_UInt(c);
997         UWord res = cc->cu_start_offset + (UWord)u32;
998         *cts = (ULong)res;
999         *ctsSzB = sizeof(UWord);
1000         TRACE_D3("<%lx>", res);
1001         break;
1002      }
1003      case DW_FORM_flag: {
1004         UChar u8 = get_UChar(c);
1005         TRACE_D3("%u", (UInt)u8);
1006         *cts = (ULong)u8;
1007         *ctsSzB = 1;
1008         break;
1009      }
1010      case DW_FORM_block1: {
1011         ULong  u64b;
1012         ULong  u64 = (ULong)get_UChar(c);
1013         UChar* block = get_address_of_Cursor(c);
1014         TRACE_D3("%llu byte block: ", u64);
1015         for (u64b = u64; u64b > 0; u64b--) {
1016            UChar u8 = get_UChar(c);
1017            TRACE_D3("%x ", (UInt)u8);
1018         }
1019         *cts = (ULong)(UWord)block;
1020         *ctsMemSzB = (UWord)u64;
1021         break;
1022      }
1023      default:
1024         VG_(printf)("get_Form_contents: unhandled %lld (%s)\n",
1025                     form, ML_(pp_DW_FORM)(form));
1026         c->barf("get_Form_contents: unhandled DW_FORM");
1027   }
1028}
1029
1030
1031/*------------------------------------------------------------*/
1032/*---                                                      ---*/
1033/*--- Parsing of variable-related DIEs                     ---*/
1034/*---                                                      ---*/
1035/*------------------------------------------------------------*/
1036
1037typedef
1038   struct _TempVar {
1039      struct _TempVar* next;
1040      UChar*  name; /* in DebugInfo's .strchunks */
1041      /* Represent ranges economically.  nRanges is the number of
1042         ranges.  Cases:
1043         0: .rngOneMin .rngOneMax .manyRanges are all zero
1044         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1045         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1046         This is merely an optimisation to avoid having to allocate
1047         and free the XArray in the common (98%) of cases where there
1048         is zero or one address ranges. */
1049      UWord   nRanges;
1050      Addr    rngOneMin;
1051      Addr    rngOneMax;
1052      XArray* rngMany; /* of AddrRange.  UNIQUE PTR in AR_DINFO. */
1053      /* --- */
1054      Int     level;
1055      Type*   typeR;
1056      GExpr*  gexpr; /* for this variable */
1057      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1058                        any */
1059      UChar*  fName; /* declaring file name, or NULL */
1060      Int     fLine; /* declaring file line number, or zero */
1061      /* offset in .debug_info, so that abstract instances can be
1062         found to satisfy references from concrete instances. */
1063      UWord   dioff;
1064      UWord   absOri; /* so the absOri fields refer to dioff fields
1065                         in some other, related TempVar. */
1066   }
1067   TempVar;
1068
1069#define N_D3_VAR_STACK 24
1070
1071typedef
1072   struct {
1073      /* Contains the range stack: a stack of address ranges, one
1074         stack entry for each nested scope.
1075
1076         Some scope entries are created by function definitions
1077         (DW_AT_subprogram), and for those, we also note the GExpr
1078         derived from its DW_AT_frame_base attribute, if any.
1079         Consequently it should be possible to find, for any
1080         variable's DIE, the GExpr for the the containing function's
1081         DW_AT_frame_base by scanning back through the stack to find
1082         the nearest entry associated with a function.  This somewhat
1083         elaborate scheme is provided so as to make it possible to
1084         obtain the correct DW_AT_frame_base expression even in the
1085         presence of nested functions (or to be more precise, in the
1086         presence of nested DW_AT_subprogram DIEs).
1087      */
1088      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1089                     stack */
1090      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1091      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1092      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1093      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1094                                         expr, else NULL */
1095      /* The file name table.  Is a mapping from integer index to the
1096         (permanent) copy of the string, iow a non-img area. */
1097      XArray* /* of UChar* */ filenameTable;
1098   }
1099   D3VarParser;
1100
1101static void varstack_show ( D3VarParser* parser, HChar* str ) {
1102   Word i, j;
1103   VG_(printf)("  varstack (%s) {\n", str);
1104   for (i = 0; i <= parser->sp; i++) {
1105      XArray* xa = parser->ranges[i];
1106      vg_assert(xa);
1107      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1108      if (parser->isFunc[i]) {
1109         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1110      } else {
1111         vg_assert(parser->fbGX[i] == NULL);
1112      }
1113      VG_(printf)(": ");
1114      if (VG_(sizeXA)( xa ) == 0) {
1115         VG_(printf)("** empty PC range array **");
1116      } else {
1117         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1118            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1119            vg_assert(range);
1120            VG_(printf)("[%p,%p] ", range->aMin, range->aMax);
1121         }
1122      }
1123      VG_(printf)("\n");
1124   }
1125   VG_(printf)("  }\n");
1126}
1127
1128/* Remove from the stack, all entries with .level > 'level' */
1129static
1130void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1131{
1132   Bool changed = False;
1133   vg_assert(parser->sp < N_D3_VAR_STACK);
1134   while (True) {
1135      vg_assert(parser->sp >= -1);
1136      if (parser->sp == -1) break;
1137      if (parser->level[parser->sp] <= level) break;
1138      if (0)
1139         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1140      vg_assert(parser->ranges[parser->sp]);
1141      /* Who allocated this xa?  get_range_list() or
1142         unitary_range_list(). */
1143      VG_(deleteXA)( parser->ranges[parser->sp] );
1144      parser->ranges[parser->sp] = NULL;
1145      parser->level[parser->sp]  = 0;
1146      parser->isFunc[parser->sp] = False;
1147      parser->fbGX[parser->sp]   = NULL;
1148      parser->sp--;
1149      changed = True;
1150   }
1151   if (changed && td3)
1152      varstack_show( parser, "after preen" );
1153}
1154
1155static void varstack_push ( CUConst* cc,
1156                            D3VarParser* parser,
1157                            Bool td3,
1158                            XArray* ranges, Int level,
1159                            Bool    isFunc, GExpr* fbGX ) {
1160   if (0)
1161   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1162            parser->sp+1, level, ranges);
1163
1164   /* First we need to zap everything >= 'level', as we are about to
1165      replace any previous entry at 'level', so .. */
1166   varstack_preen(parser, /*td3*/False, level-1);
1167
1168   vg_assert(parser->sp >= -1);
1169   vg_assert(parser->sp < N_D3_VAR_STACK);
1170   if (parser->sp == N_D3_VAR_STACK-1)
1171      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1172               "increase and recompile");
1173   if (parser->sp >= 0)
1174      vg_assert(parser->level[parser->sp] < level);
1175   parser->sp++;
1176   vg_assert(parser->ranges[parser->sp] == NULL);
1177   vg_assert(parser->level[parser->sp]  == 0);
1178   vg_assert(parser->isFunc[parser->sp] == False);
1179   vg_assert(parser->fbGX[parser->sp]   == NULL);
1180   vg_assert(ranges != NULL);
1181   if (!isFunc) vg_assert(fbGX == NULL);
1182   parser->ranges[parser->sp] = ranges;
1183   parser->level[parser->sp]  = level;
1184   parser->isFunc[parser->sp] = isFunc;
1185   parser->fbGX[parser->sp]   = fbGX;
1186   if (td3)
1187      varstack_show( parser, "after push" );
1188}
1189
1190
1191/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1192   refer either to a location expression or to a location list.
1193   Figure out which, and in both cases bundle the expression or
1194   location list into a so-called GExpr (guarded expression). */
1195__attribute__((noinline))
1196static GExpr* get_GX ( CUConst* cc, Bool td3,
1197                       ULong cts, Int ctsSzB, UWord ctsMemSzB )
1198{
1199   GExpr* gexpr = NULL;
1200   if (ctsMemSzB > 0 && ctsSzB == 0) {
1201      /* represents an in-line location expression, and cts points
1202         right at it */
1203      gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1204   }
1205   else
1206   if (ctsMemSzB == 0 && ctsSzB > 0) {
1207      /* represents location list.  cts is the offset of it in
1208         .debug_loc. */
1209      if (!cc->cu_svma_known)
1210         cc->barf("get_GX: location list, but CU svma is unknown");
1211      gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1212   }
1213   else {
1214      vg_assert(0); /* else caller is bogus */
1215   }
1216   return gexpr;
1217}
1218
1219
1220static
1221void read_filename_table( /*MOD*/D3VarParser* parser,
1222                          CUConst* cc, UWord debug_line_offset,
1223                          Bool td3 )
1224{
1225   Bool   is_dw64;
1226   Cursor c;
1227   Word   i;
1228   ULong  unit_length;
1229   UShort version;
1230   ULong  header_length;
1231   UChar  minimum_instruction_length;
1232   UChar  default_is_stmt;
1233   Char   line_base;
1234   UChar  line_range;
1235   UChar  opcode_base;
1236   UChar* str;
1237
1238   vg_assert(parser && cc && cc->barf);
1239   if ((!cc->debug_line_img)
1240       || cc->debug_line_sz <= debug_line_offset)
1241      cc->barf("read_filename_table: .debug_line is missing?");
1242
1243   init_Cursor( &c, cc->debug_line_img,
1244                cc->debug_line_sz, debug_line_offset, cc->barf,
1245                "Overrun whilst reading .debug_line section(1)" );
1246
1247   unit_length
1248      = get_Initial_Length( &is_dw64, &c,
1249           "read_filename_table: invalid initial-length field" );
1250   version = get_UShort( &c );
1251   if (version != 2)
1252     cc->barf("read_filename_table: Only DWARF version 2 line info "
1253              "is currently supported.");
1254   header_length = (ULong)get_Dwarfish_UWord( &c, is_dw64 );
1255   minimum_instruction_length = get_UChar( &c );
1256   default_is_stmt            = get_UChar( &c );
1257   line_base                  = (Char)get_UChar( &c );
1258   line_range                 = get_UChar( &c );
1259   opcode_base                = get_UChar( &c );
1260   /* skip over "standard_opcode_lengths" */
1261   for (i = 1; i < (Word)opcode_base; i++)
1262     (void)get_UChar( &c );
1263
1264   /* skip over the directory names table */
1265   while (peek_UChar(&c) != 0) {
1266     (void)get_AsciiZ(&c);
1267   }
1268   (void)get_UChar(&c); /* skip terminating zero */
1269
1270   /* Read and record the file names table */
1271   vg_assert(parser->filenameTable);
1272   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1273   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1274      from 1, for some reason. */
1275   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1276   VG_(addToXA)( parser->filenameTable, &str );
1277   while (peek_UChar(&c) != 0) {
1278      str = get_AsciiZ(&c);
1279      TRACE_D3("  read_filename_table: %ld %s\n",
1280               VG_(sizeXA)(parser->filenameTable), str);
1281      str = ML_(addStr)( cc->di, str, -1 );
1282      VG_(addToXA)( parser->filenameTable, &str );
1283      (void)get_ULEB128( &c ); /* skip directory index # */
1284      (void)get_ULEB128( &c ); /* skip last mod time */
1285      (void)get_ULEB128( &c ); /* file size */
1286   }
1287   /* We're done!  The rest of it is not interesting. */
1288}
1289
1290
1291__attribute__((noinline))
1292static void parse_var_DIE ( /*OUT*/TempVar** tempvars,
1293                            /*OUT*/GExpr** gexprs,
1294                            /*MOD*/D3VarParser* parser,
1295                            DW_TAG dtag,
1296                            UWord posn,
1297                            Int level,
1298                            Cursor* c_die,
1299                            Cursor* c_abbv,
1300                            CUConst* cc,
1301                            Bool td3 )
1302{
1303   ULong       cts;
1304   Int         ctsSzB;
1305   UWord       ctsMemSzB;
1306
1307   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1308   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1309
1310   varstack_preen( parser, td3, level-1 );
1311
1312   if (dtag == DW_TAG_compile_unit) {
1313      Bool have_lo    = False;
1314      Bool have_hi1   = False;
1315      Bool have_range = False;
1316      Addr ip_lo    = 0;
1317      Addr ip_hi1   = 0;
1318      Addr rangeoff = 0;
1319      while (True) {
1320         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1321         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1322         if (attr == 0 && form == 0) break;
1323         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1324                            cc, c_die, False/*td3*/, form );
1325         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1326            ip_lo   = cts;
1327            have_lo = True;
1328         }
1329         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1330            ip_hi1   = cts;
1331            have_hi1 = True;
1332         }
1333         if (attr == DW_AT_ranges && ctsSzB > 0) {
1334            rangeoff = cts;
1335            have_range = True;
1336         }
1337         if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1338            read_filename_table( parser, cc, (UWord)cts, td3 );
1339         }
1340      }
1341      /* Now, does this give us an opportunity to find this
1342         CU's svma? */
1343#if 0
1344      if (level == 0 && have_lo) {
1345         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1346         because we've already seen a DW_TAG_compile_unit DIE at level
1347         0.  But that can't happen, because DWARF3 only allows exactly
1348         one top level DIE per CU. */
1349         cc->cu_svma_known = True;
1350         cc->cu_svma = ip_lo;
1351         if (1)
1352            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1353         /* Now, it may be that this DIE doesn't tell us the CU's
1354            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1355            the CU doesn't *have* to have its SVMA specified.
1356
1357            But as per last para D3 spec sec 3.1.1 ("Normal and
1358            Partial Compilation Unit Entries", "If the base address
1359            (viz, the SVMA) is undefined, then any DWARF entry of
1360            structure defined interms of the base address of that
1361            compilation unit is not valid.".  So that means, if whilst
1362            processing the children of this top level DIE (or their
1363            children, etc) we see a DW_AT_range, and cu_svma_known is
1364            False, then the DIE that contains it is (per the spec)
1365            invalid, and we can legitimately stop and complain. */
1366      }
1367#else
1368      /* .. whereas The Reality is, simply assume the SVMA is zero
1369         if it isn't specified. */
1370      if (level == 0) {
1371         vg_assert(!cc->cu_svma_known);
1372         cc->cu_svma_known = True;
1373         if (have_lo)
1374            cc->cu_svma = ip_lo;
1375         else
1376            cc->cu_svma = 0;
1377      }
1378#endif
1379      /* Do we have something that looks sane? */
1380      if (have_lo && have_hi1 && (!have_range)) {
1381         if (ip_lo < ip_hi1)
1382            varstack_push( cc, parser, td3,
1383                           unitary_range_list(ip_lo, ip_hi1 - 1),
1384                           level,
1385                           False/*isFunc*/, NULL/*fbGX*/ );
1386      } else
1387      if ((!have_lo) && (!have_hi1) && have_range) {
1388         varstack_push( cc, parser, td3,
1389                        get_range_list( cc, td3,
1390                                        rangeoff, cc->cu_svma ),
1391                        level,
1392                        False/*isFunc*/, NULL/*fbGX*/ );
1393      } else
1394      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1395         /* CU has no code, presumably? */
1396         varstack_push( cc, parser, td3,
1397                        empty_range_list(),
1398                        level,
1399                        False/*isFunc*/, NULL/*fbGX*/ );
1400      } else
1401         goto bad_DIE;
1402   }
1403
1404   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1405      Bool   have_lo    = False;
1406      Bool   have_hi1   = False;
1407      Bool   have_range = False;
1408      Addr   ip_lo      = 0;
1409      Addr   ip_hi1     = 0;
1410      Addr   rangeoff   = 0;
1411      Bool   isFunc     = dtag == DW_TAG_subprogram;
1412      GExpr* fbGX       = NULL;
1413      while (True) {
1414         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1415         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1416         if (attr == 0 && form == 0) break;
1417         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1418                            cc, c_die, False/*td3*/, form );
1419         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1420            ip_lo   = cts;
1421            have_lo = True;
1422         }
1423         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1424            ip_hi1   = cts;
1425            have_hi1 = True;
1426         }
1427         if (attr == DW_AT_ranges && ctsSzB > 0) {
1428            rangeoff = cts;
1429            have_range = True;
1430         }
1431         if (isFunc
1432             && attr == DW_AT_frame_base
1433             && ((ctsMemSzB > 0 && ctsSzB == 0)
1434                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1435            fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1436            vg_assert(fbGX);
1437            vg_assert(!fbGX->next);
1438            fbGX->next = *gexprs;
1439            *gexprs = fbGX;
1440         }
1441      }
1442      /* Do we have something that looks sane? */
1443      if (dtag == DW_TAG_subprogram
1444          && (!have_lo) && (!have_hi1) && (!have_range)) {
1445         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1446            representing a subroutine declaration that is not also a
1447            definition does not have code address or range
1448            attributes." */
1449      } else
1450      if (dtag == DW_TAG_lexical_block
1451          && (!have_lo) && (!have_hi1) && (!have_range)) {
1452         /* I believe this is legit, and means the lexical block
1453            contains no insns (whatever that might mean).  Ignore. */
1454      } else
1455      if (have_lo && have_hi1 && (!have_range)) {
1456         /* This scope supplies just a single address range. */
1457         if (ip_lo < ip_hi1)
1458            varstack_push( cc, parser, td3,
1459                           unitary_range_list(ip_lo, ip_hi1 - 1),
1460                           level, isFunc, fbGX );
1461      } else
1462      if ((!have_lo) && (!have_hi1) && have_range) {
1463         /* This scope supplies multiple address ranges via the use of
1464            a range list. */
1465         varstack_push( cc, parser, td3,
1466                        get_range_list( cc, td3,
1467                                        rangeoff, cc->cu_svma ),
1468                        level, isFunc, fbGX );
1469      } else
1470      if (have_lo && (!have_hi1) && (!have_range)) {
1471         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1472            Entries) says fairly clearly that a scope must have either
1473            _range or (_low_pc and _high_pc). */
1474         /* The spec is a bit ambiguous though.  Perhaps a single byte
1475            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1476         /* This case is here because icc9 produced this:
1477         <2><13bd>: DW_TAG_lexical_block
1478            DW_AT_decl_line   : 5229
1479            DW_AT_decl_column : 37
1480            DW_AT_decl_file   : 1
1481            DW_AT_low_pc      : 0x401b03
1482         */
1483         /* Ignore (seems safe than pushing a single byte range) */
1484      } else
1485         goto bad_DIE;
1486   }
1487
1488   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1489      UChar* name        = NULL;
1490      Type*  typeR       = D3_INVALID_CUOFF;
1491      Bool   external    = False;
1492      GExpr* gexpr       = NULL;
1493      Int    n_attrs     = 0;
1494      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1495      Bool   declaration = False;
1496      Int    lineNo      = 0;
1497      UChar* fileName    = NULL;
1498      while (True) {
1499         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1500         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1501         if (attr == 0 && form == 0) break;
1502         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1503                            cc, c_die, False/*td3*/, form );
1504         n_attrs++;
1505         if (attr == DW_AT_name && ctsMemSzB > 0) {
1506            name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1507         }
1508         if (attr == DW_AT_location
1509             && ((ctsMemSzB > 0 && ctsSzB == 0)
1510                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1511            gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1512            vg_assert(gexpr);
1513            vg_assert(!gexpr->next);
1514            gexpr->next = *gexprs;
1515            *gexprs = gexpr;
1516         }
1517         if (attr == DW_AT_type && ctsSzB > 0) {
1518            typeR = (Type*)(UWord)cts;
1519         }
1520         if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1521            external = True;
1522         }
1523         if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1524            abs_ori = (UWord)cts;
1525         }
1526         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1527            declaration = True;
1528         }
1529         if (attr == DW_AT_decl_line && ctsSzB > 0) {
1530            lineNo = (Int)cts;
1531         }
1532         if (attr == DW_AT_decl_file && ctsSzB > 0) {
1533            Int ftabIx = (Int)cts;
1534            if (ftabIx >= 1
1535                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1536               fileName = *(UChar**)
1537                          VG_(indexXA)( parser->filenameTable, ftabIx );
1538               vg_assert(fileName);
1539            }
1540            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1541         }
1542      }
1543      /* We'll collect it under if one of the following three
1544         conditions holds:
1545         (1) has location and type    -> completed
1546         (2) has type only            -> is an abstract instance
1547         (3) has location and abs_ori -> is a concrete instance
1548         Name, filename and line number are all option frills.
1549      */
1550      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1551           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1552           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1553
1554         /* Add this variable to the list of interesting looking
1555            variables.  Crucially, note along with it the address
1556            range(s) associated with the variable, which for locals
1557            will be the address ranges at the top of the varparser's
1558            stack. */
1559         GExpr*   fbGX = NULL;
1560         Word     i, nRanges;
1561         XArray*  /* of AddrRange */ xa;
1562         TempVar* tv;
1563         /* Stack can't be empty; we put a dummy entry on it for the
1564            entire address range before starting with the DIEs for
1565            this CU. */
1566         vg_assert(parser->sp >= 0);
1567
1568         /* If this is a local variable (non-external), try to find
1569            the GExpr for the DW_AT_frame_base of the containing
1570            function.  It should have been pushed on the stack at the
1571            time we encountered its DW_TAG_subprogram DIE, so the way
1572            to find it is to scan back down the stack looking for it.
1573            If there isn't an enclosing stack entry marked 'isFunc'
1574            then we must be seeing variable or formal param DIEs
1575            outside of a function, so we deem the Dwarf to be
1576            malformed if that happens.  Note that the fbGX may be NULL
1577            if the containing DT_TAG_subprogram didn't supply a
1578            DW_AT_frame_base -- that's OK, but there must actually be
1579            a containing DW_TAG_subprogram. */
1580         if (!external) {
1581            Bool found = False;
1582            for (i = parser->sp; i >= 0; i--) {
1583               if (parser->isFunc[i]) {
1584                  fbGX = parser->fbGX[i];
1585                  found = True;
1586                  break;
1587               }
1588            }
1589            if (!found) {
1590               if (0 && VG_(clo_verbosity) >= 0) {
1591                  VG_(message)(Vg_DebugMsg,
1592                     "warning: parse_var_DIE: non-external variable "
1593                     "outside DW_TAG_subprogram");
1594               }
1595               /* goto bad_DIE; */
1596               /* This seems to happen a lot.  Just ignore it -- if,
1597                  when we come to evaluation of the location (guarded)
1598                  expression, it requires a frame base value, and
1599                  there's no expression for that, then evaluation as a
1600                  whole will fail.  Harmless - a bit of a waste of
1601                  cycles but nothing more. */
1602            }
1603         }
1604
1605         /* re "external ? 0 : parser->sp" (twice), if the var is
1606            marked 'external' then we must put it at the global scope,
1607            as only the global scope (level 0) covers the entire PC
1608            address space.  It is asserted elsewhere that level 0
1609            always covers the entire address space. */
1610         xa = parser->ranges[external ? 0 : parser->sp];
1611         nRanges = VG_(sizeXA)(xa);
1612         vg_assert(nRanges >= 0);
1613
1614         tv = ML_(dinfo_zalloc)( sizeof(TempVar) );
1615         tv->name   = name;
1616         tv->level  = external ? 0 : parser->sp;
1617         tv->typeR  = typeR;
1618         tv->gexpr  = gexpr;
1619         tv->fbGX   = fbGX;
1620         tv->fName  = fileName;
1621         tv->fLine  = lineNo;
1622         tv->dioff  = posn;
1623         tv->absOri = abs_ori;
1624
1625         /* See explanation on definition of type TempVar for the
1626            reason for this elaboration. */
1627         tv->nRanges = nRanges;
1628         tv->rngOneMin = 0;
1629         tv->rngOneMax = 0;
1630         tv->rngMany = NULL;
1631         if (nRanges == 1) {
1632            AddrRange* range = VG_(indexXA)(xa, 0);
1633            tv->rngOneMin = range->aMin;
1634            tv->rngOneMax = range->aMax;
1635         }
1636         else if (nRanges > 1) {
1637            tv->rngMany = VG_(cloneXA)( xa ); /* free when 'tv' freed */
1638         }
1639
1640         tv->next  = *tempvars;
1641         *tempvars = tv;
1642
1643         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
1644                  VG_(sizeXA)(xa) );
1645         /* collect stats on how effective the ->ranges special
1646            casing is */
1647         if (0) {
1648           static Int ntot=0, ngt=0;
1649           ntot++;
1650           if (tv->rngMany) ngt++;
1651           if (0 == (ntot % 100000))
1652              VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1653         }
1654
1655      }
1656
1657      /* Here are some other weird cases seen in the wild:
1658
1659            We have a variable with a name and a type, but no
1660            location.  I guess that's a sign that it has been
1661            optimised away.  Ignore it.  Here's an example:
1662
1663            static Int lc_compar(void* n1, void* n2) {
1664               MC_Chunk* mc1 = *(MC_Chunk**)n1;
1665               MC_Chunk* mc2 = *(MC_Chunk**)n2;
1666               return (mc1->data < mc2->data ? -1 : 1);
1667            }
1668
1669            Both mc1 and mc2 are like this
1670            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1671                DW_AT_name        : mc1
1672                DW_AT_decl_file   : 1
1673                DW_AT_decl_line   : 216
1674                DW_AT_type        : <5d3>
1675
1676            whereas n1 and n2 do have locations specified.
1677
1678            ---------------------------------------------
1679
1680            We see a DW_TAG_formal_parameter with a type, but
1681            no name and no location.  It's probably part of a function type
1682            construction, thusly, hence ignore it:
1683         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1684             DW_AT_sibling     : <2c9>
1685             DW_AT_prototyped  : 1
1686             DW_AT_type        : <114>
1687         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1688             DW_AT_type        : <13e>
1689         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1690             DW_AT_type        : <133>
1691
1692            ---------------------------------------------
1693
1694            Is very minimal, like this:
1695            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1696                DW_AT_abstract_origin: <7ba>
1697            What that signifies I have no idea.  Ignore.
1698
1699            ----------------------------------------------
1700
1701            Is very minimal, like this:
1702            <200f>: DW_TAG_formal_parameter
1703                DW_AT_abstract_ori: <1f4c>
1704                DW_AT_location    : 13440
1705            What that signifies I have no idea.  Ignore.
1706            It might be significant, though: the variable at least
1707            has a location and so might exist somewhere.
1708            Maybe we should handle this.
1709
1710            ---------------------------------------------
1711
1712            <22407>: DW_TAG_variable
1713              DW_AT_name        : (indirect string, offset: 0x6579):
1714                                  vgPlain_trampoline_stuff_start
1715              DW_AT_decl_file   : 29
1716              DW_AT_decl_line   : 56
1717              DW_AT_external    : 1
1718              DW_AT_declaration : 1
1719
1720            Nameless and typeless variable that has a location?  Who
1721            knows.  Not me.
1722            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1723                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1724                                     (DW_OP_addr: 3813c7c0)
1725
1726            No, really.  Check it out.  gcc is quite simply borked.
1727            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1728            // followed by no attributes, and the next DIE is a sibling,
1729            // not a child
1730            */
1731   }
1732   return;
1733
1734  bad_DIE:
1735   set_position_of_Cursor( c_die,  saved_die_c_offset );
1736   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1737   VG_(printf)("\nparse_var_DIE: confused by:\n");
1738   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1739   while (True) {
1740      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1741      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1742      if (attr == 0 && form == 0) break;
1743      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
1744      /* Get the form contents, so as to print them */
1745      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1746                         cc, c_die, True, form );
1747      VG_(printf)("\t\n");
1748   }
1749   VG_(printf)("\n");
1750   cc->barf("parse_var_DIE: confused by the above DIE");
1751   /*NOTREACHED*/
1752}
1753
1754
1755/*------------------------------------------------------------*/
1756/*---                                                      ---*/
1757/*--- Parsing of type-related DIEs                         ---*/
1758/*---                                                      ---*/
1759/*------------------------------------------------------------*/
1760
1761#define N_D3_TYPE_STACK 16
1762
1763typedef
1764   struct {
1765      /* What source language?  'C'=C/C++, 'F'=Fortran, '?'=other
1766         Established once per compilation unit. */
1767      UChar language;
1768      /* A stack of types which are currently under construction */
1769      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
1770                   stack */
1771      Type* qparent[N_D3_TYPE_STACK];
1772      Int   qlevel[N_D3_TYPE_STACK];
1773
1774   }
1775   D3TypeParser;
1776
1777static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1778   Word i;
1779   VG_(printf)("  typestack (%s) {\n", str);
1780   for (i = 0; i <= parser->sp; i++) {
1781      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
1782      ML_(pp_Type)( parser->qparent[i] );
1783      VG_(printf)("\n");
1784   }
1785   VG_(printf)("  }\n");
1786}
1787
1788/* Remove from the stack, all entries with .level > 'level' */
1789static
1790void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1791{
1792   Bool changed = False;
1793   vg_assert(parser->sp < N_D3_TYPE_STACK);
1794   while (True) {
1795      vg_assert(parser->sp >= -1);
1796      if (parser->sp == -1) break;
1797      if (parser->qlevel[parser->sp] <= level) break;
1798      if (0)
1799         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1800      vg_assert(parser->qparent[parser->sp]);
1801      parser->qparent[parser->sp] = NULL;
1802      parser->qlevel[parser->sp]  = 0;
1803      parser->sp--;
1804      changed = True;
1805   }
1806   if (changed && td3)
1807      typestack_show( parser, "after preen" );
1808}
1809
1810static Bool typestack_is_empty ( D3TypeParser* parser ) {
1811   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
1812   return parser->sp == -1;
1813}
1814
1815static void typestack_push ( CUConst* cc,
1816                             D3TypeParser* parser,
1817                             Bool td3,
1818                             Type* parent, Int level ) {
1819   if (0)
1820   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %p\n",
1821            parser->sp+1, level, parent);
1822
1823   /* First we need to zap everything >= 'level', as we are about to
1824      replace any previous entry at 'level', so .. */
1825   typestack_preen(parser, /*td3*/False, level-1);
1826
1827   vg_assert(parser->sp >= -1);
1828   vg_assert(parser->sp < N_D3_TYPE_STACK);
1829   if (parser->sp == N_D3_TYPE_STACK-1)
1830      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
1831               "increase and recompile");
1832   if (parser->sp >= 0)
1833      vg_assert(parser->qlevel[parser->sp] < level);
1834   parser->sp++;
1835   vg_assert(parser->qparent[parser->sp] == NULL);
1836   vg_assert(parser->qlevel[parser->sp]  == 0);
1837   vg_assert(parent != NULL);
1838   parser->qparent[parser->sp] = parent;
1839   parser->qlevel[parser->sp]  = level;
1840   if (td3)
1841      typestack_show( parser, "after push" );
1842}
1843
1844
1845/* Parse a type-related DIE.  'parser' holds the current parser state.
1846   'admin' is where the completed types are dumped.  'dtag' is the tag
1847   for this DIE.  'c_die' points to the start of the data fields (FORM
1848   stuff) for the DIE.  c_abbv points to the start of the (name,form)
1849   pairs which describe the DIE.
1850
1851   We may find the DIE uninteresting, in which case we should ignore
1852   it.
1853*/
1854__attribute__((noinline))
1855static void parse_type_DIE ( /*OUT*/TyAdmin** admin,
1856                             /*MOD*/D3TypeParser* parser,
1857                             DW_TAG dtag,
1858                             UWord posn,
1859                             Int level,
1860                             Cursor* c_die,
1861                             Cursor* c_abbv,
1862                             CUConst* cc,
1863                             Bool td3 )
1864{
1865   ULong     cts;
1866   Int       ctsSzB;
1867   UWord     ctsMemSzB;
1868   Type*     type   = NULL;
1869   TyAtom*   atom   = NULL;
1870   TyField*  field  = NULL;
1871   D3Expr*   expr   = NULL;
1872   TyBounds* bounds = NULL;
1873
1874   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1875   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1876
1877   /* If we've returned to a level at or above any previously noted
1878      parent, un-note it, so we don't believe we're still collecting
1879      its children. */
1880   typestack_preen( parser, td3, level-1 );
1881
1882   if (dtag == DW_TAG_compile_unit) {
1883      /* See if we can find DW_AT_language, since it is important for
1884         establishing array bounds (see DW_TAG_subrange_type below in
1885         this fn) */
1886      while (True) {
1887         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1888         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1889         if (attr == 0 && form == 0) break;
1890         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1891                            cc, c_die, False/*td3*/, form );
1892         if (attr != DW_AT_language)
1893            continue;
1894         if (ctsSzB == 0)
1895           goto bad_DIE;
1896         switch (cts) {
1897            case DW_LANG_C89: case DW_LANG_C:
1898            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
1899            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
1900            case DW_LANG_Upc:
1901               parser->language = 'C'; break;
1902            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
1903            case DW_LANG_Fortran95:
1904               parser->language = 'F'; break;
1905            case DW_LANG_Ada83: case DW_LANG_Cobol74:
1906            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
1907            case DW_LANG_Modula2: case DW_LANG_Java:
1908            case DW_LANG_C99: case DW_LANG_Ada95:
1909            case DW_LANG_PLI: case DW_LANG_D:
1910            case DW_LANG_Mips_Assembler:
1911               parser->language = '?'; break;
1912            default:
1913               goto bad_DIE;
1914         }
1915      }
1916   }
1917
1918   if (dtag == DW_TAG_base_type) {
1919      /* We can pick up a new base type any time. */
1920      type = ML_(new_Type)();
1921      type->tag = Ty_Base;
1922      while (True) {
1923         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1924         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1925         if (attr == 0 && form == 0) break;
1926         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1927                            cc, c_die, False/*td3*/, form );
1928         if (attr == DW_AT_name && ctsMemSzB > 0) {
1929            type->Ty.Base.name
1930               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1931         }
1932         if (attr == DW_AT_byte_size && ctsSzB > 0) {
1933            type->Ty.Base.szB = cts;
1934         }
1935         if (attr == DW_AT_encoding && ctsSzB > 0) {
1936            switch (cts) {
1937               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
1938               case DW_ATE_boolean:/* FIXME - is this correct? */
1939                  type->Ty.Base.enc = 'U'; break;
1940               case DW_ATE_signed: case DW_ATE_signed_char:
1941                  type->Ty.Base.enc = 'S'; break;
1942               case DW_ATE_float:
1943                  type->Ty.Base.enc = 'F'; break;
1944               case DW_ATE_complex_float:
1945                  type->Ty.Base.enc = 'C'; break;
1946               default:
1947                  goto bad_DIE;
1948            }
1949         }
1950      }
1951
1952      /* Invent a name if it doesn't have one.  gcc-4.3
1953         -ftree-vectorize is observed to emit nameless base types. */
1954      if (!type->Ty.Base.name)
1955         type->Ty.Base.name
1956            = ML_(addStr)( cc->di, "<anon_base_type>", -1 );
1957
1958      /* Do we have something that looks sane? */
1959      if (/* must have a name */
1960          type->Ty.Base.name == NULL
1961          /* and a plausible size.  Yes, really 32: "complex long
1962             double" apparently has size=32 */
1963          || type->Ty.Base.szB < 0 || type->Ty.Base.szB > 32
1964          /* and a plausible encoding */
1965          || (type->Ty.Base.enc != 'U'
1966              && type->Ty.Base.enc != 'S'
1967              && type->Ty.Base.enc != 'F'
1968              && type->Ty.Base.enc != 'C'))
1969         goto bad_DIE;
1970      /* Last minute hack: if we see this
1971         <1><515>: DW_TAG_base_type
1972             DW_AT_byte_size   : 0
1973             DW_AT_encoding    : 5
1974             DW_AT_name        : void
1975         convert it into a real Void type. */
1976      if (type->Ty.Base.szB == 0
1977          && 0 == VG_(strcmp)("void", type->Ty.Base.name)) {
1978         VG_(memset)(type, 0, sizeof(*type));
1979         type->tag = Ty_Void;
1980         type->Ty.Void.isFake = False; /* it's a real one! */
1981      }
1982      goto acquire_Type;
1983   }
1984
1985   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
1986       || dtag == DW_TAG_ptr_to_member_type) {
1987      /* This seems legit for _pointer_type and _reference_type.  I
1988         don't know if rolling _ptr_to_member_type in here really is
1989         legit, but it's better than not handling it at all. */
1990      type = ML_(new_Type)();
1991      type->tag = Ty_PorR;
1992      /* target type defaults to void */
1993      type->Ty.PorR.typeR = D3_FAKEVOID_CUOFF;
1994      type->Ty.PorR.isPtr = dtag == DW_TAG_pointer_type
1995                            || dtag == DW_TAG_ptr_to_member_type;
1996      /* Pointer types don't *have* to specify their size, in which
1997         case we assume it's a machine word.  But if they do specify
1998         it, it must be a machine word :-) This probably assumes that
1999         the word size of the Dwarf3 we're reading is the same size as
2000         that on the machine.  gcc appears to give a size whereas icc9
2001         doesn't. */
2002      if (type->Ty.PorR.isPtr)
2003         type->Ty.PorR.szB = sizeof(Word);
2004      while (True) {
2005         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2006         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2007         if (attr == 0 && form == 0) break;
2008         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2009                            cc, c_die, False/*td3*/, form );
2010         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2011            type->Ty.PorR.szB = cts;
2012         }
2013         if (attr == DW_AT_type && ctsSzB > 0) {
2014            type->Ty.PorR.typeR = (Type*)(UWord)cts;
2015         }
2016      }
2017      /* Do we have something that looks sane? */
2018      if (type->Ty.PorR.szB != sizeof(Word))
2019         goto bad_DIE;
2020      else
2021         goto acquire_Type;
2022   }
2023
2024   if (dtag == DW_TAG_enumeration_type) {
2025      /* Create a new Type to hold the results. */
2026      type = ML_(new_Type)();
2027      type->tag = Ty_Enum;
2028      type->Ty.Enum.name = NULL;
2029      type->Ty.Enum.atomRs
2030         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2031                       sizeof(TyAtom*) );
2032      while (True) {
2033         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2034         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2035         if (attr == 0 && form == 0) break;
2036         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2037                            cc, c_die, False/*td3*/, form );
2038         if (attr == DW_AT_name && ctsMemSzB > 0) {
2039            type->Ty.Enum.name
2040               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2041         }
2042         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2043            type->Ty.Enum.szB = cts;
2044         }
2045      }
2046      /* Do we have something that looks sane? */
2047      if (type->Ty.Enum.szB == 0 /* we must know the size */
2048          /* But the name can be present, or not */)
2049         goto bad_DIE;
2050      /* On't stack! */
2051      typestack_push( cc, parser, td3, type, level );
2052      goto acquire_Type;
2053   }
2054
2055   if (dtag == DW_TAG_enumerator) {
2056      Bool have_value = False;
2057      atom = ML_(new_TyAtom)( NULL, 0 );
2058      while (True) {
2059         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2060         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2061         if (attr == 0 && form == 0) break;
2062         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2063                            cc, c_die, False/*td3*/, form );
2064         if (attr == DW_AT_name && ctsMemSzB > 0) {
2065            atom->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2066         }
2067         if (attr == DW_AT_const_value && ctsSzB > 0) {
2068            atom->value = cts;
2069            have_value = True;
2070         }
2071      }
2072      /* Do we have something that looks sane? */
2073      if ((!have_value) || atom->name == NULL)
2074         goto bad_DIE;
2075      /* Do we have a plausible parent? */
2076      if (typestack_is_empty(parser)) goto bad_DIE;
2077      vg_assert(parser->qparent[parser->sp]);
2078      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2079      if (parser->qparent[parser->sp]->tag != Ty_Enum) goto bad_DIE;
2080      /* Record this child in the parent */
2081      vg_assert(parser->qparent[parser->sp]->Ty.Enum.atomRs);
2082      VG_(addToXA)( parser->qparent[parser->sp]->Ty.Enum.atomRs, &atom );
2083      /* And record the child itself */
2084      goto acquire_Atom;
2085   }
2086
2087   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_union_type) {
2088      Bool have_szB = False;
2089      Bool is_decl  = False;
2090      Bool is_spec  = False;
2091      /* Create a new Type to hold the results. */
2092      type = ML_(new_Type)();
2093      type->tag = Ty_StOrUn;
2094      type->Ty.StOrUn.name = NULL;
2095      type->Ty.StOrUn.fields
2096         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2097                       sizeof(TyAtom*) );
2098      type->Ty.StOrUn.complete = True;
2099      type->Ty.StOrUn.isStruct = dtag == DW_TAG_structure_type;
2100      while (True) {
2101         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2102         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2103         if (attr == 0 && form == 0) break;
2104         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2105                            cc, c_die, False/*td3*/, form );
2106         if (attr == DW_AT_name && ctsMemSzB > 0) {
2107            type->Ty.StOrUn.name
2108               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2109         }
2110         if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2111            type->Ty.StOrUn.szB = cts;
2112            have_szB = True;
2113         }
2114         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2115            is_decl = True;
2116         }
2117         if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2118            is_spec = True;
2119         }
2120      }
2121      /* Do we have something that looks sane? */
2122      if (is_decl && (!is_spec)) {
2123         /* It's a DW_AT_declaration.  We require the name but
2124            nothing else. */
2125         if (type->Ty.StOrUn.name == NULL)
2126            goto bad_DIE;
2127         type->Ty.StOrUn.complete = False;
2128         goto acquire_Type;
2129      }
2130      if ((!is_decl) /* && (!is_spec) */) {
2131         /* this is the common, ordinary case */
2132         if ((!have_szB) /* we must know the size */
2133             /* But the name can be present, or not */)
2134            goto bad_DIE;
2135         /* On't stack! */
2136         typestack_push( cc, parser, td3, type, level );
2137         goto acquire_Type;
2138      }
2139      else {
2140         /* don't know how to handle any other variants just now */
2141         goto bad_DIE;
2142      }
2143   }
2144
2145   if (dtag == DW_TAG_member) {
2146      /* Acquire member entries for both DW_TAG_structure_type and
2147         DW_TAG_union_type.  They differ minorly, in that struct
2148         members must have a DW_AT_data_member_location expression
2149         whereas union members must not. */
2150      Bool parent_is_struct;
2151      field = ML_(new_TyField)( NULL, NULL, NULL );
2152      field->typeR = D3_INVALID_CUOFF;
2153      expr  = NULL;
2154      while (True) {
2155         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2156         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2157         if (attr == 0 && form == 0) break;
2158         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2159                            cc, c_die, False/*td3*/, form );
2160         if (attr == DW_AT_name && ctsMemSzB > 0) {
2161            field->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2162         }
2163         if (attr == DW_AT_type && ctsSzB > 0) {
2164            field->typeR = (Type*)(UWord)cts;
2165         }
2166         if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2167            UChar* copy = ML_(addStr)( cc->di, (UChar*)(UWord)cts,
2168                                               (Int)ctsMemSzB );
2169            expr = ML_(new_D3Expr)( copy, (UWord)ctsMemSzB );
2170         }
2171      }
2172      /* Do we have a plausible parent? */
2173      if (typestack_is_empty(parser)) goto bad_DIE;
2174      vg_assert(parser->qparent[parser->sp]);
2175      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2176      if (parser->qparent[parser->sp]->tag != Ty_StOrUn) goto bad_DIE;
2177      /* Do we have something that looks sane?  If this a member of a
2178         struct, we must have a location expression; but if a member
2179         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2180         to reject in the latter case, but some compilers have been
2181         observed to emit constant-zero expressions.  So just ignore
2182         them. */
2183      parent_is_struct
2184         = parser->qparent[parser->sp]->Ty.StOrUn.isStruct;
2185      if (!field->name)
2186         field->name = ML_(addStr)(cc->di, "<anon_field>", -1);
2187      if ((!field->name) || (field->typeR == D3_INVALID_CUOFF))
2188         goto bad_DIE;
2189      if (parent_is_struct && (!expr))
2190         goto bad_DIE;
2191      if ((!parent_is_struct) && expr) {
2192         /* If this is a union type, pretend we haven't seen the data
2193            member location expression, as it is by definition
2194            redundant (it must be zero). */
2195         expr = NULL;
2196      }
2197      /* Record this child in the parent */
2198      field->isStruct = parent_is_struct;
2199      if (expr)
2200         field->loc = expr;
2201      vg_assert(parser->qparent[parser->sp]->Ty.StOrUn.fields);
2202      VG_(addToXA)( parser->qparent[parser->sp]->Ty.StOrUn.fields,
2203                    &field );
2204      /* And record the child itself */
2205      goto acquire_Field_and_Expr;
2206   }
2207
2208   if (dtag == DW_TAG_array_type) {
2209      type = ML_(new_Type)();
2210      type->tag = Ty_Array;
2211      type->Ty.Array.typeR = D3_INVALID_CUOFF;
2212      type->Ty.Array.bounds
2213         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2214                       sizeof(TyBounds*) );
2215      while (True) {
2216         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2217         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2218         if (attr == 0 && form == 0) break;
2219         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2220                            cc, c_die, False/*td3*/, form );
2221         if (attr == DW_AT_type && ctsSzB > 0) {
2222            type->Ty.Array.typeR = (Type*)(UWord)cts;
2223         }
2224      }
2225      if (type->Ty.Array.typeR == D3_INVALID_CUOFF)
2226         goto bad_DIE;
2227      /* On't stack! */
2228      typestack_push( cc, parser, td3, type, level );
2229      goto acquire_Type;
2230   }
2231
2232   if (dtag == DW_TAG_subrange_type) {
2233      Bool have_lower = False;
2234      Bool have_upper = False;
2235      Bool have_count = False;
2236      Long lower = 0;
2237      Long upper = 0;
2238      Long count = 0;
2239
2240      switch (parser->language) {
2241         case 'C': have_lower = True;  lower = 0; break;
2242         case 'F': have_lower = True;  lower = 1; break;
2243         case '?': have_lower = False; break;
2244         default:  vg_assert(0); /* assured us by handling of
2245                                    DW_TAG_compile_unit in this fn */
2246      }
2247      bounds = ML_(new_TyBounds)();
2248      while (True) {
2249         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2250         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2251         if (attr == 0 && form == 0) break;
2252         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2253                            cc, c_die, False/*td3*/, form );
2254         if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2255            lower      = (Long)cts;
2256            have_lower = True;
2257         }
2258         if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2259            upper      = (Long)cts;
2260            have_upper = True;
2261         }
2262         if (attr == DW_AT_count && ctsSzB > 0) {
2263            count      = cts;
2264            have_count = True;
2265         }
2266      }
2267      /* FIXME: potentially skip the rest if no parent present, since
2268         it could be the case that this subrange type is free-standing
2269         (not being used to describe the bounds of a containing array
2270         type) */
2271      /* Do we have a plausible parent? */
2272      if (typestack_is_empty(parser)) goto bad_DIE;
2273      vg_assert(parser->qparent[parser->sp]);
2274      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2275      if (parser->qparent[parser->sp]->tag != Ty_Array) goto bad_DIE;
2276
2277      /* Figure out if we have a definite range or not */
2278      if (have_lower && have_upper && (!have_count)) {
2279         bounds->knownL = True;
2280         bounds->knownU = True;
2281         bounds->boundL = lower;
2282         bounds->boundU = upper;
2283      }
2284      else if (have_lower && (!have_upper) && (!have_count)) {
2285         bounds->knownL = True;
2286         bounds->knownU = False;
2287         bounds->boundL = lower;
2288         bounds->boundU = 0;
2289      } else {
2290         /* FIXME: handle more cases */
2291         goto bad_DIE;
2292      }
2293
2294      /* Record this bound in the parent */
2295      vg_assert(parser->qparent[parser->sp]->Ty.Array.bounds);
2296      VG_(addToXA)( parser->qparent[parser->sp]->Ty.Array.bounds,
2297                    &bounds );
2298      /* And record the child itself */
2299      goto acquire_Bounds;
2300   }
2301
2302   if (dtag == DW_TAG_typedef) {
2303      /* We can pick up a new base type any time. */
2304      type = ML_(new_Type)();
2305      type->tag = Ty_TyDef;
2306      type->Ty.TyDef.name = NULL;
2307      type->Ty.TyDef.typeR = D3_INVALID_CUOFF;
2308      while (True) {
2309         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2310         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2311         if (attr == 0 && form == 0) break;
2312         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2313                            cc, c_die, False/*td3*/, form );
2314         if (attr == DW_AT_name && ctsMemSzB > 0) {
2315            type->Ty.TyDef.name
2316               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2317         }
2318         if (attr == DW_AT_type && ctsSzB > 0) {
2319            type->Ty.TyDef.typeR = (Type*)(UWord)cts;
2320         }
2321      }
2322      /* Do we have something that looks sane? */
2323      if (/* must have a name */
2324          type->Ty.TyDef.name == NULL
2325          /* but the referred-to type can be absent */)
2326         goto bad_DIE;
2327      else
2328         goto acquire_Type;
2329   }
2330
2331   if (dtag == DW_TAG_subroutine_type) {
2332      /* function type? just record that one fact and ask no
2333         further questions. */
2334      type = ML_(new_Type)();
2335      type->tag = Ty_Fn;
2336      goto acquire_Type;
2337   }
2338
2339   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2340      Int have_ty = 0;
2341      type = ML_(new_Type)();
2342      type->tag = Ty_Qual;
2343      type->Ty.Qual.qual
2344         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2345      /* target type defaults to 'void' */
2346      type->Ty.Qual.typeR = D3_FAKEVOID_CUOFF;
2347      while (True) {
2348         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2349         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2350         if (attr == 0 && form == 0) break;
2351         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2352                            cc, c_die, False/*td3*/, form );
2353         if (attr == DW_AT_type && ctsSzB > 0) {
2354            type->Ty.Qual.typeR = (Type*)(UWord)cts;
2355            have_ty++;
2356         }
2357      }
2358      /* gcc sometimes generates DW_TAG_const/volatile_type without
2359         DW_AT_type and GDB appears to interpret the type as 'const
2360         void' (resp. 'volatile void').  So just allow it .. */
2361      if (have_ty == 1 || have_ty == 0)
2362         goto acquire_Type;
2363      else
2364         goto bad_DIE;
2365   }
2366
2367   /* else ignore this DIE */
2368   return;
2369   /*NOTREACHED*/
2370
2371  acquire_Type:
2372   if (0) VG_(printf)("YYYY Acquire Type\n");
2373   vg_assert(type); vg_assert(!atom); vg_assert(!field);
2374   vg_assert(!expr); vg_assert(!bounds);
2375   *admin            = ML_(new_TyAdmin)( posn, *admin );
2376   (*admin)->payload = type;
2377   (*admin)->tag     = TyA_Type;
2378   return;
2379   /*NOTREACHED*/
2380
2381  acquire_Atom:
2382   if (0) VG_(printf)("YYYY Acquire Atom\n");
2383   vg_assert(!type); vg_assert(atom); vg_assert(!field);
2384   vg_assert(!expr); vg_assert(!bounds);
2385   *admin            = ML_(new_TyAdmin)( posn, *admin );
2386   (*admin)->payload = atom;
2387   (*admin)->tag     = TyA_Atom;
2388   return;
2389   /*NOTREACHED*/
2390
2391  acquire_Field_and_Expr:
2392   /* For union members, Expr should be absent */
2393   if (0) VG_(printf)("YYYY Acquire Field and Expr\n");
2394   vg_assert(!type); vg_assert(!atom); vg_assert(field);
2395   /*vg_assert(expr);*/ vg_assert(!bounds);
2396   if (expr) {
2397      *admin            = ML_(new_TyAdmin)( (UWord)D3_INVALID_CUOFF,
2398                                            *admin );
2399      (*admin)->payload = expr;
2400      (*admin)->tag     = TyA_Expr;
2401   }
2402   *admin            = ML_(new_TyAdmin)( posn, *admin );
2403   (*admin)->payload = field;
2404   (*admin)->tag     = TyA_Field;
2405   return;
2406   /*NOTREACHED*/
2407
2408  acquire_Bounds:
2409   if (0) VG_(printf)("YYYY Acquire Bounds\n");
2410   vg_assert(!type); vg_assert(!atom); vg_assert(!field);
2411   vg_assert(!expr); vg_assert(bounds);
2412   *admin            = ML_(new_TyAdmin)( posn, *admin );
2413   (*admin)->payload = bounds;
2414   (*admin)->tag     = TyA_Bounds;
2415   return;
2416   /*NOTREACHED*/
2417
2418  bad_DIE:
2419   set_position_of_Cursor( c_die,  saved_die_c_offset );
2420   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2421   VG_(printf)("\nparse_type_DIE: confused by:\n");
2422   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2423   while (True) {
2424      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2425      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2426      if (attr == 0 && form == 0) break;
2427      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2428      /* Get the form contents, so as to print them */
2429      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2430                         cc, c_die, True, form );
2431      VG_(printf)("\t\n");
2432   }
2433   VG_(printf)("\n");
2434   cc->barf("parse_type_DIE: confused by the above DIE");
2435   /*NOTREACHED*/
2436}
2437
2438
2439/*------------------------------------------------------------*/
2440/*---                                                      ---*/
2441/*--- Resolution of references to type DIEs                ---*/
2442/*---                                                      ---*/
2443/*------------------------------------------------------------*/
2444
2445static Int cmp_D3TyAdmin_by_cuOff ( void* v1, void* v2 ) {
2446   TyAdmin* a1 = *(TyAdmin**)v1;
2447   TyAdmin* a2 = *(TyAdmin**)v2;
2448   if (a1->cuOff < a2->cuOff) return -1;
2449   if (a1->cuOff > a2->cuOff) return 1;
2450   return 0;
2451}
2452
2453/* Look up 'cuOff' in 'map', to find the associated D3TyAdmin*.  Check
2454   that the found D3TyAdmin has tag 'adtag'.  Sets *payload to be the
2455   resulting payload pointer and returns True on success.
2456
2457   Also, if 'allow_invalid' is True, then if cuOff is
2458   D3_INVALID_CUOFF, return NULL in *payload.
2459
2460   Otherwise (conceptually fails) and returns False. */
2461__attribute__((noinline))
2462static Bool resolve_binding ( /*OUT*/void** payload,
2463                              XArray* map, void* cuOff,
2464                              TyAdminTag tag,
2465                              Bool allow_invalid ) {
2466   Bool    found;
2467   Word    ixLo, ixHi;
2468   TyAdmin dummy, *dummyP, *admin;
2469
2470   if (cuOff == D3_INVALID_CUOFF && allow_invalid) {
2471      *payload = NULL;
2472      return True;
2473   }
2474
2475   VG_(memset)(&dummy, 0, sizeof(dummy));
2476   dummy.cuOff = (UWord)cuOff;
2477   dummyP = &dummy;
2478   found = VG_(lookupXA)( map, &dummyP, &ixLo, &ixHi );
2479   if (!found)
2480      return False;
2481   /* If this doesn't hold, we must have seen more than one DIE with
2482      the same cuOff(set).  Which isn't possible. */
2483   vg_assert(ixLo == ixHi);
2484   admin = *(TyAdmin**)VG_(indexXA)( map, ixLo );
2485   /* All payload pointers should be non-NULL.  Ensured by assertion in
2486      loop in resolve_type_entities that creates 'map'.  Hence it is
2487      safe to return NULL to indicate 'not found'. */
2488   vg_assert(admin->payload);
2489   vg_assert(admin->cuOff == (UWord)cuOff); /* stay sane */
2490
2491   if (admin->tag != tag)
2492      return False;
2493
2494   *payload = admin->payload;
2495   return True;
2496}
2497
2498__attribute__((noinline))
2499static void resolve_type_entities ( /*MOD*/TyAdmin* admin,
2500                                    /*MOD*/TempVar* vars )
2501{
2502   Bool     ok;
2503   void*    payload;
2504   TyAdmin* adp;
2505   XArray* /* of D3TyAdmin* */ map;
2506
2507   map = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2508                     sizeof(TyAdmin*) );
2509   for (adp = admin; adp; adp = adp->next) {
2510      vg_assert(adp);
2511      vg_assert(adp->payload != NULL);
2512      if (adp->cuOff != (UWord)D3_INVALID_CUOFF) {
2513         VG_(addToXA)( map, &adp );
2514      }
2515   }
2516
2517   VG_(setCmpFnXA)( map, cmp_D3TyAdmin_by_cuOff );
2518   if (0)
2519      VG_(printf)("XXXXXX sorting map with %d entries\n",
2520                  (Int)VG_(sizeXA)(map));
2521   VG_(sortXA)( map );
2522
2523   for (adp = admin; adp; adp = adp->next) {
2524      vg_assert(adp->payload);
2525      switch (adp->tag) {
2526      case TyA_Bounds: {
2527         TyBounds* bounds = (TyBounds*)adp->payload;
2528         if (bounds->knownL && bounds->knownU
2529             && bounds->knownL > bounds->knownU) goto baaad;
2530         break;
2531      }
2532      case TyA_Atom: {
2533         TyAtom* atom = (TyAtom*)adp->payload;
2534         if (!atom->name) goto baaad;
2535         break;
2536      }
2537      case TyA_Expr: {
2538         D3Expr* expr = (D3Expr*)adp->payload;
2539         if (!expr->bytes) goto baaad;
2540         break;
2541      }
2542      case TyA_Field: {
2543         TyField* field = (TyField*)adp->payload;
2544         if (!field->name) goto baaad;
2545         if ( (field->isStruct && (!field->loc))
2546              || ((!field->isStruct) && field->loc))
2547            goto baaad;
2548         ok = resolve_binding( &payload, map, field->typeR,
2549                               TyA_Type, False/*!allow_invalid*/ );
2550         if (!ok) goto baaad;
2551         field->typeR = payload;
2552         break;
2553      }
2554      case TyA_Type: {
2555         UChar   enc;
2556         XArray* xa;
2557         Type* ty = (Type*)adp->payload;
2558         switch (ty->tag) {
2559            case Ty_Base:
2560               enc = ty->Ty.Base.enc;
2561               if ((!ty->Ty.Base.name)
2562                   || ty->Ty.Base.szB < 1 || ty->Ty.Base.szB > 32
2563                   || (enc != 'S' && enc != 'U' && enc != 'F' && enc != 'C'))
2564                  goto baaad;
2565               break;
2566            case Ty_TyDef:
2567               if (!ty->Ty.TyDef.name) goto baaad;
2568               ok = resolve_binding( &payload, map,
2569                                     ty->Ty.TyDef.typeR,
2570                                     TyA_Type,
2571                                     True/*allow_invalid*/ );
2572               if (!ok) goto baaad;
2573               ty->Ty.TyDef.typeR = payload;
2574               break;
2575            case Ty_PorR:
2576               if (ty->Ty.PorR.szB != sizeof(Word)) goto baaad;
2577               ok = resolve_binding( &payload, map,
2578                                     ty->Ty.PorR.typeR,
2579                                     TyA_Type,
2580                                     False/*!allow_invalid*/ );
2581               if (!ok) goto baaad;
2582               ty->Ty.PorR.typeR = payload;
2583               break;
2584            case Ty_Array:
2585               if (!ty->Ty.Array.bounds) goto baaad;
2586               ok = resolve_binding( &payload, map,
2587                                     ty->Ty.Array.typeR,
2588                                     TyA_Type,
2589                                     False/*!allow_invalid*/ );
2590               if (!ok) goto baaad;
2591               ty->Ty.Array.typeR = payload;
2592               break;
2593            case Ty_Enum:
2594               if ((!ty->Ty.Enum.atomRs)
2595                   || ty->Ty.Enum.szB < 1
2596                   || ty->Ty.Enum.szB > 8) goto baaad;
2597               xa = ty->Ty.Enum.atomRs;
2598               break;
2599            case Ty_StOrUn:
2600               xa = ty->Ty.StOrUn.fields;
2601               if (!xa) goto baaad;
2602               break;
2603            case Ty_Fn:
2604               break;
2605            case Ty_Qual:
2606               if (ty->Ty.Qual.qual != 'C'
2607                   && ty->Ty.Qual.qual != 'V') goto baaad;
2608               ok = resolve_binding( &payload, map,
2609                                     ty->Ty.Qual.typeR,
2610                                     TyA_Type,
2611                                     False/*!allow_invalid*/ );
2612               if (!ok) goto baaad;
2613               ty->Ty.Qual.typeR = payload;
2614               break;
2615            case Ty_Void:
2616               if (ty->Ty.Void.isFake != False
2617                   && ty->Ty.Void.isFake != True) goto baaad;
2618               break;
2619            default:
2620               goto baaad;
2621         }
2622         break;
2623      }
2624      baaad:
2625      default:
2626         VG_(printf)("valgrind: bad D3TyAdmin: ");
2627         ML_(pp_TyAdmin)(adp);
2628         VG_(printf)("\n");
2629      }
2630   }
2631
2632   /* Now resolve the variables list */
2633   for (; vars; vars = vars->next) {
2634      payload = NULL;
2635      ok = resolve_binding( &payload, map, vars->typeR,
2636                            TyA_Type, True/*allow_invalid*/ );
2637
2638      if (0 && !ok)
2639         VG_(printf)("Can't resolve type reference 0x%lx\n",
2640                     (UWord)vars->typeR);
2641      //vg_assert(ok);
2642      vars->typeR = payload;
2643   }
2644
2645   VG_(deleteXA)( map );
2646}
2647
2648
2649/*------------------------------------------------------------*/
2650/*---                                                      ---*/
2651/*--- Parsing of Compilation Units                         ---*/
2652/*---                                                      ---*/
2653/*------------------------------------------------------------*/
2654
2655static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
2656   TempVar* t1 = *(TempVar**)v1;
2657   TempVar* t2 = *(TempVar**)v2;
2658   if (t1->dioff < t2->dioff) return -1;
2659   if (t1->dioff > t2->dioff) return 1;
2660   return 0;
2661}
2662
2663static void read_DIE ( /*OUT*/TyAdmin** admin,
2664                       /*OUT*/TempVar** tempvars,
2665                       /*OUT*/GExpr** gexprs,
2666                       /*MOD*/D3TypeParser* typarser,
2667                       /*MOD*/D3VarParser* varparser,
2668                       Cursor* c, Bool td3, CUConst* cc, Int level )
2669{
2670   Cursor abbv;
2671   ULong  atag, abbv_code;
2672   UWord  posn;
2673   UInt   has_children;
2674   UWord  start_die_c_offset, start_abbv_c_offset;
2675   UWord  after_die_c_offset, after_abbv_c_offset;
2676
2677   /* --- Deal with this DIE --- */
2678   posn      = get_position_of_Cursor( c );
2679   abbv_code = get_ULEB128( c );
2680   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
2681   atag      = get_ULEB128( &abbv );
2682   TRACE_D3("\n");
2683   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
2684            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2685
2686   if (atag == 0)
2687      cc->barf("read_DIE: invalid zero tag on DIE");
2688
2689   has_children = get_UChar( &abbv );
2690   if (has_children != DW_children_no && has_children != DW_children_yes)
2691      cc->barf("read_DIE: invalid has_children value");
2692
2693   /* We're set up to look at the fields of this DIE.  Hand it off to
2694      any parser(s) that want to see it.  Since they will in general
2695      advance both the DIE and abbrev cursors, remember their current
2696      settings so that we can then back up and do one final pass over
2697      the DIE, to print out its contents. */
2698
2699   start_die_c_offset  = get_position_of_Cursor( c );
2700   start_abbv_c_offset = get_position_of_Cursor( &abbv );
2701
2702   while (True) {
2703      ULong cts;
2704      Int   ctsSzB;
2705      UWord ctsMemSzB;
2706      ULong at_name = get_ULEB128( &abbv );
2707      ULong at_form = get_ULEB128( &abbv );
2708      if (at_name == 0 && at_form == 0) break;
2709      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
2710      /* Get the form contents, but ignore them; the only purpose is
2711         to print them, if td3 is True */
2712      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2713                         cc, c, td3, (DW_FORM)at_form );
2714      TRACE_D3("\t");
2715      TRACE_D3("\n");
2716   }
2717
2718   after_die_c_offset  = get_position_of_Cursor( c );
2719   after_abbv_c_offset = get_position_of_Cursor( &abbv );
2720
2721   set_position_of_Cursor( c,     start_die_c_offset );
2722   set_position_of_Cursor( &abbv, start_abbv_c_offset );
2723
2724   parse_type_DIE( admin,
2725                   typarser,
2726                   (DW_TAG)atag,
2727                   posn,
2728                   level,
2729                   c,     /* DIE cursor */
2730                   &abbv, /* abbrev cursor */
2731                   cc,
2732                   td3 );
2733
2734   set_position_of_Cursor( c,     start_die_c_offset );
2735   set_position_of_Cursor( &abbv, start_abbv_c_offset );
2736
2737   parse_var_DIE( tempvars,
2738                  gexprs,
2739                  varparser,
2740                  (DW_TAG)atag,
2741                  posn,
2742                  level,
2743                  c,     /* DIE cursor */
2744                  &abbv, /* abbrev cursor */
2745                  cc,
2746                  td3 );
2747
2748   set_position_of_Cursor( c,     after_die_c_offset );
2749   set_position_of_Cursor( &abbv, after_abbv_c_offset );
2750
2751   /* --- Now recurse into its children, if any --- */
2752   if (has_children == DW_children_yes) {
2753      if (0) TRACE_D3("BEGIN children of level %d\n", level);
2754      while (True) {
2755         atag = peek_ULEB128( c );
2756         if (atag == 0) break;
2757         read_DIE( admin, tempvars, gexprs, typarser, varparser,
2758                   c, td3, cc, level+1 );
2759      }
2760      /* Now we need to eat the terminating zero */
2761      atag = get_ULEB128( c );
2762      vg_assert(atag == 0);
2763      if (0) TRACE_D3("END children of level %d\n", level);
2764   }
2765
2766}
2767
2768
2769static
2770void new_dwarf3_reader_wrk (
2771   struct _DebugInfo* di,
2772   __attribute__((noreturn))
2773   void (*barf)( HChar* ),
2774   UChar* debug_info_img,   SizeT debug_info_sz,
2775   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
2776   UChar* debug_line_img,   SizeT debug_line_sz,
2777   UChar* debug_str_img,    SizeT debug_str_sz,
2778   UChar* debug_ranges_img, SizeT debug_ranges_sz,
2779   UChar* debug_loc_img,    SizeT debug_loc_sz
2780)
2781{
2782   TyAdmin *admin, *adminp;
2783   TempVar *tempvars, *varp, *varp2;
2784   GExpr *gexprs, *gexpr;
2785   Cursor abbv; /* for showing .debug_abbrev */
2786   Cursor info; /* primary cursor for parsing .debug_info */
2787   Cursor ranges; /* for showing .debug_ranges */
2788   D3TypeParser typarser;
2789   D3VarParser varparser;
2790   Addr  dr_base;
2791   UWord dr_offset;
2792   Word  i;
2793   Bool td3 = di->trace_symtab;
2794   XArray* /* of TempVar* */ dioff_lookup_tab;
2795
2796#if 0
2797   /* This doesn't work properly because it assumes all entries are
2798      packed end to end, with no holes.  But that doesn't always
2799      appear to be the case, so it loses sync.  And the D3 spec
2800      doesn't appear to require a no-hole situation either. */
2801   /* Display .debug_loc */
2802   Addr  dl_base;
2803   UWord dl_offset;
2804   Cursor loc; /* for showing .debug_loc */
2805   TRACE_SYMTAB("\n");
2806   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
2807   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
2808   init_Cursor( &loc, debug_loc_img,
2809                debug_loc_sz, 0, barf,
2810                "Overrun whilst reading .debug_loc section(1)" );
2811   dl_base = 0;
2812   dl_offset = 0;
2813   while (True) {
2814      UWord  w1, w2;
2815      UWord  len;
2816      if (is_at_end_Cursor( &loc ))
2817         break;
2818
2819      /* Read a (host-)word pair.  This is something of a hack since
2820         the word size to read is really dictated by the ELF file;
2821         however, we assume we're reading a file with the same
2822         word-sizeness as the host.  Reasonably enough. */
2823      w1 = get_UWord( &loc );
2824      w2 = get_UWord( &loc );
2825
2826      if (w1 == 0 && w2 == 0) {
2827         /* end of list.  reset 'base' */
2828         TRACE_D3("    %08lx <End of list>\n", dl_offset);
2829         dl_base = 0;
2830         dl_offset = get_position_of_Cursor( &loc );
2831         continue;
2832      }
2833
2834      if (w1 == -1UL) {
2835         /* new value for 'base' */
2836         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
2837                  dl_offset, w1, w2);
2838         dl_base = w2;
2839         continue;
2840      }
2841
2842      /* else a location expression follows */
2843      TRACE_D3("    %08lx %08lx %08lx ",
2844               dl_offset, w1 + dl_base, w2 + dl_base);
2845      len = (UWord)get_UShort( &loc );
2846      while (len > 0) {
2847         UChar byte = get_UChar( &loc );
2848         TRACE_D3("%02x", (UInt)byte);
2849         len--;
2850      }
2851      TRACE_SYMTAB("\n");
2852   }
2853#endif
2854
2855   /* Display .debug_ranges */
2856   TRACE_SYMTAB("\n");
2857   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
2858   TRACE_SYMTAB("    Offset   Begin    End\n");
2859   init_Cursor( &ranges, debug_ranges_img,
2860                debug_ranges_sz, 0, barf,
2861                "Overrun whilst reading .debug_ranges section(1)" );
2862   dr_base = 0;
2863   dr_offset = 0;
2864   while (True) {
2865      UWord  w1, w2;
2866
2867      if (is_at_end_Cursor( &ranges ))
2868         break;
2869
2870      /* Read a (host-)word pair.  This is something of a hack since
2871         the word size to read is really dictated by the ELF file;
2872         however, we assume we're reading a file with the same
2873         word-sizeness as the host.  Reasonably enough. */
2874      w1 = get_UWord( &ranges );
2875      w2 = get_UWord( &ranges );
2876
2877      if (w1 == 0 && w2 == 0) {
2878         /* end of list.  reset 'base' */
2879         TRACE_D3("    %08lx <End of list>\n", dr_offset);
2880         dr_base = 0;
2881         dr_offset = get_position_of_Cursor( &ranges );
2882         continue;
2883      }
2884
2885      if (w1 == -1UL) {
2886         /* new value for 'base' */
2887         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
2888                  dr_offset, w1, w2);
2889         dr_base = w2;
2890         continue;
2891      }
2892
2893      /* else a range [w1+base, w2+base) is denoted */
2894      TRACE_D3("    %08lx %08lx %08lx\n",
2895               dr_offset, w1 + dr_base, w2 + dr_base);
2896   }
2897
2898
2899   /* Display .debug_abbrev */
2900   init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
2901                "Overrun whilst reading .debug_abbrev section" );
2902   TRACE_SYMTAB("\n");
2903   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
2904   while (True) {
2905      if (is_at_end_Cursor( &abbv ))
2906         break;
2907      /* Read one abbreviation table */
2908      TRACE_D3("  Number TAG\n");
2909      while (True) {
2910         ULong atag;
2911         UInt  has_children;
2912         ULong acode = get_ULEB128( &abbv );
2913         if (acode == 0) break; /* end of the table */
2914         atag = get_ULEB128( &abbv );
2915         has_children = get_UChar( &abbv );
2916         TRACE_D3("   %llu      %s    [%s]\n",
2917                  acode, ML_(pp_DW_TAG)(atag),
2918                         ML_(pp_DW_children)(has_children));
2919         while (True) {
2920            ULong at_name = get_ULEB128( &abbv );
2921            ULong at_form = get_ULEB128( &abbv );
2922            if (at_name == 0 && at_form == 0) break;
2923            TRACE_D3("    %18s %s\n",
2924                     ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
2925         }
2926      }
2927   }
2928   TRACE_SYMTAB("\n");
2929
2930   /* Now loop over the Compilation Units listed in the .debug_info
2931      section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
2932      unit contains a Compilation Unit Header followed by precisely
2933      one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
2934   init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
2935                "Overrun whilst reading .debug_info section" );
2936
2937   /* We'll park the harvested type information in here.  Also create
2938      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
2939      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
2940      huge and presumably will not occur in any valid DWARF3 file --
2941      it would need to have a .debug_info section 4GB long for that to
2942      happen.  These type entries end up in the DebugInfo. */
2943   admin = NULL;
2944   { Type* tVoid = ML_(new_Type)();
2945     tVoid->tag = Ty_Void;
2946     tVoid->Ty.Void.isFake = True;
2947     admin = ML_(new_TyAdmin)( (UWord)D3_FAKEVOID_CUOFF, admin );
2948     admin->payload = tVoid;
2949     admin->tag     = TyA_Type;
2950   }
2951
2952   /* List of variables we're accumulating.  These don't end up in the
2953      DebugInfo; instead their contents are handed to ML_(addVar) and
2954      the list elements are then deleted. */
2955   tempvars = NULL;
2956
2957   /* List of GExprs we're accumulating.  These wind up in the
2958      DebugInfo. */
2959   gexprs = NULL;
2960
2961   /* We need a D3TypeParser to keep track of partially constructed
2962      types.  It'll be discarded as soon as we've completed the CU,
2963      since the resulting information is tipped in to 'admin' as it is
2964      generated. */
2965   VG_(memset)( &typarser, 0, sizeof(typarser) );
2966   typarser.sp = -1;
2967   typarser.language = '?';
2968
2969   VG_(memset)( &varparser, 0, sizeof(varparser) );
2970   varparser.sp = -1;
2971
2972   TRACE_D3("\n------ Parsing .debug_info section ------\n");
2973   while (True) {
2974      UWord   cu_start_offset, cu_offset_now;
2975      CUConst cc;
2976
2977      /* It seems icc9 finishes the DIE info before debug_info_sz
2978         bytes have been used up.  So be flexible, and declare the
2979         sequence complete if there is not enough remaining bytes to
2980         hold even the smallest conceivable CU header.  (11 bytes I
2981         reckon). */
2982      Word avail = get_remaining_length_Cursor( &info );
2983      if (avail < 11) {
2984         if (avail > 0)
2985            TRACE_D3("new_dwarf3_reader_wrk: warning: "
2986                     "%ld unused bytes after end of DIEs\n", avail);
2987         break;
2988      }
2989
2990      /* Check the varparser's stack is in a sane state. */
2991      vg_assert(varparser.sp == -1);
2992      for (i = 0; i < N_D3_VAR_STACK; i++) {
2993         vg_assert(varparser.ranges[i] == NULL);
2994         vg_assert(varparser.level[i] == 0);
2995      }
2996      for (i = 0; i < N_D3_TYPE_STACK; i++) {
2997         vg_assert(typarser.qparent[i] == NULL);
2998         vg_assert(typarser.qlevel[i] == 0);
2999      }
3000
3001      cu_start_offset = get_position_of_Cursor( &info );
3002      TRACE_D3("\n");
3003      TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3004      /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3005         (saC_cache) */
3006      parse_CU_Header( &cc, td3, &info,
3007                       (UChar*)debug_abbv_img, debug_abbv_sz );
3008      cc.debug_str_img    = debug_str_img;
3009      cc.debug_str_sz     = debug_str_sz;
3010      cc.debug_ranges_img = debug_ranges_img;
3011      cc.debug_ranges_sz  = debug_ranges_sz;
3012      cc.debug_loc_img    = debug_loc_img;
3013      cc.debug_loc_sz     = debug_loc_sz;
3014      cc.debug_line_img   = debug_line_img;
3015      cc.debug_line_sz    = debug_line_sz;
3016      cc.cu_start_offset  = cu_start_offset;
3017      cc.di = di;
3018      /* The CU's svma can be deduced by looking at the AT_low_pc
3019         value in the top level TAG_compile_unit, which is the topmost
3020         DIE.  We'll leave it for the 'varparser' to acquire that info
3021         and fill it in -- since it is the only party to want to know
3022         it. */
3023      cc.cu_svma_known = False;
3024      cc.cu_svma       = 0;
3025
3026      /* Create a fake outermost-level range covering the entire
3027         address range.  So we always have *something* to catch all
3028         variable declarations. */
3029      varstack_push( &cc, &varparser, td3,
3030                     unitary_range_list(0UL, ~0UL),
3031                     -1, False/*isFunc*/, NULL/*fbGX*/ );
3032
3033      /* And set up the file name table.  When we come across the top
3034         level DIE for this CU (which is what the next call to
3035         read_DIE should process) we will copy all the file names out
3036         of the .debug_line img area and use this table to look up the
3037         copies when we later see filename numbers in DW_TAG_variables
3038         etc. */
3039      vg_assert(!varparser.filenameTable );
3040      varparser.filenameTable
3041         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
3042                       sizeof(UChar*) );
3043      vg_assert(varparser.filenameTable );
3044
3045      /* Now read the one-and-only top-level DIE for this CU. */
3046      vg_assert(varparser.sp == 0);
3047      read_DIE( &admin, &tempvars, &gexprs, &typarser, &varparser,
3048                &info, td3, &cc, 0 );
3049
3050      cu_offset_now = get_position_of_Cursor( &info );
3051      if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3052                      cu_offset_now, debug_info_sz);
3053      if (cu_offset_now > debug_info_sz)
3054         barf("toplevel DIEs beyond end of CU");
3055      if (cu_offset_now == debug_info_sz)
3056         break;
3057
3058      /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3059         anywhere else at all.  Our fake the-entire-address-space
3060         range is at level -1, so preening to -2 should completely
3061         empty the stack out. */
3062      TRACE_D3("\n");
3063      varstack_preen( &varparser, td3, -2 );
3064      /* Similarly, empty the type stack out. */
3065      typestack_preen( &typarser, td3, -2 );
3066      /* else keep going */
3067
3068      TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3069               cc.saC_cache_queries, cc.saC_cache_misses);
3070
3071      vg_assert(varparser.filenameTable );
3072      VG_(deleteXA)( varparser.filenameTable );
3073      varparser.filenameTable = NULL;
3074   }
3075
3076   /* Put the type entry list the right way round.  Not strictly
3077      necessary, but makes it easier to read. */
3078   vg_assert(admin);
3079   if (admin) {
3080      TyAdmin *next, *prev = NULL;
3081      for (adminp = admin; adminp; adminp = next) {
3082         next = adminp->next;
3083         adminp->next = prev;
3084         prev = adminp;
3085      }
3086      admin = prev;
3087   }
3088
3089   /* Put the variable list the right way round.  Not strictly
3090      necessary, but makes it easier to read. */
3091   if (tempvars) {
3092      TempVar *next, *prev = NULL;
3093      for (varp = tempvars; varp; varp = next) {
3094         next = varp->next;
3095         varp->next = prev;
3096         prev = varp;
3097      }
3098      tempvars = prev;
3099   }
3100
3101   TRACE_D3("\n");
3102   TRACE_D3("------ Acquired the following type entities: ------\n");
3103   for (adminp = admin; adminp; adminp = adminp->next) {
3104      TRACE_D3("   ");
3105      if (td3) ML_(pp_TyAdmin)( adminp );
3106      TRACE_D3("\n");
3107   }
3108   TRACE_D3("\n");
3109   TRACE_D3("------ Resolving type entries ------\n");
3110
3111   resolve_type_entities( admin, tempvars );
3112   for (gexpr = gexprs; gexpr; gexpr = gexpr->next) {
3113      bias_GX( gexpr, di->text_bias );
3114   }
3115
3116   TRACE_D3("\n");
3117   TRACE_D3("------ Acquired the following variables: ------\n\n");
3118
3119   /* Park (pointers to) all the vars in an XArray, so we can look up
3120      abstract origins quickly.  The array is sorted (hence, looked-up
3121      by) the .dioff fields.  Since the .dioffs should be instrictly
3122      ascending order, there is no need to sort the array after
3123      construction.  The ascendingness is however asserted for. */
3124   dioff_lookup_tab
3125      = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
3126                    sizeof(TempVar*) );
3127   vg_assert(dioff_lookup_tab);
3128   varp2 = NULL;
3129   for (varp = tempvars; varp; varp = varp->next) {
3130      if (varp2)
3131         vg_assert(varp2->dioff < varp->dioff);
3132      VG_(addToXA)( dioff_lookup_tab, &varp );
3133      varp2 = varp;
3134   }
3135   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3136   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3137
3138   /* Now visit each var.  Collect up as much info as possible for
3139      each var and hand it to ML_(addVar). */
3140   for (varp = tempvars; varp; varp = varp->next) {
3141
3142      /* Possibly show .. */
3143      if (td3) {
3144         VG_(printf)("<%lx> addVar: level %d: %s :: ",
3145                     varp->dioff,
3146                     varp->level,
3147                     varp->name ? varp->name : (UChar*)"<anon_var>" );
3148         if (varp->typeR) {
3149            ML_(pp_Type_C_ishly)( varp->typeR );
3150         } else {
3151            VG_(printf)("NULL");
3152         }
3153         VG_(printf)("\n  Loc=");
3154         if (varp->gexpr) {
3155            ML_(pp_GX)(varp->gexpr);
3156         } else {
3157            VG_(printf)("NULL");
3158         }
3159         VG_(printf)("\n");
3160         if (varp->fbGX) {
3161            VG_(printf)("  FrB=");
3162            ML_(pp_GX)( varp->fbGX );
3163            VG_(printf)("\n");
3164         } else {
3165            VG_(printf)("  FrB=none\n");
3166         }
3167         VG_(printf)("  declared at: %s:%d\n",
3168                     varp->fName ? varp->fName : (UChar*)"NULL",
3169                     varp->fLine );
3170         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3171            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
3172      }
3173
3174      /* Skip variables which have no location.  These must be
3175         abstract instances; they are useless as-is since with no
3176         location they have no specified memory location.  They will
3177         presumably be referred to via the absOri fields of other
3178         variables. */
3179      if (!varp->gexpr) {
3180         TRACE_D3("  SKIP (no location)\n\n");
3181         continue;
3182      }
3183
3184      /* So it has a location, at least.  If it refers to some other
3185         entry through its absOri field, pull in further info through
3186         that. */
3187      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3188         Bool found;
3189         Word ixFirst, ixLast;
3190         TempVar key;
3191         TempVar* keyp = &key;
3192         TempVar *varAI;
3193         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3194         key.dioff = varp->absOri; /* this is what we want to find */
3195         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3196                                &ixFirst, &ixLast );
3197         if (!found)
3198            barf("DW_AT_abstract_origin can't be resolved");
3199         /* If the following fails, there is more than one entry with
3200            the same dioff.  Which can't happen. */
3201         vg_assert(ixFirst == ixLast);
3202         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3203         /* stay sane */
3204         vg_assert(varAI);
3205         vg_assert(varAI->dioff == varp->absOri);
3206
3207         /* Copy what useful info we can. */
3208         if (varAI->typeR && !varp->typeR)
3209            varp->typeR = varAI->typeR;
3210         if (varAI->name && !varp->name)
3211            varp->name = varAI->name;
3212         if (varAI->fName && !varp->fName)
3213            varp->fName = varAI->fName;
3214         if (varAI->fLine > 0 && varp->fLine == 0)
3215            varp->fLine = varAI->fLine;
3216      }
3217
3218      /* Give it a name if it doesn't have one. */
3219      if (!varp->name)
3220         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3221
3222      /* So now does it have enough info to be useful? */
3223      /* NOTE: re typeR: this is a hack.  If typeR is NULL then the
3224         type didn't get resolved.  Really, in that case something's
3225         broken earlier on, and should be fixed, rather than just
3226         skipping the variable. */
3227      if (!varp->typeR) continue;
3228      vg_assert(varp->gexpr);
3229      vg_assert(varp->name);
3230      vg_assert(varp->typeR);
3231      vg_assert(varp->level >= 0);
3232
3233      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
3234         each address range in which the variable exists. */
3235      TRACE_D3("  ACQUIRE for range(s) ");
3236      { AddrRange  oneRange;
3237        AddrRange* varPcRanges;
3238        Word       nVarPcRanges;
3239        /* Set up to iterate over address ranges, however
3240           represented. */
3241        if (varp->nRanges == 0 || varp->nRanges == 1) {
3242           vg_assert(!varp->rngMany);
3243           if (varp->nRanges == 0) {
3244              vg_assert(varp->rngOneMin == 0);
3245              vg_assert(varp->rngOneMax == 0);
3246           }
3247           nVarPcRanges = varp->nRanges;
3248           oneRange.aMin = varp->rngOneMin;
3249           oneRange.aMax = varp->rngOneMax;
3250           varPcRanges = &oneRange;
3251        } else {
3252           vg_assert(varp->rngMany);
3253           vg_assert(varp->rngOneMin == 0);
3254           vg_assert(varp->rngOneMax == 0);
3255           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3256           vg_assert(nVarPcRanges >= 2);
3257           vg_assert(nVarPcRanges == (Word)varp->nRanges);
3258           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3259        }
3260        if (varp->level == 0)
3261           vg_assert( nVarPcRanges == 1 );
3262        /* and iterate */
3263        for (i = 0; i < nVarPcRanges; i++) {
3264           Addr pcMin = varPcRanges[i].aMin;
3265           Addr pcMax = varPcRanges[i].aMax;
3266           vg_assert(pcMin <= pcMax);
3267           /* Level 0 is the global address range.  So at level 0 we
3268              don't want to bias pcMin/pcMax; but at all other levels
3269              we do since those are derived from svmas in the Dwarf
3270              we're reading.  Be paranoid ... */
3271           if (varp->level == 0) {
3272              vg_assert(pcMin == (Addr)0);
3273              vg_assert(pcMax == ~(Addr)0);
3274           } else {
3275              /* vg_assert(pcMin > (Addr)0);
3276                 No .. we can legitimately expect to see ranges like
3277                 0x0-0x11D (pre-biasing, of course). */
3278              vg_assert(pcMax < ~(Addr)0);
3279           }
3280
3281           if (i > 0 && (i%2) == 0) TRACE_D3("\n                       ");
3282           TRACE_D3("[%p,%p] ", pcMin, pcMax );
3283
3284           ML_(addVar)(
3285              di, varp->level,
3286                  pcMin + (varp->level==0 ? 0 : di->text_bias),
3287                  pcMax + (varp->level==0 ? 0 : di->text_bias),
3288                  varp->name, (void*)varp->typeR,
3289                  varp->gexpr, varp->fbGX,
3290                  varp->fName, varp->fLine, td3
3291           );
3292        }
3293      }
3294
3295      TRACE_D3("\n\n");
3296      /* and move on to the next var */
3297   }
3298
3299   /* Now free all the TempVars */
3300   for (varp = tempvars; varp; varp = varp2) {
3301      varp2 = varp->next;
3302      if (varp->rngMany)
3303         VG_(deleteXA)(varp->rngMany);
3304      ML_(dinfo_free)(varp);
3305   }
3306   tempvars = NULL;
3307
3308   /* And get rid of the temporary mapping table. */
3309   VG_(deleteXA)( dioff_lookup_tab );
3310
3311   /* record the TyAdmins and the GExprs in di so they can be freed
3312      later */
3313   vg_assert(!di->admin_tyadmins);
3314   di->admin_tyadmins = admin;
3315   vg_assert(!di->admin_gexprs);
3316   di->admin_gexprs = gexprs;
3317}
3318
3319
3320/*------------------------------------------------------------*/
3321/*---                                                      ---*/
3322/*--- The "new" DWARF3 reader -- top level control logic   ---*/
3323/*---                                                      ---*/
3324/*------------------------------------------------------------*/
3325
3326/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
3327#include <setjmp.h>   /* For jmp_buf */
3328/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
3329
3330static Bool    d3rd_jmpbuf_valid  = False;
3331static HChar*  d3rd_jmpbuf_reason = NULL;
3332static jmp_buf d3rd_jmpbuf;
3333
3334static __attribute__((noreturn)) void barf ( HChar* reason ) {
3335   vg_assert(d3rd_jmpbuf_valid);
3336   d3rd_jmpbuf_reason = reason;
3337   __builtin_longjmp(&d3rd_jmpbuf, 1);
3338   /*NOTREACHED*/
3339   vg_assert(0);
3340}
3341
3342
3343void
3344ML_(new_dwarf3_reader) (
3345   struct _DebugInfo* di,
3346   UChar* debug_info_img,   SizeT debug_info_sz,
3347   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3348   UChar* debug_line_img,   SizeT debug_line_sz,
3349   UChar* debug_str_img,    SizeT debug_str_sz,
3350   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3351   UChar* debug_loc_img,    SizeT debug_loc_sz
3352)
3353{
3354   volatile Int  jumped;
3355   volatile Bool td3 = di->trace_symtab;
3356
3357   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
3358      just returns normally.  If there is any failure, it longjmp's
3359      back here, having first set d3rd_jmpbuf_reason to something
3360      useful. */
3361   vg_assert(d3rd_jmpbuf_valid  == False);
3362   vg_assert(d3rd_jmpbuf_reason == NULL);
3363
3364   d3rd_jmpbuf_valid = True;
3365   jumped = __builtin_setjmp(&d3rd_jmpbuf);
3366   if (jumped == 0) {
3367      /* try this ... */
3368      new_dwarf3_reader_wrk( di, barf,
3369                             debug_info_img,   debug_info_sz,
3370                             debug_abbv_img,   debug_abbv_sz,
3371                             debug_line_img,   debug_line_sz,
3372                             debug_str_img,    debug_str_sz,
3373                             debug_ranges_img, debug_ranges_sz,
3374                             debug_loc_img,    debug_loc_sz );
3375      d3rd_jmpbuf_valid = False;
3376      TRACE_D3("\n------ .debug_info reading was successful ------\n");
3377   } else {
3378      /* It longjmp'd. */
3379      d3rd_jmpbuf_valid = False;
3380      /* Can't longjump without giving some sort of reason. */
3381      vg_assert(d3rd_jmpbuf_reason != NULL);
3382
3383      TRACE_D3("\n------ .debug_info reading failed ------\n");
3384
3385      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
3386   }
3387
3388   d3rd_jmpbuf_valid  = False;
3389   d3rd_jmpbuf_reason = NULL;
3390}
3391
3392
3393
3394/* --- Unused code fragments which might be useful one day. --- */
3395
3396#if 0
3397   /* Read the arange tables */
3398   TRACE_SYMTAB("\n");
3399   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
3400   init_Cursor( &aranges, debug_aranges_img,
3401                debug_aranges_sz, 0, barf,
3402                "Overrun whilst reading .debug_aranges section" );
3403   while (True) {
3404      ULong  len, d_i_offset;
3405      Bool   is64;
3406      UShort version;
3407      UChar  asize, segsize;
3408
3409      if (is_at_end_Cursor( &aranges ))
3410         break;
3411      /* Read one arange thingy */
3412      /* initial_length field */
3413      len = get_Initial_Length( &is64, &aranges,
3414               "in .debug_aranges: invalid initial-length field" );
3415      version    = get_UShort( &aranges );
3416      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
3417      asize      = get_UChar( &aranges );
3418      segsize    = get_UChar( &aranges );
3419      TRACE_D3("  Length:                   %llu\n", len);
3420      TRACE_D3("  Version:                  %d\n", (Int)version);
3421      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
3422      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
3423      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
3424      TRACE_D3("\n");
3425      TRACE_D3("    Address            Length\n");
3426
3427      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
3428         (void)get_UChar( & aranges );
3429      }
3430      while (True) {
3431         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
3432         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
3433         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
3434         if (address == 0 && length == 0) break;
3435      }
3436   }
3437   TRACE_SYMTAB("\n");
3438#endif
3439
3440/*--------------------------------------------------------------------*/
3441/*--- end                                             readdwarf3.c ---*/
3442/*--------------------------------------------------------------------*/
3443