readdwarf3.c revision 50c5093772c2b23fd0897d3590dcfaec1c92ac83
1
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
4/*---                                                 readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2010 OpenWorks LLP
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#if defined(VGO_linux) || defined(VGO_darwin)
38
39/* REFERENCE (without which this code will not make much sense):
40
41   DWARF Debugging Information Format, Version 3,
42   dated 20 December 2005 (the "D3 spec").
43
44   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
45   .doc (MS Word) version, but for some reason the section numbers
46   between the Word and PDF versions differ by 1 in the first digit.
47   All section references in this code are to the PDF version.
48
49   CURRENT HACKS:
50
51   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
52      assumed to mean "const void" or "volatile void" respectively.
53      GDB appears to interpret them like this, anyway.
54
55   In many cases it is important to know the svma of a CU (the "base
56   address of the CU", as the D3 spec calls it).  There are some
57   situations in which the spec implies this value is unknown, but the
58   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
59   merely zero when not explicitly stated.  So we too have to make
60   that assumption.
61
62   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
63   unitary_range_list() bias the resulting range list in the same way
64   that its more general cousin, get_range_list(), does?  I don't
65   know.
66
67   TODO, 2008 Feb 17:
68
69   get rid of cu_svma_known and document the assumed-zero svma hack.
70
71   ML_(sizeOfType): differentiate between zero sized types and types
72   for which the size is unknown.  Is this important?  I don't know.
73
74   DW_AT_array_types: deal with explicit sizes (currently we compute
75   the size from the bounds and the element size, although that's
76   fragile, if the bounds incompletely specified, or completely
77   absent)
78
79   Document reason for difference (by 1) of stack preening depth in
80   parse_var_DIE vs parse_type_DIE.
81
82   Don't hand to ML_(addVars), vars whose locations are entirely in
83   registers (DW_OP_reg*).  This is merely a space-saving
84   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
85   expressions correctly, by failing to evaluate them and hence
86   effectively ignoring the variable with which they are associated.
87
88   Deal with DW_AT_array_types which have element size != stride
89
90   In some cases, the info for a variable is split between two
91   different DIEs (generally a declarer and a definer).  We punt on
92   these.  Could do better here.
93
94   The 'data_bias' argument passed to the expression evaluator
95   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
96   MaybeUWord, to make it clear when we do vs don't know what it is
97   for the evaluation of an expression.  At the moment zero is passed
98   for this parameter in the don't know case.  That's a bit fragile
99   and obscure; using a MaybeUWord would be clearer.
100
101   POTENTIAL PERFORMANCE IMPROVEMENTS:
102
103   Currently, duplicate removal and all other queries for the type
104   entities array is done using cuOffset-based pointing, which
105   involves a binary search (VG_(lookupXA)) for each access.  This is
106   wildly inefficient, although simple.  It would be better to
107   translate all the cuOffset-based references (iow, all the "R" and
108   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
109   'tyents' right at the start of dedup_types(), and use direct
110   indexing (VG_(indexXA)) wherever possible after that.
111
112   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
113   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
114   points, and possibly also make an _UNCHECKED version which skips
115   the range checks in performance-critical situations such as this.
116
117   Handle interaction between read_DIE and parse_{var,type}_DIE
118   better.  Currently read_DIE reads the entire DIE just to find where
119   the end is (and for debug printing), so that it can later reliably
120   move the cursor to the end regardless of what parse_{var,type}_DIE
121   do.  This means many DIEs (most, even?) are read twice.  It would
122   be smarter to make parse_{var,type}_DIE return a Bool indicating
123   whether or not they advanced the DIE cursor, and only if they
124   didn't should read_DIE itself read through the DIE.
125
126   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
127   zero variables in their .vars XArray.  Rather than have an XArray
128   with zero elements (which uses 2 malloc'd blocks), allow the .vars
129   pointer to be NULL in this case.
130
131   More generally, reduce the amount of memory allocated and freed
132   while reading Dwarf3 type/variable information.  Even modest (20MB)
133   objects cause this module to allocate and free hundreds of
134   thousands of small blocks, and ML_(arena_malloc) and its various
135   groupies always show up at the top of performance profiles. */
136
137#include "pub_core_basics.h"
138#include "pub_core_debuginfo.h"
139#include "pub_core_libcbase.h"
140#include "pub_core_libcassert.h"
141#include "pub_core_libcprint.h"
142#include "pub_core_options.h"
143#include "pub_core_tooliface.h"    /* VG_(needs) */
144#include "pub_core_xarray.h"
145#include "pub_core_wordfm.h"
146#include "priv_misc.h"             /* dinfo_zalloc/free */
147#include "priv_tytypes.h"
148#include "priv_d3basics.h"
149#include "priv_storage.h"
150#include "priv_readdwarf3.h"       /* self */
151
152
153/*------------------------------------------------------------*/
154/*---                                                      ---*/
155/*--- Basic machinery for parsing DIEs.                    ---*/
156/*---                                                      ---*/
157/*------------------------------------------------------------*/
158
159#define TRACE_D3(format, args...) \
160   if (td3) { VG_(printf)(format, ## args); }
161
162#define D3_INVALID_CUOFF  ((UWord)(-1UL))
163#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
164
165typedef
166   struct {
167      UChar* region_start_img;
168      UWord  region_szB;
169      UWord  region_next;
170      void (*barf)( HChar* ) __attribute__((noreturn));
171      HChar* barfstr;
172   }
173   Cursor;
174
175static inline Bool is_sane_Cursor ( Cursor* c ) {
176   if (!c)                return False;
177   if (!c->barf)          return False;
178   if (!c->barfstr)       return False;
179   return True;
180}
181
182static void init_Cursor ( Cursor* c,
183                          UChar*  region_start_img,
184                          UWord   region_szB,
185                          UWord   region_next,
186                          __attribute__((noreturn)) void (*barf)( HChar* ),
187                          HChar*  barfstr )
188{
189   vg_assert(c);
190   VG_(memset)(c, 0, sizeof(*c));
191   c->region_start_img = region_start_img;
192   c->region_szB       = region_szB;
193   c->region_next      = region_next;
194   c->barf             = barf;
195   c->barfstr          = barfstr;
196   vg_assert(is_sane_Cursor(c));
197}
198
199static Bool is_at_end_Cursor ( Cursor* c ) {
200   vg_assert(is_sane_Cursor(c));
201   return c->region_next >= c->region_szB;
202}
203
204static inline UWord get_position_of_Cursor ( Cursor* c ) {
205   vg_assert(is_sane_Cursor(c));
206   return c->region_next;
207}
208static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
209   c->region_next = pos;
210   vg_assert(is_sane_Cursor(c));
211}
212
213static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
214   vg_assert(is_sane_Cursor(c));
215   return c->region_szB - c->region_next;
216}
217
218static UChar* get_address_of_Cursor ( Cursor* c ) {
219   vg_assert(is_sane_Cursor(c));
220   return &c->region_start_img[ c->region_next ];
221}
222
223/* FIXME: document assumptions on endianness for
224   get_UShort/UInt/ULong. */
225static inline UChar get_UChar ( Cursor* c ) {
226   UChar r;
227   /* vg_assert(is_sane_Cursor(c)); */
228   if (c->region_next + sizeof(UChar) > c->region_szB) {
229      c->barf(c->barfstr);
230      /*NOTREACHED*/
231      vg_assert(0);
232   }
233   r = * (UChar*) &c->region_start_img[ c->region_next ];
234   c->region_next += sizeof(UChar);
235   return r;
236}
237static UShort get_UShort ( Cursor* c ) {
238   UShort r;
239   vg_assert(is_sane_Cursor(c));
240   if (c->region_next + sizeof(UShort) > c->region_szB) {
241      c->barf(c->barfstr);
242      /*NOTREACHED*/
243      vg_assert(0);
244   }
245   r = * (UShort*) &c->region_start_img[ c->region_next ];
246   c->region_next += sizeof(UShort);
247   return r;
248}
249static UInt get_UInt ( Cursor* c ) {
250   UInt r;
251   vg_assert(is_sane_Cursor(c));
252   if (c->region_next + sizeof(UInt) > c->region_szB) {
253      c->barf(c->barfstr);
254      /*NOTREACHED*/
255      vg_assert(0);
256   }
257   r = * (UInt*) &c->region_start_img[ c->region_next ];
258   c->region_next += sizeof(UInt);
259   return r;
260}
261static ULong get_ULong ( Cursor* c ) {
262   ULong r;
263   vg_assert(is_sane_Cursor(c));
264   if (c->region_next + sizeof(ULong) > c->region_szB) {
265      c->barf(c->barfstr);
266      /*NOTREACHED*/
267      vg_assert(0);
268   }
269   r = * (ULong*) &c->region_start_img[ c->region_next ];
270   c->region_next += sizeof(ULong);
271   return r;
272}
273static inline ULong get_ULEB128 ( Cursor* c ) {
274   ULong result;
275   Int   shift;
276   UChar byte;
277   /* unroll first iteration */
278   byte = get_UChar( c );
279   result = (ULong)(byte & 0x7f);
280   if (LIKELY(!(byte & 0x80))) return result;
281   shift = 7;
282   /* end unroll first iteration */
283   do {
284      byte = get_UChar( c );
285      result |= ((ULong)(byte & 0x7f)) << shift;
286      shift += 7;
287   } while (byte & 0x80);
288   return result;
289}
290static Long get_SLEB128 ( Cursor* c ) {
291   ULong  result = 0;
292   Int    shift = 0;
293   UChar  byte;
294   do {
295      byte = get_UChar(c);
296      result |= ((ULong)(byte & 0x7f)) << shift;
297      shift += 7;
298   } while (byte & 0x80);
299   if (shift < 64 && (byte & 0x40))
300      result |= -(1ULL << shift);
301   return result;
302}
303
304/* Assume 'c' points to the start of a string.  Return the absolute
305   address of whatever it points at, and advance it past the
306   terminating zero.  This makes it safe for the caller to then copy
307   the string with ML_(addStr), since (w.r.t. image overruns) the
308   process of advancing past the terminating zero will already have
309   "vetted" the string. */
310static UChar* get_AsciiZ ( Cursor* c ) {
311   UChar  uc;
312   UChar* res = get_address_of_Cursor(c);
313   do { uc = get_UChar(c); } while (uc != 0);
314   return res;
315}
316
317static ULong peek_ULEB128 ( Cursor* c ) {
318   Word here = c->region_next;
319   ULong r = get_ULEB128( c );
320   c->region_next = here;
321   return r;
322}
323static UChar peek_UChar ( Cursor* c ) {
324   Word here = c->region_next;
325   UChar r = get_UChar( c );
326   c->region_next = here;
327   return r;
328}
329
330static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
331   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
332}
333
334static UWord get_UWord ( Cursor* c ) {
335   vg_assert(sizeof(UWord) == sizeof(void*));
336   if (sizeof(UWord) == 4) return get_UInt(c);
337   if (sizeof(UWord) == 8) return get_ULong(c);
338   vg_assert(0);
339}
340
341/* Read a DWARF3 'Initial Length' field */
342static ULong get_Initial_Length ( /*OUT*/Bool* is64,
343                                  Cursor* c,
344                                  HChar* barfMsg )
345{
346   ULong w64;
347   UInt  w32;
348   *is64 = False;
349   w32 = get_UInt( c );
350   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
351      c->barf( barfMsg );
352   }
353   else if (w32 == 0xFFFFFFFF) {
354      *is64 = True;
355      w64   = get_ULong( c );
356   } else {
357      *is64 = False;
358      w64 = (ULong)w32;
359   }
360   return w64;
361}
362
363
364/*------------------------------------------------------------*/
365/*---                                                      ---*/
366/*--- "CUConst" structure                                  ---*/
367/*---                                                      ---*/
368/*------------------------------------------------------------*/
369
370#define N_ABBV_CACHE 32
371
372/* Holds information that is constant through the parsing of a
373   Compilation Unit.  This is basically plumbed through to
374   everywhere. */
375typedef
376   struct {
377      /* Call here if anything goes wrong */
378      void (*barf)( HChar* ) __attribute__((noreturn));
379      /* Is this 64-bit DWARF ? */
380      Bool   is_dw64;
381      /* Which DWARF version ?  (2, 3 or 4) */
382      UShort version;
383      /* Length of this Compilation Unit, as stated in the
384         .unit_length :: InitialLength field of the CU Header.
385         However, this size (as specified by the D3 spec) does not
386         include the size of the .unit_length field itself, which is
387         either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
388         can be obtained through the expression ".is_dw64 ? 12 : 4". */
389      ULong  unit_length;
390      /* Offset of start of this unit in .debug_info */
391      UWord  cu_start_offset;
392      /* SVMA for this CU.  In the D3 spec, is known as the "base
393         address of the compilation unit (last para sec 3.1.1).
394         Needed for (amongst things) interpretation of location-list
395         values. */
396      Addr   cu_svma;
397      Bool   cu_svma_known;
398      /* The debug_abbreviations table to be used for this Unit */
399      UChar* debug_abbv;
400      /* Upper bound on size thereof (an overestimate, in general) */
401      UWord  debug_abbv_maxszB;
402      /* Where is .debug_str ? */
403      UChar* debug_str_img;
404      UWord  debug_str_sz;
405      /* Where is .debug_ranges ? */
406      UChar* debug_ranges_img;
407      UWord  debug_ranges_sz;
408      /* Where is .debug_loc ? */
409      UChar* debug_loc_img;
410      UWord  debug_loc_sz;
411      /* Where is .debug_line? */
412      UChar* debug_line_img;
413      UWord  debug_line_sz;
414      /* Where is .debug_info? */
415      UChar* debug_info_img;
416      UWord  debug_info_sz;
417      /* --- Needed so we can add stuff to the string table. --- */
418      struct _DebugInfo* di;
419      /* --- a cache for set_abbv_Cursor --- */
420      /* abbv_code == (ULong)-1 for an unused entry. */
421      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
422      UWord saC_cache_queries;
423      UWord saC_cache_misses;
424   }
425   CUConst;
426
427
428/*------------------------------------------------------------*/
429/*---                                                      ---*/
430/*--- Helper functions for Guarded Expressions             ---*/
431/*---                                                      ---*/
432/*------------------------------------------------------------*/
433
434/* Parse the location list starting at img-offset 'debug_loc_offset'
435   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
436   and so I believe are correct SVMAs for the object as a whole.  This
437   function allocates the UChar*, and the caller must deallocate it.
438   The resulting block is in so-called Guarded-Expression format.
439
440   Guarded-Expression format is similar but not identical to the DWARF3
441   location-list format.  The format of each returned block is:
442
443      UChar biasMe;
444      UChar isEnd;
445      followed by zero or more of
446
447      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
448
449   '..bytes..' is an standard DWARF3 location expression which is
450   valid when aMin <= pc <= aMax (possibly after suitable biasing).
451
452   The number of bytes in '..bytes..' is nbytes.
453
454   The end of the sequence is marked by an isEnd == 1 value.  All
455   previous isEnd values must be zero.
456
457   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
458   text_bias added before use, and 0 if the GX is this is not
459   necessary (is ready to go).
460
461   Hence the block can be quickly parsed and is self-describing.  Note
462   that aMax is 1 less than the corresponding value in a DWARF3
463   location list.  Zero length ranges, with aMax == aMin-1, are not
464   allowed.
465*/
466/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
467   it more logically belongs. */
468
469
470/* Apply a text bias to a GX. */
471static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
472{
473   UShort nbytes;
474   Addr*  pA;
475   UChar* p = &gx->payload[0];
476   UChar  uc;
477   uc = *p++; /*biasMe*/
478   if (uc == 0)
479      return;
480   vg_assert(uc == 1);
481   p[-1] = 0; /* mark it as done */
482   while (True) {
483      uc = *p++;
484      if (uc == 1)
485         break; /*isEnd*/
486      vg_assert(uc == 0);
487      /* t-bias aMin */
488      pA = (Addr*)p;
489      *pA += di->text_debug_bias;
490      p += sizeof(Addr);
491      /* t-bias aMax */
492      pA = (Addr*)p;
493      *pA += di->text_debug_bias;
494      p += sizeof(Addr);
495      /* nbytes, and actual expression */
496      nbytes = * (UShort*)p; p += sizeof(UShort);
497      p += nbytes;
498   }
499}
500
501__attribute__((noinline))
502static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
503{
504   SizeT  bytesReqd;
505   GExpr* gx;
506   UChar *p, *pstart;
507
508   vg_assert(sizeof(UWord) == sizeof(Addr));
509   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
510   bytesReqd
511      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
512        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
513        + sizeof(UShort) /*nbytes*/    + nbytes
514        + sizeof(UChar); /*isEnd*/
515
516   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
517                           sizeof(GExpr) + bytesReqd );
518   vg_assert(gx);
519
520   p = pstart = &gx->payload[0];
521
522   * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
523   * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
524   * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
525   * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
526   * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
527   VG_(memcpy)(p, block, nbytes); p += nbytes;
528   * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
529
530   vg_assert( (SizeT)(p - pstart) == bytesReqd);
531   vg_assert( &gx->payload[bytesReqd]
532              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
533
534   return gx;
535}
536
537__attribute__((noinline))
538static GExpr* make_general_GX ( CUConst* cc,
539                                Bool     td3,
540                                UWord    debug_loc_offset,
541                                Addr     svma_of_referencing_CU )
542{
543   Addr      base;
544   Cursor    loc;
545   XArray*   xa; /* XArray of UChar */
546   GExpr*    gx;
547   Word      nbytes;
548
549   vg_assert(sizeof(UWord) == sizeof(Addr));
550   if (cc->debug_loc_sz == 0)
551      cc->barf("make_general_GX: .debug_loc is empty/missing");
552
553   init_Cursor( &loc, cc->debug_loc_img,
554                cc->debug_loc_sz, 0, cc->barf,
555                "Overrun whilst reading .debug_loc section(2)" );
556   set_position_of_Cursor( &loc, debug_loc_offset );
557
558   TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
559            debug_loc_offset, get_address_of_Cursor( &loc ) );
560
561   /* Who frees this xa?  It is freed before this fn exits. */
562   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
563                    ML_(dinfo_free),
564                    sizeof(UChar) );
565
566   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
567
568   base = 0;
569   while (True) {
570      Bool  acquire;
571      UWord len;
572      /* Read a (host-)word pair.  This is something of a hack since
573         the word size to read is really dictated by the ELF file;
574         however, we assume we're reading a file with the same
575         word-sizeness as the host.  Reasonably enough. */
576      UWord w1 = get_UWord( &loc );
577      UWord w2 = get_UWord( &loc );
578
579      TRACE_D3("   %08lx %08lx\n", w1, w2);
580      if (w1 == 0 && w2 == 0)
581         break; /* end of list */
582
583      if (w1 == -1UL) {
584         /* new value for 'base' */
585         base = w2;
586         continue;
587      }
588
589      /* else a location expression follows */
590      /* else enumerate [w1+base, w2+base) */
591      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
592         (sec 2.17.2) */
593      if (w1 > w2) {
594         TRACE_D3("negative range is for .debug_loc expr at "
595                  "file offset %lu\n",
596                  debug_loc_offset);
597         cc->barf( "negative range in .debug_loc section" );
598      }
599
600      /* ignore zero length ranges */
601      acquire = w1 < w2;
602      len     = (UWord)get_UShort( &loc );
603
604      if (acquire) {
605         UWord  w;
606         UShort s;
607         UChar  c;
608         c = 0; /* !isEnd*/
609         VG_(addBytesToXA)( xa, &c, sizeof(c) );
610         w = w1    + base + svma_of_referencing_CU;
611         VG_(addBytesToXA)( xa, &w, sizeof(w) );
612         w = w2 -1 + base + svma_of_referencing_CU;
613         VG_(addBytesToXA)( xa, &w, sizeof(w) );
614         s = (UShort)len;
615         VG_(addBytesToXA)( xa, &s, sizeof(s) );
616      }
617
618      while (len > 0) {
619         UChar byte = get_UChar( &loc );
620         TRACE_D3("%02x", (UInt)byte);
621         if (acquire)
622            VG_(addBytesToXA)( xa, &byte, 1 );
623         len--;
624      }
625      TRACE_D3("\n");
626   }
627
628   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
629
630   nbytes = VG_(sizeXA)( xa );
631   vg_assert(nbytes >= 1);
632
633   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
634   vg_assert(gx);
635   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
636   vg_assert( &gx->payload[nbytes]
637              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
638
639   VG_(deleteXA)( xa );
640
641   TRACE_D3("}\n");
642
643   return gx;
644}
645
646
647/*------------------------------------------------------------*/
648/*---                                                      ---*/
649/*--- Helper functions for range lists and CU headers      ---*/
650/*---                                                      ---*/
651/*------------------------------------------------------------*/
652
653/* Denotes an address range.  Both aMin and aMax are included in the
654   range; hence a complete range is (0, ~0) and an empty range is any
655   (X, X-1) for X > 0.*/
656typedef
657   struct { Addr aMin; Addr aMax; }
658   AddrRange;
659
660
661/* Generate an arbitrary structural total ordering on
662   XArray* of AddrRange. */
663static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
664{
665   Word n1, n2, i;
666   tl_assert(rngs1 && rngs2);
667   n1 = VG_(sizeXA)( rngs1 );
668   n2 = VG_(sizeXA)( rngs2 );
669   if (n1 < n2) return -1;
670   if (n1 > n2) return 1;
671   for (i = 0; i < n1; i++) {
672      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
673      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
674      if (rng1->aMin < rng2->aMin) return -1;
675      if (rng1->aMin > rng2->aMin) return 1;
676      if (rng1->aMax < rng2->aMax) return -1;
677      if (rng1->aMax > rng2->aMax) return 1;
678   }
679   return 0;
680}
681
682
683__attribute__((noinline))
684static XArray* /* of AddrRange */ empty_range_list ( void )
685{
686   XArray* xa; /* XArray of AddrRange */
687   /* Who frees this xa?  varstack_preen() does. */
688   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
689                    ML_(dinfo_free),
690                    sizeof(AddrRange) );
691   return xa;
692}
693
694
695__attribute__((noinline))
696static XArray* unitary_range_list ( Addr aMin, Addr aMax )
697{
698   XArray*   xa;
699   AddrRange pair;
700   vg_assert(aMin <= aMax);
701   /* Who frees this xa?  varstack_preen() does. */
702   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
703                    ML_(dinfo_free),
704                    sizeof(AddrRange) );
705   pair.aMin = aMin;
706   pair.aMax = aMax;
707   VG_(addToXA)( xa, &pair );
708   return xa;
709}
710
711
712/* Enumerate the address ranges starting at img-offset
713   'debug_ranges_offset' in .debug_ranges.  Results are biased with
714   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
715   object as a whole.  This function allocates the XArray, and the
716   caller must deallocate it. */
717__attribute__((noinline))
718static XArray* /* of AddrRange */
719       get_range_list ( CUConst* cc,
720                        Bool     td3,
721                        UWord    debug_ranges_offset,
722                        Addr     svma_of_referencing_CU )
723{
724   Addr      base;
725   Cursor    ranges;
726   XArray*   xa; /* XArray of AddrRange */
727   AddrRange pair;
728
729   if (cc->debug_ranges_sz == 0)
730      cc->barf("get_range_list: .debug_ranges is empty/missing");
731
732   init_Cursor( &ranges, cc->debug_ranges_img,
733                cc->debug_ranges_sz, 0, cc->barf,
734                "Overrun whilst reading .debug_ranges section(2)" );
735   set_position_of_Cursor( &ranges, debug_ranges_offset );
736
737   /* Who frees this xa?  varstack_preen() does. */
738   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
739                    sizeof(AddrRange) );
740   base = 0;
741   while (True) {
742      /* Read a (host-)word pair.  This is something of a hack since
743         the word size to read is really dictated by the ELF file;
744         however, we assume we're reading a file with the same
745         word-sizeness as the host.  Reasonably enough. */
746      UWord w1 = get_UWord( &ranges );
747      UWord w2 = get_UWord( &ranges );
748
749      if (w1 == 0 && w2 == 0)
750         break; /* end of list. */
751
752      if (w1 == -1UL) {
753         /* new value for 'base' */
754         base = w2;
755         continue;
756      }
757
758      /* else enumerate [w1+base, w2+base) */
759      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
760         (sec 2.17.2) */
761      if (w1 > w2)
762         cc->barf( "negative range in .debug_ranges section" );
763      if (w1 < w2) {
764         pair.aMin = w1     + base + svma_of_referencing_CU;
765         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
766         vg_assert(pair.aMin <= pair.aMax);
767         VG_(addToXA)( xa, &pair );
768      }
769   }
770   return xa;
771}
772
773
774/* Parse the Compilation Unit header indicated at 'c' and
775   initialise 'cc' accordingly. */
776static __attribute__((noinline))
777void parse_CU_Header ( /*OUT*/CUConst* cc,
778                       Bool td3,
779                       Cursor* c,
780                       UChar* debug_abbv_img, UWord debug_abbv_sz )
781{
782   UChar  address_size;
783   UWord  debug_abbrev_offset;
784   Int    i;
785
786   VG_(memset)(cc, 0, sizeof(*cc));
787   vg_assert(c && c->barf);
788   cc->barf = c->barf;
789
790   /* initial_length field */
791   cc->unit_length
792      = get_Initial_Length( &cc->is_dw64, c,
793           "parse_CU_Header: invalid initial-length field" );
794
795   TRACE_D3("   Length:        %lld\n", cc->unit_length );
796
797   /* version */
798   cc->version = get_UShort( c );
799   if (cc->version != 2 && cc->version != 3 && cc->version != 4)
800      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
801   TRACE_D3("   Version:       %d\n", (Int)cc->version );
802
803   /* debug_abbrev_offset */
804   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
805   if (debug_abbrev_offset >= debug_abbv_sz)
806      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
807   TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
808
809   /* address size.  If this isn't equal to the host word size, just
810      give up.  This makes it safe to assume elsewhere that
811      DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
812      word. */
813   address_size = get_UChar( c );
814   if (address_size != sizeof(void*))
815      cc->barf( "parse_CU_Header: invalid address_size" );
816   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
817
818   /* Set up so that cc->debug_abbv points to the relevant table for
819      this CU.  Set the szB so that at least we can't read off the end
820      of the debug_abbrev section -- potentially (and quite likely)
821      too big, if this isn't the last table in the section, but at
822      least it's safe. */
823   cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
824   cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
825   /* and empty out the set_abbv_Cursor cache */
826   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
827   for (i = 0; i < N_ABBV_CACHE; i++) {
828      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
829      cc->saC_cache[i].posn = 0;
830   }
831   cc->saC_cache_queries = 0;
832   cc->saC_cache_misses = 0;
833}
834
835
836/* Set up 'c' so it is ready to parse the abbv table entry code
837   'abbv_code' for this compilation unit.  */
838static __attribute__((noinline))
839void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
840                       CUConst* cc, ULong abbv_code )
841{
842   Int   i;
843   ULong acode;
844
845   if (abbv_code == 0)
846      cc->barf("set_abbv_Cursor: abbv_code == 0" );
847
848   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
849      allow it.  In any case no valid DWARF3 should make a reference
850      to a negative abbreviation code.  [at least, they always seem to
851      be numbered upwards from zero as far as I have seen] */
852   vg_assert(abbv_code != (ULong)-1);
853
854   /* First search the cache. */
855   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
856   cc->saC_cache_queries++;
857   for (i = 0; i < N_ABBV_CACHE; i++) {
858      /* No need to test the cached abbv_codes for -1 (empty), since
859         we just asserted that abbv_code is not -1. */
860     if (cc->saC_cache[i].abbv_code == abbv_code) {
861        /* Found it.  Cool.  Set up the parser using the cached
862           position, and move this cache entry 1 step closer to the
863           front. */
864        if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
865        init_Cursor( c, cc->debug_abbv,
866                     cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
867                     cc->barf,
868                     "Overrun whilst parsing .debug_abbrev section(1)" );
869        if (i > 0) {
870           ULong t_abbv_code = cc->saC_cache[i].abbv_code;
871           UWord t_posn = cc->saC_cache[i].posn;
872           while (i > 0) {
873              cc->saC_cache[i] = cc->saC_cache[i-1];
874              cc->saC_cache[0].abbv_code = t_abbv_code;
875              cc->saC_cache[0].posn = t_posn;
876              i--;
877           }
878        }
879        return;
880     }
881   }
882
883   /* No.  It's not in the cache.  We have to search through
884      .debug_abbrev, of course taking care to update the cache
885      when done. */
886
887   cc->saC_cache_misses++;
888   init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
889               "Overrun whilst parsing .debug_abbrev section(2)" );
890
891   /* Now iterate though the table until we find the requested
892      entry. */
893   while (True) {
894      //ULong atag;
895      //UInt  has_children;
896      acode = get_ULEB128( c );
897      if (acode == 0) break; /* end of the table */
898      if (acode == abbv_code) break; /* found it */
899      /*atag         = */ get_ULEB128( c );
900      /*has_children = */ get_UChar( c );
901      //TRACE_D3("   %llu      %s    [%s]\n",
902      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
903      while (True) {
904         ULong at_name = get_ULEB128( c );
905         ULong at_form = get_ULEB128( c );
906         if (at_name == 0 && at_form == 0) break;
907         //TRACE_D3("    %18s %s\n",
908         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
909      }
910   }
911
912   if (acode == 0) {
913      /* Not found.  This is fatal. */
914      cc->barf("set_abbv_Cursor: abbv_code not found");
915   }
916
917   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
918      starting from the abbreviation entry's tag.  So just cache
919      the result, and return. */
920   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
921      cc->saC_cache[i] = cc->saC_cache[i-1];
922   }
923   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
924   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
925   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
926}
927
928
929/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
930
931   If *cts itself contains the entire result, then *ctsSzB is set to
932   1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
933
934   Alternatively, the result can be a block of data (in the
935   transiently mapped-in object, so-called "image" space).  If so then
936   the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
937   image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
938
939   Unfortunately this means it is impossible to represent a zero-size
940   image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
941   and so is ambiguous (which case it is?)
942
943   Invariant on successful return:
944      (*ctsSzB > 0 && *ctsMemSzB == 0)
945      || (*ctsSzB == 0 && *ctsMemSzB > 0)
946*/
947static
948void get_Form_contents ( /*OUT*/ULong* cts,
949                         /*OUT*/Int*   ctsSzB,
950                         /*OUT*/UWord* ctsMemSzB,
951                         CUConst* cc, Cursor* c,
952                         Bool td3, DW_FORM form )
953{
954   *cts       = 0;
955   *ctsSzB    = 0;
956   *ctsMemSzB = 0;
957   switch (form) {
958      case DW_FORM_data1:
959         *cts = (ULong)(UChar)get_UChar(c);
960         *ctsSzB = 1;
961         TRACE_D3("%u", (UInt)*cts);
962         break;
963      case DW_FORM_data2:
964         *cts = (ULong)(UShort)get_UShort(c);
965         *ctsSzB = 2;
966         TRACE_D3("%u", (UInt)*cts);
967         break;
968      case DW_FORM_data4:
969         *cts = (ULong)(UInt)get_UInt(c);
970         *ctsSzB = 4;
971         TRACE_D3("%u", (UInt)*cts);
972         break;
973      case DW_FORM_data8:
974         *cts = get_ULong(c);
975         *ctsSzB = 8;
976         TRACE_D3("%llu", *cts);
977         break;
978      case DW_FORM_sec_offset:
979         *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
980         *ctsSzB = cc->is_dw64 ? 8 : 4;
981         TRACE_D3("%llu", *cts);
982         break;
983      case DW_FORM_sdata:
984         *cts = (ULong)(Long)get_SLEB128(c);
985         *ctsSzB = 8;
986         TRACE_D3("%lld", (Long)*cts);
987         break;
988      case DW_FORM_udata:
989         *cts = (ULong)(Long)get_ULEB128(c);
990         *ctsSzB = 8;
991         TRACE_D3("%llu", (Long)*cts);
992         break;
993      case DW_FORM_addr:
994         /* note, this is a hack.  DW_FORM_addr is defined as getting
995            a word the size of the target machine as defined by the
996            address_size field in the CU Header.  However,
997            parse_CU_Header() rejects all inputs except those for
998            which address_size == sizeof(Word), hence we can just
999            treat it as a (host) Word.  */
1000         *cts = (ULong)(UWord)get_UWord(c);
1001         *ctsSzB = sizeof(UWord);
1002         TRACE_D3("0x%lx", (UWord)*cts);
1003         break;
1004
1005      case DW_FORM_ref_addr:
1006         /* We make the same word-size assumption as DW_FORM_addr. */
1007         /* What does this really mean?  From D3 Sec 7.5.4,
1008            description of "reference", it would appear to reference
1009            some other DIE, by specifying the offset from the
1010            beginning of a .debug_info section.  The D3 spec mentions
1011            that this might be in some other shared object and
1012            executable.  But I don't see how the name of the other
1013            object/exe is specified.
1014
1015            At least for the DW_FORM_ref_addrs created by icc11, the
1016            references seem to be within the same object/executable.
1017            So for the moment we merely range-check, to see that they
1018            actually do specify a plausible offset within this
1019            object's .debug_info, and return the value unchanged.
1020         */
1021         *cts = (ULong)(UWord)get_UWord(c);
1022         *ctsSzB = sizeof(UWord);
1023         TRACE_D3("0x%lx", (UWord)*cts);
1024         if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
1025         if (/* the following 2 are surely impossible, but ... */
1026             cc->debug_info_img == NULL || cc->debug_info_sz == 0
1027             || *cts >= (ULong)cc->debug_info_sz) {
1028            /* Hmm.  Offset is nonsensical for this object's .debug_info
1029               section.  Be safe and reject it. */
1030            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1031                     "outside .debug_info");
1032         }
1033         break;
1034
1035      case DW_FORM_strp: {
1036         /* this is an offset into .debug_str */
1037         UChar* str;
1038         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1039         if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1040            cc->barf("get_Form_contents: DW_FORM_strp "
1041                     "points outside .debug_str");
1042         /* FIXME: check the entire string lies inside debug_str,
1043            not just the first byte of it. */
1044         str = (UChar*)cc->debug_str_img + uw;
1045         TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1046         *cts = (ULong)(UWord)str;
1047         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1048         break;
1049      }
1050      case DW_FORM_string: {
1051         UChar* str = get_AsciiZ(c);
1052         TRACE_D3("%s", str);
1053         *cts = (ULong)(UWord)str;
1054         /* strlen is safe because get_AsciiZ already 'vetted' the
1055            entire string */
1056         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1057         break;
1058      }
1059      case DW_FORM_ref1: {
1060         UChar  u8 = get_UChar(c);
1061         UWord res = cc->cu_start_offset + (UWord)u8;
1062         *cts = (ULong)res;
1063         *ctsSzB = sizeof(UWord);
1064         TRACE_D3("<%lx>", res);
1065         break;
1066      }
1067      case DW_FORM_ref2: {
1068         UShort  u16 = get_UShort(c);
1069         UWord res = cc->cu_start_offset + (UWord)u16;
1070         *cts = (ULong)res;
1071         *ctsSzB = sizeof(UWord);
1072         TRACE_D3("<%lx>", res);
1073         break;
1074      }
1075      case DW_FORM_ref4: {
1076         UInt  u32 = get_UInt(c);
1077         UWord res = cc->cu_start_offset + (UWord)u32;
1078         *cts = (ULong)res;
1079         *ctsSzB = sizeof(UWord);
1080         TRACE_D3("<%lx>", res);
1081         break;
1082      }
1083      case DW_FORM_ref8: {
1084         ULong  u64 = get_ULong(c);
1085         UWord res = cc->cu_start_offset + (UWord)u64;
1086         *cts = (ULong)res;
1087         *ctsSzB = sizeof(UWord);
1088         TRACE_D3("<%lx>", res);
1089         break;
1090      }
1091      case DW_FORM_ref_udata: {
1092         ULong  u64 = get_ULEB128(c);
1093         UWord res = cc->cu_start_offset + (UWord)u64;
1094         *cts = (ULong)res;
1095         *ctsSzB = sizeof(UWord);
1096         TRACE_D3("<%lx>", res);
1097         break;
1098      }
1099      case DW_FORM_flag: {
1100         UChar u8 = get_UChar(c);
1101         TRACE_D3("%u", (UInt)u8);
1102         *cts = (ULong)u8;
1103         *ctsSzB = 1;
1104         break;
1105      }
1106      case DW_FORM_flag_present:
1107         TRACE_D3("1");
1108         *cts = 1;
1109         *ctsSzB = 1;
1110         break;
1111      case DW_FORM_block1: {
1112         ULong  u64b;
1113         ULong  u64 = (ULong)get_UChar(c);
1114         UChar* block = get_address_of_Cursor(c);
1115         TRACE_D3("%llu byte block: ", u64);
1116         for (u64b = u64; u64b > 0; u64b--) {
1117            UChar u8 = get_UChar(c);
1118            TRACE_D3("%x ", (UInt)u8);
1119         }
1120         *cts = (ULong)(UWord)block;
1121         *ctsMemSzB = (UWord)u64;
1122         break;
1123      }
1124      case DW_FORM_block2: {
1125         ULong  u64b;
1126         ULong  u64 = (ULong)get_UShort(c);
1127         UChar* block = get_address_of_Cursor(c);
1128         TRACE_D3("%llu byte block: ", u64);
1129         for (u64b = u64; u64b > 0; u64b--) {
1130            UChar u8 = get_UChar(c);
1131            TRACE_D3("%x ", (UInt)u8);
1132         }
1133         *cts = (ULong)(UWord)block;
1134         *ctsMemSzB = (UWord)u64;
1135         break;
1136      }
1137      case DW_FORM_block4: {
1138         ULong  u64b;
1139         ULong  u64 = (ULong)get_UInt(c);
1140         UChar* block = get_address_of_Cursor(c);
1141         TRACE_D3("%llu byte block: ", u64);
1142         for (u64b = u64; u64b > 0; u64b--) {
1143            UChar u8 = get_UChar(c);
1144            TRACE_D3("%x ", (UInt)u8);
1145         }
1146         *cts = (ULong)(UWord)block;
1147         *ctsMemSzB = (UWord)u64;
1148         break;
1149      }
1150      case DW_FORM_exprloc:
1151      case DW_FORM_block: {
1152         ULong  u64b;
1153         ULong  u64 = (ULong)get_ULEB128(c);
1154         UChar* block = get_address_of_Cursor(c);
1155         TRACE_D3("%llu byte block: ", u64);
1156         for (u64b = u64; u64b > 0; u64b--) {
1157            UChar u8 = get_UChar(c);
1158            TRACE_D3("%x ", (UInt)u8);
1159         }
1160         *cts = (ULong)(UWord)block;
1161         *ctsMemSzB = (UWord)u64;
1162         break;
1163      }
1164      case DW_FORM_ref_sig8: {
1165         ULong  u64b;
1166         UChar* block = get_address_of_Cursor(c);
1167         TRACE_D3("8 byte signature: ");
1168         for (u64b = 8; u64b > 0; u64b--) {
1169            UChar u8 = get_UChar(c);
1170            TRACE_D3("%x ", (UInt)u8);
1171         }
1172         *cts = (ULong)(UWord)block;
1173         *ctsMemSzB = 8;
1174         break;
1175      }
1176      case DW_FORM_indirect:
1177         get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
1178                            (DW_FORM)get_ULEB128(c));
1179         return;
1180
1181      default:
1182         VG_(printf)(
1183            "get_Form_contents: unhandled %d (%s) at <%lx>\n",
1184            form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1185         c->barf("get_Form_contents: unhandled DW_FORM");
1186   }
1187}
1188
1189
1190/*------------------------------------------------------------*/
1191/*---                                                      ---*/
1192/*--- Parsing of variable-related DIEs                     ---*/
1193/*---                                                      ---*/
1194/*------------------------------------------------------------*/
1195
1196typedef
1197   struct _TempVar {
1198      UChar*  name; /* in DebugInfo's .strchunks */
1199      /* Represent ranges economically.  nRanges is the number of
1200         ranges.  Cases:
1201         0: .rngOneMin .rngOneMax .manyRanges are all zero
1202         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1203         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1204         This is merely an optimisation to avoid having to allocate
1205         and free the XArray in the common (98%) of cases where there
1206         is zero or one address ranges. */
1207      UWord   nRanges;
1208      Addr    rngOneMin;
1209      Addr    rngOneMax;
1210      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1211      /* Do not free .rngMany, since many TempVars will have the same
1212         value.  Instead the associated storage is to be freed by
1213         deleting 'rangetree', which stores a single copy of each
1214         range. */
1215      /* --- */
1216      Int     level;
1217      UWord   typeR; /* a cuOff */
1218      GExpr*  gexpr; /* for this variable */
1219      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1220                        any */
1221      UChar*  fName; /* declaring file name, or NULL */
1222      Int     fLine; /* declaring file line number, or zero */
1223      /* offset in .debug_info, so that abstract instances can be
1224         found to satisfy references from concrete instances. */
1225      UWord   dioff;
1226      UWord   absOri; /* so the absOri fields refer to dioff fields
1227                         in some other, related TempVar. */
1228   }
1229   TempVar;
1230
1231#define N_D3_VAR_STACK 48
1232
1233typedef
1234   struct {
1235      /* Contains the range stack: a stack of address ranges, one
1236         stack entry for each nested scope.
1237
1238         Some scope entries are created by function definitions
1239         (DW_AT_subprogram), and for those, we also note the GExpr
1240         derived from its DW_AT_frame_base attribute, if any.
1241         Consequently it should be possible to find, for any
1242         variable's DIE, the GExpr for the the containing function's
1243         DW_AT_frame_base by scanning back through the stack to find
1244         the nearest entry associated with a function.  This somewhat
1245         elaborate scheme is provided so as to make it possible to
1246         obtain the correct DW_AT_frame_base expression even in the
1247         presence of nested functions (or to be more precise, in the
1248         presence of nested DW_AT_subprogram DIEs).
1249      */
1250      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1251                     stack */
1252      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1253      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1254      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1255      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1256                                         expr, else NULL */
1257      /* The file name table.  Is a mapping from integer index to the
1258         (permanent) copy of the string, iow a non-img area. */
1259      XArray* /* of UChar* */ filenameTable;
1260   }
1261   D3VarParser;
1262
1263static void varstack_show ( D3VarParser* parser, HChar* str ) {
1264   Word i, j;
1265   VG_(printf)("  varstack (%s) {\n", str);
1266   for (i = 0; i <= parser->sp; i++) {
1267      XArray* xa = parser->ranges[i];
1268      vg_assert(xa);
1269      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1270      if (parser->isFunc[i]) {
1271         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1272      } else {
1273         vg_assert(parser->fbGX[i] == NULL);
1274      }
1275      VG_(printf)(": ");
1276      if (VG_(sizeXA)( xa ) == 0) {
1277         VG_(printf)("** empty PC range array **");
1278      } else {
1279         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1280            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1281            vg_assert(range);
1282            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1283         }
1284      }
1285      VG_(printf)("\n");
1286   }
1287   VG_(printf)("  }\n");
1288}
1289
1290/* Remove from the stack, all entries with .level > 'level' */
1291static
1292void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1293{
1294   Bool changed = False;
1295   vg_assert(parser->sp < N_D3_VAR_STACK);
1296   while (True) {
1297      vg_assert(parser->sp >= -1);
1298      if (parser->sp == -1) break;
1299      if (parser->level[parser->sp] <= level) break;
1300      if (0)
1301         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1302      vg_assert(parser->ranges[parser->sp]);
1303      /* Who allocated this xa?  get_range_list() or
1304         unitary_range_list(). */
1305      VG_(deleteXA)( parser->ranges[parser->sp] );
1306      parser->ranges[parser->sp] = NULL;
1307      parser->level[parser->sp]  = 0;
1308      parser->isFunc[parser->sp] = False;
1309      parser->fbGX[parser->sp]   = NULL;
1310      parser->sp--;
1311      changed = True;
1312   }
1313   if (changed && td3)
1314      varstack_show( parser, "after preen" );
1315}
1316
1317static void varstack_push ( CUConst* cc,
1318                            D3VarParser* parser,
1319                            Bool td3,
1320                            XArray* ranges, Int level,
1321                            Bool    isFunc, GExpr* fbGX ) {
1322   if (0)
1323   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1324            parser->sp+1, level, ranges);
1325
1326   /* First we need to zap everything >= 'level', as we are about to
1327      replace any previous entry at 'level', so .. */
1328   varstack_preen(parser, /*td3*/False, level-1);
1329
1330   vg_assert(parser->sp >= -1);
1331   vg_assert(parser->sp < N_D3_VAR_STACK);
1332   if (parser->sp == N_D3_VAR_STACK-1)
1333      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1334               "increase and recompile");
1335   if (parser->sp >= 0)
1336      vg_assert(parser->level[parser->sp] < level);
1337   parser->sp++;
1338   vg_assert(parser->ranges[parser->sp] == NULL);
1339   vg_assert(parser->level[parser->sp]  == 0);
1340   vg_assert(parser->isFunc[parser->sp] == False);
1341   vg_assert(parser->fbGX[parser->sp]   == NULL);
1342   vg_assert(ranges != NULL);
1343   if (!isFunc) vg_assert(fbGX == NULL);
1344   parser->ranges[parser->sp] = ranges;
1345   parser->level[parser->sp]  = level;
1346   parser->isFunc[parser->sp] = isFunc;
1347   parser->fbGX[parser->sp]   = fbGX;
1348   if (td3)
1349      varstack_show( parser, "after push" );
1350}
1351
1352
1353/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1354   refer either to a location expression or to a location list.
1355   Figure out which, and in both cases bundle the expression or
1356   location list into a so-called GExpr (guarded expression). */
1357__attribute__((noinline))
1358static GExpr* get_GX ( CUConst* cc, Bool td3,
1359                       ULong cts, Int ctsSzB, UWord ctsMemSzB )
1360{
1361   GExpr* gexpr = NULL;
1362   if (ctsMemSzB > 0 && ctsSzB == 0) {
1363      /* represents an in-line location expression, and cts points
1364         right at it */
1365      gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1366   }
1367   else
1368   if (ctsMemSzB == 0 && ctsSzB > 0) {
1369      /* represents location list.  cts is the offset of it in
1370         .debug_loc. */
1371      if (!cc->cu_svma_known)
1372         cc->barf("get_GX: location list, but CU svma is unknown");
1373      gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1374   }
1375   else {
1376      vg_assert(0); /* else caller is bogus */
1377   }
1378   return gexpr;
1379}
1380
1381
1382static
1383void read_filename_table( /*MOD*/D3VarParser* parser,
1384                          CUConst* cc, UWord debug_line_offset,
1385                          Bool td3 )
1386{
1387   Bool   is_dw64;
1388   Cursor c;
1389   Word   i;
1390   UShort version;
1391   UChar  opcode_base;
1392   UChar* str;
1393
1394   vg_assert(parser && cc && cc->barf);
1395   if ((!cc->debug_line_img)
1396       || cc->debug_line_sz <= debug_line_offset)
1397      cc->barf("read_filename_table: .debug_line is missing?");
1398
1399   init_Cursor( &c, cc->debug_line_img,
1400                cc->debug_line_sz, debug_line_offset, cc->barf,
1401                "Overrun whilst reading .debug_line section(1)" );
1402
1403   /* unit_length = */
1404      get_Initial_Length( &is_dw64, &c,
1405           "read_filename_table: invalid initial-length field" );
1406   version = get_UShort( &c );
1407   if (version != 2 && version != 3 && version != 4)
1408     cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1409              "is currently supported.");
1410   /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1411   /*minimum_instruction_length = */ get_UChar( &c );
1412   if (version >= 4)
1413      /*maximum_operations_per_insn = */ get_UChar( &c );
1414   /*default_is_stmt            = */ get_UChar( &c );
1415   /*line_base                  = (Char)*/ get_UChar( &c );
1416   /*line_range                 = */ get_UChar( &c );
1417   opcode_base                = get_UChar( &c );
1418   /* skip over "standard_opcode_lengths" */
1419   for (i = 1; i < (Word)opcode_base; i++)
1420     (void)get_UChar( &c );
1421
1422   /* skip over the directory names table */
1423   while (peek_UChar(&c) != 0) {
1424     (void)get_AsciiZ(&c);
1425   }
1426   (void)get_UChar(&c); /* skip terminating zero */
1427
1428   /* Read and record the file names table */
1429   vg_assert(parser->filenameTable);
1430   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1431   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1432      from 1, for some reason. */
1433   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1434   VG_(addToXA)( parser->filenameTable, &str );
1435   while (peek_UChar(&c) != 0) {
1436      str = get_AsciiZ(&c);
1437      TRACE_D3("  read_filename_table: %ld %s\n",
1438               VG_(sizeXA)(parser->filenameTable), str);
1439      str = ML_(addStr)( cc->di, str, -1 );
1440      VG_(addToXA)( parser->filenameTable, &str );
1441      (void)get_ULEB128( &c ); /* skip directory index # */
1442      (void)get_ULEB128( &c ); /* skip last mod time */
1443      (void)get_ULEB128( &c ); /* file size */
1444   }
1445   /* We're done!  The rest of it is not interesting. */
1446}
1447
1448
1449__attribute__((noinline))
1450static void parse_var_DIE (
1451   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1452   /*MOD*/XArray* /* of TempVar* */ tempvars,
1453   /*MOD*/XArray* /* of GExpr* */ gexprs,
1454   /*MOD*/D3VarParser* parser,
1455   DW_TAG dtag,
1456   UWord posn,
1457   Int level,
1458   Cursor* c_die,
1459   Cursor* c_abbv,
1460   CUConst* cc,
1461   Bool td3
1462)
1463{
1464   ULong       cts;
1465   Int         ctsSzB;
1466   UWord       ctsMemSzB;
1467
1468   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1469   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1470
1471   varstack_preen( parser, td3, level-1 );
1472
1473   if (dtag == DW_TAG_compile_unit) {
1474      Bool have_lo    = False;
1475      Bool have_hi1   = False;
1476      Bool have_range = False;
1477      Addr ip_lo    = 0;
1478      Addr ip_hi1   = 0;
1479      Addr rangeoff = 0;
1480      while (True) {
1481         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1482         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1483         if (attr == 0 && form == 0) break;
1484         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1485                            cc, c_die, False/*td3*/, form );
1486         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1487            ip_lo   = cts;
1488            have_lo = True;
1489         }
1490         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1491            ip_hi1   = cts;
1492            have_hi1 = True;
1493         }
1494         if (attr == DW_AT_ranges && ctsSzB > 0) {
1495            rangeoff = cts;
1496            have_range = True;
1497         }
1498         if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1499            read_filename_table( parser, cc, (UWord)cts, td3 );
1500         }
1501      }
1502      /* Now, does this give us an opportunity to find this
1503         CU's svma? */
1504#if 0
1505      if (level == 0 && have_lo) {
1506         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1507         because we've already seen a DW_TAG_compile_unit DIE at level
1508         0.  But that can't happen, because DWARF3 only allows exactly
1509         one top level DIE per CU. */
1510         cc->cu_svma_known = True;
1511         cc->cu_svma = ip_lo;
1512         if (1)
1513            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1514         /* Now, it may be that this DIE doesn't tell us the CU's
1515            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1516            the CU doesn't *have* to have its SVMA specified.
1517
1518            But as per last para D3 spec sec 3.1.1 ("Normal and
1519            Partial Compilation Unit Entries", "If the base address
1520            (viz, the SVMA) is undefined, then any DWARF entry of
1521            structure defined interms of the base address of that
1522            compilation unit is not valid.".  So that means, if whilst
1523            processing the children of this top level DIE (or their
1524            children, etc) we see a DW_AT_range, and cu_svma_known is
1525            False, then the DIE that contains it is (per the spec)
1526            invalid, and we can legitimately stop and complain. */
1527      }
1528#else
1529      /* .. whereas The Reality is, simply assume the SVMA is zero
1530         if it isn't specified. */
1531      if (level == 0) {
1532         vg_assert(!cc->cu_svma_known);
1533         cc->cu_svma_known = True;
1534         if (have_lo)
1535            cc->cu_svma = ip_lo;
1536         else
1537            cc->cu_svma = 0;
1538      }
1539#endif
1540      /* Do we have something that looks sane? */
1541      if (have_lo && have_hi1 && (!have_range)) {
1542         if (ip_lo < ip_hi1)
1543            varstack_push( cc, parser, td3,
1544                           unitary_range_list(ip_lo, ip_hi1 - 1),
1545                           level,
1546                           False/*isFunc*/, NULL/*fbGX*/ );
1547      } else
1548      if ((!have_lo) && (!have_hi1) && have_range) {
1549         varstack_push( cc, parser, td3,
1550                        get_range_list( cc, td3,
1551                                        rangeoff, cc->cu_svma ),
1552                        level,
1553                        False/*isFunc*/, NULL/*fbGX*/ );
1554      } else
1555      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1556         /* CU has no code, presumably? */
1557         varstack_push( cc, parser, td3,
1558                        empty_range_list(),
1559                        level,
1560                        False/*isFunc*/, NULL/*fbGX*/ );
1561      } else
1562      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1563         /* broken DIE created by gcc-4.3.X ?  Ignore the
1564            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1565            instead. */
1566         varstack_push( cc, parser, td3,
1567                        get_range_list( cc, td3,
1568                                        rangeoff, cc->cu_svma ),
1569                        level,
1570                        False/*isFunc*/, NULL/*fbGX*/ );
1571      } else {
1572         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1573                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
1574         goto bad_DIE;
1575      }
1576   }
1577
1578   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1579      Bool   have_lo    = False;
1580      Bool   have_hi1   = False;
1581      Bool   have_range = False;
1582      Addr   ip_lo      = 0;
1583      Addr   ip_hi1     = 0;
1584      Addr   rangeoff   = 0;
1585      Bool   isFunc     = dtag == DW_TAG_subprogram;
1586      GExpr* fbGX       = NULL;
1587      while (True) {
1588         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1589         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1590         if (attr == 0 && form == 0) break;
1591         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1592                            cc, c_die, False/*td3*/, form );
1593         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1594            ip_lo   = cts;
1595            have_lo = True;
1596         }
1597         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1598            ip_hi1   = cts;
1599            have_hi1 = True;
1600         }
1601         if (attr == DW_AT_ranges && ctsSzB > 0) {
1602            rangeoff = cts;
1603            have_range = True;
1604         }
1605         if (isFunc
1606             && attr == DW_AT_frame_base
1607             && ((ctsMemSzB > 0 && ctsSzB == 0)
1608                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1609            fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1610            vg_assert(fbGX);
1611            VG_(addToXA)(gexprs, &fbGX);
1612         }
1613      }
1614      /* Do we have something that looks sane? */
1615      if (dtag == DW_TAG_subprogram
1616          && (!have_lo) && (!have_hi1) && (!have_range)) {
1617         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1618            representing a subroutine declaration that is not also a
1619            definition does not have code address or range
1620            attributes." */
1621      } else
1622      if (dtag == DW_TAG_lexical_block
1623          && (!have_lo) && (!have_hi1) && (!have_range)) {
1624         /* I believe this is legit, and means the lexical block
1625            contains no insns (whatever that might mean).  Ignore. */
1626      } else
1627      if (have_lo && have_hi1 && (!have_range)) {
1628         /* This scope supplies just a single address range. */
1629         if (ip_lo < ip_hi1)
1630            varstack_push( cc, parser, td3,
1631                           unitary_range_list(ip_lo, ip_hi1 - 1),
1632                           level, isFunc, fbGX );
1633      } else
1634      if ((!have_lo) && (!have_hi1) && have_range) {
1635         /* This scope supplies multiple address ranges via the use of
1636            a range list. */
1637         varstack_push( cc, parser, td3,
1638                        get_range_list( cc, td3,
1639                                        rangeoff, cc->cu_svma ),
1640                        level, isFunc, fbGX );
1641      } else
1642      if (have_lo && (!have_hi1) && (!have_range)) {
1643         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1644            Entries) says fairly clearly that a scope must have either
1645            _range or (_low_pc and _high_pc). */
1646         /* The spec is a bit ambiguous though.  Perhaps a single byte
1647            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1648         /* This case is here because icc9 produced this:
1649         <2><13bd>: DW_TAG_lexical_block
1650            DW_AT_decl_line   : 5229
1651            DW_AT_decl_column : 37
1652            DW_AT_decl_file   : 1
1653            DW_AT_low_pc      : 0x401b03
1654         */
1655         /* Ignore (seems safe than pushing a single byte range) */
1656      } else
1657         goto bad_DIE;
1658   }
1659
1660   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1661      UChar* name        = NULL;
1662      UWord  typeR       = D3_INVALID_CUOFF;
1663      Bool   external    = False;
1664      GExpr* gexpr       = NULL;
1665      Int    n_attrs     = 0;
1666      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1667      Int    lineNo      = 0;
1668      UChar* fileName    = NULL;
1669      while (True) {
1670         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1671         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1672         if (attr == 0 && form == 0) break;
1673         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1674                            cc, c_die, False/*td3*/, form );
1675         n_attrs++;
1676         if (attr == DW_AT_name && ctsMemSzB > 0) {
1677            name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1678         }
1679         if (attr == DW_AT_location
1680             && ((ctsMemSzB > 0 && ctsSzB == 0)
1681                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1682            gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1683            vg_assert(gexpr);
1684            VG_(addToXA)(gexprs, &gexpr);
1685         }
1686         if (attr == DW_AT_type && ctsSzB > 0) {
1687            typeR = (UWord)cts;
1688         }
1689         if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1690            external = True;
1691         }
1692         if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1693            abs_ori = (UWord)cts;
1694         }
1695         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1696            /*declaration = True;*/
1697         }
1698         if (attr == DW_AT_decl_line && ctsSzB > 0) {
1699            lineNo = (Int)cts;
1700         }
1701         if (attr == DW_AT_decl_file && ctsSzB > 0) {
1702            Int ftabIx = (Int)cts;
1703            if (ftabIx >= 1
1704                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1705               fileName = *(UChar**)
1706                          VG_(indexXA)( parser->filenameTable, ftabIx );
1707               vg_assert(fileName);
1708            }
1709            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1710         }
1711      }
1712      /* We'll collect it under if one of the following three
1713         conditions holds:
1714         (1) has location and type    -> completed
1715         (2) has type only            -> is an abstract instance
1716         (3) has location and abs_ori -> is a concrete instance
1717         Name, filename and line number are all optional frills.
1718      */
1719      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1720           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1721           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1722
1723         /* Add this variable to the list of interesting looking
1724            variables.  Crucially, note along with it the address
1725            range(s) associated with the variable, which for locals
1726            will be the address ranges at the top of the varparser's
1727            stack. */
1728         GExpr*   fbGX = NULL;
1729         Word     i, nRanges;
1730         XArray*  /* of AddrRange */ xa;
1731         TempVar* tv;
1732         /* Stack can't be empty; we put a dummy entry on it for the
1733            entire address range before starting with the DIEs for
1734            this CU. */
1735         vg_assert(parser->sp >= 0);
1736
1737         /* If this is a local variable (non-external), try to find
1738            the GExpr for the DW_AT_frame_base of the containing
1739            function.  It should have been pushed on the stack at the
1740            time we encountered its DW_TAG_subprogram DIE, so the way
1741            to find it is to scan back down the stack looking for it.
1742            If there isn't an enclosing stack entry marked 'isFunc'
1743            then we must be seeing variable or formal param DIEs
1744            outside of a function, so we deem the Dwarf to be
1745            malformed if that happens.  Note that the fbGX may be NULL
1746            if the containing DT_TAG_subprogram didn't supply a
1747            DW_AT_frame_base -- that's OK, but there must actually be
1748            a containing DW_TAG_subprogram. */
1749         if (!external) {
1750            Bool found = False;
1751            for (i = parser->sp; i >= 0; i--) {
1752               if (parser->isFunc[i]) {
1753                  fbGX = parser->fbGX[i];
1754                  found = True;
1755                  break;
1756               }
1757            }
1758            if (!found) {
1759               if (0 && VG_(clo_verbosity) >= 0) {
1760                  VG_(message)(Vg_DebugMsg,
1761                     "warning: parse_var_DIE: non-external variable "
1762                     "outside DW_TAG_subprogram\n");
1763               }
1764               /* goto bad_DIE; */
1765               /* This seems to happen a lot.  Just ignore it -- if,
1766                  when we come to evaluation of the location (guarded)
1767                  expression, it requires a frame base value, and
1768                  there's no expression for that, then evaluation as a
1769                  whole will fail.  Harmless - a bit of a waste of
1770                  cycles but nothing more. */
1771            }
1772         }
1773
1774         /* re "external ? 0 : parser->sp" (twice), if the var is
1775            marked 'external' then we must put it at the global scope,
1776            as only the global scope (level 0) covers the entire PC
1777            address space.  It is asserted elsewhere that level 0
1778            always covers the entire address space. */
1779         xa = parser->ranges[external ? 0 : parser->sp];
1780         nRanges = VG_(sizeXA)(xa);
1781         vg_assert(nRanges >= 0);
1782
1783         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1784         tv->name   = name;
1785         tv->level  = external ? 0 : parser->sp;
1786         tv->typeR  = typeR;
1787         tv->gexpr  = gexpr;
1788         tv->fbGX   = fbGX;
1789         tv->fName  = fileName;
1790         tv->fLine  = lineNo;
1791         tv->dioff  = posn;
1792         tv->absOri = abs_ori;
1793
1794         /* See explanation on definition of type TempVar for the
1795            reason for this elaboration. */
1796         tv->nRanges = nRanges;
1797         tv->rngOneMin = 0;
1798         tv->rngOneMax = 0;
1799         tv->rngMany = NULL;
1800         if (nRanges == 1) {
1801            AddrRange* range = VG_(indexXA)(xa, 0);
1802            tv->rngOneMin = range->aMin;
1803            tv->rngOneMax = range->aMax;
1804         }
1805         else if (nRanges > 1) {
1806            /* See if we already have a range list which is
1807               structurally identical.  If so, use that; if not, clone
1808               this one, and add it to our collection. */
1809            UWord keyW, valW;
1810            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
1811               XArray* old = (XArray*)keyW;
1812               tl_assert(valW == 0);
1813               tl_assert(old != xa);
1814               tv->rngMany = old;
1815            } else {
1816               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
1817               tv->rngMany = cloned;
1818               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
1819            }
1820         }
1821
1822         VG_(addToXA)( tempvars, &tv );
1823
1824         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
1825                  VG_(sizeXA)(xa) );
1826         /* collect stats on how effective the ->ranges special
1827            casing is */
1828         if (0) {
1829            static Int ntot=0, ngt=0;
1830            ntot++;
1831            if (tv->rngMany) ngt++;
1832            if (0 == (ntot % 100000))
1833               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1834         }
1835
1836      }
1837
1838      /* Here are some other weird cases seen in the wild:
1839
1840            We have a variable with a name and a type, but no
1841            location.  I guess that's a sign that it has been
1842            optimised away.  Ignore it.  Here's an example:
1843
1844            static Int lc_compar(void* n1, void* n2) {
1845               MC_Chunk* mc1 = *(MC_Chunk**)n1;
1846               MC_Chunk* mc2 = *(MC_Chunk**)n2;
1847               return (mc1->data < mc2->data ? -1 : 1);
1848            }
1849
1850            Both mc1 and mc2 are like this
1851            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1852                DW_AT_name        : mc1
1853                DW_AT_decl_file   : 1
1854                DW_AT_decl_line   : 216
1855                DW_AT_type        : <5d3>
1856
1857            whereas n1 and n2 do have locations specified.
1858
1859            ---------------------------------------------
1860
1861            We see a DW_TAG_formal_parameter with a type, but
1862            no name and no location.  It's probably part of a function type
1863            construction, thusly, hence ignore it:
1864         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1865             DW_AT_sibling     : <2c9>
1866             DW_AT_prototyped  : 1
1867             DW_AT_type        : <114>
1868         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1869             DW_AT_type        : <13e>
1870         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1871             DW_AT_type        : <133>
1872
1873            ---------------------------------------------
1874
1875            Is very minimal, like this:
1876            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1877                DW_AT_abstract_origin: <7ba>
1878            What that signifies I have no idea.  Ignore.
1879
1880            ----------------------------------------------
1881
1882            Is very minimal, like this:
1883            <200f>: DW_TAG_formal_parameter
1884                DW_AT_abstract_ori: <1f4c>
1885                DW_AT_location    : 13440
1886            What that signifies I have no idea.  Ignore.
1887            It might be significant, though: the variable at least
1888            has a location and so might exist somewhere.
1889            Maybe we should handle this.
1890
1891            ---------------------------------------------
1892
1893            <22407>: DW_TAG_variable
1894              DW_AT_name        : (indirect string, offset: 0x6579):
1895                                  vgPlain_trampoline_stuff_start
1896              DW_AT_decl_file   : 29
1897              DW_AT_decl_line   : 56
1898              DW_AT_external    : 1
1899              DW_AT_declaration : 1
1900
1901            Nameless and typeless variable that has a location?  Who
1902            knows.  Not me.
1903            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1904                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1905                                     (DW_OP_addr: 3813c7c0)
1906
1907            No, really.  Check it out.  gcc is quite simply borked.
1908            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1909            // followed by no attributes, and the next DIE is a sibling,
1910            // not a child
1911            */
1912   }
1913   return;
1914
1915  bad_DIE:
1916   set_position_of_Cursor( c_die,  saved_die_c_offset );
1917   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1918   VG_(printf)("\nparse_var_DIE: confused by:\n");
1919   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1920   while (True) {
1921      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1922      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1923      if (attr == 0 && form == 0) break;
1924      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
1925      /* Get the form contents, so as to print them */
1926      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1927                         cc, c_die, True, form );
1928      VG_(printf)("\t\n");
1929   }
1930   VG_(printf)("\n");
1931   cc->barf("parse_var_DIE: confused by the above DIE");
1932   /*NOTREACHED*/
1933}
1934
1935
1936/*------------------------------------------------------------*/
1937/*---                                                      ---*/
1938/*--- Parsing of type-related DIEs                         ---*/
1939/*---                                                      ---*/
1940/*------------------------------------------------------------*/
1941
1942#define N_D3_TYPE_STACK 16
1943
1944typedef
1945   struct {
1946      /* What source language?  'C'=C/C++, 'F'=Fortran, '?'=other
1947         Established once per compilation unit. */
1948      UChar language;
1949      /* A stack of types which are currently under construction */
1950      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
1951                   stack */
1952      /* Note that the TyEnts in qparentE are temporary copies of the
1953         ones accumulating in the main tyent array.  So it is not safe
1954         to free up anything on them when popping them off the stack
1955         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
1956         memset them to zero when done. */
1957      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
1958      Int   qlevel[N_D3_TYPE_STACK];
1959
1960   }
1961   D3TypeParser;
1962
1963static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1964   Word i;
1965   VG_(printf)("  typestack (%s) {\n", str);
1966   for (i = 0; i <= parser->sp; i++) {
1967      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
1968      ML_(pp_TyEnt)( &parser->qparentE[i] );
1969      VG_(printf)("\n");
1970   }
1971   VG_(printf)("  }\n");
1972}
1973
1974/* Remove from the stack, all entries with .level > 'level' */
1975static
1976void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1977{
1978   Bool changed = False;
1979   vg_assert(parser->sp < N_D3_TYPE_STACK);
1980   while (True) {
1981      vg_assert(parser->sp >= -1);
1982      if (parser->sp == -1) break;
1983      if (parser->qlevel[parser->sp] <= level) break;
1984      if (0)
1985         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1986      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
1987      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
1988      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
1989      parser->qparentE[parser->sp].tag = Te_EMPTY;
1990      parser->qlevel[parser->sp] = 0;
1991      parser->sp--;
1992      changed = True;
1993   }
1994   if (changed && td3)
1995      typestack_show( parser, "after preen" );
1996}
1997
1998static Bool typestack_is_empty ( D3TypeParser* parser ) {
1999   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2000   return parser->sp == -1;
2001}
2002
2003static void typestack_push ( CUConst* cc,
2004                             D3TypeParser* parser,
2005                             Bool td3,
2006                             TyEnt* parentE, Int level ) {
2007   if (0)
2008   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2009            parser->sp+1, level, parentE->cuOff);
2010
2011   /* First we need to zap everything >= 'level', as we are about to
2012      replace any previous entry at 'level', so .. */
2013   typestack_preen(parser, /*td3*/False, level-1);
2014
2015   vg_assert(parser->sp >= -1);
2016   vg_assert(parser->sp < N_D3_TYPE_STACK);
2017   if (parser->sp == N_D3_TYPE_STACK-1)
2018      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2019               "increase and recompile");
2020   if (parser->sp >= 0)
2021      vg_assert(parser->qlevel[parser->sp] < level);
2022   parser->sp++;
2023   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2024   vg_assert(parser->qlevel[parser->sp]  == 0);
2025   vg_assert(parentE);
2026   vg_assert(ML_(TyEnt__is_type)(parentE));
2027   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2028   parser->qparentE[parser->sp] = *parentE;
2029   parser->qlevel[parser->sp]  = level;
2030   if (td3)
2031      typestack_show( parser, "after push" );
2032}
2033
2034
2035/* Parse a type-related DIE.  'parser' holds the current parser state.
2036   'admin' is where the completed types are dumped.  'dtag' is the tag
2037   for this DIE.  'c_die' points to the start of the data fields (FORM
2038   stuff) for the DIE.  c_abbv points to the start of the (name,form)
2039   pairs which describe the DIE.
2040
2041   We may find the DIE uninteresting, in which case we should ignore
2042   it.
2043
2044   What happens: the DIE is examined.  If uninteresting, it is ignored.
2045   Otherwise, the DIE gives rise to two things:
2046
2047   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2048   (2) a TyAdmin structure, which holds the type, or related stuff
2049
2050   (2) is added at the end of 'tyadmins', at some index, say 'i'.
2051
2052   A pair (cuOffset, i) is added to 'tydict'.
2053
2054   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2055   a mapping from cuOffset to the index of the corresponding entry in
2056   'tyadmin'.
2057
2058   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2059   in the tydict (by binary search).  This gives an index into
2060   tyadmins, and the required entity lives in tyadmins at that index.
2061*/
2062__attribute__((noinline))
2063static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2064                             /*MOD*/D3TypeParser* parser,
2065                             DW_TAG dtag,
2066                             UWord posn,
2067                             Int level,
2068                             Cursor* c_die,
2069                             Cursor* c_abbv,
2070                             CUConst* cc,
2071                             Bool td3 )
2072{
2073   ULong cts;
2074   Int   ctsSzB;
2075   UWord ctsMemSzB;
2076   TyEnt typeE;
2077   TyEnt atomE;
2078   TyEnt fieldE;
2079   TyEnt boundE;
2080
2081   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2082   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2083
2084   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2085   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2086   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2087   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2088
2089   /* If we've returned to a level at or above any previously noted
2090      parent, un-note it, so we don't believe we're still collecting
2091      its children. */
2092   typestack_preen( parser, td3, level-1 );
2093
2094   if (dtag == DW_TAG_compile_unit) {
2095      /* See if we can find DW_AT_language, since it is important for
2096         establishing array bounds (see DW_TAG_subrange_type below in
2097         this fn) */
2098      while (True) {
2099         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2100         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2101         if (attr == 0 && form == 0) break;
2102         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2103                            cc, c_die, False/*td3*/, form );
2104         if (attr != DW_AT_language)
2105            continue;
2106         if (ctsSzB == 0)
2107           goto bad_DIE;
2108         switch (cts) {
2109            case DW_LANG_C89: case DW_LANG_C:
2110            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2111            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2112            case DW_LANG_Upc: case DW_LANG_C99:
2113               parser->language = 'C'; break;
2114            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2115            case DW_LANG_Fortran95:
2116               parser->language = 'F'; break;
2117            case DW_LANG_Ada83: case DW_LANG_Cobol74:
2118            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2119            case DW_LANG_Modula2: case DW_LANG_Java:
2120            case DW_LANG_Ada95: case DW_LANG_PLI:
2121            case DW_LANG_D: case DW_LANG_Python:
2122            case DW_LANG_Mips_Assembler:
2123               parser->language = '?'; break;
2124            default:
2125               goto bad_DIE;
2126         }
2127      }
2128   }
2129
2130   if (dtag == DW_TAG_base_type) {
2131      /* We can pick up a new base type any time. */
2132      VG_(memset)(&typeE, 0, sizeof(typeE));
2133      typeE.cuOff = D3_INVALID_CUOFF;
2134      typeE.tag   = Te_TyBase;
2135      while (True) {
2136         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2137         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2138         if (attr == 0 && form == 0) break;
2139         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2140                            cc, c_die, False/*td3*/, form );
2141         if (attr == DW_AT_name && ctsMemSzB > 0) {
2142            typeE.Te.TyBase.name
2143               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2144                                    (UChar*)(UWord)cts );
2145         }
2146         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2147            typeE.Te.TyBase.szB = cts;
2148         }
2149         if (attr == DW_AT_encoding && ctsSzB > 0) {
2150            switch (cts) {
2151               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2152               case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2153               case DW_ATE_boolean:/* FIXME - is this correct? */
2154                  typeE.Te.TyBase.enc = 'U'; break;
2155               case DW_ATE_signed: case DW_ATE_signed_char:
2156                  typeE.Te.TyBase.enc = 'S'; break;
2157               case DW_ATE_float:
2158                  typeE.Te.TyBase.enc = 'F'; break;
2159               case DW_ATE_complex_float:
2160                  typeE.Te.TyBase.enc = 'C'; break;
2161               default:
2162                  goto bad_DIE;
2163            }
2164         }
2165      }
2166
2167      /* Invent a name if it doesn't have one.  gcc-4.3
2168         -ftree-vectorize is observed to emit nameless base types. */
2169      if (!typeE.Te.TyBase.name)
2170         typeE.Te.TyBase.name
2171            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2172                                 "<anon_base_type>" );
2173
2174      /* Do we have something that looks sane? */
2175      if (/* must have a name */
2176          typeE.Te.TyBase.name == NULL
2177          /* and a plausible size.  Yes, really 32: "complex long
2178             double" apparently has size=32 */
2179          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2180          /* and a plausible encoding */
2181          || (typeE.Te.TyBase.enc != 'U'
2182              && typeE.Te.TyBase.enc != 'S'
2183              && typeE.Te.TyBase.enc != 'F'
2184              && typeE.Te.TyBase.enc != 'C'))
2185         goto bad_DIE;
2186      /* Last minute hack: if we see this
2187         <1><515>: DW_TAG_base_type
2188             DW_AT_byte_size   : 0
2189             DW_AT_encoding    : 5
2190             DW_AT_name        : void
2191         convert it into a real Void type. */
2192      if (typeE.Te.TyBase.szB == 0
2193          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2194         ML_(TyEnt__make_EMPTY)(&typeE);
2195         typeE.tag = Te_TyVoid;
2196         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2197      }
2198
2199      goto acquire_Type;
2200   }
2201
2202   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2203       || dtag == DW_TAG_ptr_to_member_type) {
2204      /* This seems legit for _pointer_type and _reference_type.  I
2205         don't know if rolling _ptr_to_member_type in here really is
2206         legit, but it's better than not handling it at all. */
2207      VG_(memset)(&typeE, 0, sizeof(typeE));
2208      typeE.cuOff = D3_INVALID_CUOFF;
2209      typeE.tag   = Te_TyPorR;
2210      /* target type defaults to void */
2211      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2212      typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
2213                              || dtag == DW_TAG_ptr_to_member_type;
2214      /* These three type kinds don't *have* to specify their size, in
2215         which case we assume it's a machine word.  But if they do
2216         specify it, it must be a machine word :-)  This probably
2217         assumes that the word size of the Dwarf3 we're reading is the
2218         same size as that on the machine.  gcc appears to give a size
2219         whereas icc9 doesn't. */
2220      typeE.Te.TyPorR.szB = sizeof(UWord);
2221      while (True) {
2222         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2223         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2224         if (attr == 0 && form == 0) break;
2225         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2226                            cc, c_die, False/*td3*/, form );
2227         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2228            typeE.Te.TyPorR.szB = cts;
2229         }
2230         if (attr == DW_AT_type && ctsSzB > 0) {
2231            typeE.Te.TyPorR.typeR = (UWord)cts;
2232         }
2233      }
2234      /* Do we have something that looks sane? */
2235      if (typeE.Te.TyPorR.szB != sizeof(UWord))
2236         goto bad_DIE;
2237      else
2238         goto acquire_Type;
2239   }
2240
2241   if (dtag == DW_TAG_enumeration_type) {
2242      /* Create a new Type to hold the results. */
2243      VG_(memset)(&typeE, 0, sizeof(typeE));
2244      typeE.cuOff = posn;
2245      typeE.tag   = Te_TyEnum;
2246      typeE.Te.TyEnum.atomRs
2247         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2248                       ML_(dinfo_free),
2249                       sizeof(UWord) );
2250      while (True) {
2251         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2252         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2253         if (attr == 0 && form == 0) break;
2254         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2255                            cc, c_die, False/*td3*/, form );
2256         if (attr == DW_AT_name && ctsMemSzB > 0) {
2257            typeE.Te.TyEnum.name
2258              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2259                                   (UChar*)(UWord)cts );
2260         }
2261         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2262            typeE.Te.TyEnum.szB = cts;
2263         }
2264      }
2265
2266      if (!typeE.Te.TyEnum.name)
2267         typeE.Te.TyEnum.name
2268            = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2269                                 "<anon_enum_type>" );
2270
2271      /* Do we have something that looks sane? */
2272      if (typeE.Te.TyEnum.szB == 0 /* we must know the size */)
2273         goto bad_DIE;
2274      /* On't stack! */
2275      typestack_push( cc, parser, td3, &typeE, level );
2276      goto acquire_Type;
2277   }
2278
2279   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2280      DW_TAG_enumerator with only a DW_AT_name but no
2281      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2282      and appears to be a new "feature" of gcc - versions 4.3.x and
2283      earlier do not appear to do this.  So accept DW_TAG_enumerator
2284      which only have a name but no value.  An example:
2285
2286      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2287         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2288                                     QtMsgType
2289         <185>   DW_AT_byte_size   : 4
2290         <186>   DW_AT_decl_file   : 14
2291         <187>   DW_AT_decl_line   : 1480
2292         <189>   DW_AT_sibling     : <0x1a7>
2293      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2294         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2295                                     QtDebugMsg
2296      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2297         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2298                                     QtWarningMsg
2299      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2300         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2301                                     QtCriticalMsg
2302      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2303         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2304                                     QtFatalMsg
2305      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2306         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2307                                     QtSystemMsg
2308   */
2309   if (dtag == DW_TAG_enumerator) {
2310      VG_(memset)( &atomE, 0, sizeof(atomE) );
2311      atomE.cuOff = posn;
2312      atomE.tag   = Te_Atom;
2313      while (True) {
2314         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2315         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2316         if (attr == 0 && form == 0) break;
2317         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2318                            cc, c_die, False/*td3*/, form );
2319         if (attr == DW_AT_name && ctsMemSzB > 0) {
2320            atomE.Te.Atom.name
2321              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2322                                   (UChar*)(UWord)cts );
2323         }
2324         if (attr == DW_AT_const_value && ctsSzB > 0) {
2325            atomE.Te.Atom.value = cts;
2326            atomE.Te.Atom.valueKnown = True;
2327         }
2328      }
2329      /* Do we have something that looks sane? */
2330      if (atomE.Te.Atom.name == NULL)
2331         goto bad_DIE;
2332      /* Do we have a plausible parent? */
2333      if (typestack_is_empty(parser)) goto bad_DIE;
2334      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2335      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2336      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2337      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2338      /* Record this child in the parent */
2339      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2340      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2341                    &atomE );
2342      /* And record the child itself */
2343      goto acquire_Atom;
2344   }
2345
2346   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2347      don't know if this is correct, but it at least makes this reader
2348      usable for gcc-4.3 produced Dwarf3. */
2349   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2350       || dtag == DW_TAG_union_type) {
2351      Bool have_szB = False;
2352      Bool is_decl  = False;
2353      Bool is_spec  = False;
2354      /* Create a new Type to hold the results. */
2355      VG_(memset)(&typeE, 0, sizeof(typeE));
2356      typeE.cuOff = posn;
2357      typeE.tag   = Te_TyStOrUn;
2358      typeE.Te.TyStOrUn.name = NULL;
2359      typeE.Te.TyStOrUn.fieldRs
2360         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2361                       ML_(dinfo_free),
2362                       sizeof(UWord) );
2363      typeE.Te.TyStOrUn.complete = True;
2364      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2365                                   || dtag == DW_TAG_class_type;
2366      while (True) {
2367         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2368         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2369         if (attr == 0 && form == 0) break;
2370         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2371                            cc, c_die, False/*td3*/, form );
2372         if (attr == DW_AT_name && ctsMemSzB > 0) {
2373            typeE.Te.TyStOrUn.name
2374               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2375                                    (UChar*)(UWord)cts );
2376         }
2377         if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2378            typeE.Te.TyStOrUn.szB = cts;
2379            have_szB = True;
2380         }
2381         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2382            is_decl = True;
2383         }
2384         if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2385            is_spec = True;
2386         }
2387      }
2388      /* Do we have something that looks sane? */
2389      if (is_decl && (!is_spec)) {
2390         /* It's a DW_AT_declaration.  We require the name but
2391            nothing else. */
2392         if (typeE.Te.TyStOrUn.name == NULL)
2393            goto bad_DIE;
2394         typeE.Te.TyStOrUn.complete = False;
2395         /* JRS 2009 Aug 10: <possible kludge>? */
2396         /* Push this tyent on the stack, even though it's incomplete.
2397            It appears that gcc-4.4 on Fedora 11 will sometimes create
2398            DW_TAG_member entries for it, and so we need to have a
2399            plausible parent present in order for that to work.  See
2400            #200029 comments 8 and 9. */
2401         typestack_push( cc, parser, td3, &typeE, level );
2402         /* </possible kludge> */
2403         goto acquire_Type;
2404      }
2405      if ((!is_decl) /* && (!is_spec) */) {
2406         /* this is the common, ordinary case */
2407         if ((!have_szB) /* we must know the size */
2408             /* But the name can be present, or not */)
2409            goto bad_DIE;
2410         /* On't stack! */
2411         typestack_push( cc, parser, td3, &typeE, level );
2412         goto acquire_Type;
2413      }
2414      else {
2415         /* don't know how to handle any other variants just now */
2416         goto bad_DIE;
2417      }
2418   }
2419
2420   if (dtag == DW_TAG_member) {
2421      /* Acquire member entries for both DW_TAG_structure_type and
2422         DW_TAG_union_type.  They differ minorly, in that struct
2423         members must have a DW_AT_data_member_location expression
2424         whereas union members must not. */
2425      Bool parent_is_struct;
2426      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2427      fieldE.cuOff = posn;
2428      fieldE.tag   = Te_Field;
2429      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2430      while (True) {
2431         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2432         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2433         if (attr == 0 && form == 0) break;
2434         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2435                            cc, c_die, False/*td3*/, form );
2436         if (attr == DW_AT_name && ctsMemSzB > 0) {
2437            fieldE.Te.Field.name
2438               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2439                                    (UChar*)(UWord)cts );
2440         }
2441         if (attr == DW_AT_type && ctsSzB > 0) {
2442            fieldE.Te.Field.typeR = (UWord)cts;
2443         }
2444         /* There are 2 different cases for DW_AT_data_member_location.
2445            If it is a constant class attribute, it contains byte offset
2446            from the beginning of the containing entity.
2447            Otherwise it is a location expression.  */
2448         if (attr == DW_AT_data_member_location && ctsSzB > 0) {
2449            fieldE.Te.Field.nLoc = -1;
2450            fieldE.Te.Field.pos.offset = cts;
2451         } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2452            fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2453            fieldE.Te.Field.pos.loc
2454               = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2455                                    (UChar*)(UWord)cts,
2456                                    (SizeT)fieldE.Te.Field.nLoc );
2457         }
2458      }
2459      /* Do we have a plausible parent? */
2460      if (typestack_is_empty(parser)) goto bad_DIE;
2461      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2462      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2463      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2464      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2465      /* Do we have something that looks sane?  If this a member of a
2466         struct, we must have a location expression; but if a member
2467         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2468         to reject in the latter case, but some compilers have been
2469         observed to emit constant-zero expressions.  So just ignore
2470         them. */
2471      parent_is_struct
2472         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2473      if (!fieldE.Te.Field.name)
2474         fieldE.Te.Field.name
2475            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2476                                 "<anon_field>" );
2477      vg_assert(fieldE.Te.Field.name);
2478      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2479         goto bad_DIE;
2480      if (fieldE.Te.Field.nLoc) {
2481         if (!parent_is_struct) {
2482            /* If this is a union type, pretend we haven't seen the data
2483               member location expression, as it is by definition
2484               redundant (it must be zero). */
2485            if (fieldE.Te.Field.nLoc > 0)
2486               ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2487            fieldE.Te.Field.pos.loc = NULL;
2488            fieldE.Te.Field.nLoc = 0;
2489         }
2490         /* Record this child in the parent */
2491         fieldE.Te.Field.isStruct = parent_is_struct;
2492         vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2493         VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2494                       &posn );
2495         /* And record the child itself */
2496         goto acquire_Field;
2497      } else {
2498         /* Member with no location - this can happen with static
2499            const members in C++ code which are compile time constants
2500            that do no exist in the class. They're not of any interest
2501            to us so we ignore them. */
2502      }
2503   }
2504
2505   if (dtag == DW_TAG_array_type) {
2506      VG_(memset)(&typeE, 0, sizeof(typeE));
2507      typeE.cuOff = posn;
2508      typeE.tag   = Te_TyArray;
2509      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2510      typeE.Te.TyArray.boundRs
2511         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2512                       ML_(dinfo_free),
2513                       sizeof(UWord) );
2514      while (True) {
2515         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2516         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2517         if (attr == 0 && form == 0) break;
2518         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2519                            cc, c_die, False/*td3*/, form );
2520         if (attr == DW_AT_type && ctsSzB > 0) {
2521            typeE.Te.TyArray.typeR = (UWord)cts;
2522         }
2523      }
2524      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2525         goto bad_DIE;
2526      /* On't stack! */
2527      typestack_push( cc, parser, td3, &typeE, level );
2528      goto acquire_Type;
2529   }
2530
2531   if (dtag == DW_TAG_subrange_type) {
2532      Bool have_lower = False;
2533      Bool have_upper = False;
2534      Bool have_count = False;
2535      Long lower = 0;
2536      Long upper = 0;
2537
2538      switch (parser->language) {
2539         case 'C': have_lower = True;  lower = 0; break;
2540         case 'F': have_lower = True;  lower = 1; break;
2541         case '?': have_lower = False; break;
2542         default:  vg_assert(0); /* assured us by handling of
2543                                    DW_TAG_compile_unit in this fn */
2544      }
2545
2546      VG_(memset)( &boundE, 0, sizeof(boundE) );
2547      boundE.cuOff = D3_INVALID_CUOFF;
2548      boundE.tag   = Te_Bound;
2549      while (True) {
2550         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2551         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2552         if (attr == 0 && form == 0) break;
2553         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2554                            cc, c_die, False/*td3*/, form );
2555         if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2556            lower      = (Long)cts;
2557            have_lower = True;
2558         }
2559         if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2560            upper      = (Long)cts;
2561            have_upper = True;
2562         }
2563         if (attr == DW_AT_count && ctsSzB > 0) {
2564            /*count    = (Long)cts;*/
2565            have_count = True;
2566         }
2567      }
2568      /* FIXME: potentially skip the rest if no parent present, since
2569         it could be the case that this subrange type is free-standing
2570         (not being used to describe the bounds of a containing array
2571         type) */
2572      /* Do we have a plausible parent? */
2573      if (typestack_is_empty(parser)) goto bad_DIE;
2574      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2575      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2576      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2577      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2578
2579      /* Figure out if we have a definite range or not */
2580      if (have_lower && have_upper && (!have_count)) {
2581         boundE.Te.Bound.knownL = True;
2582         boundE.Te.Bound.knownU = True;
2583         boundE.Te.Bound.boundL = lower;
2584         boundE.Te.Bound.boundU = upper;
2585      }
2586      else if (have_lower && (!have_upper) && (!have_count)) {
2587         boundE.Te.Bound.knownL = True;
2588         boundE.Te.Bound.knownU = False;
2589         boundE.Te.Bound.boundL = lower;
2590         boundE.Te.Bound.boundU = 0;
2591      }
2592      else if ((!have_lower) && have_upper && (!have_count)) {
2593         boundE.Te.Bound.knownL = False;
2594         boundE.Te.Bound.knownU = True;
2595         boundE.Te.Bound.boundL = 0;
2596         boundE.Te.Bound.boundU = upper;
2597      }
2598      else if ((!have_lower) && (!have_upper) && (!have_count)) {
2599         boundE.Te.Bound.knownL = False;
2600         boundE.Te.Bound.knownU = False;
2601         boundE.Te.Bound.boundL = 0;
2602         boundE.Te.Bound.boundU = 0;
2603      } else {
2604         /* FIXME: handle more cases */
2605         goto bad_DIE;
2606      }
2607
2608      /* Record this bound in the parent */
2609      boundE.cuOff = posn;
2610      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2611      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2612                    &boundE );
2613      /* And record the child itself */
2614      goto acquire_Bound;
2615   }
2616
2617   if (dtag == DW_TAG_typedef) {
2618      /* We can pick up a new typedef any time. */
2619      VG_(memset)(&typeE, 0, sizeof(typeE));
2620      typeE.cuOff = D3_INVALID_CUOFF;
2621      typeE.tag   = Te_TyTyDef;
2622      typeE.Te.TyTyDef.name = NULL;
2623      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2624      while (True) {
2625         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2626         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2627         if (attr == 0 && form == 0) break;
2628         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2629                            cc, c_die, False/*td3*/, form );
2630         if (attr == DW_AT_name && ctsMemSzB > 0) {
2631            typeE.Te.TyTyDef.name
2632               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2633                                    (UChar*)(UWord)cts );
2634         }
2635         if (attr == DW_AT_type && ctsSzB > 0) {
2636            typeE.Te.TyTyDef.typeR = (UWord)cts;
2637         }
2638      }
2639      /* Do we have something that looks sane? */
2640      if (/* must have a name */
2641          typeE.Te.TyTyDef.name == NULL
2642          /* but the referred-to type can be absent */)
2643         goto bad_DIE;
2644      else
2645         goto acquire_Type;
2646   }
2647
2648   if (dtag == DW_TAG_subroutine_type) {
2649      /* function type? just record that one fact and ask no
2650         further questions. */
2651      VG_(memset)(&typeE, 0, sizeof(typeE));
2652      typeE.cuOff = D3_INVALID_CUOFF;
2653      typeE.tag   = Te_TyFn;
2654      goto acquire_Type;
2655   }
2656
2657   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2658      Int have_ty = 0;
2659      VG_(memset)(&typeE, 0, sizeof(typeE));
2660      typeE.cuOff = D3_INVALID_CUOFF;
2661      typeE.tag   = Te_TyQual;
2662      typeE.Te.TyQual.qual
2663         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2664      /* target type defaults to 'void' */
2665      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2666      while (True) {
2667         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2668         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2669         if (attr == 0 && form == 0) break;
2670         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2671                            cc, c_die, False/*td3*/, form );
2672         if (attr == DW_AT_type && ctsSzB > 0) {
2673            typeE.Te.TyQual.typeR = (UWord)cts;
2674            have_ty++;
2675         }
2676      }
2677      /* gcc sometimes generates DW_TAG_const/volatile_type without
2678         DW_AT_type and GDB appears to interpret the type as 'const
2679         void' (resp. 'volatile void').  So just allow it .. */
2680      if (have_ty == 1 || have_ty == 0)
2681         goto acquire_Type;
2682      else
2683         goto bad_DIE;
2684   }
2685
2686   /* else ignore this DIE */
2687   return;
2688   /*NOTREACHED*/
2689
2690  acquire_Type:
2691   if (0) VG_(printf)("YYYY Acquire Type\n");
2692   vg_assert(ML_(TyEnt__is_type)( &typeE ));
2693   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
2694   typeE.cuOff = posn;
2695   VG_(addToXA)( tyents, &typeE );
2696   return;
2697   /*NOTREACHED*/
2698
2699  acquire_Atom:
2700   if (0) VG_(printf)("YYYY Acquire Atom\n");
2701   vg_assert(atomE.tag == Te_Atom);
2702   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
2703   atomE.cuOff = posn;
2704   VG_(addToXA)( tyents, &atomE );
2705   return;
2706   /*NOTREACHED*/
2707
2708  acquire_Field:
2709   /* For union members, Expr should be absent */
2710   if (0) VG_(printf)("YYYY Acquire Field\n");
2711   vg_assert(fieldE.tag == Te_Field);
2712   vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
2713   vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
2714   if (fieldE.Te.Field.isStruct) {
2715      vg_assert(fieldE.Te.Field.nLoc != 0);
2716   } else {
2717      vg_assert(fieldE.Te.Field.nLoc == 0);
2718   }
2719   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
2720   fieldE.cuOff = posn;
2721   VG_(addToXA)( tyents, &fieldE );
2722   return;
2723   /*NOTREACHED*/
2724
2725  acquire_Bound:
2726   if (0) VG_(printf)("YYYY Acquire Bound\n");
2727   vg_assert(boundE.tag == Te_Bound);
2728   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
2729   boundE.cuOff = posn;
2730   VG_(addToXA)( tyents, &boundE );
2731   return;
2732   /*NOTREACHED*/
2733
2734  bad_DIE:
2735   set_position_of_Cursor( c_die,  saved_die_c_offset );
2736   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2737   VG_(printf)("\nparse_type_DIE: confused by:\n");
2738   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2739   while (True) {
2740      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2741      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2742      if (attr == 0 && form == 0) break;
2743      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2744      /* Get the form contents, so as to print them */
2745      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2746                         cc, c_die, True, form );
2747      VG_(printf)("\t\n");
2748   }
2749   VG_(printf)("\n");
2750   cc->barf("parse_type_DIE: confused by the above DIE");
2751   /*NOTREACHED*/
2752}
2753
2754
2755/*------------------------------------------------------------*/
2756/*---                                                      ---*/
2757/*--- Compression of type DIE information                  ---*/
2758/*---                                                      ---*/
2759/*------------------------------------------------------------*/
2760
2761static UWord chase_cuOff ( Bool* changed,
2762                           XArray* /* of TyEnt */ ents,
2763                           TyEntIndexCache* ents_cache,
2764                           UWord cuOff )
2765{
2766   TyEnt* ent;
2767   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
2768
2769   if (!ent) {
2770      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
2771      *changed = False;
2772      return cuOff;
2773   }
2774
2775   vg_assert(ent->tag != Te_EMPTY);
2776   if (ent->tag != Te_INDIR) {
2777      *changed = False;
2778      return cuOff;
2779   } else {
2780      vg_assert(ent->Te.INDIR.indR < cuOff);
2781      *changed = True;
2782      return ent->Te.INDIR.indR;
2783   }
2784}
2785
2786static
2787void chase_cuOffs_in_XArray ( Bool* changed,
2788                              XArray* /* of TyEnt */ ents,
2789                              TyEntIndexCache* ents_cache,
2790                              /*MOD*/XArray* /* of UWord */ cuOffs )
2791{
2792   Bool b2 = False;
2793   Word i, n = VG_(sizeXA)( cuOffs );
2794   for (i = 0; i < n; i++) {
2795      Bool   b = False;
2796      UWord* p = VG_(indexXA)( cuOffs, i );
2797      *p = chase_cuOff( &b, ents, ents_cache, *p );
2798      if (b)
2799         b2 = True;
2800   }
2801   *changed = b2;
2802}
2803
2804static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
2805                                    TyEntIndexCache* ents_cache,
2806                                    /*MOD*/TyEnt* te )
2807{
2808   Bool b, changed = False;
2809   switch (te->tag) {
2810      case Te_EMPTY:
2811         break;
2812      case Te_INDIR:
2813         te->Te.INDIR.indR
2814            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
2815         if (b) changed = True;
2816         break;
2817      case Te_UNKNOWN:
2818         break;
2819      case Te_Atom:
2820         break;
2821      case Te_Field:
2822         te->Te.Field.typeR
2823            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
2824         if (b) changed = True;
2825         break;
2826      case Te_Bound:
2827         break;
2828      case Te_TyBase:
2829         break;
2830      case Te_TyPorR:
2831         te->Te.TyPorR.typeR
2832            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
2833         if (b) changed = True;
2834         break;
2835      case Te_TyTyDef:
2836         te->Te.TyTyDef.typeR
2837            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
2838         if (b) changed = True;
2839         break;
2840      case Te_TyStOrUn:
2841         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
2842         if (b) changed = True;
2843         break;
2844      case Te_TyEnum:
2845         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
2846         if (b) changed = True;
2847         break;
2848      case Te_TyArray:
2849         te->Te.TyArray.typeR
2850            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
2851         if (b) changed = True;
2852         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
2853         if (b) changed = True;
2854         break;
2855      case Te_TyFn:
2856         break;
2857      case Te_TyQual:
2858         te->Te.TyQual.typeR
2859            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
2860         if (b) changed = True;
2861         break;
2862      case Te_TyVoid:
2863         break;
2864      default:
2865         ML_(pp_TyEnt)(te);
2866         vg_assert(0);
2867   }
2868   return changed;
2869}
2870
2871/* Make a pass over 'ents'.  For each tyent, inspect the target of any
2872   'R' or 'Rs' fields (those which refer to other tyents), and replace
2873   any which point to INDIR nodes with the target of the indirection
2874   (which should not itself be an indirection).  In summary, this
2875   routine shorts out all references to indirection nodes. */
2876static
2877Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
2878                                     TyEntIndexCache* ents_cache )
2879{
2880   Word i, n, nChanged = 0;
2881   Bool b;
2882   n = VG_(sizeXA)( ents );
2883   for (i = 0; i < n; i++) {
2884      TyEnt* ent = VG_(indexXA)( ents, i );
2885      vg_assert(ent->tag != Te_EMPTY);
2886      /* We have to substitute everything, even indirections, so as to
2887         ensure that chains of indirections don't build up. */
2888      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
2889      if (b)
2890         nChanged++;
2891   }
2892
2893   return nChanged;
2894}
2895
2896
2897/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
2898   Look up each new tyent in the dictionary in turn.  If it is already
2899   in the dictionary, replace this tyent with an indirection to the
2900   existing one, and delete any malloc'd stuff hanging off this one.
2901   In summary, this routine commons up all tyents that are identical
2902   as defined by TyEnt__cmp_by_all_except_cuOff. */
2903static
2904Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
2905{
2906   Word    n, i, nDeleted;
2907   WordFM* dict; /* TyEnt* -> void */
2908   TyEnt*  ent;
2909   UWord   keyW, valW;
2910
2911   dict = VG_(newFM)(
2912             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
2913             ML_(dinfo_free),
2914             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
2915          );
2916
2917   nDeleted = 0;
2918   n = VG_(sizeXA)( ents );
2919   for (i = 0; i < n; i++) {
2920      ent = VG_(indexXA)( ents, i );
2921      vg_assert(ent->tag != Te_EMPTY);
2922
2923      /* Ignore indirections, although check that they are
2924         not forming a cycle. */
2925      if (ent->tag == Te_INDIR) {
2926         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
2927         continue;
2928      }
2929
2930      keyW = valW = 0;
2931      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
2932         /* it's already in the dictionary. */
2933         TyEnt* old = (TyEnt*)keyW;
2934         vg_assert(valW == 0);
2935         vg_assert(old != ent);
2936         vg_assert(old->tag != Te_INDIR);
2937         /* since we are traversing the array in increasing order of
2938            cuOff: */
2939         vg_assert(old->cuOff < ent->cuOff);
2940         /* So anyway, dump this entry and replace it with an
2941            indirection to the one in the dictionary.  Note that the
2942            assertion above guarantees that we cannot create cycles of
2943            indirections, since we are always creating an indirection
2944            to a tyent with a cuOff lower than this one. */
2945         ML_(TyEnt__make_EMPTY)( ent );
2946         ent->tag = Te_INDIR;
2947         ent->Te.INDIR.indR = old->cuOff;
2948         nDeleted++;
2949      } else {
2950         /* not in dictionary; add it and keep going. */
2951         VG_(addToFM)( dict, (UWord)ent, 0 );
2952      }
2953   }
2954
2955   VG_(deleteFM)( dict, NULL, NULL );
2956
2957   return nDeleted;
2958}
2959
2960
2961static
2962void dedup_types ( Bool td3,
2963                   /*MOD*/XArray* /* of TyEnt */ ents,
2964                   TyEntIndexCache* ents_cache )
2965{
2966   Word m, n, i, nDel, nSubst, nThresh;
2967   if (0) td3 = True;
2968
2969   n = VG_(sizeXA)( ents );
2970
2971   /* If a commoning pass and a substitution pass both make fewer than
2972      this many changes, just stop.  It's pointless to burn up CPU
2973      time trying to compress the last 1% or so out of the array. */
2974   nThresh = n / 200;
2975
2976   /* First we must sort .ents by its .cuOff fields, so we
2977      can index into it. */
2978   VG_(setCmpFnXA)(
2979      ents,
2980      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
2981   );
2982   VG_(sortXA)( ents );
2983
2984   /* Now repeatedly do commoning and substitution passes over
2985      the array, until there are no more changes. */
2986   do {
2987      nDel   = dedup_types_commoning_pass ( ents );
2988      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
2989      vg_assert(nDel >= 0 && nSubst >= 0);
2990      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
2991   } while (nDel > nThresh || nSubst > nThresh);
2992
2993   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
2994      In fact this should be true at the end of every loop iteration
2995      above (a commoning pass followed by a substitution pass), but
2996      checking it on every iteration is excessively expensive.  Note,
2997      this loop also computes 'm' for the stats printing below it. */
2998   m = 0;
2999   n = VG_(sizeXA)( ents );
3000   for (i = 0; i < n; i++) {
3001      TyEnt *ent, *ind;
3002      ent = VG_(indexXA)( ents, i );
3003      if (ent->tag != Te_INDIR) continue;
3004      m++;
3005      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3006                                         ent->Te.INDIR.indR );
3007      vg_assert(ind);
3008      vg_assert(ind->tag != Te_INDIR);
3009   }
3010
3011   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3012}
3013
3014
3015/*------------------------------------------------------------*/
3016/*---                                                      ---*/
3017/*--- Resolution of references to type DIEs                ---*/
3018/*---                                                      ---*/
3019/*------------------------------------------------------------*/
3020
3021/* Make a pass through the (temporary) variables array.  Examine the
3022   type of each variable, check is it found, and chase any Te_INDIRs.
3023   Postcondition is: each variable has a typeR field that refers to a
3024   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3025   not to refer to a Te_INDIR.  (This is so that we can throw all the
3026   Te_INDIRs away later). */
3027
3028__attribute__((noinline))
3029static void resolve_variable_types (
3030               void (*barf)( HChar* ) __attribute__((noreturn)),
3031               /*R-O*/XArray* /* of TyEnt */ ents,
3032               /*MOD*/TyEntIndexCache* ents_cache,
3033               /*MOD*/XArray* /* of TempVar* */ vars
3034            )
3035{
3036   Word i, n;
3037   n = VG_(sizeXA)( vars );
3038   for (i = 0; i < n; i++) {
3039      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3040      /* This is the stated type of the variable.  But it might be
3041         an indirection, so be careful. */
3042      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3043                                                var->typeR );
3044      if (ent && ent->tag == Te_INDIR) {
3045         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3046                                            ent->Te.INDIR.indR );
3047         vg_assert(ent);
3048         vg_assert(ent->tag != Te_INDIR);
3049      }
3050
3051      /* Deal first with "normal" cases */
3052      if (ent && ML_(TyEnt__is_type)(ent)) {
3053         var->typeR = ent->cuOff;
3054         continue;
3055      }
3056
3057      /* If there's no ent, it probably we did not manage to read a
3058         type at the cuOffset which is stated as being this variable's
3059         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3060      if (ent == NULL) {
3061         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3062         barf("resolve_variable_types: "
3063              "cuOff does not refer to a known type");
3064      }
3065      vg_assert(ent);
3066      /* If ent has any other tag, something bad happened, along the
3067         lines of var->typeR not referring to a type at all. */
3068      vg_assert(ent->tag == Te_UNKNOWN);
3069      /* Just accept it; the type will be useless, but at least keep
3070         going. */
3071      var->typeR = ent->cuOff;
3072   }
3073}
3074
3075
3076/*------------------------------------------------------------*/
3077/*---                                                      ---*/
3078/*--- Parsing of Compilation Units                         ---*/
3079/*---                                                      ---*/
3080/*------------------------------------------------------------*/
3081
3082static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3083   TempVar* t1 = *(TempVar**)v1;
3084   TempVar* t2 = *(TempVar**)v2;
3085   if (t1->dioff < t2->dioff) return -1;
3086   if (t1->dioff > t2->dioff) return 1;
3087   return 0;
3088}
3089
3090static void read_DIE (
3091   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3092   /*MOD*/XArray* /* of TyEnt */ tyents,
3093   /*MOD*/XArray* /* of TempVar* */ tempvars,
3094   /*MOD*/XArray* /* of GExpr* */ gexprs,
3095   /*MOD*/D3TypeParser* typarser,
3096   /*MOD*/D3VarParser* varparser,
3097   Cursor* c, Bool td3, CUConst* cc, Int level
3098)
3099{
3100   Cursor abbv;
3101   ULong  atag, abbv_code;
3102   UWord  posn;
3103   UInt   has_children;
3104   UWord  start_die_c_offset, start_abbv_c_offset;
3105   UWord  after_die_c_offset, after_abbv_c_offset;
3106
3107   /* --- Deal with this DIE --- */
3108   posn      = get_position_of_Cursor( c );
3109   abbv_code = get_ULEB128( c );
3110   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3111   atag      = get_ULEB128( &abbv );
3112   TRACE_D3("\n");
3113   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3114            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3115
3116   if (atag == 0)
3117      cc->barf("read_DIE: invalid zero tag on DIE");
3118
3119   has_children = get_UChar( &abbv );
3120   if (has_children != DW_children_no && has_children != DW_children_yes)
3121      cc->barf("read_DIE: invalid has_children value");
3122
3123   /* We're set up to look at the fields of this DIE.  Hand it off to
3124      any parser(s) that want to see it.  Since they will in general
3125      advance both the DIE and abbrev cursors, remember their current
3126      settings so that we can then back up and do one final pass over
3127      the DIE, to print out its contents. */
3128
3129   start_die_c_offset  = get_position_of_Cursor( c );
3130   start_abbv_c_offset = get_position_of_Cursor( &abbv );
3131
3132   while (True) {
3133      ULong cts;
3134      Int   ctsSzB;
3135      UWord ctsMemSzB;
3136      ULong at_name = get_ULEB128( &abbv );
3137      ULong at_form = get_ULEB128( &abbv );
3138      if (at_name == 0 && at_form == 0) break;
3139      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3140      /* Get the form contents, but ignore them; the only purpose is
3141         to print them, if td3 is True */
3142      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3143                         cc, c, td3, (DW_FORM)at_form );
3144      TRACE_D3("\t");
3145      TRACE_D3("\n");
3146   }
3147
3148   after_die_c_offset  = get_position_of_Cursor( c );
3149   after_abbv_c_offset = get_position_of_Cursor( &abbv );
3150
3151   set_position_of_Cursor( c,     start_die_c_offset );
3152   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3153
3154   parse_type_DIE( tyents,
3155                   typarser,
3156                   (DW_TAG)atag,
3157                   posn,
3158                   level,
3159                   c,     /* DIE cursor */
3160                   &abbv, /* abbrev cursor */
3161                   cc,
3162                   td3 );
3163
3164   set_position_of_Cursor( c,     start_die_c_offset );
3165   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3166
3167   parse_var_DIE( rangestree,
3168                  tempvars,
3169                  gexprs,
3170                  varparser,
3171                  (DW_TAG)atag,
3172                  posn,
3173                  level,
3174                  c,     /* DIE cursor */
3175                  &abbv, /* abbrev cursor */
3176                  cc,
3177                  td3 );
3178
3179   set_position_of_Cursor( c,     after_die_c_offset );
3180   set_position_of_Cursor( &abbv, after_abbv_c_offset );
3181
3182   /* --- Now recurse into its children, if any --- */
3183   if (has_children == DW_children_yes) {
3184      if (0) TRACE_D3("BEGIN children of level %d\n", level);
3185      while (True) {
3186         atag = peek_ULEB128( c );
3187         if (atag == 0) break;
3188         read_DIE( rangestree, tyents, tempvars, gexprs,
3189                   typarser, varparser,
3190                   c, td3, cc, level+1 );
3191      }
3192      /* Now we need to eat the terminating zero */
3193      atag = get_ULEB128( c );
3194      vg_assert(atag == 0);
3195      if (0) TRACE_D3("END children of level %d\n", level);
3196   }
3197
3198}
3199
3200
3201static
3202void new_dwarf3_reader_wrk (
3203   struct _DebugInfo* di,
3204   __attribute__((noreturn)) void (*barf)( HChar* ),
3205   UChar* debug_info_img,   SizeT debug_info_sz,
3206   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3207   UChar* debug_line_img,   SizeT debug_line_sz,
3208   UChar* debug_str_img,    SizeT debug_str_sz,
3209   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3210   UChar* debug_loc_img,    SizeT debug_loc_sz
3211)
3212{
3213   XArray* /* of TyEnt */     tyents;
3214   XArray* /* of TyEnt */     tyents_to_keep;
3215   XArray* /* of GExpr* */    gexprs;
3216   XArray* /* of TempVar* */  tempvars;
3217   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3218   TyEntIndexCache* tyents_cache = NULL;
3219   TyEntIndexCache* tyents_to_keep_cache = NULL;
3220   TempVar *varp, *varp2;
3221   GExpr* gexpr;
3222   Cursor abbv; /* for showing .debug_abbrev */
3223   Cursor info; /* primary cursor for parsing .debug_info */
3224   Cursor ranges; /* for showing .debug_ranges */
3225   D3TypeParser typarser;
3226   D3VarParser varparser;
3227   Addr  dr_base;
3228   UWord dr_offset;
3229   Word  i, j, n;
3230   Bool td3 = di->trace_symtab;
3231   XArray* /* of TempVar* */ dioff_lookup_tab;
3232#if 0
3233   /* This doesn't work properly because it assumes all entries are
3234      packed end to end, with no holes.  But that doesn't always
3235      appear to be the case, so it loses sync.  And the D3 spec
3236      doesn't appear to require a no-hole situation either. */
3237   /* Display .debug_loc */
3238   Addr  dl_base;
3239   UWord dl_offset;
3240   Cursor loc; /* for showing .debug_loc */
3241   TRACE_SYMTAB("\n");
3242   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3243   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3244   init_Cursor( &loc, debug_loc_img,
3245                debug_loc_sz, 0, barf,
3246                "Overrun whilst reading .debug_loc section(1)" );
3247   dl_base = 0;
3248   dl_offset = 0;
3249   while (True) {
3250      UWord  w1, w2;
3251      UWord  len;
3252      if (is_at_end_Cursor( &loc ))
3253         break;
3254
3255      /* Read a (host-)word pair.  This is something of a hack since
3256         the word size to read is really dictated by the ELF file;
3257         however, we assume we're reading a file with the same
3258         word-sizeness as the host.  Reasonably enough. */
3259      w1 = get_UWord( &loc );
3260      w2 = get_UWord( &loc );
3261
3262      if (w1 == 0 && w2 == 0) {
3263         /* end of list.  reset 'base' */
3264         TRACE_D3("    %08lx <End of list>\n", dl_offset);
3265         dl_base = 0;
3266         dl_offset = get_position_of_Cursor( &loc );
3267         continue;
3268      }
3269
3270      if (w1 == -1UL) {
3271         /* new value for 'base' */
3272         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3273                  dl_offset, w1, w2);
3274         dl_base = w2;
3275         continue;
3276      }
3277
3278      /* else a location expression follows */
3279      TRACE_D3("    %08lx %08lx %08lx ",
3280               dl_offset, w1 + dl_base, w2 + dl_base);
3281      len = (UWord)get_UShort( &loc );
3282      while (len > 0) {
3283         UChar byte = get_UChar( &loc );
3284         TRACE_D3("%02x", (UInt)byte);
3285         len--;
3286      }
3287      TRACE_SYMTAB("\n");
3288   }
3289#endif
3290
3291   /* Display .debug_ranges */
3292   TRACE_SYMTAB("\n");
3293   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3294   TRACE_SYMTAB("    Offset   Begin    End\n");
3295   init_Cursor( &ranges, debug_ranges_img,
3296                debug_ranges_sz, 0, barf,
3297                "Overrun whilst reading .debug_ranges section(1)" );
3298   dr_base = 0;
3299   dr_offset = 0;
3300   while (True) {
3301      UWord  w1, w2;
3302
3303      if (is_at_end_Cursor( &ranges ))
3304         break;
3305
3306      /* Read a (host-)word pair.  This is something of a hack since
3307         the word size to read is really dictated by the ELF file;
3308         however, we assume we're reading a file with the same
3309         word-sizeness as the host.  Reasonably enough. */
3310      w1 = get_UWord( &ranges );
3311      w2 = get_UWord( &ranges );
3312
3313      if (w1 == 0 && w2 == 0) {
3314         /* end of list.  reset 'base' */
3315         TRACE_D3("    %08lx <End of list>\n", dr_offset);
3316         dr_base = 0;
3317         dr_offset = get_position_of_Cursor( &ranges );
3318         continue;
3319      }
3320
3321      if (w1 == -1UL) {
3322         /* new value for 'base' */
3323         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3324                  dr_offset, w1, w2);
3325         dr_base = w2;
3326         continue;
3327      }
3328
3329      /* else a range [w1+base, w2+base) is denoted */
3330      TRACE_D3("    %08lx %08lx %08lx\n",
3331               dr_offset, w1 + dr_base, w2 + dr_base);
3332   }
3333
3334   /* Display .debug_abbrev */
3335   init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3336                "Overrun whilst reading .debug_abbrev section" );
3337   TRACE_SYMTAB("\n");
3338   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3339   while (True) {
3340      if (is_at_end_Cursor( &abbv ))
3341         break;
3342      /* Read one abbreviation table */
3343      TRACE_D3("  Number TAG\n");
3344      while (True) {
3345         ULong atag;
3346         UInt  has_children;
3347         ULong acode = get_ULEB128( &abbv );
3348         if (acode == 0) break; /* end of the table */
3349         atag = get_ULEB128( &abbv );
3350         has_children = get_UChar( &abbv );
3351         TRACE_D3("   %llu      %s    [%s]\n",
3352                  acode, ML_(pp_DW_TAG)(atag),
3353                         ML_(pp_DW_children)(has_children));
3354         while (True) {
3355            ULong at_name = get_ULEB128( &abbv );
3356            ULong at_form = get_ULEB128( &abbv );
3357            if (at_name == 0 && at_form == 0) break;
3358            TRACE_D3("    %18s %s\n",
3359                     ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3360         }
3361      }
3362   }
3363   TRACE_SYMTAB("\n");
3364
3365   /* Now loop over the Compilation Units listed in the .debug_info
3366      section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3367      unit contains a Compilation Unit Header followed by precisely
3368      one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3369   init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3370                "Overrun whilst reading .debug_info section" );
3371
3372   /* We'll park the harvested type information in here.  Also create
3373      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3374      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3375      huge and presumably will not occur in any valid DWARF3 file --
3376      it would need to have a .debug_info section 4GB long for that to
3377      happen.  These type entries end up in the DebugInfo. */
3378   tyents = VG_(newXA)( ML_(dinfo_zalloc),
3379                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3380                        ML_(dinfo_free), sizeof(TyEnt) );
3381   { TyEnt tyent;
3382     VG_(memset)(&tyent, 0, sizeof(tyent));
3383     tyent.tag   = Te_TyVoid;
3384     tyent.cuOff = D3_FAKEVOID_CUOFF;
3385     tyent.Te.TyVoid.isFake = True;
3386     VG_(addToXA)( tyents, &tyent );
3387   }
3388   { TyEnt tyent;
3389     VG_(memset)(&tyent, 0, sizeof(tyent));
3390     tyent.tag   = Te_UNKNOWN;
3391     tyent.cuOff = D3_INVALID_CUOFF;
3392     VG_(addToXA)( tyents, &tyent );
3393   }
3394
3395   /* This is a tree used to unique-ify the range lists that are
3396      manufactured by parse_var_DIE.  References to the keys in the
3397      tree wind up in .rngMany fields in TempVars.  We'll need to
3398      delete this tree, and the XArrays attached to it, at the end of
3399      this function. */
3400   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3401                            "di.readdwarf3.ndrw.2 (rangestree)",
3402                            ML_(dinfo_free),
3403                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3404
3405   /* List of variables we're accumulating.  These don't end up in the
3406      DebugInfo; instead their contents are handed to ML_(addVar) and
3407      the list elements are then deleted. */
3408   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3409                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
3410                          ML_(dinfo_free),
3411                          sizeof(TempVar*) );
3412
3413   /* List of GExprs we're accumulating.  These wind up in the
3414      DebugInfo. */
3415   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3416                        ML_(dinfo_free), sizeof(GExpr*) );
3417
3418   /* We need a D3TypeParser to keep track of partially constructed
3419      types.  It'll be discarded as soon as we've completed the CU,
3420      since the resulting information is tipped in to 'tyents' as it
3421      is generated. */
3422   VG_(memset)( &typarser, 0, sizeof(typarser) );
3423   typarser.sp = -1;
3424   typarser.language = '?';
3425   for (i = 0; i < N_D3_TYPE_STACK; i++) {
3426      typarser.qparentE[i].tag   = Te_EMPTY;
3427      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3428   }
3429
3430   VG_(memset)( &varparser, 0, sizeof(varparser) );
3431   varparser.sp = -1;
3432
3433   TRACE_D3("\n------ Parsing .debug_info section ------\n");
3434   while (True) {
3435      UWord   cu_start_offset, cu_offset_now;
3436      CUConst cc;
3437      /* It may be that the stated size of this CU is larger than the
3438         amount of stuff actually in it.  icc9 seems to generate CUs
3439         thusly.  We use these variables to figure out if this is
3440         indeed the case, and if so how many bytes we need to skip to
3441         get to the start of the next CU.  Not skipping those bytes
3442         causes us to misidentify the start of the next CU, and it all
3443         goes badly wrong after that (not surprisingly). */
3444      UWord cu_size_including_IniLen, cu_amount_used;
3445
3446      /* It seems icc9 finishes the DIE info before debug_info_sz
3447         bytes have been used up.  So be flexible, and declare the
3448         sequence complete if there is not enough remaining bytes to
3449         hold even the smallest conceivable CU header.  (11 bytes I
3450         reckon). */
3451      /* JRS 23Jan09: I suspect this is no longer necessary now that
3452         the code below contains a 'while (cu_amount_used <
3453         cu_size_including_IniLen ...'  style loop, which skips over
3454         any leftover bytes at the end of a CU in the case where the
3455         CU's stated size is larger than its actual size (as
3456         determined by reading all its DIEs).  However, for prudence,
3457         I'll leave the following test in place.  I can't see that a
3458         CU header can be smaller than 11 bytes, so I don't think
3459         there's any harm possible through the test -- it just adds
3460         robustness. */
3461      Word avail = get_remaining_length_Cursor( &info );
3462      if (avail < 11) {
3463         if (avail > 0)
3464            TRACE_D3("new_dwarf3_reader_wrk: warning: "
3465                     "%ld unused bytes after end of DIEs\n", avail);
3466         break;
3467      }
3468
3469      /* Check the varparser's stack is in a sane state. */
3470      vg_assert(varparser.sp == -1);
3471      for (i = 0; i < N_D3_VAR_STACK; i++) {
3472         vg_assert(varparser.ranges[i] == NULL);
3473         vg_assert(varparser.level[i] == 0);
3474      }
3475      for (i = 0; i < N_D3_TYPE_STACK; i++) {
3476         vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3477         vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3478         vg_assert(typarser.qlevel[i] == 0);
3479      }
3480
3481      cu_start_offset = get_position_of_Cursor( &info );
3482      TRACE_D3("\n");
3483      TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3484      /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3485         (saC_cache) */
3486      parse_CU_Header( &cc, td3, &info,
3487                       (UChar*)debug_abbv_img, debug_abbv_sz );
3488      cc.debug_str_img    = debug_str_img;
3489      cc.debug_str_sz     = debug_str_sz;
3490      cc.debug_ranges_img = debug_ranges_img;
3491      cc.debug_ranges_sz  = debug_ranges_sz;
3492      cc.debug_loc_img    = debug_loc_img;
3493      cc.debug_loc_sz     = debug_loc_sz;
3494      cc.debug_line_img   = debug_line_img;
3495      cc.debug_line_sz    = debug_line_sz;
3496      cc.debug_info_img   = debug_info_img;
3497      cc.debug_info_sz    = debug_info_sz;
3498      cc.cu_start_offset  = cu_start_offset;
3499      cc.di = di;
3500      /* The CU's svma can be deduced by looking at the AT_low_pc
3501         value in the top level TAG_compile_unit, which is the topmost
3502         DIE.  We'll leave it for the 'varparser' to acquire that info
3503         and fill it in -- since it is the only party to want to know
3504         it. */
3505      cc.cu_svma_known = False;
3506      cc.cu_svma       = 0;
3507
3508      /* Create a fake outermost-level range covering the entire
3509         address range.  So we always have *something* to catch all
3510         variable declarations. */
3511      varstack_push( &cc, &varparser, td3,
3512                     unitary_range_list(0UL, ~0UL),
3513                     -1, False/*isFunc*/, NULL/*fbGX*/ );
3514
3515      /* And set up the file name table.  When we come across the top
3516         level DIE for this CU (which is what the next call to
3517         read_DIE should process) we will copy all the file names out
3518         of the .debug_line img area and use this table to look up the
3519         copies when we later see filename numbers in DW_TAG_variables
3520         etc. */
3521      vg_assert(!varparser.filenameTable );
3522      varparser.filenameTable
3523         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3524                       ML_(dinfo_free),
3525                       sizeof(UChar*) );
3526      vg_assert(varparser.filenameTable);
3527
3528      /* Now read the one-and-only top-level DIE for this CU. */
3529      vg_assert(varparser.sp == 0);
3530      read_DIE( rangestree,
3531                tyents, tempvars, gexprs,
3532                &typarser, &varparser,
3533                &info, td3, &cc, 0 );
3534
3535      cu_offset_now = get_position_of_Cursor( &info );
3536
3537      if (0) VG_(printf)("Travelled: %lu  size %llu\n",
3538                         cu_offset_now - cc.cu_start_offset,
3539                         cc.unit_length + (cc.is_dw64 ? 12 : 4));
3540
3541      /* How big the CU claims it is .. */
3542      cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3543      /* .. vs how big we have found it to be */
3544      cu_amount_used = cu_offset_now - cc.cu_start_offset;
3545
3546      if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3547                      cu_offset_now, debug_info_sz);
3548      if (cu_offset_now > debug_info_sz)
3549         barf("toplevel DIEs beyond end of CU");
3550
3551      /* If the CU is bigger than it claims to be, we've got a serious
3552         problem. */
3553      if (cu_amount_used > cu_size_including_IniLen)
3554         barf("CU's actual size appears to be larger than it claims it is");
3555
3556      /* If the CU is smaller than it claims to be, we need to skip some
3557         bytes.  Loop updates cu_offset_new and cu_amount_used. */
3558      while (cu_amount_used < cu_size_including_IniLen
3559             && get_remaining_length_Cursor( &info ) > 0) {
3560         if (0) VG_(printf)("SKIP\n");
3561         (void)get_UChar( &info );
3562         cu_offset_now = get_position_of_Cursor( &info );
3563         cu_amount_used = cu_offset_now - cc.cu_start_offset;
3564      }
3565
3566      if (cu_offset_now == debug_info_sz)
3567         break;
3568
3569      /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3570         anywhere else at all.  Our fake the-entire-address-space
3571         range is at level -1, so preening to -2 should completely
3572         empty the stack out. */
3573      TRACE_D3("\n");
3574      varstack_preen( &varparser, td3, -2 );
3575      /* Similarly, empty the type stack out. */
3576      typestack_preen( &typarser, td3, -2 );
3577      /* else keep going */
3578
3579      TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3580               cc.saC_cache_queries, cc.saC_cache_misses);
3581
3582      vg_assert(varparser.filenameTable );
3583      VG_(deleteXA)( varparser.filenameTable );
3584      varparser.filenameTable = NULL;
3585   }
3586
3587   /* From here on we're post-processing the stuff we got
3588      out of the .debug_info section. */
3589   if (td3) {
3590      TRACE_D3("\n");
3591      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
3592      TRACE_D3("\n");
3593      TRACE_D3("------ Compressing type entries ------\n");
3594   }
3595
3596   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
3597                                     sizeof(TyEntIndexCache) );
3598   ML_(TyEntIndexCache__invalidate)( tyents_cache );
3599   dedup_types( td3, tyents, tyents_cache );
3600   if (td3) {
3601      TRACE_D3("\n");
3602      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
3603   }
3604
3605   TRACE_D3("\n");
3606   TRACE_D3("------ Resolving the types of variables ------\n" );
3607   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
3608
3609   /* Copy all the non-INDIR tyents into a new table.  For large
3610      .so's, about 90% of the tyents will by now have been resolved to
3611      INDIRs, and we no longer need them, and so don't need to store
3612      them. */
3613   tyents_to_keep
3614      = VG_(newXA)( ML_(dinfo_zalloc),
3615                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
3616                    ML_(dinfo_free), sizeof(TyEnt) );
3617   n = VG_(sizeXA)( tyents );
3618   for (i = 0; i < n; i++) {
3619      TyEnt* ent = VG_(indexXA)( tyents, i );
3620      if (ent->tag != Te_INDIR)
3621         VG_(addToXA)( tyents_to_keep, ent );
3622   }
3623
3624   VG_(deleteXA)( tyents );
3625   tyents = NULL;
3626   ML_(dinfo_free)( tyents_cache );
3627   tyents_cache = NULL;
3628
3629   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
3630      minor) waste of time, since tyents itself is sorted, but
3631      necessary since VG_(lookupXA) refuses to cooperate if we
3632      don't. */
3633   VG_(setCmpFnXA)(
3634      tyents_to_keep,
3635      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3636   );
3637   VG_(sortXA)( tyents_to_keep );
3638
3639   /* Enable cacheing on tyents_to_keep */
3640   tyents_to_keep_cache
3641      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
3642                           sizeof(TyEntIndexCache) );
3643   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
3644
3645   /* And record the tyents in the DebugInfo.  We do this before
3646      starting to hand variables to ML_(addVar), since if ML_(addVar)
3647      wants to do debug printing (of the types of said vars) then it
3648      will need the tyents.*/
3649   vg_assert(!di->admin_tyents);
3650   di->admin_tyents = tyents_to_keep;
3651
3652   /* Bias all the location expressions. */
3653   TRACE_D3("\n");
3654   TRACE_D3("------ Biasing the location expressions ------\n" );
3655
3656   n = VG_(sizeXA)( gexprs );
3657   for (i = 0; i < n; i++) {
3658      gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
3659      bias_GX( gexpr, di );
3660   }
3661
3662   TRACE_D3("\n");
3663   TRACE_D3("------ Acquired the following variables: ------\n\n");
3664
3665   /* Park (pointers to) all the vars in an XArray, so we can look up
3666      abstract origins quickly.  The array is sorted (hence, looked-up
3667      by) the .dioff fields.  Since the .dioffs should be in strictly
3668      ascending order, there is no need to sort the array after
3669      construction.  The ascendingness is however asserted for. */
3670   dioff_lookup_tab
3671      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
3672                    ML_(dinfo_free),
3673                    sizeof(TempVar*) );
3674   vg_assert(dioff_lookup_tab);
3675
3676   n = VG_(sizeXA)( tempvars );
3677   for (i = 0; i < n; i++) {
3678      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3679      if (i > 0) {
3680         varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
3681         /* why should this hold?  Only, I think, because we've
3682            constructed the array by reading .debug_info sequentially,
3683            and so the array .dioff fields should reflect that, and be
3684            strictly ascending. */
3685         vg_assert(varp2->dioff < varp->dioff);
3686      }
3687      VG_(addToXA)( dioff_lookup_tab, &varp );
3688   }
3689   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3690   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3691
3692   /* Now visit each var.  Collect up as much info as possible for
3693      each var and hand it to ML_(addVar). */
3694   n = VG_(sizeXA)( tempvars );
3695   for (j = 0; j < n; j++) {
3696      TyEnt* ent;
3697      varp = *(TempVar**)VG_(indexXA)( tempvars, j );
3698
3699      /* Possibly show .. */
3700      if (td3) {
3701         VG_(printf)("<%lx> addVar: level %d: %s :: ",
3702                     varp->dioff,
3703                     varp->level,
3704                     varp->name ? varp->name : (UChar*)"<anon_var>" );
3705         if (varp->typeR) {
3706            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
3707         } else {
3708            VG_(printf)("NULL");
3709         }
3710         VG_(printf)("\n  Loc=");
3711         if (varp->gexpr) {
3712            ML_(pp_GX)(varp->gexpr);
3713         } else {
3714            VG_(printf)("NULL");
3715         }
3716         VG_(printf)("\n");
3717         if (varp->fbGX) {
3718            VG_(printf)("  FrB=");
3719            ML_(pp_GX)( varp->fbGX );
3720            VG_(printf)("\n");
3721         } else {
3722            VG_(printf)("  FrB=none\n");
3723         }
3724         VG_(printf)("  declared at: %s:%d\n",
3725                     varp->fName ? varp->fName : (UChar*)"NULL",
3726                     varp->fLine );
3727         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3728            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
3729      }
3730
3731      /* Skip variables which have no location.  These must be
3732         abstract instances; they are useless as-is since with no
3733         location they have no specified memory location.  They will
3734         presumably be referred to via the absOri fields of other
3735         variables. */
3736      if (!varp->gexpr) {
3737         TRACE_D3("  SKIP (no location)\n\n");
3738         continue;
3739      }
3740
3741      /* So it has a location, at least.  If it refers to some other
3742         entry through its absOri field, pull in further info through
3743         that. */
3744      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3745         Bool found;
3746         Word ixFirst, ixLast;
3747         TempVar key;
3748         TempVar* keyp = &key;
3749         TempVar *varAI;
3750         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3751         key.dioff = varp->absOri; /* this is what we want to find */
3752         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3753                                &ixFirst, &ixLast );
3754         if (!found) {
3755            /* barf("DW_AT_abstract_origin can't be resolved"); */
3756            TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
3757            continue;
3758         }
3759         /* If the following fails, there is more than one entry with
3760            the same dioff.  Which can't happen. */
3761         vg_assert(ixFirst == ixLast);
3762         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3763         /* stay sane */
3764         vg_assert(varAI);
3765         vg_assert(varAI->dioff == varp->absOri);
3766
3767         /* Copy what useful info we can. */
3768         if (varAI->typeR && !varp->typeR)
3769            varp->typeR = varAI->typeR;
3770         if (varAI->name && !varp->name)
3771            varp->name = varAI->name;
3772         if (varAI->fName && !varp->fName)
3773            varp->fName = varAI->fName;
3774         if (varAI->fLine > 0 && varp->fLine == 0)
3775            varp->fLine = varAI->fLine;
3776      }
3777
3778      /* Give it a name if it doesn't have one. */
3779      if (!varp->name)
3780         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3781
3782      /* So now does it have enough info to be useful? */
3783      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
3784         the type didn't get resolved.  Really, in that case
3785         something's broken earlier on, and should be fixed, rather
3786         than just skipping the variable. */
3787      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
3788                                         tyents_to_keep_cache,
3789                                         varp->typeR );
3790      /* The next two assertions should be guaranteed by
3791         our previous call to resolve_variable_types. */
3792      vg_assert(ent);
3793      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
3794
3795      if (ent->tag == Te_UNKNOWN) continue;
3796
3797      vg_assert(varp->gexpr);
3798      vg_assert(varp->name);
3799      vg_assert(varp->typeR);
3800      vg_assert(varp->level >= 0);
3801
3802      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
3803         each address range in which the variable exists. */
3804      TRACE_D3("  ACQUIRE for range(s) ");
3805      { AddrRange  oneRange;
3806        AddrRange* varPcRanges;
3807        Word       nVarPcRanges;
3808        /* Set up to iterate over address ranges, however
3809           represented. */
3810        if (varp->nRanges == 0 || varp->nRanges == 1) {
3811           vg_assert(!varp->rngMany);
3812           if (varp->nRanges == 0) {
3813              vg_assert(varp->rngOneMin == 0);
3814              vg_assert(varp->rngOneMax == 0);
3815           }
3816           nVarPcRanges = varp->nRanges;
3817           oneRange.aMin = varp->rngOneMin;
3818           oneRange.aMax = varp->rngOneMax;
3819           varPcRanges = &oneRange;
3820        } else {
3821           vg_assert(varp->rngMany);
3822           vg_assert(varp->rngOneMin == 0);
3823           vg_assert(varp->rngOneMax == 0);
3824           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3825           vg_assert(nVarPcRanges >= 2);
3826           vg_assert(nVarPcRanges == (Word)varp->nRanges);
3827           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3828        }
3829        if (varp->level == 0)
3830           vg_assert( nVarPcRanges == 1 );
3831        /* and iterate */
3832        for (i = 0; i < nVarPcRanges; i++) {
3833           Addr pcMin = varPcRanges[i].aMin;
3834           Addr pcMax = varPcRanges[i].aMax;
3835           vg_assert(pcMin <= pcMax);
3836           /* Level 0 is the global address range.  So at level 0 we
3837              don't want to bias pcMin/pcMax; but at all other levels
3838              we do since those are derived from svmas in the Dwarf
3839              we're reading.  Be paranoid ... */
3840           if (varp->level == 0) {
3841              vg_assert(pcMin == (Addr)0);
3842              vg_assert(pcMax == ~(Addr)0);
3843           } else {
3844              /* vg_assert(pcMin > (Addr)0);
3845                 No .. we can legitimately expect to see ranges like
3846                 0x0-0x11D (pre-biasing, of course). */
3847              vg_assert(pcMax < ~(Addr)0);
3848           }
3849
3850           /* Apply text biasing, for non-global variables. */
3851           if (varp->level > 0) {
3852              pcMin += di->text_debug_bias;
3853              pcMax += di->text_debug_bias;
3854           }
3855
3856           if (i > 0 && (i%2) == 0)
3857              TRACE_D3("\n                       ");
3858           TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
3859
3860           ML_(addVar)(
3861              di, varp->level,
3862                  pcMin, pcMax,
3863                  varp->name,  varp->typeR,
3864                  varp->gexpr, varp->fbGX,
3865                  varp->fName, varp->fLine, td3
3866           );
3867        }
3868      }
3869
3870      TRACE_D3("\n\n");
3871      /* and move on to the next var */
3872   }
3873
3874   /* Now free all the TempVars */
3875   n = VG_(sizeXA)( tempvars );
3876   for (i = 0; i < n; i++) {
3877      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3878      ML_(dinfo_free)(varp);
3879   }
3880   VG_(deleteXA)( tempvars );
3881   tempvars = NULL;
3882
3883   /* and the temp lookup table */
3884   VG_(deleteXA)( dioff_lookup_tab );
3885
3886   /* and the ranges tree.  Note that we need to also free the XArrays
3887      which constitute the keys, hence pass VG_(deleteXA) as a
3888      key-finalizer. */
3889   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
3890
3891   /* and the tyents_to_keep cache */
3892   ML_(dinfo_free)( tyents_to_keep_cache );
3893   tyents_to_keep_cache = NULL;
3894
3895   /* and the file name table (just the array, not the entries
3896      themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
3897      can be NULL here, for icc9 generated Dwarf3.  Not sure what that
3898      signifies (a deeper problem with the reader?)) */
3899   if (varparser.filenameTable) {
3900      VG_(deleteXA)( varparser.filenameTable );
3901      varparser.filenameTable = NULL;
3902   }
3903
3904   /* record the GExprs in di so they can be freed later */
3905   vg_assert(!di->admin_gexprs);
3906   di->admin_gexprs = gexprs;
3907}
3908
3909
3910/*------------------------------------------------------------*/
3911/*---                                                      ---*/
3912/*--- The "new" DWARF3 reader -- top level control logic   ---*/
3913/*---                                                      ---*/
3914/*------------------------------------------------------------*/
3915
3916/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
3917#include <setjmp.h>   /* For jmp_buf */
3918/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
3919
3920static Bool    d3rd_jmpbuf_valid  = False;
3921static HChar*  d3rd_jmpbuf_reason = NULL;
3922static jmp_buf d3rd_jmpbuf;
3923
3924static __attribute__((noreturn)) void barf ( HChar* reason ) {
3925   vg_assert(d3rd_jmpbuf_valid);
3926   d3rd_jmpbuf_reason = reason;
3927   __builtin_longjmp(&d3rd_jmpbuf, 1);
3928   /*NOTREACHED*/
3929   vg_assert(0);
3930}
3931
3932
3933void
3934ML_(new_dwarf3_reader) (
3935   struct _DebugInfo* di,
3936   UChar* debug_info_img,   SizeT debug_info_sz,
3937   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3938   UChar* debug_line_img,   SizeT debug_line_sz,
3939   UChar* debug_str_img,    SizeT debug_str_sz,
3940   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3941   UChar* debug_loc_img,    SizeT debug_loc_sz
3942)
3943{
3944   volatile Int  jumped;
3945   volatile Bool td3 = di->trace_symtab;
3946
3947   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
3948      just returns normally.  If there is any failure, it longjmp's
3949      back here, having first set d3rd_jmpbuf_reason to something
3950      useful. */
3951   vg_assert(d3rd_jmpbuf_valid  == False);
3952   vg_assert(d3rd_jmpbuf_reason == NULL);
3953
3954   d3rd_jmpbuf_valid = True;
3955   jumped = __builtin_setjmp(&d3rd_jmpbuf);
3956   if (jumped == 0) {
3957      /* try this ... */
3958      new_dwarf3_reader_wrk( di, barf,
3959                             debug_info_img,   debug_info_sz,
3960                             debug_abbv_img,   debug_abbv_sz,
3961                             debug_line_img,   debug_line_sz,
3962                             debug_str_img,    debug_str_sz,
3963                             debug_ranges_img, debug_ranges_sz,
3964                             debug_loc_img,    debug_loc_sz );
3965      d3rd_jmpbuf_valid = False;
3966      TRACE_D3("\n------ .debug_info reading was successful ------\n");
3967   } else {
3968      /* It longjmp'd. */
3969      d3rd_jmpbuf_valid = False;
3970      /* Can't longjump without giving some sort of reason. */
3971      vg_assert(d3rd_jmpbuf_reason != NULL);
3972
3973      TRACE_D3("\n------ .debug_info reading failed ------\n");
3974
3975      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
3976   }
3977
3978   d3rd_jmpbuf_valid  = False;
3979   d3rd_jmpbuf_reason = NULL;
3980}
3981
3982
3983
3984/* --- Unused code fragments which might be useful one day. --- */
3985
3986#if 0
3987   /* Read the arange tables */
3988   TRACE_SYMTAB("\n");
3989   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
3990   init_Cursor( &aranges, debug_aranges_img,
3991                debug_aranges_sz, 0, barf,
3992                "Overrun whilst reading .debug_aranges section" );
3993   while (True) {
3994      ULong  len, d_i_offset;
3995      Bool   is64;
3996      UShort version;
3997      UChar  asize, segsize;
3998
3999      if (is_at_end_Cursor( &aranges ))
4000         break;
4001      /* Read one arange thingy */
4002      /* initial_length field */
4003      len = get_Initial_Length( &is64, &aranges,
4004               "in .debug_aranges: invalid initial-length field" );
4005      version    = get_UShort( &aranges );
4006      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4007      asize      = get_UChar( &aranges );
4008      segsize    = get_UChar( &aranges );
4009      TRACE_D3("  Length:                   %llu\n", len);
4010      TRACE_D3("  Version:                  %d\n", (Int)version);
4011      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4012      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4013      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4014      TRACE_D3("\n");
4015      TRACE_D3("    Address            Length\n");
4016
4017      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4018         (void)get_UChar( & aranges );
4019      }
4020      while (True) {
4021         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4022         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4023         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4024         if (address == 0 && length == 0) break;
4025      }
4026   }
4027   TRACE_SYMTAB("\n");
4028#endif
4029
4030#endif // defined(VGO_linux) || defined(VGO_darwin)
4031
4032/*--------------------------------------------------------------------*/
4033/*--- end                                                          ---*/
4034/*--------------------------------------------------------------------*/
4035