readdwarf3.c revision bdee918842b4b2d4a09146a4642e999dc71b3652
1
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
4/*---                                                 readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2010 OpenWorks LLP
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#if defined(VGO_linux) || defined(VGO_darwin)
38
39/* REFERENCE (without which this code will not make much sense):
40
41   DWARF Debugging Information Format, Version 3,
42   dated 20 December 2005 (the "D3 spec").
43
44   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
45   .doc (MS Word) version, but for some reason the section numbers
46   between the Word and PDF versions differ by 1 in the first digit.
47   All section references in this code are to the PDF version.
48
49   CURRENT HACKS:
50
51   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
52      assumed to mean "const void" or "volatile void" respectively.
53      GDB appears to interpret them like this, anyway.
54
55   In many cases it is important to know the svma of a CU (the "base
56   address of the CU", as the D3 spec calls it).  There are some
57   situations in which the spec implies this value is unknown, but the
58   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
59   merely zero when not explicitly stated.  So we too have to make
60   that assumption.
61
62   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
63   unitary_range_list() bias the resulting range list in the same way
64   that its more general cousin, get_range_list(), does?  I don't
65   know.
66
67   TODO, 2008 Feb 17:
68
69   get rid of cu_svma_known and document the assumed-zero svma hack.
70
71   ML_(sizeOfType): differentiate between zero sized types and types
72   for which the size is unknown.  Is this important?  I don't know.
73
74   DW_AT_array_types: deal with explicit sizes (currently we compute
75   the size from the bounds and the element size, although that's
76   fragile, if the bounds incompletely specified, or completely
77   absent)
78
79   Document reason for difference (by 1) of stack preening depth in
80   parse_var_DIE vs parse_type_DIE.
81
82   Don't hand to ML_(addVars), vars whose locations are entirely in
83   registers (DW_OP_reg*).  This is merely a space-saving
84   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
85   expressions correctly, by failing to evaluate them and hence
86   effectively ignoring the variable with which they are associated.
87
88   Deal with DW_AT_array_types which have element size != stride
89
90   In some cases, the info for a variable is split between two
91   different DIEs (generally a declarer and a definer).  We punt on
92   these.  Could do better here.
93
94   The 'data_bias' argument passed to the expression evaluator
95   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
96   MaybeUWord, to make it clear when we do vs don't know what it is
97   for the evaluation of an expression.  At the moment zero is passed
98   for this parameter in the don't know case.  That's a bit fragile
99   and obscure; using a MaybeUWord would be clearer.
100
101   POTENTIAL PERFORMANCE IMPROVEMENTS:
102
103   Currently, duplicate removal and all other queries for the type
104   entities array is done using cuOffset-based pointing, which
105   involves a binary search (VG_(lookupXA)) for each access.  This is
106   wildly inefficient, although simple.  It would be better to
107   translate all the cuOffset-based references (iow, all the "R" and
108   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
109   'tyents' right at the start of dedup_types(), and use direct
110   indexing (VG_(indexXA)) wherever possible after that.
111
112   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
113   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
114   points, and possibly also make an _UNCHECKED version which skips
115   the range checks in performance-critical situations such as this.
116
117   Handle interaction between read_DIE and parse_{var,type}_DIE
118   better.  Currently read_DIE reads the entire DIE just to find where
119   the end is (and for debug printing), so that it can later reliably
120   move the cursor to the end regardless of what parse_{var,type}_DIE
121   do.  This means many DIEs (most, even?) are read twice.  It would
122   be smarter to make parse_{var,type}_DIE return a Bool indicating
123   whether or not they advanced the DIE cursor, and only if they
124   didn't should read_DIE itself read through the DIE.
125
126   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
127   zero variables in their .vars XArray.  Rather than have an XArray
128   with zero elements (which uses 2 malloc'd blocks), allow the .vars
129   pointer to be NULL in this case.
130
131   More generally, reduce the amount of memory allocated and freed
132   while reading Dwarf3 type/variable information.  Even modest (20MB)
133   objects cause this module to allocate and free hundreds of
134   thousands of small blocks, and ML_(arena_malloc) and its various
135   groupies always show up at the top of performance profiles. */
136
137#include "pub_core_basics.h"
138#include "pub_core_debuginfo.h"
139#include "pub_core_libcbase.h"
140#include "pub_core_libcassert.h"
141#include "pub_core_libcprint.h"
142#include "pub_core_options.h"
143#include "pub_core_tooliface.h"    /* VG_(needs) */
144#include "pub_core_xarray.h"
145#include "pub_core_wordfm.h"
146#include "priv_misc.h"             /* dinfo_zalloc/free */
147#include "priv_tytypes.h"
148#include "priv_d3basics.h"
149#include "priv_storage.h"
150#include "priv_readdwarf3.h"       /* self */
151
152
153/*------------------------------------------------------------*/
154/*---                                                      ---*/
155/*--- Basic machinery for parsing DIEs.                    ---*/
156/*---                                                      ---*/
157/*------------------------------------------------------------*/
158
159#define TRACE_D3(format, args...) \
160   if (td3) { VG_(printf)(format, ## args); }
161
162#define D3_INVALID_CUOFF  ((UWord)(-1UL))
163#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
164
165typedef
166   struct {
167      UChar* region_start_img;
168      UWord  region_szB;
169      UWord  region_next;
170      void (*barf)( HChar* ) __attribute__((noreturn));
171      HChar* barfstr;
172   }
173   Cursor;
174
175static inline Bool is_sane_Cursor ( Cursor* c ) {
176   if (!c)                return False;
177   if (!c->barf)          return False;
178   if (!c->barfstr)       return False;
179   return True;
180}
181
182static void init_Cursor ( Cursor* c,
183                          UChar*  region_start_img,
184                          UWord   region_szB,
185                          UWord   region_next,
186                          __attribute__((noreturn)) void (*barf)( HChar* ),
187                          HChar*  barfstr )
188{
189   vg_assert(c);
190   VG_(memset)(c, 0, sizeof(*c));
191   c->region_start_img = region_start_img;
192   c->region_szB       = region_szB;
193   c->region_next      = region_next;
194   c->barf             = barf;
195   c->barfstr          = barfstr;
196   vg_assert(is_sane_Cursor(c));
197}
198
199static Bool is_at_end_Cursor ( Cursor* c ) {
200   vg_assert(is_sane_Cursor(c));
201   return c->region_next >= c->region_szB;
202}
203
204static inline UWord get_position_of_Cursor ( Cursor* c ) {
205   vg_assert(is_sane_Cursor(c));
206   return c->region_next;
207}
208static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
209   c->region_next = pos;
210   vg_assert(is_sane_Cursor(c));
211}
212
213static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
214   vg_assert(is_sane_Cursor(c));
215   return c->region_szB - c->region_next;
216}
217
218static UChar* get_address_of_Cursor ( Cursor* c ) {
219   vg_assert(is_sane_Cursor(c));
220   return &c->region_start_img[ c->region_next ];
221}
222
223__attribute__((noreturn))
224static void failWith ( Cursor* c, HChar* str ) {
225   vg_assert(c);
226   vg_assert(c->barf);
227   c->barf(str);
228   /*NOTREACHED*/
229   vg_assert(0);
230}
231
232/* FIXME: document assumptions on endianness for
233   get_UShort/UInt/ULong. */
234static inline UChar get_UChar ( Cursor* c ) {
235   UChar r;
236   /* vg_assert(is_sane_Cursor(c)); */
237   if (c->region_next + sizeof(UChar) > c->region_szB) {
238      c->barf(c->barfstr);
239      /*NOTREACHED*/
240      vg_assert(0);
241   }
242   r = * (UChar*) &c->region_start_img[ c->region_next ];
243   c->region_next += sizeof(UChar);
244   return r;
245}
246static UShort get_UShort ( Cursor* c ) {
247   UShort r;
248   vg_assert(is_sane_Cursor(c));
249   if (c->region_next + sizeof(UShort) > c->region_szB) {
250      c->barf(c->barfstr);
251      /*NOTREACHED*/
252      vg_assert(0);
253   }
254   r = * (UShort*) &c->region_start_img[ c->region_next ];
255   c->region_next += sizeof(UShort);
256   return r;
257}
258static UInt get_UInt ( Cursor* c ) {
259   UInt r;
260   vg_assert(is_sane_Cursor(c));
261   if (c->region_next + sizeof(UInt) > c->region_szB) {
262      c->barf(c->barfstr);
263      /*NOTREACHED*/
264      vg_assert(0);
265   }
266   r = * (UInt*) &c->region_start_img[ c->region_next ];
267   c->region_next += sizeof(UInt);
268   return r;
269}
270static ULong get_ULong ( Cursor* c ) {
271   ULong r;
272   vg_assert(is_sane_Cursor(c));
273   if (c->region_next + sizeof(ULong) > c->region_szB) {
274      c->barf(c->barfstr);
275      /*NOTREACHED*/
276      vg_assert(0);
277   }
278   r = * (ULong*) &c->region_start_img[ c->region_next ];
279   c->region_next += sizeof(ULong);
280   return r;
281}
282static inline ULong get_ULEB128 ( Cursor* c ) {
283   ULong result;
284   Int   shift;
285   UChar byte;
286   /* unroll first iteration */
287   byte = get_UChar( c );
288   result = (ULong)(byte & 0x7f);
289   if (LIKELY(!(byte & 0x80))) return result;
290   shift = 7;
291   /* end unroll first iteration */
292   do {
293      byte = get_UChar( c );
294      result |= ((ULong)(byte & 0x7f)) << shift;
295      shift += 7;
296   } while (byte & 0x80);
297   return result;
298}
299static Long get_SLEB128 ( Cursor* c ) {
300   ULong  result = 0;
301   Int    shift = 0;
302   UChar  byte;
303   do {
304      byte = get_UChar(c);
305      result |= ((ULong)(byte & 0x7f)) << shift;
306      shift += 7;
307   } while (byte & 0x80);
308   if (shift < 64 && (byte & 0x40))
309      result |= -(1ULL << shift);
310   return result;
311}
312
313/* Assume 'c' points to the start of a string.  Return the absolute
314   address of whatever it points at, and advance it past the
315   terminating zero.  This makes it safe for the caller to then copy
316   the string with ML_(addStr), since (w.r.t. image overruns) the
317   process of advancing past the terminating zero will already have
318   "vetted" the string. */
319static UChar* get_AsciiZ ( Cursor* c ) {
320   UChar  uc;
321   UChar* res = get_address_of_Cursor(c);
322   do { uc = get_UChar(c); } while (uc != 0);
323   return res;
324}
325
326static ULong peek_ULEB128 ( Cursor* c ) {
327   Word here = c->region_next;
328   ULong r = get_ULEB128( c );
329   c->region_next = here;
330   return r;
331}
332static UChar peek_UChar ( Cursor* c ) {
333   Word here = c->region_next;
334   UChar r = get_UChar( c );
335   c->region_next = here;
336   return r;
337}
338
339static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
340   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
341}
342
343static UWord get_UWord ( Cursor* c ) {
344   vg_assert(sizeof(UWord) == sizeof(void*));
345   if (sizeof(UWord) == 4) return get_UInt(c);
346   if (sizeof(UWord) == 8) return get_ULong(c);
347   vg_assert(0);
348}
349
350/* Read a DWARF3 'Initial Length' field */
351static ULong get_Initial_Length ( /*OUT*/Bool* is64,
352                                  Cursor* c,
353                                  HChar* barfMsg )
354{
355   ULong w64;
356   UInt  w32;
357   *is64 = False;
358   w32 = get_UInt( c );
359   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
360      c->barf( barfMsg );
361   }
362   else if (w32 == 0xFFFFFFFF) {
363      *is64 = True;
364      w64   = get_ULong( c );
365   } else {
366      *is64 = False;
367      w64 = (ULong)w32;
368   }
369   return w64;
370}
371
372
373/*------------------------------------------------------------*/
374/*---                                                      ---*/
375/*--- "CUConst" structure                                  ---*/
376/*---                                                      ---*/
377/*------------------------------------------------------------*/
378
379#define N_ABBV_CACHE 32
380
381/* Holds information that is constant through the parsing of a
382   Compilation Unit.  This is basically plumbed through to
383   everywhere. */
384typedef
385   struct {
386      /* Call here if anything goes wrong */
387      void (*barf)( HChar* ) __attribute__((noreturn));
388      /* Is this 64-bit DWARF ? */
389      Bool   is_dw64;
390      /* Which DWARF version ?  (2, 3 or 4) */
391      UShort version;
392      /* Length of this Compilation Unit, as stated in the
393         .unit_length :: InitialLength field of the CU Header.
394         However, this size (as specified by the D3 spec) does not
395         include the size of the .unit_length field itself, which is
396         either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
397         can be obtained through the expression ".is_dw64 ? 12 : 4". */
398      ULong  unit_length;
399      /* Offset of start of this unit in .debug_info */
400      UWord  cu_start_offset;
401      /* SVMA for this CU.  In the D3 spec, is known as the "base
402         address of the compilation unit (last para sec 3.1.1).
403         Needed for (amongst things) interpretation of location-list
404         values. */
405      Addr   cu_svma;
406      Bool   cu_svma_known;
407      /* The debug_abbreviations table to be used for this Unit */
408      UChar* debug_abbv;
409      /* Upper bound on size thereof (an overestimate, in general) */
410      UWord  debug_abbv_maxszB;
411      /* Where is .debug_str ? */
412      UChar* debug_str_img;
413      UWord  debug_str_sz;
414      /* Where is .debug_ranges ? */
415      UChar* debug_ranges_img;
416      UWord  debug_ranges_sz;
417      /* Where is .debug_loc ? */
418      UChar* debug_loc_img;
419      UWord  debug_loc_sz;
420      /* Where is .debug_line? */
421      UChar* debug_line_img;
422      UWord  debug_line_sz;
423      /* Where is .debug_info? */
424      UChar* debug_info_img;
425      UWord  debug_info_sz;
426      /* --- Needed so we can add stuff to the string table. --- */
427      struct _DebugInfo* di;
428      /* --- a cache for set_abbv_Cursor --- */
429      /* abbv_code == (ULong)-1 for an unused entry. */
430      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
431      UWord saC_cache_queries;
432      UWord saC_cache_misses;
433   }
434   CUConst;
435
436
437/*------------------------------------------------------------*/
438/*---                                                      ---*/
439/*--- Helper functions for Guarded Expressions             ---*/
440/*---                                                      ---*/
441/*------------------------------------------------------------*/
442
443/* Parse the location list starting at img-offset 'debug_loc_offset'
444   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
445   and so I believe are correct SVMAs for the object as a whole.  This
446   function allocates the UChar*, and the caller must deallocate it.
447   The resulting block is in so-called Guarded-Expression format.
448
449   Guarded-Expression format is similar but not identical to the DWARF3
450   location-list format.  The format of each returned block is:
451
452      UChar biasMe;
453      UChar isEnd;
454      followed by zero or more of
455
456      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
457
458   '..bytes..' is an standard DWARF3 location expression which is
459   valid when aMin <= pc <= aMax (possibly after suitable biasing).
460
461   The number of bytes in '..bytes..' is nbytes.
462
463   The end of the sequence is marked by an isEnd == 1 value.  All
464   previous isEnd values must be zero.
465
466   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
467   text_bias added before use, and 0 if the GX is this is not
468   necessary (is ready to go).
469
470   Hence the block can be quickly parsed and is self-describing.  Note
471   that aMax is 1 less than the corresponding value in a DWARF3
472   location list.  Zero length ranges, with aMax == aMin-1, are not
473   allowed.
474*/
475/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
476   it more logically belongs. */
477
478
479/* Apply a text bias to a GX. */
480static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
481{
482   UShort nbytes;
483   Addr*  pA;
484   UChar* p = &gx->payload[0];
485   UChar  uc;
486   uc = *p++; /*biasMe*/
487   if (uc == 0)
488      return;
489   vg_assert(uc == 1);
490   p[-1] = 0; /* mark it as done */
491   while (True) {
492      uc = *p++;
493      if (uc == 1)
494         break; /*isEnd*/
495      vg_assert(uc == 0);
496      /* t-bias aMin */
497      pA = (Addr*)p;
498      *pA += di->text_debug_bias;
499      p += sizeof(Addr);
500      /* t-bias aMax */
501      pA = (Addr*)p;
502      *pA += di->text_debug_bias;
503      p += sizeof(Addr);
504      /* nbytes, and actual expression */
505      nbytes = * (UShort*)p; p += sizeof(UShort);
506      p += nbytes;
507   }
508}
509
510__attribute__((noinline))
511static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
512{
513   SizeT  bytesReqd;
514   GExpr* gx;
515   UChar *p, *pstart;
516
517   vg_assert(sizeof(UWord) == sizeof(Addr));
518   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
519   bytesReqd
520      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
521        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
522        + sizeof(UShort) /*nbytes*/    + nbytes
523        + sizeof(UChar); /*isEnd*/
524
525   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
526                           sizeof(GExpr) + bytesReqd );
527   vg_assert(gx);
528
529   p = pstart = &gx->payload[0];
530
531   * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
532   * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
533   * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
534   * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
535   * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
536   VG_(memcpy)(p, block, nbytes); p += nbytes;
537   * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
538
539   vg_assert( (SizeT)(p - pstart) == bytesReqd);
540   vg_assert( &gx->payload[bytesReqd]
541              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
542
543   return gx;
544}
545
546__attribute__((noinline))
547static GExpr* make_general_GX ( CUConst* cc,
548                                Bool     td3,
549                                UWord    debug_loc_offset,
550                                Addr     svma_of_referencing_CU )
551{
552   Addr      base;
553   Cursor    loc;
554   XArray*   xa; /* XArray of UChar */
555   GExpr*    gx;
556   Word      nbytes;
557
558   vg_assert(sizeof(UWord) == sizeof(Addr));
559   if (cc->debug_loc_sz == 0)
560      cc->barf("make_general_GX: .debug_loc is empty/missing");
561
562   init_Cursor( &loc, cc->debug_loc_img,
563                cc->debug_loc_sz, 0, cc->barf,
564                "Overrun whilst reading .debug_loc section(2)" );
565   set_position_of_Cursor( &loc, debug_loc_offset );
566
567   TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
568            debug_loc_offset, get_address_of_Cursor( &loc ) );
569
570   /* Who frees this xa?  It is freed before this fn exits. */
571   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
572                    ML_(dinfo_free),
573                    sizeof(UChar) );
574
575   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
576
577   base = 0;
578   while (True) {
579      Bool  acquire;
580      UWord len;
581      /* Read a (host-)word pair.  This is something of a hack since
582         the word size to read is really dictated by the ELF file;
583         however, we assume we're reading a file with the same
584         word-sizeness as the host.  Reasonably enough. */
585      UWord w1 = get_UWord( &loc );
586      UWord w2 = get_UWord( &loc );
587
588      TRACE_D3("   %08lx %08lx\n", w1, w2);
589      if (w1 == 0 && w2 == 0)
590         break; /* end of list */
591
592      if (w1 == -1UL) {
593         /* new value for 'base' */
594         base = w2;
595         continue;
596      }
597
598      /* else a location expression follows */
599      /* else enumerate [w1+base, w2+base) */
600      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
601         (sec 2.17.2) */
602      if (w1 > w2) {
603         TRACE_D3("negative range is for .debug_loc expr at "
604                  "file offset %lu\n",
605                  debug_loc_offset);
606         cc->barf( "negative range in .debug_loc section" );
607      }
608
609      /* ignore zero length ranges */
610      acquire = w1 < w2;
611      len     = (UWord)get_UShort( &loc );
612
613      if (acquire) {
614         UWord  w;
615         UShort s;
616         UChar  c;
617         c = 0; /* !isEnd*/
618         VG_(addBytesToXA)( xa, &c, sizeof(c) );
619         w = w1    + base + svma_of_referencing_CU;
620         VG_(addBytesToXA)( xa, &w, sizeof(w) );
621         w = w2 -1 + base + svma_of_referencing_CU;
622         VG_(addBytesToXA)( xa, &w, sizeof(w) );
623         s = (UShort)len;
624         VG_(addBytesToXA)( xa, &s, sizeof(s) );
625      }
626
627      while (len > 0) {
628         UChar byte = get_UChar( &loc );
629         TRACE_D3("%02x", (UInt)byte);
630         if (acquire)
631            VG_(addBytesToXA)( xa, &byte, 1 );
632         len--;
633      }
634      TRACE_D3("\n");
635   }
636
637   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
638
639   nbytes = VG_(sizeXA)( xa );
640   vg_assert(nbytes >= 1);
641
642   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
643   vg_assert(gx);
644   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
645   vg_assert( &gx->payload[nbytes]
646              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
647
648   VG_(deleteXA)( xa );
649
650   TRACE_D3("}\n");
651
652   return gx;
653}
654
655
656/*------------------------------------------------------------*/
657/*---                                                      ---*/
658/*--- Helper functions for range lists and CU headers      ---*/
659/*---                                                      ---*/
660/*------------------------------------------------------------*/
661
662/* Denotes an address range.  Both aMin and aMax are included in the
663   range; hence a complete range is (0, ~0) and an empty range is any
664   (X, X-1) for X > 0.*/
665typedef
666   struct { Addr aMin; Addr aMax; }
667   AddrRange;
668
669
670/* Generate an arbitrary structural total ordering on
671   XArray* of AddrRange. */
672static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
673{
674   Word n1, n2, i;
675   tl_assert(rngs1 && rngs2);
676   n1 = VG_(sizeXA)( rngs1 );
677   n2 = VG_(sizeXA)( rngs2 );
678   if (n1 < n2) return -1;
679   if (n1 > n2) return 1;
680   for (i = 0; i < n1; i++) {
681      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
682      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
683      if (rng1->aMin < rng2->aMin) return -1;
684      if (rng1->aMin > rng2->aMin) return 1;
685      if (rng1->aMax < rng2->aMax) return -1;
686      if (rng1->aMax > rng2->aMax) return 1;
687   }
688   return 0;
689}
690
691
692__attribute__((noinline))
693static XArray* /* of AddrRange */ empty_range_list ( void )
694{
695   XArray* xa; /* XArray of AddrRange */
696   /* Who frees this xa?  varstack_preen() does. */
697   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
698                    ML_(dinfo_free),
699                    sizeof(AddrRange) );
700   return xa;
701}
702
703
704__attribute__((noinline))
705static XArray* unitary_range_list ( Addr aMin, Addr aMax )
706{
707   XArray*   xa;
708   AddrRange pair;
709   vg_assert(aMin <= aMax);
710   /* Who frees this xa?  varstack_preen() does. */
711   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
712                    ML_(dinfo_free),
713                    sizeof(AddrRange) );
714   pair.aMin = aMin;
715   pair.aMax = aMax;
716   VG_(addToXA)( xa, &pair );
717   return xa;
718}
719
720
721/* Enumerate the address ranges starting at img-offset
722   'debug_ranges_offset' in .debug_ranges.  Results are biased with
723   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
724   object as a whole.  This function allocates the XArray, and the
725   caller must deallocate it. */
726__attribute__((noinline))
727static XArray* /* of AddrRange */
728       get_range_list ( CUConst* cc,
729                        Bool     td3,
730                        UWord    debug_ranges_offset,
731                        Addr     svma_of_referencing_CU )
732{
733   Addr      base;
734   Cursor    ranges;
735   XArray*   xa; /* XArray of AddrRange */
736   AddrRange pair;
737
738   if (cc->debug_ranges_sz == 0)
739      cc->barf("get_range_list: .debug_ranges is empty/missing");
740
741   init_Cursor( &ranges, cc->debug_ranges_img,
742                cc->debug_ranges_sz, 0, cc->barf,
743                "Overrun whilst reading .debug_ranges section(2)" );
744   set_position_of_Cursor( &ranges, debug_ranges_offset );
745
746   /* Who frees this xa?  varstack_preen() does. */
747   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
748                    sizeof(AddrRange) );
749   base = 0;
750   while (True) {
751      /* Read a (host-)word pair.  This is something of a hack since
752         the word size to read is really dictated by the ELF file;
753         however, we assume we're reading a file with the same
754         word-sizeness as the host.  Reasonably enough. */
755      UWord w1 = get_UWord( &ranges );
756      UWord w2 = get_UWord( &ranges );
757
758      if (w1 == 0 && w2 == 0)
759         break; /* end of list. */
760
761      if (w1 == -1UL) {
762         /* new value for 'base' */
763         base = w2;
764         continue;
765      }
766
767      /* else enumerate [w1+base, w2+base) */
768      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
769         (sec 2.17.2) */
770      if (w1 > w2)
771         cc->barf( "negative range in .debug_ranges section" );
772      if (w1 < w2) {
773         pair.aMin = w1     + base + svma_of_referencing_CU;
774         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
775         vg_assert(pair.aMin <= pair.aMax);
776         VG_(addToXA)( xa, &pair );
777      }
778   }
779   return xa;
780}
781
782
783/* Parse the Compilation Unit header indicated at 'c' and
784   initialise 'cc' accordingly. */
785static __attribute__((noinline))
786void parse_CU_Header ( /*OUT*/CUConst* cc,
787                       Bool td3,
788                       Cursor* c,
789                       UChar* debug_abbv_img, UWord debug_abbv_sz )
790{
791   UChar  address_size;
792   UWord  debug_abbrev_offset;
793   Int    i;
794
795   VG_(memset)(cc, 0, sizeof(*cc));
796   vg_assert(c && c->barf);
797   cc->barf = c->barf;
798
799   /* initial_length field */
800   cc->unit_length
801      = get_Initial_Length( &cc->is_dw64, c,
802           "parse_CU_Header: invalid initial-length field" );
803
804   TRACE_D3("   Length:        %lld\n", cc->unit_length );
805
806   /* version */
807   cc->version = get_UShort( c );
808   if (cc->version != 2 && cc->version != 3 && cc->version != 4)
809      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
810   TRACE_D3("   Version:       %d\n", (Int)cc->version );
811
812   /* debug_abbrev_offset */
813   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
814   if (debug_abbrev_offset >= debug_abbv_sz)
815      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
816   TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
817
818   /* address size.  If this isn't equal to the host word size, just
819      give up.  This makes it safe to assume elsewhere that
820      DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
821      word. */
822   address_size = get_UChar( c );
823   if (address_size != sizeof(void*))
824      cc->barf( "parse_CU_Header: invalid address_size" );
825   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
826
827   /* Set up so that cc->debug_abbv points to the relevant table for
828      this CU.  Set the szB so that at least we can't read off the end
829      of the debug_abbrev section -- potentially (and quite likely)
830      too big, if this isn't the last table in the section, but at
831      least it's safe. */
832   cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
833   cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
834   /* and empty out the set_abbv_Cursor cache */
835   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
836   for (i = 0; i < N_ABBV_CACHE; i++) {
837      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
838      cc->saC_cache[i].posn = 0;
839   }
840   cc->saC_cache_queries = 0;
841   cc->saC_cache_misses = 0;
842}
843
844
845/* Set up 'c' so it is ready to parse the abbv table entry code
846   'abbv_code' for this compilation unit.  */
847static __attribute__((noinline))
848void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
849                       CUConst* cc, ULong abbv_code )
850{
851   Int   i;
852   ULong acode;
853
854   if (abbv_code == 0)
855      cc->barf("set_abbv_Cursor: abbv_code == 0" );
856
857   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
858      allow it.  In any case no valid DWARF3 should make a reference
859      to a negative abbreviation code.  [at least, they always seem to
860      be numbered upwards from zero as far as I have seen] */
861   vg_assert(abbv_code != (ULong)-1);
862
863   /* First search the cache. */
864   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
865   cc->saC_cache_queries++;
866   for (i = 0; i < N_ABBV_CACHE; i++) {
867      /* No need to test the cached abbv_codes for -1 (empty), since
868         we just asserted that abbv_code is not -1. */
869     if (cc->saC_cache[i].abbv_code == abbv_code) {
870        /* Found it.  Cool.  Set up the parser using the cached
871           position, and move this cache entry 1 step closer to the
872           front. */
873        if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
874        init_Cursor( c, cc->debug_abbv,
875                     cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
876                     cc->barf,
877                     "Overrun whilst parsing .debug_abbrev section(1)" );
878        if (i > 0) {
879           ULong t_abbv_code = cc->saC_cache[i].abbv_code;
880           UWord t_posn = cc->saC_cache[i].posn;
881           while (i > 0) {
882              cc->saC_cache[i] = cc->saC_cache[i-1];
883              cc->saC_cache[0].abbv_code = t_abbv_code;
884              cc->saC_cache[0].posn = t_posn;
885              i--;
886           }
887        }
888        return;
889     }
890   }
891
892   /* No.  It's not in the cache.  We have to search through
893      .debug_abbrev, of course taking care to update the cache
894      when done. */
895
896   cc->saC_cache_misses++;
897   init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
898               "Overrun whilst parsing .debug_abbrev section(2)" );
899
900   /* Now iterate though the table until we find the requested
901      entry. */
902   while (True) {
903      //ULong atag;
904      //UInt  has_children;
905      acode = get_ULEB128( c );
906      if (acode == 0) break; /* end of the table */
907      if (acode == abbv_code) break; /* found it */
908      /*atag         = */ get_ULEB128( c );
909      /*has_children = */ get_UChar( c );
910      //TRACE_D3("   %llu      %s    [%s]\n",
911      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
912      while (True) {
913         ULong at_name = get_ULEB128( c );
914         ULong at_form = get_ULEB128( c );
915         if (at_name == 0 && at_form == 0) break;
916         //TRACE_D3("    %18s %s\n",
917         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
918      }
919   }
920
921   if (acode == 0) {
922      /* Not found.  This is fatal. */
923      cc->barf("set_abbv_Cursor: abbv_code not found");
924   }
925
926   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
927      starting from the abbreviation entry's tag.  So just cache
928      the result, and return. */
929   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
930      cc->saC_cache[i] = cc->saC_cache[i-1];
931   }
932   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
933   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
934   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
935}
936
937
938/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
939
940   If *cts itself contains the entire result, then *ctsSzB is set to
941   1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
942
943   Alternatively, the result can be a block of data (in the
944   transiently mapped-in object, so-called "image" space).  If so then
945   the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
946   image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
947
948   Unfortunately this means it is impossible to represent a zero-size
949   image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
950   and so is ambiguous (which case it is?)
951
952   Invariant on successful return:
953      (*ctsSzB > 0 && *ctsMemSzB == 0)
954      || (*ctsSzB == 0 && *ctsMemSzB > 0)
955*/
956static
957void get_Form_contents ( /*OUT*/ULong* cts,
958                         /*OUT*/Int*   ctsSzB,
959                         /*OUT*/UWord* ctsMemSzB,
960                         CUConst* cc, Cursor* c,
961                         Bool td3, DW_FORM form )
962{
963   *cts       = 0;
964   *ctsSzB    = 0;
965   *ctsMemSzB = 0;
966   switch (form) {
967      case DW_FORM_data1:
968         *cts = (ULong)(UChar)get_UChar(c);
969         *ctsSzB = 1;
970         TRACE_D3("%u", (UInt)*cts);
971         break;
972      case DW_FORM_data2:
973         *cts = (ULong)(UShort)get_UShort(c);
974         *ctsSzB = 2;
975         TRACE_D3("%u", (UInt)*cts);
976         break;
977      case DW_FORM_data4:
978         *cts = (ULong)(UInt)get_UInt(c);
979         *ctsSzB = 4;
980         TRACE_D3("%u", (UInt)*cts);
981         break;
982      case DW_FORM_data8:
983         *cts = get_ULong(c);
984         *ctsSzB = 8;
985         TRACE_D3("%llu", *cts);
986         break;
987      case DW_FORM_sec_offset:
988         *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
989         *ctsSzB = cc->is_dw64 ? 8 : 4;
990         TRACE_D3("%llu", *cts);
991         break;
992      case DW_FORM_sdata:
993         *cts = (ULong)(Long)get_SLEB128(c);
994         *ctsSzB = 8;
995         TRACE_D3("%lld", (Long)*cts);
996         break;
997      case DW_FORM_udata:
998         *cts = (ULong)(Long)get_ULEB128(c);
999         *ctsSzB = 8;
1000         TRACE_D3("%llu", (Long)*cts);
1001         break;
1002      case DW_FORM_addr:
1003         /* note, this is a hack.  DW_FORM_addr is defined as getting
1004            a word the size of the target machine as defined by the
1005            address_size field in the CU Header.  However,
1006            parse_CU_Header() rejects all inputs except those for
1007            which address_size == sizeof(Word), hence we can just
1008            treat it as a (host) Word.  */
1009         *cts = (ULong)(UWord)get_UWord(c);
1010         *ctsSzB = sizeof(UWord);
1011         TRACE_D3("0x%lx", (UWord)*cts);
1012         break;
1013
1014      case DW_FORM_ref_addr:
1015         /* We make the same word-size assumption as DW_FORM_addr. */
1016         /* What does this really mean?  From D3 Sec 7.5.4,
1017            description of "reference", it would appear to reference
1018            some other DIE, by specifying the offset from the
1019            beginning of a .debug_info section.  The D3 spec mentions
1020            that this might be in some other shared object and
1021            executable.  But I don't see how the name of the other
1022            object/exe is specified.
1023
1024            At least for the DW_FORM_ref_addrs created by icc11, the
1025            references seem to be within the same object/executable.
1026            So for the moment we merely range-check, to see that they
1027            actually do specify a plausible offset within this
1028            object's .debug_info, and return the value unchanged.
1029         */
1030         *cts = (ULong)(UWord)get_UWord(c);
1031         *ctsSzB = sizeof(UWord);
1032         TRACE_D3("0x%lx", (UWord)*cts);
1033         if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
1034         if (/* the following 2 are surely impossible, but ... */
1035             cc->debug_info_img == NULL || cc->debug_info_sz == 0
1036             || *cts >= (ULong)cc->debug_info_sz) {
1037            /* Hmm.  Offset is nonsensical for this object's .debug_info
1038               section.  Be safe and reject it. */
1039            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1040                     "outside .debug_info");
1041         }
1042         break;
1043
1044      case DW_FORM_strp: {
1045         /* this is an offset into .debug_str */
1046         UChar* str;
1047         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1048         if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1049            cc->barf("get_Form_contents: DW_FORM_strp "
1050                     "points outside .debug_str");
1051         /* FIXME: check the entire string lies inside debug_str,
1052            not just the first byte of it. */
1053         str = (UChar*)cc->debug_str_img + uw;
1054         TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1055         *cts = (ULong)(UWord)str;
1056         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1057         break;
1058      }
1059      case DW_FORM_string: {
1060         UChar* str = get_AsciiZ(c);
1061         TRACE_D3("%s", str);
1062         *cts = (ULong)(UWord)str;
1063         /* strlen is safe because get_AsciiZ already 'vetted' the
1064            entire string */
1065         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1066         break;
1067      }
1068      case DW_FORM_ref1: {
1069         UChar  u8 = get_UChar(c);
1070         UWord res = cc->cu_start_offset + (UWord)u8;
1071         *cts = (ULong)res;
1072         *ctsSzB = sizeof(UWord);
1073         TRACE_D3("<%lx>", res);
1074         break;
1075      }
1076      case DW_FORM_ref2: {
1077         UShort  u16 = get_UShort(c);
1078         UWord res = cc->cu_start_offset + (UWord)u16;
1079         *cts = (ULong)res;
1080         *ctsSzB = sizeof(UWord);
1081         TRACE_D3("<%lx>", res);
1082         break;
1083      }
1084      case DW_FORM_ref4: {
1085         UInt  u32 = get_UInt(c);
1086         UWord res = cc->cu_start_offset + (UWord)u32;
1087         *cts = (ULong)res;
1088         *ctsSzB = sizeof(UWord);
1089         TRACE_D3("<%lx>", res);
1090         break;
1091      }
1092      case DW_FORM_ref8: {
1093         ULong  u64 = get_ULong(c);
1094         UWord res = cc->cu_start_offset + (UWord)u64;
1095         *cts = (ULong)res;
1096         *ctsSzB = sizeof(UWord);
1097         TRACE_D3("<%lx>", res);
1098         break;
1099      }
1100      case DW_FORM_ref_udata: {
1101         ULong  u64 = get_ULEB128(c);
1102         UWord res = cc->cu_start_offset + (UWord)u64;
1103         *cts = (ULong)res;
1104         *ctsSzB = sizeof(UWord);
1105         TRACE_D3("<%lx>", res);
1106         break;
1107      }
1108      case DW_FORM_flag: {
1109         UChar u8 = get_UChar(c);
1110         TRACE_D3("%u", (UInt)u8);
1111         *cts = (ULong)u8;
1112         *ctsSzB = 1;
1113         break;
1114      }
1115      case DW_FORM_flag_present:
1116         TRACE_D3("1");
1117         *cts = 1;
1118         *ctsSzB = 1;
1119         break;
1120      case DW_FORM_block1: {
1121         ULong  u64b;
1122         ULong  u64 = (ULong)get_UChar(c);
1123         UChar* block = get_address_of_Cursor(c);
1124         TRACE_D3("%llu byte block: ", u64);
1125         for (u64b = u64; u64b > 0; u64b--) {
1126            UChar u8 = get_UChar(c);
1127            TRACE_D3("%x ", (UInt)u8);
1128         }
1129         *cts = (ULong)(UWord)block;
1130         *ctsMemSzB = (UWord)u64;
1131         break;
1132      }
1133      case DW_FORM_block2: {
1134         ULong  u64b;
1135         ULong  u64 = (ULong)get_UShort(c);
1136         UChar* block = get_address_of_Cursor(c);
1137         TRACE_D3("%llu byte block: ", u64);
1138         for (u64b = u64; u64b > 0; u64b--) {
1139            UChar u8 = get_UChar(c);
1140            TRACE_D3("%x ", (UInt)u8);
1141         }
1142         *cts = (ULong)(UWord)block;
1143         *ctsMemSzB = (UWord)u64;
1144         break;
1145      }
1146      case DW_FORM_block4: {
1147         ULong  u64b;
1148         ULong  u64 = (ULong)get_UInt(c);
1149         UChar* block = get_address_of_Cursor(c);
1150         TRACE_D3("%llu byte block: ", u64);
1151         for (u64b = u64; u64b > 0; u64b--) {
1152            UChar u8 = get_UChar(c);
1153            TRACE_D3("%x ", (UInt)u8);
1154         }
1155         *cts = (ULong)(UWord)block;
1156         *ctsMemSzB = (UWord)u64;
1157         break;
1158      }
1159      case DW_FORM_exprloc:
1160      case DW_FORM_block: {
1161         ULong  u64b;
1162         ULong  u64 = (ULong)get_ULEB128(c);
1163         UChar* block = get_address_of_Cursor(c);
1164         TRACE_D3("%llu byte block: ", u64);
1165         for (u64b = u64; u64b > 0; u64b--) {
1166            UChar u8 = get_UChar(c);
1167            TRACE_D3("%x ", (UInt)u8);
1168         }
1169         *cts = (ULong)(UWord)block;
1170         *ctsMemSzB = (UWord)u64;
1171         break;
1172      }
1173      case DW_FORM_ref_sig8: {
1174         ULong  u64b;
1175         UChar* block = get_address_of_Cursor(c);
1176         TRACE_D3("8 byte signature: ");
1177         for (u64b = 8; u64b > 0; u64b--) {
1178            UChar u8 = get_UChar(c);
1179            TRACE_D3("%x ", (UInt)u8);
1180         }
1181         *cts = (ULong)(UWord)block;
1182         *ctsMemSzB = 8;
1183         break;
1184      }
1185      case DW_FORM_indirect:
1186         get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
1187                            (DW_FORM)get_ULEB128(c));
1188         return;
1189
1190      default:
1191         VG_(printf)(
1192            "get_Form_contents: unhandled %d (%s) at <%lx>\n",
1193            form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1194         c->barf("get_Form_contents: unhandled DW_FORM");
1195   }
1196}
1197
1198
1199/*------------------------------------------------------------*/
1200/*---                                                      ---*/
1201/*--- Parsing of variable-related DIEs                     ---*/
1202/*---                                                      ---*/
1203/*------------------------------------------------------------*/
1204
1205typedef
1206   struct _TempVar {
1207      UChar*  name; /* in DebugInfo's .strchunks */
1208      /* Represent ranges economically.  nRanges is the number of
1209         ranges.  Cases:
1210         0: .rngOneMin .rngOneMax .manyRanges are all zero
1211         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1212         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1213         This is merely an optimisation to avoid having to allocate
1214         and free the XArray in the common (98%) of cases where there
1215         is zero or one address ranges. */
1216      UWord   nRanges;
1217      Addr    rngOneMin;
1218      Addr    rngOneMax;
1219      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1220      /* Do not free .rngMany, since many TempVars will have the same
1221         value.  Instead the associated storage is to be freed by
1222         deleting 'rangetree', which stores a single copy of each
1223         range. */
1224      /* --- */
1225      Int     level;
1226      UWord   typeR; /* a cuOff */
1227      GExpr*  gexpr; /* for this variable */
1228      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1229                        any */
1230      UChar*  fName; /* declaring file name, or NULL */
1231      Int     fLine; /* declaring file line number, or zero */
1232      /* offset in .debug_info, so that abstract instances can be
1233         found to satisfy references from concrete instances. */
1234      UWord   dioff;
1235      UWord   absOri; /* so the absOri fields refer to dioff fields
1236                         in some other, related TempVar. */
1237   }
1238   TempVar;
1239
1240#define N_D3_VAR_STACK 48
1241
1242typedef
1243   struct {
1244      /* Contains the range stack: a stack of address ranges, one
1245         stack entry for each nested scope.
1246
1247         Some scope entries are created by function definitions
1248         (DW_AT_subprogram), and for those, we also note the GExpr
1249         derived from its DW_AT_frame_base attribute, if any.
1250         Consequently it should be possible to find, for any
1251         variable's DIE, the GExpr for the the containing function's
1252         DW_AT_frame_base by scanning back through the stack to find
1253         the nearest entry associated with a function.  This somewhat
1254         elaborate scheme is provided so as to make it possible to
1255         obtain the correct DW_AT_frame_base expression even in the
1256         presence of nested functions (or to be more precise, in the
1257         presence of nested DW_AT_subprogram DIEs).
1258      */
1259      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1260                     stack */
1261      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1262      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1263      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1264      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1265                                         expr, else NULL */
1266      /* The file name table.  Is a mapping from integer index to the
1267         (permanent) copy of the string, iow a non-img area. */
1268      XArray* /* of UChar* */ filenameTable;
1269   }
1270   D3VarParser;
1271
1272static void varstack_show ( D3VarParser* parser, HChar* str ) {
1273   Word i, j;
1274   VG_(printf)("  varstack (%s) {\n", str);
1275   for (i = 0; i <= parser->sp; i++) {
1276      XArray* xa = parser->ranges[i];
1277      vg_assert(xa);
1278      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1279      if (parser->isFunc[i]) {
1280         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1281      } else {
1282         vg_assert(parser->fbGX[i] == NULL);
1283      }
1284      VG_(printf)(": ");
1285      if (VG_(sizeXA)( xa ) == 0) {
1286         VG_(printf)("** empty PC range array **");
1287      } else {
1288         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1289            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1290            vg_assert(range);
1291            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1292         }
1293      }
1294      VG_(printf)("\n");
1295   }
1296   VG_(printf)("  }\n");
1297}
1298
1299/* Remove from the stack, all entries with .level > 'level' */
1300static
1301void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1302{
1303   Bool changed = False;
1304   vg_assert(parser->sp < N_D3_VAR_STACK);
1305   while (True) {
1306      vg_assert(parser->sp >= -1);
1307      if (parser->sp == -1) break;
1308      if (parser->level[parser->sp] <= level) break;
1309      if (0)
1310         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1311      vg_assert(parser->ranges[parser->sp]);
1312      /* Who allocated this xa?  get_range_list() or
1313         unitary_range_list(). */
1314      VG_(deleteXA)( parser->ranges[parser->sp] );
1315      parser->ranges[parser->sp] = NULL;
1316      parser->level[parser->sp]  = 0;
1317      parser->isFunc[parser->sp] = False;
1318      parser->fbGX[parser->sp]   = NULL;
1319      parser->sp--;
1320      changed = True;
1321   }
1322   if (changed && td3)
1323      varstack_show( parser, "after preen" );
1324}
1325
1326static void varstack_push ( CUConst* cc,
1327                            D3VarParser* parser,
1328                            Bool td3,
1329                            XArray* ranges, Int level,
1330                            Bool    isFunc, GExpr* fbGX ) {
1331   if (0)
1332   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1333            parser->sp+1, level, ranges);
1334
1335   /* First we need to zap everything >= 'level', as we are about to
1336      replace any previous entry at 'level', so .. */
1337   varstack_preen(parser, /*td3*/False, level-1);
1338
1339   vg_assert(parser->sp >= -1);
1340   vg_assert(parser->sp < N_D3_VAR_STACK);
1341   if (parser->sp == N_D3_VAR_STACK-1)
1342      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1343               "increase and recompile");
1344   if (parser->sp >= 0)
1345      vg_assert(parser->level[parser->sp] < level);
1346   parser->sp++;
1347   vg_assert(parser->ranges[parser->sp] == NULL);
1348   vg_assert(parser->level[parser->sp]  == 0);
1349   vg_assert(parser->isFunc[parser->sp] == False);
1350   vg_assert(parser->fbGX[parser->sp]   == NULL);
1351   vg_assert(ranges != NULL);
1352   if (!isFunc) vg_assert(fbGX == NULL);
1353   parser->ranges[parser->sp] = ranges;
1354   parser->level[parser->sp]  = level;
1355   parser->isFunc[parser->sp] = isFunc;
1356   parser->fbGX[parser->sp]   = fbGX;
1357   if (td3)
1358      varstack_show( parser, "after push" );
1359}
1360
1361
1362/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1363   refer either to a location expression or to a location list.
1364   Figure out which, and in both cases bundle the expression or
1365   location list into a so-called GExpr (guarded expression). */
1366__attribute__((noinline))
1367static GExpr* get_GX ( CUConst* cc, Bool td3,
1368                       ULong cts, Int ctsSzB, UWord ctsMemSzB )
1369{
1370   GExpr* gexpr = NULL;
1371   if (ctsMemSzB > 0 && ctsSzB == 0) {
1372      /* represents an in-line location expression, and cts points
1373         right at it */
1374      gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1375   }
1376   else
1377   if (ctsMemSzB == 0 && ctsSzB > 0) {
1378      /* represents location list.  cts is the offset of it in
1379         .debug_loc. */
1380      if (!cc->cu_svma_known)
1381         cc->barf("get_GX: location list, but CU svma is unknown");
1382      gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1383   }
1384   else {
1385      vg_assert(0); /* else caller is bogus */
1386   }
1387   return gexpr;
1388}
1389
1390
1391static
1392void read_filename_table( /*MOD*/D3VarParser* parser,
1393                          CUConst* cc, UWord debug_line_offset,
1394                          Bool td3 )
1395{
1396   Bool   is_dw64;
1397   Cursor c;
1398   Word   i;
1399   UShort version;
1400   UChar  opcode_base;
1401   UChar* str;
1402
1403   vg_assert(parser && cc && cc->barf);
1404   if ((!cc->debug_line_img)
1405       || cc->debug_line_sz <= debug_line_offset)
1406      cc->barf("read_filename_table: .debug_line is missing?");
1407
1408   init_Cursor( &c, cc->debug_line_img,
1409                cc->debug_line_sz, debug_line_offset, cc->barf,
1410                "Overrun whilst reading .debug_line section(1)" );
1411
1412   /* unit_length = */
1413      get_Initial_Length( &is_dw64, &c,
1414           "read_filename_table: invalid initial-length field" );
1415   version = get_UShort( &c );
1416   if (version != 2 && version != 3 && version != 4)
1417     cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1418              "is currently supported.");
1419   /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1420   /*minimum_instruction_length = */ get_UChar( &c );
1421   if (version >= 4)
1422      /*maximum_operations_per_insn = */ get_UChar( &c );
1423   /*default_is_stmt            = */ get_UChar( &c );
1424   /*line_base                  = (Char)*/ get_UChar( &c );
1425   /*line_range                 = */ get_UChar( &c );
1426   opcode_base                = get_UChar( &c );
1427   /* skip over "standard_opcode_lengths" */
1428   for (i = 1; i < (Word)opcode_base; i++)
1429     (void)get_UChar( &c );
1430
1431   /* skip over the directory names table */
1432   while (peek_UChar(&c) != 0) {
1433     (void)get_AsciiZ(&c);
1434   }
1435   (void)get_UChar(&c); /* skip terminating zero */
1436
1437   /* Read and record the file names table */
1438   vg_assert(parser->filenameTable);
1439   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1440   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1441      from 1, for some reason. */
1442   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1443   VG_(addToXA)( parser->filenameTable, &str );
1444   while (peek_UChar(&c) != 0) {
1445      str = get_AsciiZ(&c);
1446      TRACE_D3("  read_filename_table: %ld %s\n",
1447               VG_(sizeXA)(parser->filenameTable), str);
1448      str = ML_(addStr)( cc->di, str, -1 );
1449      VG_(addToXA)( parser->filenameTable, &str );
1450      (void)get_ULEB128( &c ); /* skip directory index # */
1451      (void)get_ULEB128( &c ); /* skip last mod time */
1452      (void)get_ULEB128( &c ); /* file size */
1453   }
1454   /* We're done!  The rest of it is not interesting. */
1455}
1456
1457
1458__attribute__((noinline))
1459static void parse_var_DIE (
1460   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1461   /*MOD*/XArray* /* of TempVar* */ tempvars,
1462   /*MOD*/XArray* /* of GExpr* */ gexprs,
1463   /*MOD*/D3VarParser* parser,
1464   DW_TAG dtag,
1465   UWord posn,
1466   Int level,
1467   Cursor* c_die,
1468   Cursor* c_abbv,
1469   CUConst* cc,
1470   Bool td3
1471)
1472{
1473   ULong       cts;
1474   Int         ctsSzB;
1475   UWord       ctsMemSzB;
1476
1477   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1478   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1479
1480   varstack_preen( parser, td3, level-1 );
1481
1482   if (dtag == DW_TAG_compile_unit) {
1483      Bool have_lo    = False;
1484      Bool have_hi1   = False;
1485      Bool have_range = False;
1486      Addr ip_lo    = 0;
1487      Addr ip_hi1   = 0;
1488      Addr rangeoff = 0;
1489      while (True) {
1490         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1491         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1492         if (attr == 0 && form == 0) break;
1493         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1494                            cc, c_die, False/*td3*/, form );
1495         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1496            ip_lo   = cts;
1497            have_lo = True;
1498         }
1499         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1500            ip_hi1   = cts;
1501            have_hi1 = True;
1502         }
1503         if (attr == DW_AT_ranges && ctsSzB > 0) {
1504            rangeoff = cts;
1505            have_range = True;
1506         }
1507         if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1508            read_filename_table( parser, cc, (UWord)cts, td3 );
1509         }
1510      }
1511      /* Now, does this give us an opportunity to find this
1512         CU's svma? */
1513#if 0
1514      if (level == 0 && have_lo) {
1515         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1516         because we've already seen a DW_TAG_compile_unit DIE at level
1517         0.  But that can't happen, because DWARF3 only allows exactly
1518         one top level DIE per CU. */
1519         cc->cu_svma_known = True;
1520         cc->cu_svma = ip_lo;
1521         if (1)
1522            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1523         /* Now, it may be that this DIE doesn't tell us the CU's
1524            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1525            the CU doesn't *have* to have its SVMA specified.
1526
1527            But as per last para D3 spec sec 3.1.1 ("Normal and
1528            Partial Compilation Unit Entries", "If the base address
1529            (viz, the SVMA) is undefined, then any DWARF entry of
1530            structure defined interms of the base address of that
1531            compilation unit is not valid.".  So that means, if whilst
1532            processing the children of this top level DIE (or their
1533            children, etc) we see a DW_AT_range, and cu_svma_known is
1534            False, then the DIE that contains it is (per the spec)
1535            invalid, and we can legitimately stop and complain. */
1536      }
1537#else
1538      /* .. whereas The Reality is, simply assume the SVMA is zero
1539         if it isn't specified. */
1540      if (level == 0) {
1541         vg_assert(!cc->cu_svma_known);
1542         cc->cu_svma_known = True;
1543         if (have_lo)
1544            cc->cu_svma = ip_lo;
1545         else
1546            cc->cu_svma = 0;
1547      }
1548#endif
1549      /* Do we have something that looks sane? */
1550      if (have_lo && have_hi1 && (!have_range)) {
1551         if (ip_lo < ip_hi1)
1552            varstack_push( cc, parser, td3,
1553                           unitary_range_list(ip_lo, ip_hi1 - 1),
1554                           level,
1555                           False/*isFunc*/, NULL/*fbGX*/ );
1556      } else
1557      if ((!have_lo) && (!have_hi1) && have_range) {
1558         varstack_push( cc, parser, td3,
1559                        get_range_list( cc, td3,
1560                                        rangeoff, cc->cu_svma ),
1561                        level,
1562                        False/*isFunc*/, NULL/*fbGX*/ );
1563      } else
1564      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1565         /* CU has no code, presumably? */
1566         varstack_push( cc, parser, td3,
1567                        empty_range_list(),
1568                        level,
1569                        False/*isFunc*/, NULL/*fbGX*/ );
1570      } else
1571      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1572         /* broken DIE created by gcc-4.3.X ?  Ignore the
1573            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1574            instead. */
1575         varstack_push( cc, parser, td3,
1576                        get_range_list( cc, td3,
1577                                        rangeoff, cc->cu_svma ),
1578                        level,
1579                        False/*isFunc*/, NULL/*fbGX*/ );
1580      } else {
1581         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1582                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
1583         goto bad_DIE;
1584      }
1585   }
1586
1587   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1588      Bool   have_lo    = False;
1589      Bool   have_hi1   = False;
1590      Bool   have_range = False;
1591      Addr   ip_lo      = 0;
1592      Addr   ip_hi1     = 0;
1593      Addr   rangeoff   = 0;
1594      Bool   isFunc     = dtag == DW_TAG_subprogram;
1595      GExpr* fbGX       = NULL;
1596      while (True) {
1597         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1598         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1599         if (attr == 0 && form == 0) break;
1600         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1601                            cc, c_die, False/*td3*/, form );
1602         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1603            ip_lo   = cts;
1604            have_lo = True;
1605         }
1606         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1607            ip_hi1   = cts;
1608            have_hi1 = True;
1609         }
1610         if (attr == DW_AT_ranges && ctsSzB > 0) {
1611            rangeoff = cts;
1612            have_range = True;
1613         }
1614         if (isFunc
1615             && attr == DW_AT_frame_base
1616             && ((ctsMemSzB > 0 && ctsSzB == 0)
1617                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1618            fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1619            vg_assert(fbGX);
1620            VG_(addToXA)(gexprs, &fbGX);
1621         }
1622      }
1623      /* Do we have something that looks sane? */
1624      if (dtag == DW_TAG_subprogram
1625          && (!have_lo) && (!have_hi1) && (!have_range)) {
1626         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1627            representing a subroutine declaration that is not also a
1628            definition does not have code address or range
1629            attributes." */
1630      } else
1631      if (dtag == DW_TAG_lexical_block
1632          && (!have_lo) && (!have_hi1) && (!have_range)) {
1633         /* I believe this is legit, and means the lexical block
1634            contains no insns (whatever that might mean).  Ignore. */
1635      } else
1636      if (have_lo && have_hi1 && (!have_range)) {
1637         /* This scope supplies just a single address range. */
1638         if (ip_lo < ip_hi1)
1639            varstack_push( cc, parser, td3,
1640                           unitary_range_list(ip_lo, ip_hi1 - 1),
1641                           level, isFunc, fbGX );
1642      } else
1643      if ((!have_lo) && (!have_hi1) && have_range) {
1644         /* This scope supplies multiple address ranges via the use of
1645            a range list. */
1646         varstack_push( cc, parser, td3,
1647                        get_range_list( cc, td3,
1648                                        rangeoff, cc->cu_svma ),
1649                        level, isFunc, fbGX );
1650      } else
1651      if (have_lo && (!have_hi1) && (!have_range)) {
1652         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1653            Entries) says fairly clearly that a scope must have either
1654            _range or (_low_pc and _high_pc). */
1655         /* The spec is a bit ambiguous though.  Perhaps a single byte
1656            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1657         /* This case is here because icc9 produced this:
1658         <2><13bd>: DW_TAG_lexical_block
1659            DW_AT_decl_line   : 5229
1660            DW_AT_decl_column : 37
1661            DW_AT_decl_file   : 1
1662            DW_AT_low_pc      : 0x401b03
1663         */
1664         /* Ignore (seems safe than pushing a single byte range) */
1665      } else
1666         goto bad_DIE;
1667   }
1668
1669   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1670      UChar* name        = NULL;
1671      UWord  typeR       = D3_INVALID_CUOFF;
1672      Bool   external    = False;
1673      GExpr* gexpr       = NULL;
1674      Int    n_attrs     = 0;
1675      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1676      Int    lineNo      = 0;
1677      UChar* fileName    = NULL;
1678      while (True) {
1679         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1680         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1681         if (attr == 0 && form == 0) break;
1682         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1683                            cc, c_die, False/*td3*/, form );
1684         n_attrs++;
1685         if (attr == DW_AT_name && ctsMemSzB > 0) {
1686            name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1687         }
1688         if (attr == DW_AT_location
1689             && ((ctsMemSzB > 0 && ctsSzB == 0)
1690                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1691            gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1692            vg_assert(gexpr);
1693            VG_(addToXA)(gexprs, &gexpr);
1694         }
1695         if (attr == DW_AT_type && ctsSzB > 0) {
1696            typeR = (UWord)cts;
1697         }
1698         if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1699            external = True;
1700         }
1701         if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1702            abs_ori = (UWord)cts;
1703         }
1704         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1705            /*declaration = True;*/
1706         }
1707         if (attr == DW_AT_decl_line && ctsSzB > 0) {
1708            lineNo = (Int)cts;
1709         }
1710         if (attr == DW_AT_decl_file && ctsSzB > 0) {
1711            Int ftabIx = (Int)cts;
1712            if (ftabIx >= 1
1713                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1714               fileName = *(UChar**)
1715                          VG_(indexXA)( parser->filenameTable, ftabIx );
1716               vg_assert(fileName);
1717            }
1718            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1719         }
1720      }
1721      /* We'll collect it under if one of the following three
1722         conditions holds:
1723         (1) has location and type    -> completed
1724         (2) has type only            -> is an abstract instance
1725         (3) has location and abs_ori -> is a concrete instance
1726         Name, filename and line number are all optional frills.
1727      */
1728      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1729           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1730           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1731
1732         /* Add this variable to the list of interesting looking
1733            variables.  Crucially, note along with it the address
1734            range(s) associated with the variable, which for locals
1735            will be the address ranges at the top of the varparser's
1736            stack. */
1737         GExpr*   fbGX = NULL;
1738         Word     i, nRanges;
1739         XArray*  /* of AddrRange */ xa;
1740         TempVar* tv;
1741         /* Stack can't be empty; we put a dummy entry on it for the
1742            entire address range before starting with the DIEs for
1743            this CU. */
1744         vg_assert(parser->sp >= 0);
1745
1746         /* If this is a local variable (non-external), try to find
1747            the GExpr for the DW_AT_frame_base of the containing
1748            function.  It should have been pushed on the stack at the
1749            time we encountered its DW_TAG_subprogram DIE, so the way
1750            to find it is to scan back down the stack looking for it.
1751            If there isn't an enclosing stack entry marked 'isFunc'
1752            then we must be seeing variable or formal param DIEs
1753            outside of a function, so we deem the Dwarf to be
1754            malformed if that happens.  Note that the fbGX may be NULL
1755            if the containing DT_TAG_subprogram didn't supply a
1756            DW_AT_frame_base -- that's OK, but there must actually be
1757            a containing DW_TAG_subprogram. */
1758         if (!external) {
1759            Bool found = False;
1760            for (i = parser->sp; i >= 0; i--) {
1761               if (parser->isFunc[i]) {
1762                  fbGX = parser->fbGX[i];
1763                  found = True;
1764                  break;
1765               }
1766            }
1767            if (!found) {
1768               if (0 && VG_(clo_verbosity) >= 0) {
1769                  VG_(message)(Vg_DebugMsg,
1770                     "warning: parse_var_DIE: non-external variable "
1771                     "outside DW_TAG_subprogram\n");
1772               }
1773               /* goto bad_DIE; */
1774               /* This seems to happen a lot.  Just ignore it -- if,
1775                  when we come to evaluation of the location (guarded)
1776                  expression, it requires a frame base value, and
1777                  there's no expression for that, then evaluation as a
1778                  whole will fail.  Harmless - a bit of a waste of
1779                  cycles but nothing more. */
1780            }
1781         }
1782
1783         /* re "external ? 0 : parser->sp" (twice), if the var is
1784            marked 'external' then we must put it at the global scope,
1785            as only the global scope (level 0) covers the entire PC
1786            address space.  It is asserted elsewhere that level 0
1787            always covers the entire address space. */
1788         xa = parser->ranges[external ? 0 : parser->sp];
1789         nRanges = VG_(sizeXA)(xa);
1790         vg_assert(nRanges >= 0);
1791
1792         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1793         tv->name   = name;
1794         tv->level  = external ? 0 : parser->sp;
1795         tv->typeR  = typeR;
1796         tv->gexpr  = gexpr;
1797         tv->fbGX   = fbGX;
1798         tv->fName  = fileName;
1799         tv->fLine  = lineNo;
1800         tv->dioff  = posn;
1801         tv->absOri = abs_ori;
1802
1803         /* See explanation on definition of type TempVar for the
1804            reason for this elaboration. */
1805         tv->nRanges = nRanges;
1806         tv->rngOneMin = 0;
1807         tv->rngOneMax = 0;
1808         tv->rngMany = NULL;
1809         if (nRanges == 1) {
1810            AddrRange* range = VG_(indexXA)(xa, 0);
1811            tv->rngOneMin = range->aMin;
1812            tv->rngOneMax = range->aMax;
1813         }
1814         else if (nRanges > 1) {
1815            /* See if we already have a range list which is
1816               structurally identical.  If so, use that; if not, clone
1817               this one, and add it to our collection. */
1818            UWord keyW, valW;
1819            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
1820               XArray* old = (XArray*)keyW;
1821               tl_assert(valW == 0);
1822               tl_assert(old != xa);
1823               tv->rngMany = old;
1824            } else {
1825               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
1826               tv->rngMany = cloned;
1827               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
1828            }
1829         }
1830
1831         VG_(addToXA)( tempvars, &tv );
1832
1833         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
1834                  VG_(sizeXA)(xa) );
1835         /* collect stats on how effective the ->ranges special
1836            casing is */
1837         if (0) {
1838            static Int ntot=0, ngt=0;
1839            ntot++;
1840            if (tv->rngMany) ngt++;
1841            if (0 == (ntot % 100000))
1842               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1843         }
1844
1845      }
1846
1847      /* Here are some other weird cases seen in the wild:
1848
1849            We have a variable with a name and a type, but no
1850            location.  I guess that's a sign that it has been
1851            optimised away.  Ignore it.  Here's an example:
1852
1853            static Int lc_compar(void* n1, void* n2) {
1854               MC_Chunk* mc1 = *(MC_Chunk**)n1;
1855               MC_Chunk* mc2 = *(MC_Chunk**)n2;
1856               return (mc1->data < mc2->data ? -1 : 1);
1857            }
1858
1859            Both mc1 and mc2 are like this
1860            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1861                DW_AT_name        : mc1
1862                DW_AT_decl_file   : 1
1863                DW_AT_decl_line   : 216
1864                DW_AT_type        : <5d3>
1865
1866            whereas n1 and n2 do have locations specified.
1867
1868            ---------------------------------------------
1869
1870            We see a DW_TAG_formal_parameter with a type, but
1871            no name and no location.  It's probably part of a function type
1872            construction, thusly, hence ignore it:
1873         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1874             DW_AT_sibling     : <2c9>
1875             DW_AT_prototyped  : 1
1876             DW_AT_type        : <114>
1877         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1878             DW_AT_type        : <13e>
1879         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1880             DW_AT_type        : <133>
1881
1882            ---------------------------------------------
1883
1884            Is very minimal, like this:
1885            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1886                DW_AT_abstract_origin: <7ba>
1887            What that signifies I have no idea.  Ignore.
1888
1889            ----------------------------------------------
1890
1891            Is very minimal, like this:
1892            <200f>: DW_TAG_formal_parameter
1893                DW_AT_abstract_ori: <1f4c>
1894                DW_AT_location    : 13440
1895            What that signifies I have no idea.  Ignore.
1896            It might be significant, though: the variable at least
1897            has a location and so might exist somewhere.
1898            Maybe we should handle this.
1899
1900            ---------------------------------------------
1901
1902            <22407>: DW_TAG_variable
1903              DW_AT_name        : (indirect string, offset: 0x6579):
1904                                  vgPlain_trampoline_stuff_start
1905              DW_AT_decl_file   : 29
1906              DW_AT_decl_line   : 56
1907              DW_AT_external    : 1
1908              DW_AT_declaration : 1
1909
1910            Nameless and typeless variable that has a location?  Who
1911            knows.  Not me.
1912            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1913                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1914                                     (DW_OP_addr: 3813c7c0)
1915
1916            No, really.  Check it out.  gcc is quite simply borked.
1917            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1918            // followed by no attributes, and the next DIE is a sibling,
1919            // not a child
1920            */
1921   }
1922   return;
1923
1924  bad_DIE:
1925   set_position_of_Cursor( c_die,  saved_die_c_offset );
1926   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1927   VG_(printf)("\nparse_var_DIE: confused by:\n");
1928   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1929   while (True) {
1930      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1931      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1932      if (attr == 0 && form == 0) break;
1933      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
1934      /* Get the form contents, so as to print them */
1935      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1936                         cc, c_die, True, form );
1937      VG_(printf)("\t\n");
1938   }
1939   VG_(printf)("\n");
1940   cc->barf("parse_var_DIE: confused by the above DIE");
1941   /*NOTREACHED*/
1942}
1943
1944
1945/*------------------------------------------------------------*/
1946/*---                                                      ---*/
1947/*--- Parsing of type-related DIEs                         ---*/
1948/*---                                                      ---*/
1949/*------------------------------------------------------------*/
1950
1951#define N_D3_TYPE_STACK 16
1952
1953typedef
1954   struct {
1955      /* What source language?  'C'=C/C++, 'F'=Fortran, '?'=other
1956         Established once per compilation unit. */
1957      UChar language;
1958      /* A stack of types which are currently under construction */
1959      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
1960                   stack */
1961      /* Note that the TyEnts in qparentE are temporary copies of the
1962         ones accumulating in the main tyent array.  So it is not safe
1963         to free up anything on them when popping them off the stack
1964         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
1965         memset them to zero when done. */
1966      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
1967      Int   qlevel[N_D3_TYPE_STACK];
1968
1969   }
1970   D3TypeParser;
1971
1972static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1973   Word i;
1974   VG_(printf)("  typestack (%s) {\n", str);
1975   for (i = 0; i <= parser->sp; i++) {
1976      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
1977      ML_(pp_TyEnt)( &parser->qparentE[i] );
1978      VG_(printf)("\n");
1979   }
1980   VG_(printf)("  }\n");
1981}
1982
1983/* Remove from the stack, all entries with .level > 'level' */
1984static
1985void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1986{
1987   Bool changed = False;
1988   vg_assert(parser->sp < N_D3_TYPE_STACK);
1989   while (True) {
1990      vg_assert(parser->sp >= -1);
1991      if (parser->sp == -1) break;
1992      if (parser->qlevel[parser->sp] <= level) break;
1993      if (0)
1994         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1995      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
1996      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
1997      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
1998      parser->qparentE[parser->sp].tag = Te_EMPTY;
1999      parser->qlevel[parser->sp] = 0;
2000      parser->sp--;
2001      changed = True;
2002   }
2003   if (changed && td3)
2004      typestack_show( parser, "after preen" );
2005}
2006
2007static Bool typestack_is_empty ( D3TypeParser* parser ) {
2008   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2009   return parser->sp == -1;
2010}
2011
2012static void typestack_push ( CUConst* cc,
2013                             D3TypeParser* parser,
2014                             Bool td3,
2015                             TyEnt* parentE, Int level ) {
2016   if (0)
2017   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2018            parser->sp+1, level, parentE->cuOff);
2019
2020   /* First we need to zap everything >= 'level', as we are about to
2021      replace any previous entry at 'level', so .. */
2022   typestack_preen(parser, /*td3*/False, level-1);
2023
2024   vg_assert(parser->sp >= -1);
2025   vg_assert(parser->sp < N_D3_TYPE_STACK);
2026   if (parser->sp == N_D3_TYPE_STACK-1)
2027      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2028               "increase and recompile");
2029   if (parser->sp >= 0)
2030      vg_assert(parser->qlevel[parser->sp] < level);
2031   parser->sp++;
2032   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2033   vg_assert(parser->qlevel[parser->sp]  == 0);
2034   vg_assert(parentE);
2035   vg_assert(ML_(TyEnt__is_type)(parentE));
2036   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2037   parser->qparentE[parser->sp] = *parentE;
2038   parser->qlevel[parser->sp]  = level;
2039   if (td3)
2040      typestack_show( parser, "after push" );
2041}
2042
2043
2044/* Parse a type-related DIE.  'parser' holds the current parser state.
2045   'admin' is where the completed types are dumped.  'dtag' is the tag
2046   for this DIE.  'c_die' points to the start of the data fields (FORM
2047   stuff) for the DIE.  c_abbv points to the start of the (name,form)
2048   pairs which describe the DIE.
2049
2050   We may find the DIE uninteresting, in which case we should ignore
2051   it.
2052
2053   What happens: the DIE is examined.  If uninteresting, it is ignored.
2054   Otherwise, the DIE gives rise to two things:
2055
2056   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2057   (2) a TyAdmin structure, which holds the type, or related stuff
2058
2059   (2) is added at the end of 'tyadmins', at some index, say 'i'.
2060
2061   A pair (cuOffset, i) is added to 'tydict'.
2062
2063   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2064   a mapping from cuOffset to the index of the corresponding entry in
2065   'tyadmin'.
2066
2067   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2068   in the tydict (by binary search).  This gives an index into
2069   tyadmins, and the required entity lives in tyadmins at that index.
2070*/
2071__attribute__((noinline))
2072static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2073                             /*MOD*/D3TypeParser* parser,
2074                             DW_TAG dtag,
2075                             UWord posn,
2076                             Int level,
2077                             Cursor* c_die,
2078                             Cursor* c_abbv,
2079                             CUConst* cc,
2080                             Bool td3 )
2081{
2082   ULong cts;
2083   Int   ctsSzB;
2084   UWord ctsMemSzB;
2085   TyEnt typeE;
2086   TyEnt atomE;
2087   TyEnt fieldE;
2088   TyEnt boundE;
2089
2090   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2091   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2092
2093   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2094   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2095   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2096   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2097
2098   /* If we've returned to a level at or above any previously noted
2099      parent, un-note it, so we don't believe we're still collecting
2100      its children. */
2101   typestack_preen( parser, td3, level-1 );
2102
2103   if (dtag == DW_TAG_compile_unit) {
2104      /* See if we can find DW_AT_language, since it is important for
2105         establishing array bounds (see DW_TAG_subrange_type below in
2106         this fn) */
2107      while (True) {
2108         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2109         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2110         if (attr == 0 && form == 0) break;
2111         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2112                            cc, c_die, False/*td3*/, form );
2113         if (attr != DW_AT_language)
2114            continue;
2115         if (ctsSzB == 0)
2116           goto bad_DIE;
2117         switch (cts) {
2118            case DW_LANG_C89: case DW_LANG_C:
2119            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2120            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2121            case DW_LANG_Upc: case DW_LANG_C99:
2122               parser->language = 'C'; break;
2123            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2124            case DW_LANG_Fortran95:
2125               parser->language = 'F'; break;
2126            case DW_LANG_Ada83: case DW_LANG_Cobol74:
2127            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2128            case DW_LANG_Modula2: case DW_LANG_Java:
2129            case DW_LANG_Ada95: case DW_LANG_PLI:
2130            case DW_LANG_D: case DW_LANG_Python:
2131            case DW_LANG_Mips_Assembler:
2132               parser->language = '?'; break;
2133            default:
2134               goto bad_DIE;
2135         }
2136      }
2137   }
2138
2139   if (dtag == DW_TAG_base_type) {
2140      /* We can pick up a new base type any time. */
2141      VG_(memset)(&typeE, 0, sizeof(typeE));
2142      typeE.cuOff = D3_INVALID_CUOFF;
2143      typeE.tag   = Te_TyBase;
2144      while (True) {
2145         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2146         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2147         if (attr == 0 && form == 0) break;
2148         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2149                            cc, c_die, False/*td3*/, form );
2150         if (attr == DW_AT_name && ctsMemSzB > 0) {
2151            typeE.Te.TyBase.name
2152               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2153                                    (UChar*)(UWord)cts );
2154         }
2155         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2156            typeE.Te.TyBase.szB = cts;
2157         }
2158         if (attr == DW_AT_encoding && ctsSzB > 0) {
2159            switch (cts) {
2160               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2161               case DW_ATE_boolean:/* FIXME - is this correct? */
2162                  typeE.Te.TyBase.enc = 'U'; break;
2163               case DW_ATE_signed: case DW_ATE_signed_char:
2164                  typeE.Te.TyBase.enc = 'S'; break;
2165               case DW_ATE_float:
2166                  typeE.Te.TyBase.enc = 'F'; break;
2167               case DW_ATE_complex_float:
2168                  typeE.Te.TyBase.enc = 'C'; break;
2169               default:
2170                  goto bad_DIE;
2171            }
2172         }
2173      }
2174
2175      /* Invent a name if it doesn't have one.  gcc-4.3
2176         -ftree-vectorize is observed to emit nameless base types. */
2177      if (!typeE.Te.TyBase.name)
2178         typeE.Te.TyBase.name
2179            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2180                                 "<anon_base_type>" );
2181
2182      /* Do we have something that looks sane? */
2183      if (/* must have a name */
2184          typeE.Te.TyBase.name == NULL
2185          /* and a plausible size.  Yes, really 32: "complex long
2186             double" apparently has size=32 */
2187          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2188          /* and a plausible encoding */
2189          || (typeE.Te.TyBase.enc != 'U'
2190              && typeE.Te.TyBase.enc != 'S'
2191              && typeE.Te.TyBase.enc != 'F'
2192              && typeE.Te.TyBase.enc != 'C'))
2193         goto bad_DIE;
2194      /* Last minute hack: if we see this
2195         <1><515>: DW_TAG_base_type
2196             DW_AT_byte_size   : 0
2197             DW_AT_encoding    : 5
2198             DW_AT_name        : void
2199         convert it into a real Void type. */
2200      if (typeE.Te.TyBase.szB == 0
2201          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2202         ML_(TyEnt__make_EMPTY)(&typeE);
2203         typeE.tag = Te_TyVoid;
2204         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2205      }
2206
2207      goto acquire_Type;
2208   }
2209
2210   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2211       || dtag == DW_TAG_ptr_to_member_type) {
2212      /* This seems legit for _pointer_type and _reference_type.  I
2213         don't know if rolling _ptr_to_member_type in here really is
2214         legit, but it's better than not handling it at all. */
2215      VG_(memset)(&typeE, 0, sizeof(typeE));
2216      typeE.cuOff = D3_INVALID_CUOFF;
2217      typeE.tag   = Te_TyPorR;
2218      /* target type defaults to void */
2219      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2220      typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
2221                              || dtag == DW_TAG_ptr_to_member_type;
2222      /* These three type kinds don't *have* to specify their size, in
2223         which case we assume it's a machine word.  But if they do
2224         specify it, it must be a machine word :-)  This probably
2225         assumes that the word size of the Dwarf3 we're reading is the
2226         same size as that on the machine.  gcc appears to give a size
2227         whereas icc9 doesn't. */
2228      typeE.Te.TyPorR.szB = sizeof(UWord);
2229      while (True) {
2230         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2231         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2232         if (attr == 0 && form == 0) break;
2233         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2234                            cc, c_die, False/*td3*/, form );
2235         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2236            typeE.Te.TyPorR.szB = cts;
2237         }
2238         if (attr == DW_AT_type && ctsSzB > 0) {
2239            typeE.Te.TyPorR.typeR = (UWord)cts;
2240         }
2241      }
2242      /* Do we have something that looks sane? */
2243      if (typeE.Te.TyPorR.szB != sizeof(UWord))
2244         goto bad_DIE;
2245      else
2246         goto acquire_Type;
2247   }
2248
2249   if (dtag == DW_TAG_enumeration_type) {
2250      /* Create a new Type to hold the results. */
2251      VG_(memset)(&typeE, 0, sizeof(typeE));
2252      typeE.cuOff = posn;
2253      typeE.tag   = Te_TyEnum;
2254      typeE.Te.TyEnum.atomRs
2255         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2256                       ML_(dinfo_free),
2257                       sizeof(UWord) );
2258      while (True) {
2259         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2260         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2261         if (attr == 0 && form == 0) break;
2262         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2263                            cc, c_die, False/*td3*/, form );
2264         if (attr == DW_AT_name && ctsMemSzB > 0) {
2265            typeE.Te.TyEnum.name
2266              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2267                                   (UChar*)(UWord)cts );
2268         }
2269         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2270            typeE.Te.TyEnum.szB = cts;
2271         }
2272      }
2273
2274      if (!typeE.Te.TyEnum.name)
2275         typeE.Te.TyEnum.name
2276            = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2277                                 "<anon_enum_type>" );
2278
2279      /* Do we have something that looks sane? */
2280      if (typeE.Te.TyEnum.szB == 0 /* we must know the size */)
2281         goto bad_DIE;
2282      /* On't stack! */
2283      typestack_push( cc, parser, td3, &typeE, level );
2284      goto acquire_Type;
2285   }
2286
2287   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2288      DW_TAG_enumerator with only a DW_AT_name but no
2289      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2290      and appears to be a new "feature" of gcc - versions 4.3.x and
2291      earlier do not appear to do this.  So accept DW_TAG_enumerator
2292      which only have a name but no value.  An example:
2293
2294      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2295         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2296                                     QtMsgType
2297         <185>   DW_AT_byte_size   : 4
2298         <186>   DW_AT_decl_file   : 14
2299         <187>   DW_AT_decl_line   : 1480
2300         <189>   DW_AT_sibling     : <0x1a7>
2301      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2302         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2303                                     QtDebugMsg
2304      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2305         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2306                                     QtWarningMsg
2307      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2308         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2309                                     QtCriticalMsg
2310      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2311         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2312                                     QtFatalMsg
2313      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2314         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2315                                     QtSystemMsg
2316   */
2317   if (dtag == DW_TAG_enumerator) {
2318      VG_(memset)( &atomE, 0, sizeof(atomE) );
2319      atomE.cuOff = posn;
2320      atomE.tag   = Te_Atom;
2321      while (True) {
2322         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2323         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2324         if (attr == 0 && form == 0) break;
2325         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2326                            cc, c_die, False/*td3*/, form );
2327         if (attr == DW_AT_name && ctsMemSzB > 0) {
2328            atomE.Te.Atom.name
2329              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2330                                   (UChar*)(UWord)cts );
2331         }
2332         if (attr == DW_AT_const_value && ctsSzB > 0) {
2333            atomE.Te.Atom.value = cts;
2334            atomE.Te.Atom.valueKnown = True;
2335         }
2336      }
2337      /* Do we have something that looks sane? */
2338      if (atomE.Te.Atom.name == NULL)
2339         goto bad_DIE;
2340      /* Do we have a plausible parent? */
2341      if (typestack_is_empty(parser)) goto bad_DIE;
2342      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2343      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2344      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2345      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2346      /* Record this child in the parent */
2347      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2348      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2349                    &atomE );
2350      /* And record the child itself */
2351      goto acquire_Atom;
2352   }
2353
2354   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2355      don't know if this is correct, but it at least makes this reader
2356      usable for gcc-4.3 produced Dwarf3. */
2357   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2358       || dtag == DW_TAG_union_type) {
2359      Bool have_szB = False;
2360      Bool is_decl  = False;
2361      Bool is_spec  = False;
2362      /* Create a new Type to hold the results. */
2363      VG_(memset)(&typeE, 0, sizeof(typeE));
2364      typeE.cuOff = posn;
2365      typeE.tag   = Te_TyStOrUn;
2366      typeE.Te.TyStOrUn.name = NULL;
2367      typeE.Te.TyStOrUn.fieldRs
2368         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2369                       ML_(dinfo_free),
2370                       sizeof(UWord) );
2371      typeE.Te.TyStOrUn.complete = True;
2372      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2373                                   || dtag == DW_TAG_class_type;
2374      while (True) {
2375         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2376         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2377         if (attr == 0 && form == 0) break;
2378         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2379                            cc, c_die, False/*td3*/, form );
2380         if (attr == DW_AT_name && ctsMemSzB > 0) {
2381            typeE.Te.TyStOrUn.name
2382               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2383                                    (UChar*)(UWord)cts );
2384         }
2385         if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2386            typeE.Te.TyStOrUn.szB = cts;
2387            have_szB = True;
2388         }
2389         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2390            is_decl = True;
2391         }
2392         if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2393            is_spec = True;
2394         }
2395      }
2396      /* Do we have something that looks sane? */
2397      if (is_decl && (!is_spec)) {
2398         /* It's a DW_AT_declaration.  We require the name but
2399            nothing else. */
2400         if (typeE.Te.TyStOrUn.name == NULL)
2401            goto bad_DIE;
2402         typeE.Te.TyStOrUn.complete = False;
2403         /* JRS 2009 Aug 10: <possible kludge>? */
2404         /* Push this tyent on the stack, even though it's incomplete.
2405            It appears that gcc-4.4 on Fedora 11 will sometimes create
2406            DW_TAG_member entries for it, and so we need to have a
2407            plausible parent present in order for that to work.  See
2408            #200029 comments 8 and 9. */
2409         typestack_push( cc, parser, td3, &typeE, level );
2410         /* </possible kludge> */
2411         goto acquire_Type;
2412      }
2413      if ((!is_decl) /* && (!is_spec) */) {
2414         /* this is the common, ordinary case */
2415         if ((!have_szB) /* we must know the size */
2416             /* But the name can be present, or not */)
2417            goto bad_DIE;
2418         /* On't stack! */
2419         typestack_push( cc, parser, td3, &typeE, level );
2420         goto acquire_Type;
2421      }
2422      else {
2423         /* don't know how to handle any other variants just now */
2424         goto bad_DIE;
2425      }
2426   }
2427
2428   if (dtag == DW_TAG_member) {
2429      /* Acquire member entries for both DW_TAG_structure_type and
2430         DW_TAG_union_type.  They differ minorly, in that struct
2431         members must have a DW_AT_data_member_location expression
2432         whereas union members must not. */
2433      Bool parent_is_struct;
2434      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2435      fieldE.cuOff = posn;
2436      fieldE.tag   = Te_Field;
2437      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2438      while (True) {
2439         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2440         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2441         if (attr == 0 && form == 0) break;
2442         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2443                            cc, c_die, False/*td3*/, form );
2444         if (attr == DW_AT_name && ctsMemSzB > 0) {
2445            fieldE.Te.Field.name
2446               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2447                                    (UChar*)(UWord)cts );
2448         }
2449         if (attr == DW_AT_type && ctsSzB > 0) {
2450            fieldE.Te.Field.typeR = (UWord)cts;
2451         }
2452         /* There are 2 different cases for DW_AT_data_member_location.
2453            If it is a constant class attribute, it contains byte offset
2454            from the beginning of the containing entity.
2455            Otherwise it is a location expression.  */
2456         if (attr == DW_AT_data_member_location && ctsSzB > 0) {
2457            fieldE.Te.Field.nLoc = -1;
2458            fieldE.Te.Field.pos.offset = cts;
2459         } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2460            fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2461            fieldE.Te.Field.pos.loc
2462               = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2463                                    (UChar*)(UWord)cts,
2464                                    (SizeT)fieldE.Te.Field.nLoc );
2465         }
2466      }
2467      /* Do we have a plausible parent? */
2468      if (typestack_is_empty(parser)) goto bad_DIE;
2469      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2470      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2471      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2472      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2473      /* Do we have something that looks sane?  If this a member of a
2474         struct, we must have a location expression; but if a member
2475         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2476         to reject in the latter case, but some compilers have been
2477         observed to emit constant-zero expressions.  So just ignore
2478         them. */
2479      parent_is_struct
2480         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2481      if (!fieldE.Te.Field.name)
2482         fieldE.Te.Field.name
2483            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2484                                 "<anon_field>" );
2485      vg_assert(fieldE.Te.Field.name);
2486      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2487         goto bad_DIE;
2488      if (fieldE.Te.Field.nLoc) {
2489         if (!parent_is_struct) {
2490            /* If this is a union type, pretend we haven't seen the data
2491               member location expression, as it is by definition
2492               redundant (it must be zero). */
2493            if (fieldE.Te.Field.nLoc > 0)
2494               ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2495            fieldE.Te.Field.pos.loc = NULL;
2496            fieldE.Te.Field.nLoc = 0;
2497         }
2498         /* Record this child in the parent */
2499         fieldE.Te.Field.isStruct = parent_is_struct;
2500         vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2501         VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2502                       &posn );
2503         /* And record the child itself */
2504         goto acquire_Field;
2505      } else {
2506         /* Member with no location - this can happen with static
2507            const members in C++ code which are compile time constants
2508            that do no exist in the class. They're not of any interest
2509            to us so we ignore them. */
2510      }
2511   }
2512
2513   if (dtag == DW_TAG_array_type) {
2514      VG_(memset)(&typeE, 0, sizeof(typeE));
2515      typeE.cuOff = posn;
2516      typeE.tag   = Te_TyArray;
2517      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2518      typeE.Te.TyArray.boundRs
2519         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2520                       ML_(dinfo_free),
2521                       sizeof(UWord) );
2522      while (True) {
2523         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2524         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2525         if (attr == 0 && form == 0) break;
2526         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2527                            cc, c_die, False/*td3*/, form );
2528         if (attr == DW_AT_type && ctsSzB > 0) {
2529            typeE.Te.TyArray.typeR = (UWord)cts;
2530         }
2531      }
2532      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2533         goto bad_DIE;
2534      /* On't stack! */
2535      typestack_push( cc, parser, td3, &typeE, level );
2536      goto acquire_Type;
2537   }
2538
2539   if (dtag == DW_TAG_subrange_type) {
2540      Bool have_lower = False;
2541      Bool have_upper = False;
2542      Bool have_count = False;
2543      Long lower = 0;
2544      Long upper = 0;
2545
2546      switch (parser->language) {
2547         case 'C': have_lower = True;  lower = 0; break;
2548         case 'F': have_lower = True;  lower = 1; break;
2549         case '?': have_lower = False; break;
2550         default:  vg_assert(0); /* assured us by handling of
2551                                    DW_TAG_compile_unit in this fn */
2552      }
2553
2554      VG_(memset)( &boundE, 0, sizeof(boundE) );
2555      boundE.cuOff = D3_INVALID_CUOFF;
2556      boundE.tag   = Te_Bound;
2557      while (True) {
2558         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2559         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2560         if (attr == 0 && form == 0) break;
2561         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2562                            cc, c_die, False/*td3*/, form );
2563         if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2564            lower      = (Long)cts;
2565            have_lower = True;
2566         }
2567         if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2568            upper      = (Long)cts;
2569            have_upper = True;
2570         }
2571         if (attr == DW_AT_count && ctsSzB > 0) {
2572            /*count    = (Long)cts;*/
2573            have_count = True;
2574         }
2575      }
2576      /* FIXME: potentially skip the rest if no parent present, since
2577         it could be the case that this subrange type is free-standing
2578         (not being used to describe the bounds of a containing array
2579         type) */
2580      /* Do we have a plausible parent? */
2581      if (typestack_is_empty(parser)) goto bad_DIE;
2582      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2583      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2584      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2585      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2586
2587      /* Figure out if we have a definite range or not */
2588      if (have_lower && have_upper && (!have_count)) {
2589         boundE.Te.Bound.knownL = True;
2590         boundE.Te.Bound.knownU = True;
2591         boundE.Te.Bound.boundL = lower;
2592         boundE.Te.Bound.boundU = upper;
2593      }
2594      else if (have_lower && (!have_upper) && (!have_count)) {
2595         boundE.Te.Bound.knownL = True;
2596         boundE.Te.Bound.knownU = False;
2597         boundE.Te.Bound.boundL = lower;
2598         boundE.Te.Bound.boundU = 0;
2599      }
2600      else if ((!have_lower) && have_upper && (!have_count)) {
2601         boundE.Te.Bound.knownL = False;
2602         boundE.Te.Bound.knownU = True;
2603         boundE.Te.Bound.boundL = 0;
2604         boundE.Te.Bound.boundU = upper;
2605      }
2606      else if ((!have_lower) && (!have_upper) && (!have_count)) {
2607         boundE.Te.Bound.knownL = False;
2608         boundE.Te.Bound.knownU = False;
2609         boundE.Te.Bound.boundL = 0;
2610         boundE.Te.Bound.boundU = 0;
2611      } else {
2612         /* FIXME: handle more cases */
2613         goto bad_DIE;
2614      }
2615
2616      /* Record this bound in the parent */
2617      boundE.cuOff = posn;
2618      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2619      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2620                    &boundE );
2621      /* And record the child itself */
2622      goto acquire_Bound;
2623   }
2624
2625   if (dtag == DW_TAG_typedef) {
2626      /* We can pick up a new typedef any time. */
2627      VG_(memset)(&typeE, 0, sizeof(typeE));
2628      typeE.cuOff = D3_INVALID_CUOFF;
2629      typeE.tag   = Te_TyTyDef;
2630      typeE.Te.TyTyDef.name = NULL;
2631      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2632      while (True) {
2633         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2634         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2635         if (attr == 0 && form == 0) break;
2636         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2637                            cc, c_die, False/*td3*/, form );
2638         if (attr == DW_AT_name && ctsMemSzB > 0) {
2639            typeE.Te.TyTyDef.name
2640               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2641                                    (UChar*)(UWord)cts );
2642         }
2643         if (attr == DW_AT_type && ctsSzB > 0) {
2644            typeE.Te.TyTyDef.typeR = (UWord)cts;
2645         }
2646      }
2647      /* Do we have something that looks sane? */
2648      if (/* must have a name */
2649          typeE.Te.TyTyDef.name == NULL
2650          /* but the referred-to type can be absent */)
2651         goto bad_DIE;
2652      else
2653         goto acquire_Type;
2654   }
2655
2656   if (dtag == DW_TAG_subroutine_type) {
2657      /* function type? just record that one fact and ask no
2658         further questions. */
2659      VG_(memset)(&typeE, 0, sizeof(typeE));
2660      typeE.cuOff = D3_INVALID_CUOFF;
2661      typeE.tag   = Te_TyFn;
2662      goto acquire_Type;
2663   }
2664
2665   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2666      Int have_ty = 0;
2667      VG_(memset)(&typeE, 0, sizeof(typeE));
2668      typeE.cuOff = D3_INVALID_CUOFF;
2669      typeE.tag   = Te_TyQual;
2670      typeE.Te.TyQual.qual
2671         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2672      /* target type defaults to 'void' */
2673      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2674      while (True) {
2675         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2676         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2677         if (attr == 0 && form == 0) break;
2678         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2679                            cc, c_die, False/*td3*/, form );
2680         if (attr == DW_AT_type && ctsSzB > 0) {
2681            typeE.Te.TyQual.typeR = (UWord)cts;
2682            have_ty++;
2683         }
2684      }
2685      /* gcc sometimes generates DW_TAG_const/volatile_type without
2686         DW_AT_type and GDB appears to interpret the type as 'const
2687         void' (resp. 'volatile void').  So just allow it .. */
2688      if (have_ty == 1 || have_ty == 0)
2689         goto acquire_Type;
2690      else
2691         goto bad_DIE;
2692   }
2693
2694   /* else ignore this DIE */
2695   return;
2696   /*NOTREACHED*/
2697
2698  acquire_Type:
2699   if (0) VG_(printf)("YYYY Acquire Type\n");
2700   vg_assert(ML_(TyEnt__is_type)( &typeE ));
2701   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
2702   typeE.cuOff = posn;
2703   VG_(addToXA)( tyents, &typeE );
2704   return;
2705   /*NOTREACHED*/
2706
2707  acquire_Atom:
2708   if (0) VG_(printf)("YYYY Acquire Atom\n");
2709   vg_assert(atomE.tag == Te_Atom);
2710   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
2711   atomE.cuOff = posn;
2712   VG_(addToXA)( tyents, &atomE );
2713   return;
2714   /*NOTREACHED*/
2715
2716  acquire_Field:
2717   /* For union members, Expr should be absent */
2718   if (0) VG_(printf)("YYYY Acquire Field\n");
2719   vg_assert(fieldE.tag == Te_Field);
2720   vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
2721   vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
2722   if (fieldE.Te.Field.isStruct) {
2723      vg_assert(fieldE.Te.Field.nLoc != 0);
2724   } else {
2725      vg_assert(fieldE.Te.Field.nLoc == 0);
2726   }
2727   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
2728   fieldE.cuOff = posn;
2729   VG_(addToXA)( tyents, &fieldE );
2730   return;
2731   /*NOTREACHED*/
2732
2733  acquire_Bound:
2734   if (0) VG_(printf)("YYYY Acquire Bound\n");
2735   vg_assert(boundE.tag == Te_Bound);
2736   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
2737   boundE.cuOff = posn;
2738   VG_(addToXA)( tyents, &boundE );
2739   return;
2740   /*NOTREACHED*/
2741
2742  bad_DIE:
2743   set_position_of_Cursor( c_die,  saved_die_c_offset );
2744   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2745   VG_(printf)("\nparse_type_DIE: confused by:\n");
2746   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2747   while (True) {
2748      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2749      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2750      if (attr == 0 && form == 0) break;
2751      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2752      /* Get the form contents, so as to print them */
2753      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2754                         cc, c_die, True, form );
2755      VG_(printf)("\t\n");
2756   }
2757   VG_(printf)("\n");
2758   cc->barf("parse_type_DIE: confused by the above DIE");
2759   /*NOTREACHED*/
2760}
2761
2762
2763/*------------------------------------------------------------*/
2764/*---                                                      ---*/
2765/*--- Compression of type DIE information                  ---*/
2766/*---                                                      ---*/
2767/*------------------------------------------------------------*/
2768
2769static UWord chase_cuOff ( Bool* changed,
2770                           XArray* /* of TyEnt */ ents,
2771                           TyEntIndexCache* ents_cache,
2772                           UWord cuOff )
2773{
2774   TyEnt* ent;
2775   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
2776
2777   if (!ent) {
2778      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
2779      *changed = False;
2780      return cuOff;
2781   }
2782
2783   vg_assert(ent->tag != Te_EMPTY);
2784   if (ent->tag != Te_INDIR) {
2785      *changed = False;
2786      return cuOff;
2787   } else {
2788      vg_assert(ent->Te.INDIR.indR < cuOff);
2789      *changed = True;
2790      return ent->Te.INDIR.indR;
2791   }
2792}
2793
2794static
2795void chase_cuOffs_in_XArray ( Bool* changed,
2796                              XArray* /* of TyEnt */ ents,
2797                              TyEntIndexCache* ents_cache,
2798                              /*MOD*/XArray* /* of UWord */ cuOffs )
2799{
2800   Bool b2 = False;
2801   Word i, n = VG_(sizeXA)( cuOffs );
2802   for (i = 0; i < n; i++) {
2803      Bool   b = False;
2804      UWord* p = VG_(indexXA)( cuOffs, i );
2805      *p = chase_cuOff( &b, ents, ents_cache, *p );
2806      if (b)
2807         b2 = True;
2808   }
2809   *changed = b2;
2810}
2811
2812static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
2813                                    TyEntIndexCache* ents_cache,
2814                                    /*MOD*/TyEnt* te )
2815{
2816   Bool b, changed = False;
2817   switch (te->tag) {
2818      case Te_EMPTY:
2819         break;
2820      case Te_INDIR:
2821         te->Te.INDIR.indR
2822            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
2823         if (b) changed = True;
2824         break;
2825      case Te_UNKNOWN:
2826         break;
2827      case Te_Atom:
2828         break;
2829      case Te_Field:
2830         te->Te.Field.typeR
2831            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
2832         if (b) changed = True;
2833         break;
2834      case Te_Bound:
2835         break;
2836      case Te_TyBase:
2837         break;
2838      case Te_TyPorR:
2839         te->Te.TyPorR.typeR
2840            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
2841         if (b) changed = True;
2842         break;
2843      case Te_TyTyDef:
2844         te->Te.TyTyDef.typeR
2845            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
2846         if (b) changed = True;
2847         break;
2848      case Te_TyStOrUn:
2849         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
2850         if (b) changed = True;
2851         break;
2852      case Te_TyEnum:
2853         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
2854         if (b) changed = True;
2855         break;
2856      case Te_TyArray:
2857         te->Te.TyArray.typeR
2858            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
2859         if (b) changed = True;
2860         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
2861         if (b) changed = True;
2862         break;
2863      case Te_TyFn:
2864         break;
2865      case Te_TyQual:
2866         te->Te.TyQual.typeR
2867            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
2868         if (b) changed = True;
2869         break;
2870      case Te_TyVoid:
2871         break;
2872      default:
2873         ML_(pp_TyEnt)(te);
2874         vg_assert(0);
2875   }
2876   return changed;
2877}
2878
2879/* Make a pass over 'ents'.  For each tyent, inspect the target of any
2880   'R' or 'Rs' fields (those which refer to other tyents), and replace
2881   any which point to INDIR nodes with the target of the indirection
2882   (which should not itself be an indirection).  In summary, this
2883   routine shorts out all references to indirection nodes. */
2884static
2885Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
2886                                     TyEntIndexCache* ents_cache )
2887{
2888   Word i, n, nChanged = 0;
2889   Bool b;
2890   n = VG_(sizeXA)( ents );
2891   for (i = 0; i < n; i++) {
2892      TyEnt* ent = VG_(indexXA)( ents, i );
2893      vg_assert(ent->tag != Te_EMPTY);
2894      /* We have to substitute everything, even indirections, so as to
2895         ensure that chains of indirections don't build up. */
2896      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
2897      if (b)
2898         nChanged++;
2899   }
2900
2901   return nChanged;
2902}
2903
2904
2905/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
2906   Look up each new tyent in the dictionary in turn.  If it is already
2907   in the dictionary, replace this tyent with an indirection to the
2908   existing one, and delete any malloc'd stuff hanging off this one.
2909   In summary, this routine commons up all tyents that are identical
2910   as defined by TyEnt__cmp_by_all_except_cuOff. */
2911static
2912Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
2913{
2914   Word    n, i, nDeleted;
2915   WordFM* dict; /* TyEnt* -> void */
2916   TyEnt*  ent;
2917   UWord   keyW, valW;
2918
2919   dict = VG_(newFM)(
2920             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
2921             ML_(dinfo_free),
2922             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
2923          );
2924
2925   nDeleted = 0;
2926   n = VG_(sizeXA)( ents );
2927   for (i = 0; i < n; i++) {
2928      ent = VG_(indexXA)( ents, i );
2929      vg_assert(ent->tag != Te_EMPTY);
2930
2931      /* Ignore indirections, although check that they are
2932         not forming a cycle. */
2933      if (ent->tag == Te_INDIR) {
2934         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
2935         continue;
2936      }
2937
2938      keyW = valW = 0;
2939      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
2940         /* it's already in the dictionary. */
2941         TyEnt* old = (TyEnt*)keyW;
2942         vg_assert(valW == 0);
2943         vg_assert(old != ent);
2944         vg_assert(old->tag != Te_INDIR);
2945         /* since we are traversing the array in increasing order of
2946            cuOff: */
2947         vg_assert(old->cuOff < ent->cuOff);
2948         /* So anyway, dump this entry and replace it with an
2949            indirection to the one in the dictionary.  Note that the
2950            assertion above guarantees that we cannot create cycles of
2951            indirections, since we are always creating an indirection
2952            to a tyent with a cuOff lower than this one. */
2953         ML_(TyEnt__make_EMPTY)( ent );
2954         ent->tag = Te_INDIR;
2955         ent->Te.INDIR.indR = old->cuOff;
2956         nDeleted++;
2957      } else {
2958         /* not in dictionary; add it and keep going. */
2959         VG_(addToFM)( dict, (UWord)ent, 0 );
2960      }
2961   }
2962
2963   VG_(deleteFM)( dict, NULL, NULL );
2964
2965   return nDeleted;
2966}
2967
2968
2969static
2970void dedup_types ( Bool td3,
2971                   /*MOD*/XArray* /* of TyEnt */ ents,
2972                   TyEntIndexCache* ents_cache )
2973{
2974   Word m, n, i, nDel, nSubst, nThresh;
2975   if (0) td3 = True;
2976
2977   n = VG_(sizeXA)( ents );
2978
2979   /* If a commoning pass and a substitution pass both make fewer than
2980      this many changes, just stop.  It's pointless to burn up CPU
2981      time trying to compress the last 1% or so out of the array. */
2982   nThresh = n / 200;
2983
2984   /* First we must sort .ents by its .cuOff fields, so we
2985      can index into it. */
2986   VG_(setCmpFnXA)(
2987      ents,
2988      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
2989   );
2990   VG_(sortXA)( ents );
2991
2992   /* Now repeatedly do commoning and substitution passes over
2993      the array, until there are no more changes. */
2994   do {
2995      nDel   = dedup_types_commoning_pass ( ents );
2996      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
2997      vg_assert(nDel >= 0 && nSubst >= 0);
2998      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
2999   } while (nDel > nThresh || nSubst > nThresh);
3000
3001   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3002      In fact this should be true at the end of every loop iteration
3003      above (a commoning pass followed by a substitution pass), but
3004      checking it on every iteration is excessively expensive.  Note,
3005      this loop also computes 'm' for the stats printing below it. */
3006   m = 0;
3007   n = VG_(sizeXA)( ents );
3008   for (i = 0; i < n; i++) {
3009      TyEnt *ent, *ind;
3010      ent = VG_(indexXA)( ents, i );
3011      if (ent->tag != Te_INDIR) continue;
3012      m++;
3013      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3014                                         ent->Te.INDIR.indR );
3015      vg_assert(ind);
3016      vg_assert(ind->tag != Te_INDIR);
3017   }
3018
3019   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3020}
3021
3022
3023/*------------------------------------------------------------*/
3024/*---                                                      ---*/
3025/*--- Resolution of references to type DIEs                ---*/
3026/*---                                                      ---*/
3027/*------------------------------------------------------------*/
3028
3029/* Make a pass through the (temporary) variables array.  Examine the
3030   type of each variable, check is it found, and chase any Te_INDIRs.
3031   Postcondition is: each variable has a typeR field that refers to a
3032   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3033   not to refer to a Te_INDIR.  (This is so that we can throw all the
3034   Te_INDIRs away later). */
3035
3036__attribute__((noinline))
3037static void resolve_variable_types (
3038               void (*barf)( HChar* ) __attribute__((noreturn)),
3039               /*R-O*/XArray* /* of TyEnt */ ents,
3040               /*MOD*/TyEntIndexCache* ents_cache,
3041               /*MOD*/XArray* /* of TempVar* */ vars
3042            )
3043{
3044   Word i, n;
3045   n = VG_(sizeXA)( vars );
3046   for (i = 0; i < n; i++) {
3047      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3048      /* This is the stated type of the variable.  But it might be
3049         an indirection, so be careful. */
3050      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3051                                                var->typeR );
3052      if (ent && ent->tag == Te_INDIR) {
3053         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3054                                            ent->Te.INDIR.indR );
3055         vg_assert(ent);
3056         vg_assert(ent->tag != Te_INDIR);
3057      }
3058
3059      /* Deal first with "normal" cases */
3060      if (ent && ML_(TyEnt__is_type)(ent)) {
3061         var->typeR = ent->cuOff;
3062         continue;
3063      }
3064
3065      /* If there's no ent, it probably we did not manage to read a
3066         type at the cuOffset which is stated as being this variable's
3067         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3068      if (ent == NULL) {
3069         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3070         barf("resolve_variable_types: "
3071              "cuOff does not refer to a known type");
3072      }
3073      vg_assert(ent);
3074      /* If ent has any other tag, something bad happened, along the
3075         lines of var->typeR not referring to a type at all. */
3076      vg_assert(ent->tag == Te_UNKNOWN);
3077      /* Just accept it; the type will be useless, but at least keep
3078         going. */
3079      var->typeR = ent->cuOff;
3080   }
3081}
3082
3083
3084/*------------------------------------------------------------*/
3085/*---                                                      ---*/
3086/*--- Parsing of Compilation Units                         ---*/
3087/*---                                                      ---*/
3088/*------------------------------------------------------------*/
3089
3090static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3091   TempVar* t1 = *(TempVar**)v1;
3092   TempVar* t2 = *(TempVar**)v2;
3093   if (t1->dioff < t2->dioff) return -1;
3094   if (t1->dioff > t2->dioff) return 1;
3095   return 0;
3096}
3097
3098static void read_DIE (
3099   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3100   /*MOD*/XArray* /* of TyEnt */ tyents,
3101   /*MOD*/XArray* /* of TempVar* */ tempvars,
3102   /*MOD*/XArray* /* of GExpr* */ gexprs,
3103   /*MOD*/D3TypeParser* typarser,
3104   /*MOD*/D3VarParser* varparser,
3105   Cursor* c, Bool td3, CUConst* cc, Int level
3106)
3107{
3108   Cursor abbv;
3109   ULong  atag, abbv_code;
3110   UWord  posn;
3111   UInt   has_children;
3112   UWord  start_die_c_offset, start_abbv_c_offset;
3113   UWord  after_die_c_offset, after_abbv_c_offset;
3114
3115   /* --- Deal with this DIE --- */
3116   posn      = get_position_of_Cursor( c );
3117   abbv_code = get_ULEB128( c );
3118   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3119   atag      = get_ULEB128( &abbv );
3120   TRACE_D3("\n");
3121   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3122            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3123
3124   if (atag == 0)
3125      cc->barf("read_DIE: invalid zero tag on DIE");
3126
3127   has_children = get_UChar( &abbv );
3128   if (has_children != DW_children_no && has_children != DW_children_yes)
3129      cc->barf("read_DIE: invalid has_children value");
3130
3131   /* We're set up to look at the fields of this DIE.  Hand it off to
3132      any parser(s) that want to see it.  Since they will in general
3133      advance both the DIE and abbrev cursors, remember their current
3134      settings so that we can then back up and do one final pass over
3135      the DIE, to print out its contents. */
3136
3137   start_die_c_offset  = get_position_of_Cursor( c );
3138   start_abbv_c_offset = get_position_of_Cursor( &abbv );
3139
3140   while (True) {
3141      ULong cts;
3142      Int   ctsSzB;
3143      UWord ctsMemSzB;
3144      ULong at_name = get_ULEB128( &abbv );
3145      ULong at_form = get_ULEB128( &abbv );
3146      if (at_name == 0 && at_form == 0) break;
3147      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3148      /* Get the form contents, but ignore them; the only purpose is
3149         to print them, if td3 is True */
3150      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3151                         cc, c, td3, (DW_FORM)at_form );
3152      TRACE_D3("\t");
3153      TRACE_D3("\n");
3154   }
3155
3156   after_die_c_offset  = get_position_of_Cursor( c );
3157   after_abbv_c_offset = get_position_of_Cursor( &abbv );
3158
3159   set_position_of_Cursor( c,     start_die_c_offset );
3160   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3161
3162   parse_type_DIE( tyents,
3163                   typarser,
3164                   (DW_TAG)atag,
3165                   posn,
3166                   level,
3167                   c,     /* DIE cursor */
3168                   &abbv, /* abbrev cursor */
3169                   cc,
3170                   td3 );
3171
3172   set_position_of_Cursor( c,     start_die_c_offset );
3173   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3174
3175   parse_var_DIE( rangestree,
3176                  tempvars,
3177                  gexprs,
3178                  varparser,
3179                  (DW_TAG)atag,
3180                  posn,
3181                  level,
3182                  c,     /* DIE cursor */
3183                  &abbv, /* abbrev cursor */
3184                  cc,
3185                  td3 );
3186
3187   set_position_of_Cursor( c,     after_die_c_offset );
3188   set_position_of_Cursor( &abbv, after_abbv_c_offset );
3189
3190   /* --- Now recurse into its children, if any --- */
3191   if (has_children == DW_children_yes) {
3192      if (0) TRACE_D3("BEGIN children of level %d\n", level);
3193      while (True) {
3194         atag = peek_ULEB128( c );
3195         if (atag == 0) break;
3196         read_DIE( rangestree, tyents, tempvars, gexprs,
3197                   typarser, varparser,
3198                   c, td3, cc, level+1 );
3199      }
3200      /* Now we need to eat the terminating zero */
3201      atag = get_ULEB128( c );
3202      vg_assert(atag == 0);
3203      if (0) TRACE_D3("END children of level %d\n", level);
3204   }
3205
3206}
3207
3208
3209static
3210void new_dwarf3_reader_wrk (
3211   struct _DebugInfo* di,
3212   __attribute__((noreturn)) void (*barf)( HChar* ),
3213   UChar* debug_info_img,   SizeT debug_info_sz,
3214   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3215   UChar* debug_line_img,   SizeT debug_line_sz,
3216   UChar* debug_str_img,    SizeT debug_str_sz,
3217   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3218   UChar* debug_loc_img,    SizeT debug_loc_sz
3219)
3220{
3221   XArray* /* of TyEnt */     tyents;
3222   XArray* /* of TyEnt */     tyents_to_keep;
3223   XArray* /* of GExpr* */    gexprs;
3224   XArray* /* of TempVar* */  tempvars;
3225   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3226   TyEntIndexCache* tyents_cache = NULL;
3227   TyEntIndexCache* tyents_to_keep_cache = NULL;
3228   TempVar *varp, *varp2;
3229   GExpr* gexpr;
3230   Cursor abbv; /* for showing .debug_abbrev */
3231   Cursor info; /* primary cursor for parsing .debug_info */
3232   Cursor ranges; /* for showing .debug_ranges */
3233   D3TypeParser typarser;
3234   D3VarParser varparser;
3235   Addr  dr_base;
3236   UWord dr_offset;
3237   Word  i, j, n;
3238   Bool td3 = di->trace_symtab;
3239   XArray* /* of TempVar* */ dioff_lookup_tab;
3240#if 0
3241   /* This doesn't work properly because it assumes all entries are
3242      packed end to end, with no holes.  But that doesn't always
3243      appear to be the case, so it loses sync.  And the D3 spec
3244      doesn't appear to require a no-hole situation either. */
3245   /* Display .debug_loc */
3246   Addr  dl_base;
3247   UWord dl_offset;
3248   Cursor loc; /* for showing .debug_loc */
3249   TRACE_SYMTAB("\n");
3250   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3251   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3252   init_Cursor( &loc, debug_loc_img,
3253                debug_loc_sz, 0, barf,
3254                "Overrun whilst reading .debug_loc section(1)" );
3255   dl_base = 0;
3256   dl_offset = 0;
3257   while (True) {
3258      UWord  w1, w2;
3259      UWord  len;
3260      if (is_at_end_Cursor( &loc ))
3261         break;
3262
3263      /* Read a (host-)word pair.  This is something of a hack since
3264         the word size to read is really dictated by the ELF file;
3265         however, we assume we're reading a file with the same
3266         word-sizeness as the host.  Reasonably enough. */
3267      w1 = get_UWord( &loc );
3268      w2 = get_UWord( &loc );
3269
3270      if (w1 == 0 && w2 == 0) {
3271         /* end of list.  reset 'base' */
3272         TRACE_D3("    %08lx <End of list>\n", dl_offset);
3273         dl_base = 0;
3274         dl_offset = get_position_of_Cursor( &loc );
3275         continue;
3276      }
3277
3278      if (w1 == -1UL) {
3279         /* new value for 'base' */
3280         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3281                  dl_offset, w1, w2);
3282         dl_base = w2;
3283         continue;
3284      }
3285
3286      /* else a location expression follows */
3287      TRACE_D3("    %08lx %08lx %08lx ",
3288               dl_offset, w1 + dl_base, w2 + dl_base);
3289      len = (UWord)get_UShort( &loc );
3290      while (len > 0) {
3291         UChar byte = get_UChar( &loc );
3292         TRACE_D3("%02x", (UInt)byte);
3293         len--;
3294      }
3295      TRACE_SYMTAB("\n");
3296   }
3297#endif
3298
3299   /* Display .debug_ranges */
3300   TRACE_SYMTAB("\n");
3301   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3302   TRACE_SYMTAB("    Offset   Begin    End\n");
3303   init_Cursor( &ranges, debug_ranges_img,
3304                debug_ranges_sz, 0, barf,
3305                "Overrun whilst reading .debug_ranges section(1)" );
3306   dr_base = 0;
3307   dr_offset = 0;
3308   while (True) {
3309      UWord  w1, w2;
3310
3311      if (is_at_end_Cursor( &ranges ))
3312         break;
3313
3314      /* Read a (host-)word pair.  This is something of a hack since
3315         the word size to read is really dictated by the ELF file;
3316         however, we assume we're reading a file with the same
3317         word-sizeness as the host.  Reasonably enough. */
3318      w1 = get_UWord( &ranges );
3319      w2 = get_UWord( &ranges );
3320
3321      if (w1 == 0 && w2 == 0) {
3322         /* end of list.  reset 'base' */
3323         TRACE_D3("    %08lx <End of list>\n", dr_offset);
3324         dr_base = 0;
3325         dr_offset = get_position_of_Cursor( &ranges );
3326         continue;
3327      }
3328
3329      if (w1 == -1UL) {
3330         /* new value for 'base' */
3331         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3332                  dr_offset, w1, w2);
3333         dr_base = w2;
3334         continue;
3335      }
3336
3337      /* else a range [w1+base, w2+base) is denoted */
3338      TRACE_D3("    %08lx %08lx %08lx\n",
3339               dr_offset, w1 + dr_base, w2 + dr_base);
3340   }
3341
3342   /* Display .debug_abbrev */
3343   init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3344                "Overrun whilst reading .debug_abbrev section" );
3345   TRACE_SYMTAB("\n");
3346   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3347   while (True) {
3348      if (is_at_end_Cursor( &abbv ))
3349         break;
3350      /* Read one abbreviation table */
3351      TRACE_D3("  Number TAG\n");
3352      while (True) {
3353         ULong atag;
3354         UInt  has_children;
3355         ULong acode = get_ULEB128( &abbv );
3356         if (acode == 0) break; /* end of the table */
3357         atag = get_ULEB128( &abbv );
3358         has_children = get_UChar( &abbv );
3359         TRACE_D3("   %llu      %s    [%s]\n",
3360                  acode, ML_(pp_DW_TAG)(atag),
3361                         ML_(pp_DW_children)(has_children));
3362         while (True) {
3363            ULong at_name = get_ULEB128( &abbv );
3364            ULong at_form = get_ULEB128( &abbv );
3365            if (at_name == 0 && at_form == 0) break;
3366            TRACE_D3("    %18s %s\n",
3367                     ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3368         }
3369      }
3370   }
3371   TRACE_SYMTAB("\n");
3372
3373   /* Now loop over the Compilation Units listed in the .debug_info
3374      section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3375      unit contains a Compilation Unit Header followed by precisely
3376      one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3377   init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3378                "Overrun whilst reading .debug_info section" );
3379
3380   /* We'll park the harvested type information in here.  Also create
3381      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3382      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3383      huge and presumably will not occur in any valid DWARF3 file --
3384      it would need to have a .debug_info section 4GB long for that to
3385      happen.  These type entries end up in the DebugInfo. */
3386   tyents = VG_(newXA)( ML_(dinfo_zalloc),
3387                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3388                        ML_(dinfo_free), sizeof(TyEnt) );
3389   { TyEnt tyent;
3390     VG_(memset)(&tyent, 0, sizeof(tyent));
3391     tyent.tag   = Te_TyVoid;
3392     tyent.cuOff = D3_FAKEVOID_CUOFF;
3393     tyent.Te.TyVoid.isFake = True;
3394     VG_(addToXA)( tyents, &tyent );
3395   }
3396   { TyEnt tyent;
3397     VG_(memset)(&tyent, 0, sizeof(tyent));
3398     tyent.tag   = Te_UNKNOWN;
3399     tyent.cuOff = D3_INVALID_CUOFF;
3400     VG_(addToXA)( tyents, &tyent );
3401   }
3402
3403   /* This is a tree used to unique-ify the range lists that are
3404      manufactured by parse_var_DIE.  References to the keys in the
3405      tree wind up in .rngMany fields in TempVars.  We'll need to
3406      delete this tree, and the XArrays attached to it, at the end of
3407      this function. */
3408   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3409                            "di.readdwarf3.ndrw.2 (rangestree)",
3410                            ML_(dinfo_free),
3411                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3412
3413   /* List of variables we're accumulating.  These don't end up in the
3414      DebugInfo; instead their contents are handed to ML_(addVar) and
3415      the list elements are then deleted. */
3416   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3417                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
3418                          ML_(dinfo_free),
3419                          sizeof(TempVar*) );
3420
3421   /* List of GExprs we're accumulating.  These wind up in the
3422      DebugInfo. */
3423   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3424                        ML_(dinfo_free), sizeof(GExpr*) );
3425
3426   /* We need a D3TypeParser to keep track of partially constructed
3427      types.  It'll be discarded as soon as we've completed the CU,
3428      since the resulting information is tipped in to 'tyents' as it
3429      is generated. */
3430   VG_(memset)( &typarser, 0, sizeof(typarser) );
3431   typarser.sp = -1;
3432   typarser.language = '?';
3433   for (i = 0; i < N_D3_TYPE_STACK; i++) {
3434      typarser.qparentE[i].tag   = Te_EMPTY;
3435      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3436   }
3437
3438   VG_(memset)( &varparser, 0, sizeof(varparser) );
3439   varparser.sp = -1;
3440
3441   TRACE_D3("\n------ Parsing .debug_info section ------\n");
3442   while (True) {
3443      UWord   cu_start_offset, cu_offset_now;
3444      CUConst cc;
3445      /* It may be that the stated size of this CU is larger than the
3446         amount of stuff actually in it.  icc9 seems to generate CUs
3447         thusly.  We use these variables to figure out if this is
3448         indeed the case, and if so how many bytes we need to skip to
3449         get to the start of the next CU.  Not skipping those bytes
3450         causes us to misidentify the start of the next CU, and it all
3451         goes badly wrong after that (not surprisingly). */
3452      UWord cu_size_including_IniLen, cu_amount_used;
3453
3454      /* It seems icc9 finishes the DIE info before debug_info_sz
3455         bytes have been used up.  So be flexible, and declare the
3456         sequence complete if there is not enough remaining bytes to
3457         hold even the smallest conceivable CU header.  (11 bytes I
3458         reckon). */
3459      /* JRS 23Jan09: I suspect this is no longer necessary now that
3460         the code below contains a 'while (cu_amount_used <
3461         cu_size_including_IniLen ...'  style loop, which skips over
3462         any leftover bytes at the end of a CU in the case where the
3463         CU's stated size is larger than its actual size (as
3464         determined by reading all its DIEs).  However, for prudence,
3465         I'll leave the following test in place.  I can't see that a
3466         CU header can be smaller than 11 bytes, so I don't think
3467         there's any harm possible through the test -- it just adds
3468         robustness. */
3469      Word avail = get_remaining_length_Cursor( &info );
3470      if (avail < 11) {
3471         if (avail > 0)
3472            TRACE_D3("new_dwarf3_reader_wrk: warning: "
3473                     "%ld unused bytes after end of DIEs\n", avail);
3474         break;
3475      }
3476
3477      /* Check the varparser's stack is in a sane state. */
3478      vg_assert(varparser.sp == -1);
3479      for (i = 0; i < N_D3_VAR_STACK; i++) {
3480         vg_assert(varparser.ranges[i] == NULL);
3481         vg_assert(varparser.level[i] == 0);
3482      }
3483      for (i = 0; i < N_D3_TYPE_STACK; i++) {
3484         vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3485         vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3486         vg_assert(typarser.qlevel[i] == 0);
3487      }
3488
3489      cu_start_offset = get_position_of_Cursor( &info );
3490      TRACE_D3("\n");
3491      TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3492      /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3493         (saC_cache) */
3494      parse_CU_Header( &cc, td3, &info,
3495                       (UChar*)debug_abbv_img, debug_abbv_sz );
3496      cc.debug_str_img    = debug_str_img;
3497      cc.debug_str_sz     = debug_str_sz;
3498      cc.debug_ranges_img = debug_ranges_img;
3499      cc.debug_ranges_sz  = debug_ranges_sz;
3500      cc.debug_loc_img    = debug_loc_img;
3501      cc.debug_loc_sz     = debug_loc_sz;
3502      cc.debug_line_img   = debug_line_img;
3503      cc.debug_line_sz    = debug_line_sz;
3504      cc.debug_info_img   = debug_info_img;
3505      cc.debug_info_sz    = debug_info_sz;
3506      cc.cu_start_offset  = cu_start_offset;
3507      cc.di = di;
3508      /* The CU's svma can be deduced by looking at the AT_low_pc
3509         value in the top level TAG_compile_unit, which is the topmost
3510         DIE.  We'll leave it for the 'varparser' to acquire that info
3511         and fill it in -- since it is the only party to want to know
3512         it. */
3513      cc.cu_svma_known = False;
3514      cc.cu_svma       = 0;
3515
3516      /* Create a fake outermost-level range covering the entire
3517         address range.  So we always have *something* to catch all
3518         variable declarations. */
3519      varstack_push( &cc, &varparser, td3,
3520                     unitary_range_list(0UL, ~0UL),
3521                     -1, False/*isFunc*/, NULL/*fbGX*/ );
3522
3523      /* And set up the file name table.  When we come across the top
3524         level DIE for this CU (which is what the next call to
3525         read_DIE should process) we will copy all the file names out
3526         of the .debug_line img area and use this table to look up the
3527         copies when we later see filename numbers in DW_TAG_variables
3528         etc. */
3529      vg_assert(!varparser.filenameTable );
3530      varparser.filenameTable
3531         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3532                       ML_(dinfo_free),
3533                       sizeof(UChar*) );
3534      vg_assert(varparser.filenameTable);
3535
3536      /* Now read the one-and-only top-level DIE for this CU. */
3537      vg_assert(varparser.sp == 0);
3538      read_DIE( rangestree,
3539                tyents, tempvars, gexprs,
3540                &typarser, &varparser,
3541                &info, td3, &cc, 0 );
3542
3543      cu_offset_now = get_position_of_Cursor( &info );
3544
3545      if (0) VG_(printf)("Travelled: %lu  size %llu\n",
3546                         cu_offset_now - cc.cu_start_offset,
3547                         cc.unit_length + (cc.is_dw64 ? 12 : 4));
3548
3549      /* How big the CU claims it is .. */
3550      cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3551      /* .. vs how big we have found it to be */
3552      cu_amount_used = cu_offset_now - cc.cu_start_offset;
3553
3554      if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3555                      cu_offset_now, debug_info_sz);
3556      if (cu_offset_now > debug_info_sz)
3557         barf("toplevel DIEs beyond end of CU");
3558
3559      /* If the CU is bigger than it claims to be, we've got a serious
3560         problem. */
3561      if (cu_amount_used > cu_size_including_IniLen)
3562         barf("CU's actual size appears to be larger than it claims it is");
3563
3564      /* If the CU is smaller than it claims to be, we need to skip some
3565         bytes.  Loop updates cu_offset_new and cu_amount_used. */
3566      while (cu_amount_used < cu_size_including_IniLen
3567             && get_remaining_length_Cursor( &info ) > 0) {
3568         if (0) VG_(printf)("SKIP\n");
3569         (void)get_UChar( &info );
3570         cu_offset_now = get_position_of_Cursor( &info );
3571         cu_amount_used = cu_offset_now - cc.cu_start_offset;
3572      }
3573
3574      if (cu_offset_now == debug_info_sz)
3575         break;
3576
3577      /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3578         anywhere else at all.  Our fake the-entire-address-space
3579         range is at level -1, so preening to -2 should completely
3580         empty the stack out. */
3581      TRACE_D3("\n");
3582      varstack_preen( &varparser, td3, -2 );
3583      /* Similarly, empty the type stack out. */
3584      typestack_preen( &typarser, td3, -2 );
3585      /* else keep going */
3586
3587      TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3588               cc.saC_cache_queries, cc.saC_cache_misses);
3589
3590      vg_assert(varparser.filenameTable );
3591      VG_(deleteXA)( varparser.filenameTable );
3592      varparser.filenameTable = NULL;
3593   }
3594
3595   /* From here on we're post-processing the stuff we got
3596      out of the .debug_info section. */
3597   if (td3) {
3598      TRACE_D3("\n");
3599      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
3600      TRACE_D3("\n");
3601      TRACE_D3("------ Compressing type entries ------\n");
3602   }
3603
3604   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
3605                                     sizeof(TyEntIndexCache) );
3606   ML_(TyEntIndexCache__invalidate)( tyents_cache );
3607   dedup_types( td3, tyents, tyents_cache );
3608   if (td3) {
3609      TRACE_D3("\n");
3610      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
3611   }
3612
3613   TRACE_D3("\n");
3614   TRACE_D3("------ Resolving the types of variables ------\n" );
3615   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
3616
3617   /* Copy all the non-INDIR tyents into a new table.  For large
3618      .so's, about 90% of the tyents will by now have been resolved to
3619      INDIRs, and we no longer need them, and so don't need to store
3620      them. */
3621   tyents_to_keep
3622      = VG_(newXA)( ML_(dinfo_zalloc),
3623                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
3624                    ML_(dinfo_free), sizeof(TyEnt) );
3625   n = VG_(sizeXA)( tyents );
3626   for (i = 0; i < n; i++) {
3627      TyEnt* ent = VG_(indexXA)( tyents, i );
3628      if (ent->tag != Te_INDIR)
3629         VG_(addToXA)( tyents_to_keep, ent );
3630   }
3631
3632   VG_(deleteXA)( tyents );
3633   tyents = NULL;
3634   ML_(dinfo_free)( tyents_cache );
3635   tyents_cache = NULL;
3636
3637   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
3638      minor) waste of time, since tyents itself is sorted, but
3639      necessary since VG_(lookupXA) refuses to cooperate if we
3640      don't. */
3641   VG_(setCmpFnXA)(
3642      tyents_to_keep,
3643      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3644   );
3645   VG_(sortXA)( tyents_to_keep );
3646
3647   /* Enable cacheing on tyents_to_keep */
3648   tyents_to_keep_cache
3649      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
3650                           sizeof(TyEntIndexCache) );
3651   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
3652
3653   /* And record the tyents in the DebugInfo.  We do this before
3654      starting to hand variables to ML_(addVar), since if ML_(addVar)
3655      wants to do debug printing (of the types of said vars) then it
3656      will need the tyents.*/
3657   vg_assert(!di->admin_tyents);
3658   di->admin_tyents = tyents_to_keep;
3659
3660   /* Bias all the location expressions. */
3661   TRACE_D3("\n");
3662   TRACE_D3("------ Biasing the location expressions ------\n" );
3663
3664   n = VG_(sizeXA)( gexprs );
3665   for (i = 0; i < n; i++) {
3666      gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
3667      bias_GX( gexpr, di );
3668   }
3669
3670   TRACE_D3("\n");
3671   TRACE_D3("------ Acquired the following variables: ------\n\n");
3672
3673   /* Park (pointers to) all the vars in an XArray, so we can look up
3674      abstract origins quickly.  The array is sorted (hence, looked-up
3675      by) the .dioff fields.  Since the .dioffs should be in strictly
3676      ascending order, there is no need to sort the array after
3677      construction.  The ascendingness is however asserted for. */
3678   dioff_lookup_tab
3679      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
3680                    ML_(dinfo_free),
3681                    sizeof(TempVar*) );
3682   vg_assert(dioff_lookup_tab);
3683
3684   n = VG_(sizeXA)( tempvars );
3685   for (i = 0; i < n; i++) {
3686      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3687      if (i > 0) {
3688         varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
3689         /* why should this hold?  Only, I think, because we've
3690            constructed the array by reading .debug_info sequentially,
3691            and so the array .dioff fields should reflect that, and be
3692            strictly ascending. */
3693         vg_assert(varp2->dioff < varp->dioff);
3694      }
3695      VG_(addToXA)( dioff_lookup_tab, &varp );
3696   }
3697   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3698   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3699
3700   /* Now visit each var.  Collect up as much info as possible for
3701      each var and hand it to ML_(addVar). */
3702   n = VG_(sizeXA)( tempvars );
3703   for (j = 0; j < n; j++) {
3704      TyEnt* ent;
3705      varp = *(TempVar**)VG_(indexXA)( tempvars, j );
3706
3707      /* Possibly show .. */
3708      if (td3) {
3709         VG_(printf)("<%lx> addVar: level %d: %s :: ",
3710                     varp->dioff,
3711                     varp->level,
3712                     varp->name ? varp->name : (UChar*)"<anon_var>" );
3713         if (varp->typeR) {
3714            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
3715         } else {
3716            VG_(printf)("NULL");
3717         }
3718         VG_(printf)("\n  Loc=");
3719         if (varp->gexpr) {
3720            ML_(pp_GX)(varp->gexpr);
3721         } else {
3722            VG_(printf)("NULL");
3723         }
3724         VG_(printf)("\n");
3725         if (varp->fbGX) {
3726            VG_(printf)("  FrB=");
3727            ML_(pp_GX)( varp->fbGX );
3728            VG_(printf)("\n");
3729         } else {
3730            VG_(printf)("  FrB=none\n");
3731         }
3732         VG_(printf)("  declared at: %s:%d\n",
3733                     varp->fName ? varp->fName : (UChar*)"NULL",
3734                     varp->fLine );
3735         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3736            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
3737      }
3738
3739      /* Skip variables which have no location.  These must be
3740         abstract instances; they are useless as-is since with no
3741         location they have no specified memory location.  They will
3742         presumably be referred to via the absOri fields of other
3743         variables. */
3744      if (!varp->gexpr) {
3745         TRACE_D3("  SKIP (no location)\n\n");
3746         continue;
3747      }
3748
3749      /* So it has a location, at least.  If it refers to some other
3750         entry through its absOri field, pull in further info through
3751         that. */
3752      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3753         Bool found;
3754         Word ixFirst, ixLast;
3755         TempVar key;
3756         TempVar* keyp = &key;
3757         TempVar *varAI;
3758         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3759         key.dioff = varp->absOri; /* this is what we want to find */
3760         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3761                                &ixFirst, &ixLast );
3762         if (!found) {
3763            /* barf("DW_AT_abstract_origin can't be resolved"); */
3764            TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
3765            continue;
3766         }
3767         /* If the following fails, there is more than one entry with
3768            the same dioff.  Which can't happen. */
3769         vg_assert(ixFirst == ixLast);
3770         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3771         /* stay sane */
3772         vg_assert(varAI);
3773         vg_assert(varAI->dioff == varp->absOri);
3774
3775         /* Copy what useful info we can. */
3776         if (varAI->typeR && !varp->typeR)
3777            varp->typeR = varAI->typeR;
3778         if (varAI->name && !varp->name)
3779            varp->name = varAI->name;
3780         if (varAI->fName && !varp->fName)
3781            varp->fName = varAI->fName;
3782         if (varAI->fLine > 0 && varp->fLine == 0)
3783            varp->fLine = varAI->fLine;
3784      }
3785
3786      /* Give it a name if it doesn't have one. */
3787      if (!varp->name)
3788         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3789
3790      /* So now does it have enough info to be useful? */
3791      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
3792         the type didn't get resolved.  Really, in that case
3793         something's broken earlier on, and should be fixed, rather
3794         than just skipping the variable. */
3795      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
3796                                         tyents_to_keep_cache,
3797                                         varp->typeR );
3798      /* The next two assertions should be guaranteed by
3799         our previous call to resolve_variable_types. */
3800      vg_assert(ent);
3801      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
3802
3803      if (ent->tag == Te_UNKNOWN) continue;
3804
3805      vg_assert(varp->gexpr);
3806      vg_assert(varp->name);
3807      vg_assert(varp->typeR);
3808      vg_assert(varp->level >= 0);
3809
3810      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
3811         each address range in which the variable exists. */
3812      TRACE_D3("  ACQUIRE for range(s) ");
3813      { AddrRange  oneRange;
3814        AddrRange* varPcRanges;
3815        Word       nVarPcRanges;
3816        /* Set up to iterate over address ranges, however
3817           represented. */
3818        if (varp->nRanges == 0 || varp->nRanges == 1) {
3819           vg_assert(!varp->rngMany);
3820           if (varp->nRanges == 0) {
3821              vg_assert(varp->rngOneMin == 0);
3822              vg_assert(varp->rngOneMax == 0);
3823           }
3824           nVarPcRanges = varp->nRanges;
3825           oneRange.aMin = varp->rngOneMin;
3826           oneRange.aMax = varp->rngOneMax;
3827           varPcRanges = &oneRange;
3828        } else {
3829           vg_assert(varp->rngMany);
3830           vg_assert(varp->rngOneMin == 0);
3831           vg_assert(varp->rngOneMax == 0);
3832           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3833           vg_assert(nVarPcRanges >= 2);
3834           vg_assert(nVarPcRanges == (Word)varp->nRanges);
3835           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3836        }
3837        if (varp->level == 0)
3838           vg_assert( nVarPcRanges == 1 );
3839        /* and iterate */
3840        for (i = 0; i < nVarPcRanges; i++) {
3841           Addr pcMin = varPcRanges[i].aMin;
3842           Addr pcMax = varPcRanges[i].aMax;
3843           vg_assert(pcMin <= pcMax);
3844           /* Level 0 is the global address range.  So at level 0 we
3845              don't want to bias pcMin/pcMax; but at all other levels
3846              we do since those are derived from svmas in the Dwarf
3847              we're reading.  Be paranoid ... */
3848           if (varp->level == 0) {
3849              vg_assert(pcMin == (Addr)0);
3850              vg_assert(pcMax == ~(Addr)0);
3851           } else {
3852              /* vg_assert(pcMin > (Addr)0);
3853                 No .. we can legitimately expect to see ranges like
3854                 0x0-0x11D (pre-biasing, of course). */
3855              vg_assert(pcMax < ~(Addr)0);
3856           }
3857
3858           /* Apply text biasing, for non-global variables. */
3859           if (varp->level > 0) {
3860              pcMin += di->text_debug_bias;
3861              pcMax += di->text_debug_bias;
3862           }
3863
3864           if (i > 0 && (i%2) == 0)
3865              TRACE_D3("\n                       ");
3866           TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
3867
3868           ML_(addVar)(
3869              di, varp->level,
3870                  pcMin, pcMax,
3871                  varp->name,  varp->typeR,
3872                  varp->gexpr, varp->fbGX,
3873                  varp->fName, varp->fLine, td3
3874           );
3875        }
3876      }
3877
3878      TRACE_D3("\n\n");
3879      /* and move on to the next var */
3880   }
3881
3882   /* Now free all the TempVars */
3883   n = VG_(sizeXA)( tempvars );
3884   for (i = 0; i < n; i++) {
3885      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3886      ML_(dinfo_free)(varp);
3887   }
3888   VG_(deleteXA)( tempvars );
3889   tempvars = NULL;
3890
3891   /* and the temp lookup table */
3892   VG_(deleteXA)( dioff_lookup_tab );
3893
3894   /* and the ranges tree.  Note that we need to also free the XArrays
3895      which constitute the keys, hence pass VG_(deleteXA) as a
3896      key-finalizer. */
3897   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
3898
3899   /* and the tyents_to_keep cache */
3900   ML_(dinfo_free)( tyents_to_keep_cache );
3901   tyents_to_keep_cache = NULL;
3902
3903   /* and the file name table (just the array, not the entries
3904      themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
3905      can be NULL here, for icc9 generated Dwarf3.  Not sure what that
3906      signifies (a deeper problem with the reader?)) */
3907   if (varparser.filenameTable) {
3908      VG_(deleteXA)( varparser.filenameTable );
3909      varparser.filenameTable = NULL;
3910   }
3911
3912   /* record the GExprs in di so they can be freed later */
3913   vg_assert(!di->admin_gexprs);
3914   di->admin_gexprs = gexprs;
3915}
3916
3917
3918/*------------------------------------------------------------*/
3919/*---                                                      ---*/
3920/*--- The "new" DWARF3 reader -- top level control logic   ---*/
3921/*---                                                      ---*/
3922/*------------------------------------------------------------*/
3923
3924/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
3925#include <setjmp.h>   /* For jmp_buf */
3926/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
3927
3928static Bool    d3rd_jmpbuf_valid  = False;
3929static HChar*  d3rd_jmpbuf_reason = NULL;
3930static jmp_buf d3rd_jmpbuf;
3931
3932static __attribute__((noreturn)) void barf ( HChar* reason ) {
3933   vg_assert(d3rd_jmpbuf_valid);
3934   d3rd_jmpbuf_reason = reason;
3935   __builtin_longjmp(&d3rd_jmpbuf, 1);
3936   /*NOTREACHED*/
3937   vg_assert(0);
3938}
3939
3940
3941void
3942ML_(new_dwarf3_reader) (
3943   struct _DebugInfo* di,
3944   UChar* debug_info_img,   SizeT debug_info_sz,
3945   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3946   UChar* debug_line_img,   SizeT debug_line_sz,
3947   UChar* debug_str_img,    SizeT debug_str_sz,
3948   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3949   UChar* debug_loc_img,    SizeT debug_loc_sz
3950)
3951{
3952   volatile Int  jumped;
3953   volatile Bool td3 = di->trace_symtab;
3954
3955   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
3956      just returns normally.  If there is any failure, it longjmp's
3957      back here, having first set d3rd_jmpbuf_reason to something
3958      useful. */
3959   vg_assert(d3rd_jmpbuf_valid  == False);
3960   vg_assert(d3rd_jmpbuf_reason == NULL);
3961
3962   d3rd_jmpbuf_valid = True;
3963   jumped = __builtin_setjmp(&d3rd_jmpbuf);
3964   if (jumped == 0) {
3965      /* try this ... */
3966      new_dwarf3_reader_wrk( di, barf,
3967                             debug_info_img,   debug_info_sz,
3968                             debug_abbv_img,   debug_abbv_sz,
3969                             debug_line_img,   debug_line_sz,
3970                             debug_str_img,    debug_str_sz,
3971                             debug_ranges_img, debug_ranges_sz,
3972                             debug_loc_img,    debug_loc_sz );
3973      d3rd_jmpbuf_valid = False;
3974      TRACE_D3("\n------ .debug_info reading was successful ------\n");
3975   } else {
3976      /* It longjmp'd. */
3977      d3rd_jmpbuf_valid = False;
3978      /* Can't longjump without giving some sort of reason. */
3979      vg_assert(d3rd_jmpbuf_reason != NULL);
3980
3981      TRACE_D3("\n------ .debug_info reading failed ------\n");
3982
3983      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
3984   }
3985
3986   d3rd_jmpbuf_valid  = False;
3987   d3rd_jmpbuf_reason = NULL;
3988}
3989
3990
3991
3992/* --- Unused code fragments which might be useful one day. --- */
3993
3994#if 0
3995   /* Read the arange tables */
3996   TRACE_SYMTAB("\n");
3997   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
3998   init_Cursor( &aranges, debug_aranges_img,
3999                debug_aranges_sz, 0, barf,
4000                "Overrun whilst reading .debug_aranges section" );
4001   while (True) {
4002      ULong  len, d_i_offset;
4003      Bool   is64;
4004      UShort version;
4005      UChar  asize, segsize;
4006
4007      if (is_at_end_Cursor( &aranges ))
4008         break;
4009      /* Read one arange thingy */
4010      /* initial_length field */
4011      len = get_Initial_Length( &is64, &aranges,
4012               "in .debug_aranges: invalid initial-length field" );
4013      version    = get_UShort( &aranges );
4014      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4015      asize      = get_UChar( &aranges );
4016      segsize    = get_UChar( &aranges );
4017      TRACE_D3("  Length:                   %llu\n", len);
4018      TRACE_D3("  Version:                  %d\n", (Int)version);
4019      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4020      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4021      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4022      TRACE_D3("\n");
4023      TRACE_D3("    Address            Length\n");
4024
4025      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4026         (void)get_UChar( & aranges );
4027      }
4028      while (True) {
4029         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4030         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4031         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4032         if (address == 0 && length == 0) break;
4033      }
4034   }
4035   TRACE_SYMTAB("\n");
4036#endif
4037
4038#endif // defined(VGO_linux) || defined(VGO_darwin)
4039
4040/*--------------------------------------------------------------------*/
4041/*--- end                                                          ---*/
4042/*--------------------------------------------------------------------*/
4043