readdwarf3.c revision c4431bfe04c7490ea2d74939d222d87f13f30960
1
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3 ".debug_info" sections (DIE trees).              ---*/
4/*---                                                 readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2008 OpenWorks LLP
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37/* REFERENCE (without which this code will not make much sense):
38
39   DWARF Debugging Information Format, Version 3,
40   dated 20 December 2005 (the "D3 spec").
41
42   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
43   .doc (MS Word) version, but for some reason the section numbers
44   between the Word and PDF versions differ by 1 in the first digit.
45   All section references in this code are to the PDF version.
46
47   CURRENT HACKS:
48
49   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
50      assumed to mean "const void" or "volatile void" respectively.
51      GDB appears to interpret them like this, anyway.
52
53   In many cases it is important to know the svma of a CU (the "base
54   address of the CU", as the D3 spec calls it).  There are some
55   situations in which the spec implies this value is unknown, but the
56   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
57   merely zero when not explicitly stated.  So we too have to make
58   that assumption.
59
60   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
61   unitary_range_list() bias the resulting range list in the same way
62   that its more general cousin, get_range_list(), does?  I don't
63   know.
64
65   TODO, 2008 Feb 17:
66
67   get rid of cu_svma_known and document the assumed-zero svma hack.
68
69   ML_(sizeOfType): differentiate between zero sized types and types
70   for which the size is unknown.  Is this important?  I don't know.
71
72   DW_AT_array_types: deal with explicit sizes (currently we compute
73   the size from the bounds and the element size, although that's
74   fragile, if the bounds incompletely specified, or completely
75   absent)
76
77   Document reason for difference (by 1) of stack preening depth in
78   parse_var_DIE vs parse_type_DIE.
79
80   Don't hand to ML_(addVars), vars whose locations are entirely in
81   registers (DW_OP_reg*).  This is merely a space-saving
82   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
83   expressions correctly, by failing to evaluate them and hence
84   effectively ignoring the variable with which they are associated.
85
86   Deal with DW_AT_array_types which have element size != stride
87
88   In some cases, the info for a variable is split between two
89   different DIEs (generally a declarer and a definer).  We punt on
90   these.  Could do better here.
91
92   The 'data_bias' argument passed to the expression evaluator
93   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
94   MaybeUWord, to make it clear when we do vs don't know what it is
95   for the evaluation of an expression.  At the moment zero is passed
96   for this parameter in the don't know case.  That's a bit fragile
97   and obscure; using a MaybeUWord would be clearer.
98
99   POTENTIAL PERFORMANCE IMPROVEMENTS:
100
101   Currently, duplicate removal and all other queries for the type
102   entities array is done using cuOffset-based pointing, which
103   involves a binary search (VG_(lookupXA)) for each access.  This is
104   wildly inefficient, although simple.  It would be better to
105   translate all the cuOffset-based references (iow, all the "R" and
106   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
107   'tyents' right at the start of dedup_types(), and use direct
108   indexing (VG_(indexXA)) wherever possible after that.
109
110   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
111   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
112   points, and possibly also make an _UNCHECKED version which skips
113   the range checks in performance-critical situations such as this.
114
115   Handle interaction between read_DIE and parse_{var,type}_DIE
116   better.  Currently read_DIE reads the entire DIE just to find where
117   the end is (and for debug printing), so that it can later reliably
118   move the cursor to the end regardless of what parse_{var,type}_DIE
119   do.  This means many DIEs (most, even?) are read twice.  It would
120   be smarter to make parse_{var,type}_DIE return a Bool indicating
121   whether or not they advanced the DIE cursor, and only if they
122   didn't should read_DIE itself read through the DIE.
123
124   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
125   zero variables in their .vars XArray.  Rather than have an XArray
126   with zero elements (which uses 2 malloc'd blocks), allow the .vars
127   pointer to be NULL in this case.
128
129   More generally, reduce the amount of memory allocated and freed
130   while reading Dwarf3 type/variable information.  Even modest (20MB)
131   objects cause this module to allocate and free hundreds of
132   thousands of small blocks, and ML_(arena_malloc) and its various
133   groupies always show up at the top of performance profiles. */
134
135#include "pub_core_basics.h"
136#include "pub_core_libcbase.h"
137#include "pub_core_libcassert.h"
138#include "pub_core_libcprint.h"
139#include "pub_core_options.h"
140#include "pub_core_xarray.h"
141#include "pub_core_wordfm.h"
142#include "priv_misc.h"             /* dinfo_zalloc/free */
143#include "priv_tytypes.h"
144#include "priv_d3basics.h"
145#include "priv_storage.h"
146#include "priv_readdwarf3.h"       /* self */
147
148
149/*------------------------------------------------------------*/
150/*---                                                      ---*/
151/*--- Basic machinery for parsing DIEs.                    ---*/
152/*---                                                      ---*/
153/*------------------------------------------------------------*/
154
155#define TRACE_D3(format, args...) \
156   if (td3) { VG_(printf)(format, ## args); }
157
158#define D3_INVALID_CUOFF  ((UWord)(-1UL))
159#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
160
161typedef
162   struct {
163      UChar* region_start_img;
164      UWord  region_szB;
165      UWord  region_next;
166      void (*barf)( HChar* ) __attribute__((noreturn));
167      HChar* barfstr;
168   }
169   Cursor;
170
171static inline Bool is_sane_Cursor ( Cursor* c ) {
172   if (!c)                return False;
173   if (!c->barf)          return False;
174   if (!c->barfstr)       return False;
175   return True;
176}
177
178static void init_Cursor ( Cursor* c,
179                          UChar*  region_start_img,
180                          UWord   region_szB,
181                          UWord   region_next,
182                          __attribute__((noreturn)) void (*barf)( HChar* ),
183                          HChar*  barfstr )
184{
185   vg_assert(c);
186   VG_(memset)(c, 0, sizeof(*c));
187   c->region_start_img = region_start_img;
188   c->region_szB       = region_szB;
189   c->region_next      = region_next;
190   c->barf             = barf;
191   c->barfstr          = barfstr;
192   vg_assert(is_sane_Cursor(c));
193}
194
195static Bool is_at_end_Cursor ( Cursor* c ) {
196   vg_assert(is_sane_Cursor(c));
197   return c->region_next >= c->region_szB;
198}
199
200static inline UWord get_position_of_Cursor ( Cursor* c ) {
201   vg_assert(is_sane_Cursor(c));
202   return c->region_next;
203}
204static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
205   c->region_next = pos;
206   vg_assert(is_sane_Cursor(c));
207}
208
209static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
210   vg_assert(is_sane_Cursor(c));
211   return c->region_szB - c->region_next;
212}
213
214static UChar* get_address_of_Cursor ( Cursor* c ) {
215   vg_assert(is_sane_Cursor(c));
216   return &c->region_start_img[ c->region_next ];
217}
218
219__attribute__((noreturn))
220static void failWith ( Cursor* c, HChar* str ) {
221   vg_assert(c);
222   vg_assert(c->barf);
223   c->barf(str);
224   /*NOTREACHED*/
225   vg_assert(0);
226}
227
228/* FIXME: document assumptions on endianness for
229   get_UShort/UInt/ULong. */
230static inline UChar get_UChar ( Cursor* c ) {
231   UChar r;
232   /* vg_assert(is_sane_Cursor(c)); */
233   if (c->region_next + sizeof(UChar) > c->region_szB) {
234      c->barf(c->barfstr);
235      /*NOTREACHED*/
236      vg_assert(0);
237   }
238   r = * (UChar*) &c->region_start_img[ c->region_next ];
239   c->region_next += sizeof(UChar);
240   return r;
241}
242static UShort get_UShort ( Cursor* c ) {
243   UShort r;
244   vg_assert(is_sane_Cursor(c));
245   if (c->region_next + sizeof(UShort) > c->region_szB) {
246      c->barf(c->barfstr);
247      /*NOTREACHED*/
248      vg_assert(0);
249   }
250   r = * (UShort*) &c->region_start_img[ c->region_next ];
251   c->region_next += sizeof(UShort);
252   return r;
253}
254static UInt get_UInt ( Cursor* c ) {
255   UInt r;
256   vg_assert(is_sane_Cursor(c));
257   if (c->region_next + sizeof(UInt) > c->region_szB) {
258      c->barf(c->barfstr);
259      /*NOTREACHED*/
260      vg_assert(0);
261   }
262   r = * (UInt*) &c->region_start_img[ c->region_next ];
263   c->region_next += sizeof(UInt);
264   return r;
265}
266static ULong get_ULong ( Cursor* c ) {
267   ULong r;
268   vg_assert(is_sane_Cursor(c));
269   if (c->region_next + sizeof(ULong) > c->region_szB) {
270      c->barf(c->barfstr);
271      /*NOTREACHED*/
272      vg_assert(0);
273   }
274   r = * (ULong*) &c->region_start_img[ c->region_next ];
275   c->region_next += sizeof(ULong);
276   return r;
277}
278static inline ULong get_ULEB128 ( Cursor* c ) {
279   ULong result;
280   Int   shift;
281   UChar byte;
282   /* unroll first iteration */
283   byte = get_UChar( c );
284   result = (ULong)(byte & 0x7f);
285   if (LIKELY(!(byte & 0x80))) return result;
286   shift = 7;
287   /* end unroll first iteration */
288   do {
289      byte = get_UChar( c );
290      result |= ((ULong)(byte & 0x7f)) << shift;
291      shift += 7;
292   } while (byte & 0x80);
293   return result;
294}
295static Long get_SLEB128 ( Cursor* c ) {
296   ULong  result = 0;
297   Int    shift = 0;
298   UChar  byte;
299   do {
300      byte = get_UChar(c);
301      result |= ((ULong)(byte & 0x7f)) << shift;
302      shift += 7;
303   } while (byte & 0x80);
304   if (shift < 64 && (byte & 0x40))
305      result |= -(1ULL << shift);
306   return result;
307}
308
309/* Assume 'c' points to the start of a string.  Return the absolute
310   address of whatever it points at, and advance it past the
311   terminating zero.  This makes it safe for the caller to then copy
312   the string with ML_(addStr), since (w.r.t. image overruns) the
313   process of advancing past the terminating zero will already have
314   "vetted" the string. */
315static UChar* get_AsciiZ ( Cursor* c ) {
316   UChar  uc;
317   UChar* res = get_address_of_Cursor(c);
318   do { uc = get_UChar(c); } while (uc != 0);
319   return res;
320}
321
322static ULong peek_ULEB128 ( Cursor* c ) {
323   Word here = c->region_next;
324   ULong r = get_ULEB128( c );
325   c->region_next = here;
326   return r;
327}
328static UChar peek_UChar ( Cursor* c ) {
329   Word here = c->region_next;
330   UChar r = get_UChar( c );
331   c->region_next = here;
332   return r;
333}
334
335static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
336   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
337}
338
339static UWord get_UWord ( Cursor* c ) {
340   vg_assert(sizeof(UWord) == sizeof(void*));
341   if (sizeof(UWord) == 4) return get_UInt(c);
342   if (sizeof(UWord) == 8) return get_ULong(c);
343   vg_assert(0);
344}
345
346
347/* Read a DWARF3 'Initial Length' field */
348static ULong get_Initial_Length ( /*OUT*/Bool* is64,
349                                  Cursor* c,
350                                  HChar* barfMsg )
351{
352   ULong w64;
353   UInt  w32;
354   *is64 = False;
355   w32 = get_UInt( c );
356   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
357      c->barf( barfMsg );
358   }
359   else if (w32 == 0xFFFFFFFF) {
360      *is64 = True;
361      w64   = get_ULong( c );
362   } else {
363      *is64 = False;
364      w64 = (ULong)w32;
365   }
366   return w64;
367}
368
369
370/*------------------------------------------------------------*/
371/*---                                                      ---*/
372/*--- "CUConst" structure                                  ---*/
373/*---                                                      ---*/
374/*------------------------------------------------------------*/
375
376#define N_ABBV_CACHE 32
377
378/* Holds information that is constant through the parsing of a
379   Compilation Unit.  This is basically plumbed through to
380   everywhere. */
381typedef
382   struct {
383      /* Call here if anything goes wrong */
384      void (*barf)( HChar* ) __attribute__((noreturn));
385      /* Is this 64-bit DWARF ? */
386      Bool   is_dw64;
387      /* Which DWARF version ?  (2 or 3) */
388      UShort version;
389      /* Length of this Compilation Unit, excluding its Header */
390      ULong  unit_length;
391      /* Offset of start of this unit in .debug_info */
392      UWord  cu_start_offset;
393      /* SVMA for this CU.  In the D3 spec, is known as the "base
394         address of the compilation unit (last para sec 3.1.1).
395         Needed for (amongst things) interpretation of location-list
396         values. */
397      Addr   cu_svma;
398      Bool   cu_svma_known;
399      /* The debug_abbreviations table to be used for this Unit */
400      UChar* debug_abbv;
401      /* Upper bound on size thereof (an overestimate, in general) */
402      UWord  debug_abbv_maxszB;
403      /* Where is .debug_str ? */
404      UChar* debug_str_img;
405      UWord  debug_str_sz;
406      /* Where is .debug_ranges ? */
407      UChar* debug_ranges_img;
408      UWord  debug_ranges_sz;
409      /* Where is .debug_loc ? */
410      UChar* debug_loc_img;
411      UWord  debug_loc_sz;
412      /* Where is .debug_line? */
413      UChar* debug_line_img;
414      UWord  debug_line_sz;
415      /* --- Needed so we can add stuff to the string table. --- */
416      struct _DebugInfo* di;
417      /* --- a cache for set_abbv_Cursor --- */
418      /* abbv_code == (ULong)-1 for an unused entry. */
419      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
420      UWord saC_cache_queries;
421      UWord saC_cache_misses;
422   }
423   CUConst;
424
425
426/*------------------------------------------------------------*/
427/*---                                                      ---*/
428/*--- Helper functions for Guarded Expressions             ---*/
429/*---                                                      ---*/
430/*------------------------------------------------------------*/
431
432/* Parse the location list starting at img-offset 'debug_loc_offset'
433   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
434   and so I believe are correct SVMAs for the object as a whole.  This
435   function allocates the UChar*, and the caller must deallocate it.
436   The resulting block is in so-called Guarded-Expression format.
437
438   Guarded-Expression format is similar but not identical to the DWARF3
439   location-list format.  The format of each returned block is:
440
441      UChar biasMe;
442      UChar isEnd;
443      followed by zero or more of
444
445      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
446
447   '..bytes..' is an standard DWARF3 location expression which is
448   valid when aMin <= pc <= aMax (possibly after suitable biasing).
449
450   The number of bytes in '..bytes..' is nbytes.
451
452   The end of the sequence is marked by an isEnd == 1 value.  All
453   previous isEnd values must be zero.
454
455   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
456   text_bias added before use, and 0 if the GX is this is not
457   necessary (is ready to go).
458
459   Hence the block can be quickly parsed and is self-describing.  Note
460   that aMax is 1 less than the corresponding value in a DWARF3
461   location list.  Zero length ranges, with aMax == aMin-1, are not
462   allowed.
463*/
464/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
465   it more logically belongs. */
466
467
468/* "Comment_Regarding_DWARF3_Text_Biasing" (is referred to elsewhere)
469    -----------------------------------------------------------------
470    apply_kludgey_text_bias() is our mechanism for biasing text
471    addresses found in DWARF3 .debug_info, .debug_ranges, .debug_loc
472    sections.  This is a nasty and unprincipled hack.
473
474    Biasing the text svmas, so as to obtain text avmas, should be
475    straightforward, right?  We just add on di->text_bias, as
476    carefully computed by readelf.c.
477
478    That works OK most of the time.  But in the following case it fails:
479    1. The object is made in the usual way (gcc -g, etc)
480    2. The DWARF3 stuff removed from it and parked in a .debuginfo object
481    3. The remaining (base) object is then prelinked.
482
483    Prelinking changes the text svmas throughout an object by some
484    constant amount, including the DWARF3 stuff.  So if the DWARF3
485    stuff remains attached to the original object, then there is no
486    problem.  However, if the DWARF3 stuff is detached, and the
487    remaining object is prelinked and the debuginfo object isn't, then
488    we have a problem: the text bias computed for the main object
489    isn't correct for the debuginfo object.
490
491    So the following kludged is used to bias text svmas.
492
493    1. First, try with the text bias computed for the main object.  If
494       that gives an avma inside the area in which the text segment is
495       known to have been mapped, then all well and good.
496
497    2. If not, try using the avma of the text mapped area as a bias.
498       Again, if that works out, fine.  This is the heart of the
499       kludge.  It implicitly treats the svma-s to be biased as if
500       they had been prelinked to zero.
501
502    3. If even that doesn't work, just return the avma unchanged.
503
504    For each object/object-pair, we count the number of times each
505    case occurs.  We flag an error (which the user gets to see) if (3)
506    ever occurs, or if a mixture of (1) and (2) occurs.  That should
507    at least catch the most obvious snafus.
508
509    Caveats: the main remaining worry is whether this problem somehow
510    also affects the data-biasing done for case DW_OP_addr in
511    ML_(evaluate_Dwarf3_Expr) in d3basics.c.  This is currently
512    unknown.
513
514    Possible sources of info: canonical description seems to be:
515
516       http://people.redhat.com/jakub/prelink.pdf
517
518    See para at line 337 starting "DWARF 2 debugging information ..."
519
520    This thread looks like the gdb people hitting the same issue:
521
522       http://sourceware.org/ml/gdb-patches/2007-01/msg00278.html
523*/
524typedef
525   struct {
526      /* FIXED */
527      Addr     rx_map_avma;
528      SizeT    rx_map_size;
529      PtrdiffT text_bias;
530      /* VARIABLE -- count stats */
531      UWord n_straightforward_biasings;
532      UWord n_kludgey_biasings;
533      UWord n_failed_biasings;
534   }
535   KludgeyTextBiaser;
536
537static Addr apply_kludgey_text_bias ( KludgeyTextBiaser* ktb,
538                                      Addr allegedly_text_svma ) {
539   Addr res;
540   res = allegedly_text_svma + ktb->text_bias;
541   if (res >= ktb->rx_map_avma
542       && res < ktb->rx_map_avma + ktb->rx_map_size) {
543      ktb->n_straightforward_biasings++;
544      return res;
545   }
546   res = allegedly_text_svma + ktb->rx_map_avma;
547   if (res >= ktb->rx_map_avma
548       && res < ktb->rx_map_avma + ktb->rx_map_size) {
549      ktb->n_kludgey_biasings++;
550      return res;
551   }
552   ktb->n_failed_biasings++;
553   return allegedly_text_svma; /* this svma is a luzer */
554}
555
556
557/* Apply a text bias to a GX.  Kludgily :-( */
558static void bias_GX ( /*MOD*/GExpr* gx, KludgeyTextBiaser* ktb )
559{
560   UShort nbytes;
561   Addr*  pA;
562   UChar* p = &gx->payload[0];
563   UChar  uc;
564   uc = *p++; /*biasMe*/
565   if (uc == 0)
566      return;
567   vg_assert(uc == 1);
568   p[-1] = 0; /* mark it as done */
569   while (True) {
570      uc = *p++;
571      if (uc == 1)
572         break; /*isEnd*/
573      vg_assert(uc == 0);
574      /* t-bias aMin */
575      pA = (Addr*)p;
576      *pA = apply_kludgey_text_bias( ktb, *pA );
577      p += sizeof(Addr);
578      /* t-bias aMax */
579      pA = (Addr*)p;
580      *pA = apply_kludgey_text_bias( ktb, *pA );
581      p += sizeof(Addr);
582      /* nbytes, and actual expression */
583      nbytes = * (UShort*)p; p += sizeof(UShort);
584      p += nbytes;
585   }
586}
587
588__attribute__((noinline))
589static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
590{
591   SizeT  bytesReqd;
592   GExpr* gx;
593   UChar *p, *pstart;
594
595   vg_assert(sizeof(UWord) == sizeof(Addr));
596   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
597   bytesReqd
598      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
599        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
600        + sizeof(UShort) /*nbytes*/    + nbytes
601        + sizeof(UChar); /*isEnd*/
602
603   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
604                           sizeof(GExpr) + bytesReqd );
605   vg_assert(gx);
606
607   p = pstart = &gx->payload[0];
608
609   * ((UChar*)p)  = 0;          /*biasMe*/ p += sizeof(UChar);
610   * ((UChar*)p)  = 0;          /*!isEnd*/ p += sizeof(UChar);
611   * ((Addr*)p)   = 0;          /*aMin*/   p += sizeof(Addr);
612   * ((Addr*)p)   = ~((Addr)0); /*aMax */  p += sizeof(Addr);
613   * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
614   VG_(memcpy)(p, block, nbytes); p += nbytes;
615   * ((UChar*)p)  = 1;          /*isEnd*/  p += sizeof(UChar);
616
617   vg_assert( (SizeT)(p - pstart) == bytesReqd);
618   vg_assert( &gx->payload[bytesReqd]
619              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
620
621   return gx;
622}
623
624__attribute__((noinline))
625static GExpr* make_general_GX ( CUConst* cc,
626                                Bool     td3,
627                                UWord    debug_loc_offset,
628                                Addr     svma_of_referencing_CU )
629{
630   Addr      base;
631   Cursor    loc;
632   XArray*   xa; /* XArray of UChar */
633   GExpr*    gx;
634   Word      nbytes;
635
636   vg_assert(sizeof(UWord) == sizeof(Addr));
637   if (cc->debug_loc_sz == 0)
638      cc->barf("make_general_GX: .debug_loc is empty/missing");
639
640   init_Cursor( &loc, cc->debug_loc_img,
641                cc->debug_loc_sz, 0, cc->barf,
642                "Overrun whilst reading .debug_loc section(2)" );
643   set_position_of_Cursor( &loc, debug_loc_offset );
644
645   TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
646            debug_loc_offset, get_address_of_Cursor( &loc ) );
647
648   /* Who frees this xa?  It is freed before this fn exits. */
649   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
650                    ML_(dinfo_free),
651                    sizeof(UChar) );
652
653   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
654
655   base = 0;
656   while (True) {
657      Bool  acquire;
658      UWord len;
659      /* Read a (host-)word pair.  This is something of a hack since
660         the word size to read is really dictated by the ELF file;
661         however, we assume we're reading a file with the same
662         word-sizeness as the host.  Reasonably enough. */
663      UWord w1 = get_UWord( &loc );
664      UWord w2 = get_UWord( &loc );
665
666      TRACE_D3("   %08lx %08lx\n", w1, w2);
667      if (w1 == 0 && w2 == 0)
668         break; /* end of list */
669
670      if (w1 == -1UL) {
671         /* new value for 'base' */
672         base = w2;
673         continue;
674      }
675
676      /* else a location expression follows */
677      /* else enumerate [w1+base, w2+base) */
678      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
679         (sec 2.17.2) */
680      if (w1 > w2) {
681         TRACE_D3("negative range is for .debug_loc expr at "
682                  "file offset %lu\n",
683                  debug_loc_offset);
684         cc->barf( "negative range in .debug_loc section" );
685      }
686
687      /* ignore zero length ranges */
688      acquire = w1 < w2;
689      len     = (UWord)get_UShort( &loc );
690
691      if (acquire) {
692         UWord  w;
693         UShort s;
694         UChar  c;
695         c = 0; /* !isEnd*/
696         VG_(addBytesToXA)( xa, &c, sizeof(c) );
697         w = w1    + base + svma_of_referencing_CU;
698         VG_(addBytesToXA)( xa, &w, sizeof(w) );
699         w = w2 -1 + base + svma_of_referencing_CU;
700         VG_(addBytesToXA)( xa, &w, sizeof(w) );
701         s = (UShort)len;
702         VG_(addBytesToXA)( xa, &s, sizeof(s) );
703      }
704
705      while (len > 0) {
706         UChar byte = get_UChar( &loc );
707         TRACE_D3("%02x", (UInt)byte);
708         if (acquire)
709            VG_(addBytesToXA)( xa, &byte, 1 );
710         len--;
711      }
712      TRACE_D3("\n");
713   }
714
715   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
716
717   nbytes = VG_(sizeXA)( xa );
718   vg_assert(nbytes >= 1);
719
720   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
721   vg_assert(gx);
722   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
723   vg_assert( &gx->payload[nbytes]
724              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
725
726   VG_(deleteXA)( xa );
727
728   TRACE_D3("}\n");
729
730   return gx;
731}
732
733
734/*------------------------------------------------------------*/
735/*---                                                      ---*/
736/*--- Helper functions for range lists and CU headers      ---*/
737/*---                                                      ---*/
738/*------------------------------------------------------------*/
739
740/* Denotes an address range.  Both aMin and aMax are included in the
741   range; hence a complete range is (0, ~0) and an empty range is any
742   (X, X-1) for X > 0.*/
743typedef
744   struct { Addr aMin; Addr aMax; }
745   AddrRange;
746
747
748/* Generate an arbitrary structural total ordering on
749   XArray* of AddrRange. */
750static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
751{
752   Word n1, n2, i;
753   tl_assert(rngs1 && rngs2);
754   n1 = VG_(sizeXA)( rngs1 );
755   n2 = VG_(sizeXA)( rngs2 );
756   if (n1 < n2) return -1;
757   if (n1 > n2) return 1;
758   for (i = 0; i < n1; i++) {
759      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
760      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
761      if (rng1->aMin < rng2->aMin) return -1;
762      if (rng1->aMin > rng2->aMin) return 1;
763      if (rng1->aMax < rng2->aMax) return -1;
764      if (rng1->aMax > rng2->aMax) return 1;
765   }
766   return 0;
767}
768
769
770__attribute__((noinline))
771static XArray* /* of AddrRange */ empty_range_list ( void )
772{
773   XArray* xa; /* XArray of AddrRange */
774   /* Who frees this xa?  varstack_preen() does. */
775   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
776                    ML_(dinfo_free),
777                    sizeof(AddrRange) );
778   return xa;
779}
780
781
782__attribute__((noinline))
783static XArray* unitary_range_list ( Addr aMin, Addr aMax )
784{
785   XArray*   xa;
786   AddrRange pair;
787   vg_assert(aMin <= aMax);
788   /* Who frees this xa?  varstack_preen() does. */
789   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
790                    ML_(dinfo_free),
791                    sizeof(AddrRange) );
792   pair.aMin = aMin;
793   pair.aMax = aMax;
794   VG_(addToXA)( xa, &pair );
795   return xa;
796}
797
798
799/* Enumerate the address ranges starting at img-offset
800   'debug_ranges_offset' in .debug_ranges.  Results are biased with
801   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
802   object as a whole.  This function allocates the XArray, and the
803   caller must deallocate it. */
804__attribute__((noinline))
805static XArray* /* of AddrRange */
806       get_range_list ( CUConst* cc,
807                        Bool     td3,
808                        UWord    debug_ranges_offset,
809                        Addr     svma_of_referencing_CU )
810{
811   Addr      base;
812   Cursor    ranges;
813   XArray*   xa; /* XArray of AddrRange */
814   AddrRange pair;
815
816   if (cc->debug_ranges_sz == 0)
817      cc->barf("get_range_list: .debug_ranges is empty/missing");
818
819   init_Cursor( &ranges, cc->debug_ranges_img,
820                cc->debug_ranges_sz, 0, cc->barf,
821                "Overrun whilst reading .debug_ranges section(2)" );
822   set_position_of_Cursor( &ranges, debug_ranges_offset );
823
824   /* Who frees this xa?  varstack_preen() does. */
825   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
826                    sizeof(AddrRange) );
827   base = 0;
828   while (True) {
829      /* Read a (host-)word pair.  This is something of a hack since
830         the word size to read is really dictated by the ELF file;
831         however, we assume we're reading a file with the same
832         word-sizeness as the host.  Reasonably enough. */
833      UWord w1 = get_UWord( &ranges );
834      UWord w2 = get_UWord( &ranges );
835
836      if (w1 == 0 && w2 == 0)
837         break; /* end of list. */
838
839      if (w1 == -1UL) {
840         /* new value for 'base' */
841         base = w2;
842         continue;
843      }
844
845      /* else enumerate [w1+base, w2+base) */
846      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
847         (sec 2.17.2) */
848      if (w1 > w2)
849         cc->barf( "negative range in .debug_ranges section" );
850      if (w1 < w2) {
851         pair.aMin = w1     + base + svma_of_referencing_CU;
852         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
853         vg_assert(pair.aMin <= pair.aMax);
854         VG_(addToXA)( xa, &pair );
855      }
856   }
857   return xa;
858}
859
860
861/* Parse the Compilation Unit header indicated at 'c' and
862   initialise 'cc' accordingly. */
863static __attribute__((noinline))
864void parse_CU_Header ( /*OUT*/CUConst* cc,
865                       Bool td3,
866                       Cursor* c,
867                       UChar* debug_abbv_img, UWord debug_abbv_sz )
868{
869   UChar  address_size;
870   UWord  debug_abbrev_offset;
871   Int    i;
872
873   VG_(memset)(cc, 0, sizeof(*cc));
874   vg_assert(c && c->barf);
875   cc->barf = c->barf;
876
877   /* initial_length field */
878   cc->unit_length
879      = get_Initial_Length( &cc->is_dw64, c,
880           "parse_CU_Header: invalid initial-length field" );
881
882   TRACE_D3("   Length:        %lld\n", cc->unit_length );
883
884   /* version */
885   cc->version = get_UShort( c );
886   if (cc->version != 2 && cc->version != 3)
887      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3" );
888   TRACE_D3("   Version:       %d\n", (Int)cc->version );
889
890   /* debug_abbrev_offset */
891   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
892   if (debug_abbrev_offset >= debug_abbv_sz)
893      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
894   TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
895
896   /* address size.  If this isn't equal to the host word size, just
897      give up.  This makes it safe to assume elsewhere that
898      DW_FORM_addr can be treated as a host word. */
899   address_size = get_UChar( c );
900   if (address_size != sizeof(void*))
901      cc->barf( "parse_CU_Header: invalid address_size" );
902   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
903
904   /* Set up so that cc->debug_abbv points to the relevant table for
905      this CU.  Set the szB so that at least we can't read off the end
906      of the debug_abbrev section -- potentially (and quite likely)
907      too big, if this isn't the last table in the section, but at
908      least it's safe. */
909   cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
910   cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
911   /* and empty out the set_abbv_Cursor cache */
912   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
913   for (i = 0; i < N_ABBV_CACHE; i++) {
914      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
915      cc->saC_cache[i].posn = 0;
916   }
917   cc->saC_cache_queries = 0;
918   cc->saC_cache_misses = 0;
919}
920
921
922/* Set up 'c' so it is ready to parse the abbv table entry code
923   'abbv_code' for this compilation unit.  */
924static __attribute__((noinline))
925void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
926                       CUConst* cc, ULong abbv_code )
927{
928   Int   i;
929   ULong acode;
930
931   if (abbv_code == 0)
932      cc->barf("set_abbv_Cursor: abbv_code == 0" );
933
934   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
935      allow it.  In any case no valid DWARF3 should make a reference
936      to a negative abbreviation code.  [at least, they always seem to
937      be numbered upwards from zero as far as I have seen] */
938   vg_assert(abbv_code != (ULong)-1);
939
940   /* First search the cache. */
941   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
942   cc->saC_cache_queries++;
943   for (i = 0; i < N_ABBV_CACHE; i++) {
944      /* No need to test the cached abbv_codes for -1 (empty), since
945         we just asserted that abbv_code is not -1. */
946     if (cc->saC_cache[i].abbv_code == abbv_code) {
947        /* Found it.  Cool.  Set up the parser using the cached
948           position, and move this cache entry 1 step closer to the
949           front. */
950        if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
951        init_Cursor( c, cc->debug_abbv,
952                     cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
953                     cc->barf,
954                     "Overrun whilst parsing .debug_abbrev section(1)" );
955        if (i > 0) {
956           ULong t_abbv_code = cc->saC_cache[i].abbv_code;
957           UWord t_posn = cc->saC_cache[i].posn;
958           while (i > 0) {
959              cc->saC_cache[i] = cc->saC_cache[i-1];
960              cc->saC_cache[0].abbv_code = t_abbv_code;
961              cc->saC_cache[0].posn = t_posn;
962              i--;
963           }
964        }
965        return;
966     }
967   }
968
969   /* No.  It's not in the cache.  We have to search through
970      .debug_abbrev, of course taking care to update the cache
971      when done. */
972
973   cc->saC_cache_misses++;
974   init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
975               "Overrun whilst parsing .debug_abbrev section(2)" );
976
977   /* Now iterate though the table until we find the requested
978      entry. */
979   while (True) {
980      ULong atag;
981      UInt  has_children;
982      acode = get_ULEB128( c );
983      if (acode == 0) break; /* end of the table */
984      if (acode == abbv_code) break; /* found it */
985      atag         = get_ULEB128( c );
986      has_children = get_UChar( c );
987      //TRACE_D3("   %llu      %s    [%s]\n",
988      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
989      while (True) {
990         ULong at_name = get_ULEB128( c );
991         ULong at_form = get_ULEB128( c );
992         if (at_name == 0 && at_form == 0) break;
993         //TRACE_D3("    %18s %s\n",
994         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
995      }
996   }
997
998   if (acode == 0) {
999      /* Not found.  This is fatal. */
1000      cc->barf("set_abbv_Cursor: abbv_code not found");
1001   }
1002
1003   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
1004      starting from the abbreviation entry's tag.  So just cache
1005      the result, and return. */
1006   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
1007      cc->saC_cache[i] = cc->saC_cache[i-1];
1008   }
1009   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
1010   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
1011   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
1012}
1013
1014
1015/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
1016
1017   If *cts itself contains the entire result, then *ctsSzB is set to
1018   1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
1019
1020   Alternatively, the result can be a block of data (in the
1021   transiently mapped-in object, so-called "image" space).  If so then
1022   the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
1023   image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
1024
1025   Unfortunately this means it is impossible to represent a zero-size
1026   image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
1027   and so is ambiguous (which case it is?)
1028
1029   Invariant on successful return:
1030      (*ctsSzB > 0 && *ctsMemSzB == 0)
1031      || (*ctsSzB == 0 && *ctsMemSzB > 0)
1032*/
1033static
1034void get_Form_contents ( /*OUT*/ULong* cts,
1035                         /*OUT*/Int*   ctsSzB,
1036                         /*OUT*/UWord* ctsMemSzB,
1037                         CUConst* cc, Cursor* c,
1038                         Bool td3, DW_FORM form )
1039{
1040   *cts       = 0;
1041   *ctsSzB    = 0;
1042   *ctsMemSzB = 0;
1043   switch (form) {
1044      case DW_FORM_data1:
1045         *cts = (ULong)(UChar)get_UChar(c);
1046         *ctsSzB = 1;
1047         TRACE_D3("%u", (UInt)*cts);
1048         break;
1049      case DW_FORM_data2:
1050         *cts = (ULong)(UShort)get_UShort(c);
1051         *ctsSzB = 2;
1052         TRACE_D3("%u", (UInt)*cts);
1053         break;
1054      case DW_FORM_data4:
1055         *cts = (ULong)(UInt)get_UInt(c);
1056         *ctsSzB = 4;
1057         TRACE_D3("%u", (UInt)*cts);
1058         break;
1059      case DW_FORM_data8:
1060         *cts = get_ULong(c);
1061         *ctsSzB = 8;
1062         TRACE_D3("%llu", *cts);
1063         break;
1064      case DW_FORM_sdata:
1065         *cts = (ULong)(Long)get_SLEB128(c);
1066         *ctsSzB = 8;
1067         TRACE_D3("%lld", (Long)*cts);
1068         break;
1069      case DW_FORM_addr:
1070         /* note, this is a hack.  DW_FORM_addr is defined as getting
1071            a word the size of the target machine as defined by the
1072            address_size field in the CU Header.  However,
1073            parse_CU_Header() rejects all inputs except those for
1074            which address_size == sizeof(Word), hence we can just
1075            treat it as a (host) Word.  */
1076         *cts = (ULong)(UWord)get_UWord(c);
1077         *ctsSzB = sizeof(UWord);
1078         TRACE_D3("0x%lx", (UWord)*cts);
1079         break;
1080      case DW_FORM_strp: {
1081         /* this is an offset into .debug_str */
1082         UChar* str;
1083         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1084         if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1085            cc->barf("read_and_show_Form: DW_FORM_strp "
1086                     "points outside .debug_str");
1087         /* FIXME: check the entire string lies inside debug_str,
1088            not just the first byte of it. */
1089         str = (UChar*)cc->debug_str_img + uw;
1090         TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1091         *cts = (ULong)(UWord)str;
1092         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1093         break;
1094      }
1095      case DW_FORM_string: {
1096         UChar* str = get_AsciiZ(c);
1097         TRACE_D3("%s", str);
1098         *cts = (ULong)(UWord)str;
1099         /* strlen is safe because get_AsciiZ already 'vetted' the
1100            entire string */
1101         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1102         break;
1103      }
1104      case DW_FORM_ref4: {
1105         UInt  u32 = get_UInt(c);
1106         UWord res = cc->cu_start_offset + (UWord)u32;
1107         *cts = (ULong)res;
1108         *ctsSzB = sizeof(UWord);
1109         TRACE_D3("<%lx>", res);
1110         break;
1111      }
1112      case DW_FORM_flag: {
1113         UChar u8 = get_UChar(c);
1114         TRACE_D3("%u", (UInt)u8);
1115         *cts = (ULong)u8;
1116         *ctsSzB = 1;
1117         break;
1118      }
1119      case DW_FORM_block1: {
1120         ULong  u64b;
1121         ULong  u64 = (ULong)get_UChar(c);
1122         UChar* block = get_address_of_Cursor(c);
1123         TRACE_D3("%llu byte block: ", u64);
1124         for (u64b = u64; u64b > 0; u64b--) {
1125            UChar u8 = get_UChar(c);
1126            TRACE_D3("%x ", (UInt)u8);
1127         }
1128         *cts = (ULong)(UWord)block;
1129         *ctsMemSzB = (UWord)u64;
1130         break;
1131      }
1132      case DW_FORM_block2: {
1133         ULong  u64b;
1134         ULong  u64 = (ULong)get_UShort(c);
1135         UChar* block = get_address_of_Cursor(c);
1136         TRACE_D3("%llu byte block: ", u64);
1137         for (u64b = u64; u64b > 0; u64b--) {
1138            UChar u8 = get_UChar(c);
1139            TRACE_D3("%x ", (UInt)u8);
1140         }
1141         *cts = (ULong)(UWord)block;
1142         *ctsMemSzB = (UWord)u64;
1143         break;
1144      }
1145      default:
1146         VG_(printf)("get_Form_contents: unhandled %d (%s)\n",
1147                     form, ML_(pp_DW_FORM)(form));
1148         c->barf("get_Form_contents: unhandled DW_FORM");
1149   }
1150}
1151
1152
1153/*------------------------------------------------------------*/
1154/*---                                                      ---*/
1155/*--- Parsing of variable-related DIEs                     ---*/
1156/*---                                                      ---*/
1157/*------------------------------------------------------------*/
1158
1159typedef
1160   struct _TempVar {
1161      UChar*  name; /* in DebugInfo's .strchunks */
1162      /* Represent ranges economically.  nRanges is the number of
1163         ranges.  Cases:
1164         0: .rngOneMin .rngOneMax .manyRanges are all zero
1165         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1166         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1167         This is merely an optimisation to avoid having to allocate
1168         and free the XArray in the common (98%) of cases where there
1169         is zero or one address ranges. */
1170      UWord   nRanges;
1171      Addr    rngOneMin;
1172      Addr    rngOneMax;
1173      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1174      /* Do not free .rngMany, since many TempVars will have the same
1175         value.  Instead the associated storage is to be freed by
1176         deleting 'rangetree', which stores a single copy of each
1177         range. */
1178      /* --- */
1179      Int     level;
1180      UWord   typeR; /* a cuOff */
1181      GExpr*  gexpr; /* for this variable */
1182      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1183                        any */
1184      UChar*  fName; /* declaring file name, or NULL */
1185      Int     fLine; /* declaring file line number, or zero */
1186      /* offset in .debug_info, so that abstract instances can be
1187         found to satisfy references from concrete instances. */
1188      UWord   dioff;
1189      UWord   absOri; /* so the absOri fields refer to dioff fields
1190                         in some other, related TempVar. */
1191   }
1192   TempVar;
1193
1194#define N_D3_VAR_STACK 48
1195
1196typedef
1197   struct {
1198      /* Contains the range stack: a stack of address ranges, one
1199         stack entry for each nested scope.
1200
1201         Some scope entries are created by function definitions
1202         (DW_AT_subprogram), and for those, we also note the GExpr
1203         derived from its DW_AT_frame_base attribute, if any.
1204         Consequently it should be possible to find, for any
1205         variable's DIE, the GExpr for the the containing function's
1206         DW_AT_frame_base by scanning back through the stack to find
1207         the nearest entry associated with a function.  This somewhat
1208         elaborate scheme is provided so as to make it possible to
1209         obtain the correct DW_AT_frame_base expression even in the
1210         presence of nested functions (or to be more precise, in the
1211         presence of nested DW_AT_subprogram DIEs).
1212      */
1213      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1214                     stack */
1215      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1216      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1217      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1218      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1219                                         expr, else NULL */
1220      /* The file name table.  Is a mapping from integer index to the
1221         (permanent) copy of the string, iow a non-img area. */
1222      XArray* /* of UChar* */ filenameTable;
1223   }
1224   D3VarParser;
1225
1226static void varstack_show ( D3VarParser* parser, HChar* str ) {
1227   Word i, j;
1228   VG_(printf)("  varstack (%s) {\n", str);
1229   for (i = 0; i <= parser->sp; i++) {
1230      XArray* xa = parser->ranges[i];
1231      vg_assert(xa);
1232      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1233      if (parser->isFunc[i]) {
1234         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1235      } else {
1236         vg_assert(parser->fbGX[i] == NULL);
1237      }
1238      VG_(printf)(": ");
1239      if (VG_(sizeXA)( xa ) == 0) {
1240         VG_(printf)("** empty PC range array **");
1241      } else {
1242         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1243            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1244            vg_assert(range);
1245            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1246         }
1247      }
1248      VG_(printf)("\n");
1249   }
1250   VG_(printf)("  }\n");
1251}
1252
1253/* Remove from the stack, all entries with .level > 'level' */
1254static
1255void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1256{
1257   Bool changed = False;
1258   vg_assert(parser->sp < N_D3_VAR_STACK);
1259   while (True) {
1260      vg_assert(parser->sp >= -1);
1261      if (parser->sp == -1) break;
1262      if (parser->level[parser->sp] <= level) break;
1263      if (0)
1264         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1265      vg_assert(parser->ranges[parser->sp]);
1266      /* Who allocated this xa?  get_range_list() or
1267         unitary_range_list(). */
1268      VG_(deleteXA)( parser->ranges[parser->sp] );
1269      parser->ranges[parser->sp] = NULL;
1270      parser->level[parser->sp]  = 0;
1271      parser->isFunc[parser->sp] = False;
1272      parser->fbGX[parser->sp]   = NULL;
1273      parser->sp--;
1274      changed = True;
1275   }
1276   if (changed && td3)
1277      varstack_show( parser, "after preen" );
1278}
1279
1280static void varstack_push ( CUConst* cc,
1281                            D3VarParser* parser,
1282                            Bool td3,
1283                            XArray* ranges, Int level,
1284                            Bool    isFunc, GExpr* fbGX ) {
1285   if (0)
1286   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1287            parser->sp+1, level, ranges);
1288
1289   /* First we need to zap everything >= 'level', as we are about to
1290      replace any previous entry at 'level', so .. */
1291   varstack_preen(parser, /*td3*/False, level-1);
1292
1293   vg_assert(parser->sp >= -1);
1294   vg_assert(parser->sp < N_D3_VAR_STACK);
1295   if (parser->sp == N_D3_VAR_STACK-1)
1296      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1297               "increase and recompile");
1298   if (parser->sp >= 0)
1299      vg_assert(parser->level[parser->sp] < level);
1300   parser->sp++;
1301   vg_assert(parser->ranges[parser->sp] == NULL);
1302   vg_assert(parser->level[parser->sp]  == 0);
1303   vg_assert(parser->isFunc[parser->sp] == False);
1304   vg_assert(parser->fbGX[parser->sp]   == NULL);
1305   vg_assert(ranges != NULL);
1306   if (!isFunc) vg_assert(fbGX == NULL);
1307   parser->ranges[parser->sp] = ranges;
1308   parser->level[parser->sp]  = level;
1309   parser->isFunc[parser->sp] = isFunc;
1310   parser->fbGX[parser->sp]   = fbGX;
1311   if (td3)
1312      varstack_show( parser, "after push" );
1313}
1314
1315
1316/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1317   refer either to a location expression or to a location list.
1318   Figure out which, and in both cases bundle the expression or
1319   location list into a so-called GExpr (guarded expression). */
1320__attribute__((noinline))
1321static GExpr* get_GX ( CUConst* cc, Bool td3,
1322                       ULong cts, Int ctsSzB, UWord ctsMemSzB )
1323{
1324   GExpr* gexpr = NULL;
1325   if (ctsMemSzB > 0 && ctsSzB == 0) {
1326      /* represents an in-line location expression, and cts points
1327         right at it */
1328      gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1329   }
1330   else
1331   if (ctsMemSzB == 0 && ctsSzB > 0) {
1332      /* represents location list.  cts is the offset of it in
1333         .debug_loc. */
1334      if (!cc->cu_svma_known)
1335         cc->barf("get_GX: location list, but CU svma is unknown");
1336      gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1337   }
1338   else {
1339      vg_assert(0); /* else caller is bogus */
1340   }
1341   return gexpr;
1342}
1343
1344
1345static
1346void read_filename_table( /*MOD*/D3VarParser* parser,
1347                          CUConst* cc, UWord debug_line_offset,
1348                          Bool td3 )
1349{
1350   Bool   is_dw64;
1351   Cursor c;
1352   Word   i;
1353   ULong  unit_length;
1354   UShort version;
1355   ULong  header_length;
1356   UChar  minimum_instruction_length;
1357   UChar  default_is_stmt;
1358   Char   line_base;
1359   UChar  line_range;
1360   UChar  opcode_base;
1361   UChar* str;
1362
1363   vg_assert(parser && cc && cc->barf);
1364   if ((!cc->debug_line_img)
1365       || cc->debug_line_sz <= debug_line_offset)
1366      cc->barf("read_filename_table: .debug_line is missing?");
1367
1368   init_Cursor( &c, cc->debug_line_img,
1369                cc->debug_line_sz, debug_line_offset, cc->barf,
1370                "Overrun whilst reading .debug_line section(1)" );
1371
1372   unit_length
1373      = get_Initial_Length( &is_dw64, &c,
1374           "read_filename_table: invalid initial-length field" );
1375   version = get_UShort( &c );
1376   if (version != 2)
1377     cc->barf("read_filename_table: Only DWARF version 2 line info "
1378              "is currently supported.");
1379   header_length = (ULong)get_Dwarfish_UWord( &c, is_dw64 );
1380   minimum_instruction_length = get_UChar( &c );
1381   default_is_stmt            = get_UChar( &c );
1382   line_base                  = (Char)get_UChar( &c );
1383   line_range                 = get_UChar( &c );
1384   opcode_base                = get_UChar( &c );
1385   /* skip over "standard_opcode_lengths" */
1386   for (i = 1; i < (Word)opcode_base; i++)
1387     (void)get_UChar( &c );
1388
1389   /* skip over the directory names table */
1390   while (peek_UChar(&c) != 0) {
1391     (void)get_AsciiZ(&c);
1392   }
1393   (void)get_UChar(&c); /* skip terminating zero */
1394
1395   /* Read and record the file names table */
1396   vg_assert(parser->filenameTable);
1397   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1398   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1399      from 1, for some reason. */
1400   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1401   VG_(addToXA)( parser->filenameTable, &str );
1402   while (peek_UChar(&c) != 0) {
1403      str = get_AsciiZ(&c);
1404      TRACE_D3("  read_filename_table: %ld %s\n",
1405               VG_(sizeXA)(parser->filenameTable), str);
1406      str = ML_(addStr)( cc->di, str, -1 );
1407      VG_(addToXA)( parser->filenameTable, &str );
1408      (void)get_ULEB128( &c ); /* skip directory index # */
1409      (void)get_ULEB128( &c ); /* skip last mod time */
1410      (void)get_ULEB128( &c ); /* file size */
1411   }
1412   /* We're done!  The rest of it is not interesting. */
1413}
1414
1415
1416__attribute__((noinline))
1417static void parse_var_DIE (
1418   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1419   /*MOD*/XArray* /* of TempVar* */ tempvars,
1420   /*MOD*/XArray* /* of GExpr* */ gexprs,
1421   /*MOD*/D3VarParser* parser,
1422   DW_TAG dtag,
1423   UWord posn,
1424   Int level,
1425   Cursor* c_die,
1426   Cursor* c_abbv,
1427   CUConst* cc,
1428   Bool td3
1429)
1430{
1431   ULong       cts;
1432   Int         ctsSzB;
1433   UWord       ctsMemSzB;
1434
1435   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1436   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1437
1438   varstack_preen( parser, td3, level-1 );
1439
1440   if (dtag == DW_TAG_compile_unit) {
1441      Bool have_lo    = False;
1442      Bool have_hi1   = False;
1443      Bool have_range = False;
1444      Addr ip_lo    = 0;
1445      Addr ip_hi1   = 0;
1446      Addr rangeoff = 0;
1447      while (True) {
1448         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1449         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1450         if (attr == 0 && form == 0) break;
1451         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1452                            cc, c_die, False/*td3*/, form );
1453         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1454            ip_lo   = cts;
1455            have_lo = True;
1456         }
1457         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1458            ip_hi1   = cts;
1459            have_hi1 = True;
1460         }
1461         if (attr == DW_AT_ranges && ctsSzB > 0) {
1462            rangeoff = cts;
1463            have_range = True;
1464         }
1465         if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1466            read_filename_table( parser, cc, (UWord)cts, td3 );
1467         }
1468      }
1469      /* Now, does this give us an opportunity to find this
1470         CU's svma? */
1471#if 0
1472      if (level == 0 && have_lo) {
1473         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1474         because we've already seen a DW_TAG_compile_unit DIE at level
1475         0.  But that can't happen, because DWARF3 only allows exactly
1476         one top level DIE per CU. */
1477         cc->cu_svma_known = True;
1478         cc->cu_svma = ip_lo;
1479         if (1)
1480            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1481         /* Now, it may be that this DIE doesn't tell us the CU's
1482            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1483            the CU doesn't *have* to have its SVMA specified.
1484
1485            But as per last para D3 spec sec 3.1.1 ("Normal and
1486            Partial Compilation Unit Entries", "If the base address
1487            (viz, the SVMA) is undefined, then any DWARF entry of
1488            structure defined interms of the base address of that
1489            compilation unit is not valid.".  So that means, if whilst
1490            processing the children of this top level DIE (or their
1491            children, etc) we see a DW_AT_range, and cu_svma_known is
1492            False, then the DIE that contains it is (per the spec)
1493            invalid, and we can legitimately stop and complain. */
1494      }
1495#else
1496      /* .. whereas The Reality is, simply assume the SVMA is zero
1497         if it isn't specified. */
1498      if (level == 0) {
1499         vg_assert(!cc->cu_svma_known);
1500         cc->cu_svma_known = True;
1501         if (have_lo)
1502            cc->cu_svma = ip_lo;
1503         else
1504            cc->cu_svma = 0;
1505      }
1506#endif
1507      /* Do we have something that looks sane? */
1508      if (have_lo && have_hi1 && (!have_range)) {
1509         if (ip_lo < ip_hi1)
1510            varstack_push( cc, parser, td3,
1511                           unitary_range_list(ip_lo, ip_hi1 - 1),
1512                           level,
1513                           False/*isFunc*/, NULL/*fbGX*/ );
1514      } else
1515      if ((!have_lo) && (!have_hi1) && have_range) {
1516         varstack_push( cc, parser, td3,
1517                        get_range_list( cc, td3,
1518                                        rangeoff, cc->cu_svma ),
1519                        level,
1520                        False/*isFunc*/, NULL/*fbGX*/ );
1521      } else
1522      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1523         /* CU has no code, presumably? */
1524         varstack_push( cc, parser, td3,
1525                        empty_range_list(),
1526                        level,
1527                        False/*isFunc*/, NULL/*fbGX*/ );
1528      } else
1529      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1530         /* broken DIE created by gcc-4.3.X ?  Ignore the
1531            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1532            instead. */
1533         varstack_push( cc, parser, td3,
1534                        get_range_list( cc, td3,
1535                                        rangeoff, cc->cu_svma ),
1536                        level,
1537                        False/*isFunc*/, NULL/*fbGX*/ );
1538      } else {
1539         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1540                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
1541         goto bad_DIE;
1542      }
1543   }
1544
1545   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1546      Bool   have_lo    = False;
1547      Bool   have_hi1   = False;
1548      Bool   have_range = False;
1549      Addr   ip_lo      = 0;
1550      Addr   ip_hi1     = 0;
1551      Addr   rangeoff   = 0;
1552      Bool   isFunc     = dtag == DW_TAG_subprogram;
1553      GExpr* fbGX       = NULL;
1554      while (True) {
1555         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1556         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1557         if (attr == 0 && form == 0) break;
1558         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1559                            cc, c_die, False/*td3*/, form );
1560         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1561            ip_lo   = cts;
1562            have_lo = True;
1563         }
1564         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1565            ip_hi1   = cts;
1566            have_hi1 = True;
1567         }
1568         if (attr == DW_AT_ranges && ctsSzB > 0) {
1569            rangeoff = cts;
1570            have_range = True;
1571         }
1572         if (isFunc
1573             && attr == DW_AT_frame_base
1574             && ((ctsMemSzB > 0 && ctsSzB == 0)
1575                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1576            fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1577            vg_assert(fbGX);
1578            VG_(addToXA)(gexprs, &fbGX);
1579         }
1580      }
1581      /* Do we have something that looks sane? */
1582      if (dtag == DW_TAG_subprogram
1583          && (!have_lo) && (!have_hi1) && (!have_range)) {
1584         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1585            representing a subroutine declaration that is not also a
1586            definition does not have code address or range
1587            attributes." */
1588      } else
1589      if (dtag == DW_TAG_lexical_block
1590          && (!have_lo) && (!have_hi1) && (!have_range)) {
1591         /* I believe this is legit, and means the lexical block
1592            contains no insns (whatever that might mean).  Ignore. */
1593      } else
1594      if (have_lo && have_hi1 && (!have_range)) {
1595         /* This scope supplies just a single address range. */
1596         if (ip_lo < ip_hi1)
1597            varstack_push( cc, parser, td3,
1598                           unitary_range_list(ip_lo, ip_hi1 - 1),
1599                           level, isFunc, fbGX );
1600      } else
1601      if ((!have_lo) && (!have_hi1) && have_range) {
1602         /* This scope supplies multiple address ranges via the use of
1603            a range list. */
1604         varstack_push( cc, parser, td3,
1605                        get_range_list( cc, td3,
1606                                        rangeoff, cc->cu_svma ),
1607                        level, isFunc, fbGX );
1608      } else
1609      if (have_lo && (!have_hi1) && (!have_range)) {
1610         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1611            Entries) says fairly clearly that a scope must have either
1612            _range or (_low_pc and _high_pc). */
1613         /* The spec is a bit ambiguous though.  Perhaps a single byte
1614            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1615         /* This case is here because icc9 produced this:
1616         <2><13bd>: DW_TAG_lexical_block
1617            DW_AT_decl_line   : 5229
1618            DW_AT_decl_column : 37
1619            DW_AT_decl_file   : 1
1620            DW_AT_low_pc      : 0x401b03
1621         */
1622         /* Ignore (seems safe than pushing a single byte range) */
1623      } else
1624         goto bad_DIE;
1625   }
1626
1627   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1628      UChar* name        = NULL;
1629      UWord  typeR       = D3_INVALID_CUOFF;
1630      Bool   external    = False;
1631      GExpr* gexpr       = NULL;
1632      Int    n_attrs     = 0;
1633      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1634      Bool   declaration = False;
1635      Int    lineNo      = 0;
1636      UChar* fileName    = NULL;
1637      while (True) {
1638         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1639         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1640         if (attr == 0 && form == 0) break;
1641         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1642                            cc, c_die, False/*td3*/, form );
1643         n_attrs++;
1644         if (attr == DW_AT_name && ctsMemSzB > 0) {
1645            name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1646         }
1647         if (attr == DW_AT_location
1648             && ((ctsMemSzB > 0 && ctsSzB == 0)
1649                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1650            gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1651            vg_assert(gexpr);
1652            VG_(addToXA)(gexprs, &gexpr);
1653         }
1654         if (attr == DW_AT_type && ctsSzB > 0) {
1655            typeR = (UWord)cts;
1656         }
1657         if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1658            external = True;
1659         }
1660         if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1661            abs_ori = (UWord)cts;
1662         }
1663         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1664            declaration = True;
1665         }
1666         if (attr == DW_AT_decl_line && ctsSzB > 0) {
1667            lineNo = (Int)cts;
1668         }
1669         if (attr == DW_AT_decl_file && ctsSzB > 0) {
1670            Int ftabIx = (Int)cts;
1671            if (ftabIx >= 1
1672                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1673               fileName = *(UChar**)
1674                          VG_(indexXA)( parser->filenameTable, ftabIx );
1675               vg_assert(fileName);
1676            }
1677            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1678         }
1679      }
1680      /* We'll collect it under if one of the following three
1681         conditions holds:
1682         (1) has location and type    -> completed
1683         (2) has type only            -> is an abstract instance
1684         (3) has location and abs_ori -> is a concrete instance
1685         Name, filename and line number are all optional frills.
1686      */
1687      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1688           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1689           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1690
1691         /* Add this variable to the list of interesting looking
1692            variables.  Crucially, note along with it the address
1693            range(s) associated with the variable, which for locals
1694            will be the address ranges at the top of the varparser's
1695            stack. */
1696         GExpr*   fbGX = NULL;
1697         Word     i, nRanges;
1698         XArray*  /* of AddrRange */ xa;
1699         TempVar* tv;
1700         /* Stack can't be empty; we put a dummy entry on it for the
1701            entire address range before starting with the DIEs for
1702            this CU. */
1703         vg_assert(parser->sp >= 0);
1704
1705         /* If this is a local variable (non-external), try to find
1706            the GExpr for the DW_AT_frame_base of the containing
1707            function.  It should have been pushed on the stack at the
1708            time we encountered its DW_TAG_subprogram DIE, so the way
1709            to find it is to scan back down the stack looking for it.
1710            If there isn't an enclosing stack entry marked 'isFunc'
1711            then we must be seeing variable or formal param DIEs
1712            outside of a function, so we deem the Dwarf to be
1713            malformed if that happens.  Note that the fbGX may be NULL
1714            if the containing DT_TAG_subprogram didn't supply a
1715            DW_AT_frame_base -- that's OK, but there must actually be
1716            a containing DW_TAG_subprogram. */
1717         if (!external) {
1718            Bool found = False;
1719            for (i = parser->sp; i >= 0; i--) {
1720               if (parser->isFunc[i]) {
1721                  fbGX = parser->fbGX[i];
1722                  found = True;
1723                  break;
1724               }
1725            }
1726            if (!found) {
1727               if (0 && VG_(clo_verbosity) >= 0) {
1728                  VG_(message)(Vg_DebugMsg,
1729                     "warning: parse_var_DIE: non-external variable "
1730                     "outside DW_TAG_subprogram");
1731               }
1732               /* goto bad_DIE; */
1733               /* This seems to happen a lot.  Just ignore it -- if,
1734                  when we come to evaluation of the location (guarded)
1735                  expression, it requires a frame base value, and
1736                  there's no expression for that, then evaluation as a
1737                  whole will fail.  Harmless - a bit of a waste of
1738                  cycles but nothing more. */
1739            }
1740         }
1741
1742         /* re "external ? 0 : parser->sp" (twice), if the var is
1743            marked 'external' then we must put it at the global scope,
1744            as only the global scope (level 0) covers the entire PC
1745            address space.  It is asserted elsewhere that level 0
1746            always covers the entire address space. */
1747         xa = parser->ranges[external ? 0 : parser->sp];
1748         nRanges = VG_(sizeXA)(xa);
1749         vg_assert(nRanges >= 0);
1750
1751         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1752         tv->name   = name;
1753         tv->level  = external ? 0 : parser->sp;
1754         tv->typeR  = typeR;
1755         tv->gexpr  = gexpr;
1756         tv->fbGX   = fbGX;
1757         tv->fName  = fileName;
1758         tv->fLine  = lineNo;
1759         tv->dioff  = posn;
1760         tv->absOri = abs_ori;
1761
1762         /* See explanation on definition of type TempVar for the
1763            reason for this elaboration. */
1764         tv->nRanges = nRanges;
1765         tv->rngOneMin = 0;
1766         tv->rngOneMax = 0;
1767         tv->rngMany = NULL;
1768         if (nRanges == 1) {
1769            AddrRange* range = VG_(indexXA)(xa, 0);
1770            tv->rngOneMin = range->aMin;
1771            tv->rngOneMax = range->aMax;
1772         }
1773         else if (nRanges > 1) {
1774            /* See if we already have a range list which is
1775               structurally identical.  If so, use that; if not, clone
1776               this one, and add it to our collection. */
1777            UWord keyW, valW;
1778            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
1779               XArray* old = (XArray*)keyW;
1780               tl_assert(valW == 0);
1781               tl_assert(old != xa);
1782               tv->rngMany = old;
1783            } else {
1784               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
1785               tv->rngMany = cloned;
1786               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
1787            }
1788         }
1789
1790         VG_(addToXA)( tempvars, &tv );
1791
1792         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
1793                  VG_(sizeXA)(xa) );
1794         /* collect stats on how effective the ->ranges special
1795            casing is */
1796         if (0) {
1797            static Int ntot=0, ngt=0;
1798            ntot++;
1799            if (tv->rngMany) ngt++;
1800            if (0 == (ntot % 100000))
1801               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1802         }
1803
1804      }
1805
1806      /* Here are some other weird cases seen in the wild:
1807
1808            We have a variable with a name and a type, but no
1809            location.  I guess that's a sign that it has been
1810            optimised away.  Ignore it.  Here's an example:
1811
1812            static Int lc_compar(void* n1, void* n2) {
1813               MC_Chunk* mc1 = *(MC_Chunk**)n1;
1814               MC_Chunk* mc2 = *(MC_Chunk**)n2;
1815               return (mc1->data < mc2->data ? -1 : 1);
1816            }
1817
1818            Both mc1 and mc2 are like this
1819            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1820                DW_AT_name        : mc1
1821                DW_AT_decl_file   : 1
1822                DW_AT_decl_line   : 216
1823                DW_AT_type        : <5d3>
1824
1825            whereas n1 and n2 do have locations specified.
1826
1827            ---------------------------------------------
1828
1829            We see a DW_TAG_formal_parameter with a type, but
1830            no name and no location.  It's probably part of a function type
1831            construction, thusly, hence ignore it:
1832         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1833             DW_AT_sibling     : <2c9>
1834             DW_AT_prototyped  : 1
1835             DW_AT_type        : <114>
1836         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1837             DW_AT_type        : <13e>
1838         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1839             DW_AT_type        : <133>
1840
1841            ---------------------------------------------
1842
1843            Is very minimal, like this:
1844            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1845                DW_AT_abstract_origin: <7ba>
1846            What that signifies I have no idea.  Ignore.
1847
1848            ----------------------------------------------
1849
1850            Is very minimal, like this:
1851            <200f>: DW_TAG_formal_parameter
1852                DW_AT_abstract_ori: <1f4c>
1853                DW_AT_location    : 13440
1854            What that signifies I have no idea.  Ignore.
1855            It might be significant, though: the variable at least
1856            has a location and so might exist somewhere.
1857            Maybe we should handle this.
1858
1859            ---------------------------------------------
1860
1861            <22407>: DW_TAG_variable
1862              DW_AT_name        : (indirect string, offset: 0x6579):
1863                                  vgPlain_trampoline_stuff_start
1864              DW_AT_decl_file   : 29
1865              DW_AT_decl_line   : 56
1866              DW_AT_external    : 1
1867              DW_AT_declaration : 1
1868
1869            Nameless and typeless variable that has a location?  Who
1870            knows.  Not me.
1871            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1872                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1873                                     (DW_OP_addr: 3813c7c0)
1874
1875            No, really.  Check it out.  gcc is quite simply borked.
1876            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1877            // followed by no attributes, and the next DIE is a sibling,
1878            // not a child
1879            */
1880   }
1881   return;
1882
1883  bad_DIE:
1884   set_position_of_Cursor( c_die,  saved_die_c_offset );
1885   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1886   VG_(printf)("\nparse_var_DIE: confused by:\n");
1887   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1888   while (True) {
1889      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1890      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1891      if (attr == 0 && form == 0) break;
1892      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
1893      /* Get the form contents, so as to print them */
1894      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1895                         cc, c_die, True, form );
1896      VG_(printf)("\t\n");
1897   }
1898   VG_(printf)("\n");
1899   cc->barf("parse_var_DIE: confused by the above DIE");
1900   /*NOTREACHED*/
1901}
1902
1903
1904/*------------------------------------------------------------*/
1905/*---                                                      ---*/
1906/*--- Parsing of type-related DIEs                         ---*/
1907/*---                                                      ---*/
1908/*------------------------------------------------------------*/
1909
1910#define N_D3_TYPE_STACK 16
1911
1912typedef
1913   struct {
1914      /* What source language?  'C'=C/C++, 'F'=Fortran, '?'=other
1915         Established once per compilation unit. */
1916      UChar language;
1917      /* A stack of types which are currently under construction */
1918      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
1919                   stack */
1920      /* Note that the TyEnts in qparentE are temporary copies of the
1921         ones accumulating in the main tyent array.  So it is not safe
1922         to free up anything on them when popping them off the stack
1923         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
1924         memset them to zero when done. */
1925      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
1926      Int   qlevel[N_D3_TYPE_STACK];
1927
1928   }
1929   D3TypeParser;
1930
1931static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1932   Word i;
1933   VG_(printf)("  typestack (%s) {\n", str);
1934   for (i = 0; i <= parser->sp; i++) {
1935      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
1936      ML_(pp_TyEnt)( &parser->qparentE[i] );
1937      VG_(printf)("\n");
1938   }
1939   VG_(printf)("  }\n");
1940}
1941
1942/* Remove from the stack, all entries with .level > 'level' */
1943static
1944void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1945{
1946   Bool changed = False;
1947   vg_assert(parser->sp < N_D3_TYPE_STACK);
1948   while (True) {
1949      vg_assert(parser->sp >= -1);
1950      if (parser->sp == -1) break;
1951      if (parser->qlevel[parser->sp] <= level) break;
1952      if (0)
1953         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1954      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
1955      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
1956      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
1957      parser->qparentE[parser->sp].tag = Te_EMPTY;
1958      parser->qlevel[parser->sp] = 0;
1959      parser->sp--;
1960      changed = True;
1961   }
1962   if (changed && td3)
1963      typestack_show( parser, "after preen" );
1964}
1965
1966static Bool typestack_is_empty ( D3TypeParser* parser ) {
1967   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
1968   return parser->sp == -1;
1969}
1970
1971static void typestack_push ( CUConst* cc,
1972                             D3TypeParser* parser,
1973                             Bool td3,
1974                             TyEnt* parentE, Int level ) {
1975   if (0)
1976   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
1977            parser->sp+1, level, parentE->cuOff);
1978
1979   /* First we need to zap everything >= 'level', as we are about to
1980      replace any previous entry at 'level', so .. */
1981   typestack_preen(parser, /*td3*/False, level-1);
1982
1983   vg_assert(parser->sp >= -1);
1984   vg_assert(parser->sp < N_D3_TYPE_STACK);
1985   if (parser->sp == N_D3_TYPE_STACK-1)
1986      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
1987               "increase and recompile");
1988   if (parser->sp >= 0)
1989      vg_assert(parser->qlevel[parser->sp] < level);
1990   parser->sp++;
1991   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
1992   vg_assert(parser->qlevel[parser->sp]  == 0);
1993   vg_assert(parentE);
1994   vg_assert(ML_(TyEnt__is_type)(parentE));
1995   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
1996   parser->qparentE[parser->sp] = *parentE;
1997   parser->qlevel[parser->sp]  = level;
1998   if (td3)
1999      typestack_show( parser, "after push" );
2000}
2001
2002
2003/* Parse a type-related DIE.  'parser' holds the current parser state.
2004   'admin' is where the completed types are dumped.  'dtag' is the tag
2005   for this DIE.  'c_die' points to the start of the data fields (FORM
2006   stuff) for the DIE.  c_abbv points to the start of the (name,form)
2007   pairs which describe the DIE.
2008
2009   We may find the DIE uninteresting, in which case we should ignore
2010   it.
2011
2012   What happens: the DIE is examined.  If uninteresting, it is ignored.
2013   Otherwise, the DIE gives rise to two things:
2014
2015   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2016   (2) a TyAdmin structure, which holds the type, or related stuff
2017
2018   (2) is added at the end of 'tyadmins', at some index, say 'i'.
2019
2020   A pair (cuOffset, i) is added to 'tydict'.
2021
2022   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2023   a mapping from cuOffset to the index of the corresponding entry in
2024   'tyadmin'.
2025
2026   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2027   in the tydict (by binary search).  This gives an index into
2028   tyadmins, and the required entity lives in tyadmins at that index.
2029*/
2030__attribute__((noinline))
2031static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2032                             /*MOD*/D3TypeParser* parser,
2033                             DW_TAG dtag,
2034                             UWord posn,
2035                             Int level,
2036                             Cursor* c_die,
2037                             Cursor* c_abbv,
2038                             CUConst* cc,
2039                             Bool td3 )
2040{
2041   ULong cts;
2042   Int   ctsSzB;
2043   UWord ctsMemSzB;
2044   TyEnt typeE;
2045   TyEnt atomE;
2046   TyEnt fieldE;
2047   TyEnt boundE;
2048
2049   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2050   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2051
2052   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2053   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2054   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2055   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2056
2057   /* If we've returned to a level at or above any previously noted
2058      parent, un-note it, so we don't believe we're still collecting
2059      its children. */
2060   typestack_preen( parser, td3, level-1 );
2061
2062   if (dtag == DW_TAG_compile_unit) {
2063      /* See if we can find DW_AT_language, since it is important for
2064         establishing array bounds (see DW_TAG_subrange_type below in
2065         this fn) */
2066      while (True) {
2067         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2068         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2069         if (attr == 0 && form == 0) break;
2070         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2071                            cc, c_die, False/*td3*/, form );
2072         if (attr != DW_AT_language)
2073            continue;
2074         if (ctsSzB == 0)
2075           goto bad_DIE;
2076         switch (cts) {
2077            case DW_LANG_C89: case DW_LANG_C:
2078            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2079            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2080            case DW_LANG_Upc:
2081               parser->language = 'C'; break;
2082            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2083            case DW_LANG_Fortran95:
2084               parser->language = 'F'; break;
2085            case DW_LANG_Ada83: case DW_LANG_Cobol74:
2086            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2087            case DW_LANG_Modula2: case DW_LANG_Java:
2088            case DW_LANG_C99: case DW_LANG_Ada95:
2089            case DW_LANG_PLI: case DW_LANG_D:
2090            case DW_LANG_Mips_Assembler:
2091               parser->language = '?'; break;
2092            default:
2093               goto bad_DIE;
2094         }
2095      }
2096   }
2097
2098   if (dtag == DW_TAG_base_type) {
2099      /* We can pick up a new base type any time. */
2100      VG_(memset)(&typeE, 0, sizeof(typeE));
2101      typeE.cuOff = D3_INVALID_CUOFF;
2102      typeE.tag   = Te_TyBase;
2103      while (True) {
2104         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2105         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2106         if (attr == 0 && form == 0) break;
2107         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2108                            cc, c_die, False/*td3*/, form );
2109         if (attr == DW_AT_name && ctsMemSzB > 0) {
2110            typeE.Te.TyBase.name
2111               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2112                                    (UChar*)(UWord)cts );
2113         }
2114         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2115            typeE.Te.TyBase.szB = cts;
2116         }
2117         if (attr == DW_AT_encoding && ctsSzB > 0) {
2118            switch (cts) {
2119               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2120               case DW_ATE_boolean:/* FIXME - is this correct? */
2121                  typeE.Te.TyBase.enc = 'U'; break;
2122               case DW_ATE_signed: case DW_ATE_signed_char:
2123                  typeE.Te.TyBase.enc = 'S'; break;
2124               case DW_ATE_float:
2125                  typeE.Te.TyBase.enc = 'F'; break;
2126               case DW_ATE_complex_float:
2127                  typeE.Te.TyBase.enc = 'C'; break;
2128               default:
2129                  goto bad_DIE;
2130            }
2131         }
2132      }
2133
2134      /* Invent a name if it doesn't have one.  gcc-4.3
2135         -ftree-vectorize is observed to emit nameless base types. */
2136      if (!typeE.Te.TyBase.name)
2137         typeE.Te.TyBase.name
2138            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2139                                 "<anon_base_type>" );
2140
2141      /* Do we have something that looks sane? */
2142      if (/* must have a name */
2143          typeE.Te.TyBase.name == NULL
2144          /* and a plausible size.  Yes, really 32: "complex long
2145             double" apparently has size=32 */
2146          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2147          /* and a plausible encoding */
2148          || (typeE.Te.TyBase.enc != 'U'
2149              && typeE.Te.TyBase.enc != 'S'
2150              && typeE.Te.TyBase.enc != 'F'
2151              && typeE.Te.TyBase.enc != 'C'))
2152         goto bad_DIE;
2153      /* Last minute hack: if we see this
2154         <1><515>: DW_TAG_base_type
2155             DW_AT_byte_size   : 0
2156             DW_AT_encoding    : 5
2157             DW_AT_name        : void
2158         convert it into a real Void type. */
2159      if (typeE.Te.TyBase.szB == 0
2160          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2161         ML_(TyEnt__make_EMPTY)(&typeE);
2162         typeE.tag = Te_TyVoid;
2163         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2164      }
2165
2166      goto acquire_Type;
2167   }
2168
2169   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2170       || dtag == DW_TAG_ptr_to_member_type) {
2171      /* This seems legit for _pointer_type and _reference_type.  I
2172         don't know if rolling _ptr_to_member_type in here really is
2173         legit, but it's better than not handling it at all. */
2174      VG_(memset)(&typeE, 0, sizeof(typeE));
2175      typeE.cuOff = D3_INVALID_CUOFF;
2176      typeE.tag   = Te_TyPorR;
2177      /* target type defaults to void */
2178      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2179      typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
2180                              || dtag == DW_TAG_ptr_to_member_type;
2181      /* Pointer types don't *have* to specify their size, in which
2182         case we assume it's a machine word.  But if they do specify
2183         it, it must be a machine word :-) This probably assumes that
2184         the word size of the Dwarf3 we're reading is the same size as
2185         that on the machine.  gcc appears to give a size whereas icc9
2186         doesn't. */
2187      if (typeE.Te.TyPorR.isPtr)
2188         typeE.Te.TyPorR.szB = sizeof(Word);
2189      while (True) {
2190         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2191         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2192         if (attr == 0 && form == 0) break;
2193         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2194                            cc, c_die, False/*td3*/, form );
2195         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2196            typeE.Te.TyPorR.szB = cts;
2197         }
2198         if (attr == DW_AT_type && ctsSzB > 0) {
2199            typeE.Te.TyPorR.typeR = (UWord)cts;
2200         }
2201      }
2202      /* Do we have something that looks sane? */
2203      if (typeE.Te.TyPorR.szB != sizeof(Word))
2204         goto bad_DIE;
2205      else
2206         goto acquire_Type;
2207   }
2208
2209   if (dtag == DW_TAG_enumeration_type) {
2210      /* Create a new Type to hold the results. */
2211      VG_(memset)(&typeE, 0, sizeof(typeE));
2212      typeE.cuOff = posn;
2213      typeE.tag   = Te_TyEnum;
2214      typeE.Te.TyEnum.atomRs
2215         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2216                       ML_(dinfo_free),
2217                       sizeof(UWord) );
2218      while (True) {
2219         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2220         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2221         if (attr == 0 && form == 0) break;
2222         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2223                            cc, c_die, False/*td3*/, form );
2224         if (attr == DW_AT_name && ctsMemSzB > 0) {
2225            typeE.Te.TyEnum.name
2226              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2227                                   (UChar*)(UWord)cts );
2228         }
2229         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2230            typeE.Te.TyEnum.szB = cts;
2231         }
2232      }
2233      /* Do we have something that looks sane? */
2234      if (typeE.Te.TyEnum.szB == 0 /* we must know the size */
2235         /* But the name can be present, or not */)
2236         goto bad_DIE;
2237      /* On't stack! */
2238      typestack_push( cc, parser, td3, &typeE, level );
2239      goto acquire_Type;
2240   }
2241
2242   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2243      DW_TAG_enumerator with only a DW_AT_name but no
2244      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2245      and appears to be a new "feature" of gcc - versions 4.3.x and
2246      earlier do not appear to do this.  So accept DW_TAG_enumerator
2247      which only have a name but no value.  An example:
2248
2249      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2250         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2251                                     QtMsgType
2252         <185>   DW_AT_byte_size   : 4
2253         <186>   DW_AT_decl_file   : 14
2254         <187>   DW_AT_decl_line   : 1480
2255         <189>   DW_AT_sibling     : <0x1a7>
2256      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2257         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2258                                     QtDebugMsg
2259      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2260         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2261                                     QtWarningMsg
2262      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2263         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2264                                     QtCriticalMsg
2265      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2266         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2267                                     QtFatalMsg
2268      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2269         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2270                                     QtSystemMsg
2271   */
2272   if (dtag == DW_TAG_enumerator) {
2273      VG_(memset)( &atomE, 0, sizeof(atomE) );
2274      atomE.cuOff = posn;
2275      atomE.tag   = Te_Atom;
2276      while (True) {
2277         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2278         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2279         if (attr == 0 && form == 0) break;
2280         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2281                            cc, c_die, False/*td3*/, form );
2282         if (attr == DW_AT_name && ctsMemSzB > 0) {
2283            atomE.Te.Atom.name
2284              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2285                                   (UChar*)(UWord)cts );
2286         }
2287         if (attr == DW_AT_const_value && ctsSzB > 0) {
2288            atomE.Te.Atom.value = cts;
2289            atomE.Te.Atom.valueKnown = True;
2290         }
2291      }
2292      /* Do we have something that looks sane? */
2293      if (atomE.Te.Atom.name == NULL)
2294         goto bad_DIE;
2295      /* Do we have a plausible parent? */
2296      if (typestack_is_empty(parser)) goto bad_DIE;
2297      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2298      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2299      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2300      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2301      /* Record this child in the parent */
2302      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2303      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2304                    &atomE );
2305      /* And record the child itself */
2306      goto acquire_Atom;
2307   }
2308
2309   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2310      don't know if this is correct, but it at least makes this reader
2311      usable for gcc-4.3 produced Dwarf3. */
2312   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2313       || dtag == DW_TAG_union_type) {
2314      Bool have_szB = False;
2315      Bool is_decl  = False;
2316      Bool is_spec  = False;
2317      /* Create a new Type to hold the results. */
2318      VG_(memset)(&typeE, 0, sizeof(typeE));
2319      typeE.cuOff = posn;
2320      typeE.tag   = Te_TyStOrUn;
2321      typeE.Te.TyStOrUn.name = NULL;
2322      typeE.Te.TyStOrUn.fieldRs
2323         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2324                       ML_(dinfo_free),
2325                       sizeof(UWord) );
2326      typeE.Te.TyStOrUn.complete = True;
2327      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2328                                   || dtag == DW_TAG_class_type;
2329      while (True) {
2330         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2331         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2332         if (attr == 0 && form == 0) break;
2333         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2334                            cc, c_die, False/*td3*/, form );
2335         if (attr == DW_AT_name && ctsMemSzB > 0) {
2336            typeE.Te.TyStOrUn.name
2337               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2338                                    (UChar*)(UWord)cts );
2339         }
2340         if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2341            typeE.Te.TyStOrUn.szB = cts;
2342            have_szB = True;
2343         }
2344         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2345            is_decl = True;
2346         }
2347         if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2348            is_spec = True;
2349         }
2350      }
2351      /* Do we have something that looks sane? */
2352      if (is_decl && (!is_spec)) {
2353         /* It's a DW_AT_declaration.  We require the name but
2354            nothing else. */
2355         if (typeE.Te.TyStOrUn.name == NULL)
2356            goto bad_DIE;
2357         typeE.Te.TyStOrUn.complete = False;
2358         goto acquire_Type;
2359      }
2360      if ((!is_decl) /* && (!is_spec) */) {
2361         /* this is the common, ordinary case */
2362         if ((!have_szB) /* we must know the size */
2363             /* But the name can be present, or not */)
2364            goto bad_DIE;
2365         /* On't stack! */
2366         typestack_push( cc, parser, td3, &typeE, level );
2367         goto acquire_Type;
2368      }
2369      else {
2370         /* don't know how to handle any other variants just now */
2371         goto bad_DIE;
2372      }
2373   }
2374
2375   if (dtag == DW_TAG_member) {
2376      /* Acquire member entries for both DW_TAG_structure_type and
2377         DW_TAG_union_type.  They differ minorly, in that struct
2378         members must have a DW_AT_data_member_location expression
2379         whereas union members must not. */
2380      Bool parent_is_struct;
2381      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2382      fieldE.cuOff = posn;
2383      fieldE.tag   = Te_Field;
2384      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2385      while (True) {
2386         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2387         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2388         if (attr == 0 && form == 0) break;
2389         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2390                            cc, c_die, False/*td3*/, form );
2391         if (attr == DW_AT_name && ctsMemSzB > 0) {
2392            fieldE.Te.Field.name
2393               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2394                                    (UChar*)(UWord)cts );
2395         }
2396         if (attr == DW_AT_type && ctsSzB > 0) {
2397            fieldE.Te.Field.typeR = (UWord)cts;
2398         }
2399         if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2400            fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2401            fieldE.Te.Field.loc
2402               = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2403                                    (UChar*)(UWord)cts,
2404                                    (SizeT)fieldE.Te.Field.nLoc );
2405         }
2406      }
2407      /* Do we have a plausible parent? */
2408      if (typestack_is_empty(parser)) goto bad_DIE;
2409      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2410      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2411      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2412      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2413      /* Do we have something that looks sane?  If this a member of a
2414         struct, we must have a location expression; but if a member
2415         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2416         to reject in the latter case, but some compilers have been
2417         observed to emit constant-zero expressions.  So just ignore
2418         them. */
2419      parent_is_struct
2420         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2421      if (!fieldE.Te.Field.name)
2422         fieldE.Te.Field.name
2423            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2424                                 "<anon_field>" );
2425      vg_assert(fieldE.Te.Field.name);
2426      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2427         goto bad_DIE;
2428      if (parent_is_struct && (!fieldE.Te.Field.loc))
2429         goto bad_DIE;
2430      if ((!parent_is_struct) && fieldE.Te.Field.loc) {
2431         /* If this is a union type, pretend we haven't seen the data
2432            member location expression, as it is by definition
2433            redundant (it must be zero). */
2434         ML_(dinfo_free)(fieldE.Te.Field.loc);
2435         fieldE.Te.Field.loc  = NULL;
2436         fieldE.Te.Field.nLoc = 0;
2437      }
2438      /* Record this child in the parent */
2439      fieldE.Te.Field.isStruct = parent_is_struct;
2440      vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2441      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2442                    &posn );
2443      /* And record the child itself */
2444      goto acquire_Field;
2445   }
2446
2447   if (dtag == DW_TAG_array_type) {
2448      VG_(memset)(&typeE, 0, sizeof(typeE));
2449      typeE.cuOff = posn;
2450      typeE.tag   = Te_TyArray;
2451      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2452      typeE.Te.TyArray.boundRs
2453         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2454                       ML_(dinfo_free),
2455                       sizeof(UWord) );
2456      while (True) {
2457         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2458         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2459         if (attr == 0 && form == 0) break;
2460         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2461                            cc, c_die, False/*td3*/, form );
2462         if (attr == DW_AT_type && ctsSzB > 0) {
2463            typeE.Te.TyArray.typeR = (UWord)cts;
2464         }
2465      }
2466      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2467         goto bad_DIE;
2468      /* On't stack! */
2469      typestack_push( cc, parser, td3, &typeE, level );
2470      goto acquire_Type;
2471   }
2472
2473   if (dtag == DW_TAG_subrange_type) {
2474      Bool have_lower = False;
2475      Bool have_upper = False;
2476      Bool have_count = False;
2477      Long lower = 0;
2478      Long upper = 0;
2479      Long count = 0;
2480
2481      switch (parser->language) {
2482         case 'C': have_lower = True;  lower = 0; break;
2483         case 'F': have_lower = True;  lower = 1; break;
2484         case '?': have_lower = False; break;
2485         default:  vg_assert(0); /* assured us by handling of
2486                                    DW_TAG_compile_unit in this fn */
2487      }
2488
2489      VG_(memset)( &boundE, 0, sizeof(boundE) );
2490      boundE.cuOff = D3_INVALID_CUOFF;
2491      boundE.tag   = Te_Bound;
2492      while (True) {
2493         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2494         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2495         if (attr == 0 && form == 0) break;
2496         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2497                            cc, c_die, False/*td3*/, form );
2498         if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2499            lower      = (Long)cts;
2500            have_lower = True;
2501         }
2502         if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2503            upper      = (Long)cts;
2504            have_upper = True;
2505         }
2506         if (attr == DW_AT_count && ctsSzB > 0) {
2507            count      = cts;
2508            have_count = True;
2509         }
2510      }
2511      /* FIXME: potentially skip the rest if no parent present, since
2512         it could be the case that this subrange type is free-standing
2513         (not being used to describe the bounds of a containing array
2514         type) */
2515      /* Do we have a plausible parent? */
2516      if (typestack_is_empty(parser)) goto bad_DIE;
2517      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2518      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2519      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2520      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2521
2522      /* Figure out if we have a definite range or not */
2523      if (have_lower && have_upper && (!have_count)) {
2524         boundE.Te.Bound.knownL = True;
2525         boundE.Te.Bound.knownU = True;
2526         boundE.Te.Bound.boundL = lower;
2527         boundE.Te.Bound.boundU = upper;
2528      }
2529      else if (have_lower && (!have_upper) && (!have_count)) {
2530         boundE.Te.Bound.knownL = True;
2531         boundE.Te.Bound.knownU = False;
2532         boundE.Te.Bound.boundL = lower;
2533         boundE.Te.Bound.boundU = 0;
2534      } else {
2535         /* FIXME: handle more cases */
2536         goto bad_DIE;
2537      }
2538
2539      /* Record this bound in the parent */
2540      boundE.cuOff = posn;
2541      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2542      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2543                    &boundE );
2544      /* And record the child itself */
2545      goto acquire_Bound;
2546   }
2547
2548   if (dtag == DW_TAG_typedef) {
2549      /* We can pick up a new typedef any time. */
2550      VG_(memset)(&typeE, 0, sizeof(typeE));
2551      typeE.cuOff = D3_INVALID_CUOFF;
2552      typeE.tag   = Te_TyTyDef;
2553      typeE.Te.TyTyDef.name = NULL;
2554      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2555      while (True) {
2556         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2557         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2558         if (attr == 0 && form == 0) break;
2559         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2560                            cc, c_die, False/*td3*/, form );
2561         if (attr == DW_AT_name && ctsMemSzB > 0) {
2562            typeE.Te.TyTyDef.name
2563               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2564                                    (UChar*)(UWord)cts );
2565         }
2566         if (attr == DW_AT_type && ctsSzB > 0) {
2567            typeE.Te.TyTyDef.typeR = (UWord)cts;
2568         }
2569      }
2570      /* Do we have something that looks sane? */
2571      if (/* must have a name */
2572          typeE.Te.TyTyDef.name == NULL
2573          /* but the referred-to type can be absent */)
2574         goto bad_DIE;
2575      else
2576         goto acquire_Type;
2577   }
2578
2579   if (dtag == DW_TAG_subroutine_type) {
2580      /* function type? just record that one fact and ask no
2581         further questions. */
2582      VG_(memset)(&typeE, 0, sizeof(typeE));
2583      typeE.cuOff = D3_INVALID_CUOFF;
2584      typeE.tag   = Te_TyFn;
2585      goto acquire_Type;
2586   }
2587
2588   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2589      Int have_ty = 0;
2590      VG_(memset)(&typeE, 0, sizeof(typeE));
2591      typeE.cuOff = D3_INVALID_CUOFF;
2592      typeE.tag   = Te_TyQual;
2593      typeE.Te.TyQual.qual
2594         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2595      /* target type defaults to 'void' */
2596      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2597      while (True) {
2598         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2599         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2600         if (attr == 0 && form == 0) break;
2601         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2602                            cc, c_die, False/*td3*/, form );
2603         if (attr == DW_AT_type && ctsSzB > 0) {
2604            typeE.Te.TyQual.typeR = (UWord)cts;
2605            have_ty++;
2606         }
2607      }
2608      /* gcc sometimes generates DW_TAG_const/volatile_type without
2609         DW_AT_type and GDB appears to interpret the type as 'const
2610         void' (resp. 'volatile void').  So just allow it .. */
2611      if (have_ty == 1 || have_ty == 0)
2612         goto acquire_Type;
2613      else
2614         goto bad_DIE;
2615   }
2616
2617   /* else ignore this DIE */
2618   return;
2619   /*NOTREACHED*/
2620
2621  acquire_Type:
2622   if (0) VG_(printf)("YYYY Acquire Type\n");
2623   vg_assert(ML_(TyEnt__is_type)( &typeE ));
2624   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
2625   typeE.cuOff = posn;
2626   VG_(addToXA)( tyents, &typeE );
2627   return;
2628   /*NOTREACHED*/
2629
2630  acquire_Atom:
2631   if (0) VG_(printf)("YYYY Acquire Atom\n");
2632   vg_assert(atomE.tag == Te_Atom);
2633   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
2634   atomE.cuOff = posn;
2635   VG_(addToXA)( tyents, &atomE );
2636   return;
2637   /*NOTREACHED*/
2638
2639  acquire_Field:
2640   /* For union members, Expr should be absent */
2641   if (0) VG_(printf)("YYYY Acquire Field\n");
2642   vg_assert(fieldE.tag == Te_Field);
2643   vg_assert( (fieldE.Te.Field.nLoc > 0 && fieldE.Te.Field.loc != NULL)
2644              || (fieldE.Te.Field.nLoc == 0 && fieldE.Te.Field.loc == NULL) );
2645   if (fieldE.Te.Field.isStruct) {
2646      vg_assert(fieldE.Te.Field.nLoc > 0);
2647   } else {
2648      vg_assert(fieldE.Te.Field.nLoc == 0);
2649   }
2650   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
2651   fieldE.cuOff = posn;
2652   VG_(addToXA)( tyents, &fieldE );
2653   return;
2654   /*NOTREACHED*/
2655
2656  acquire_Bound:
2657   if (0) VG_(printf)("YYYY Acquire Bound\n");
2658   vg_assert(boundE.tag == Te_Bound);
2659   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
2660   boundE.cuOff = posn;
2661   VG_(addToXA)( tyents, &boundE );
2662   return;
2663   /*NOTREACHED*/
2664
2665  bad_DIE:
2666   set_position_of_Cursor( c_die,  saved_die_c_offset );
2667   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2668   VG_(printf)("\nparse_type_DIE: confused by:\n");
2669   VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2670   while (True) {
2671      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2672      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2673      if (attr == 0 && form == 0) break;
2674      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2675      /* Get the form contents, so as to print them */
2676      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2677                         cc, c_die, True, form );
2678      VG_(printf)("\t\n");
2679   }
2680   VG_(printf)("\n");
2681   cc->barf("parse_type_DIE: confused by the above DIE");
2682   /*NOTREACHED*/
2683}
2684
2685
2686/*------------------------------------------------------------*/
2687/*---                                                      ---*/
2688/*--- Compression of type DIE information                  ---*/
2689/*---                                                      ---*/
2690/*------------------------------------------------------------*/
2691
2692static UWord chase_cuOff ( Bool* changed,
2693                           XArray* /* of TyEnt */ ents,
2694                           TyEntIndexCache* ents_cache,
2695                           UWord cuOff )
2696{
2697   TyEnt* ent;
2698   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
2699
2700   if (!ent) {
2701      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
2702      *changed = False;
2703      return cuOff;
2704   }
2705
2706   vg_assert(ent->tag != Te_EMPTY);
2707   if (ent->tag != Te_INDIR) {
2708      *changed = False;
2709      return cuOff;
2710   } else {
2711      vg_assert(ent->Te.INDIR.indR < cuOff);
2712      *changed = True;
2713      return ent->Te.INDIR.indR;
2714   }
2715}
2716
2717static
2718void chase_cuOffs_in_XArray ( Bool* changed,
2719                              XArray* /* of TyEnt */ ents,
2720                              TyEntIndexCache* ents_cache,
2721                              /*MOD*/XArray* /* of UWord */ cuOffs )
2722{
2723   Bool b2 = False;
2724   Word i, n = VG_(sizeXA)( cuOffs );
2725   for (i = 0; i < n; i++) {
2726      Bool   b = False;
2727      UWord* p = VG_(indexXA)( cuOffs, i );
2728      *p = chase_cuOff( &b, ents, ents_cache, *p );
2729      if (b)
2730         b2 = True;
2731   }
2732   *changed = b2;
2733}
2734
2735static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
2736                                    TyEntIndexCache* ents_cache,
2737                                    /*MOD*/TyEnt* te )
2738{
2739   Bool b, changed = False;
2740   switch (te->tag) {
2741      case Te_EMPTY:
2742         break;
2743      case Te_INDIR:
2744         te->Te.INDIR.indR
2745            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
2746         if (b) changed = True;
2747         break;
2748      case Te_UNKNOWN:
2749         break;
2750      case Te_Atom:
2751         break;
2752      case Te_Field:
2753         te->Te.Field.typeR
2754            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
2755         if (b) changed = True;
2756         break;
2757      case Te_Bound:
2758         break;
2759      case Te_TyBase:
2760         break;
2761      case Te_TyPorR:
2762         te->Te.TyPorR.typeR
2763            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
2764         if (b) changed = True;
2765         break;
2766      case Te_TyTyDef:
2767         te->Te.TyTyDef.typeR
2768            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
2769         if (b) changed = True;
2770         break;
2771      case Te_TyStOrUn:
2772         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
2773         if (b) changed = True;
2774         break;
2775      case Te_TyEnum:
2776         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
2777         if (b) changed = True;
2778         break;
2779      case Te_TyArray:
2780         te->Te.TyArray.typeR
2781            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
2782         if (b) changed = True;
2783         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
2784         if (b) changed = True;
2785         break;
2786      case Te_TyFn:
2787         break;
2788      case Te_TyQual:
2789         te->Te.TyQual.typeR
2790            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
2791         if (b) changed = True;
2792         break;
2793      case Te_TyVoid:
2794         break;
2795      default:
2796         ML_(pp_TyEnt)(te);
2797         vg_assert(0);
2798   }
2799   return changed;
2800}
2801
2802/* Make a pass over 'ents'.  For each tyent, inspect the target of any
2803   'R' or 'Rs' fields (those which refer to other tyents), and replace
2804   any which point to INDIR nodes with the target of the indirection
2805   (which should not itself be an indirection).  In summary, this
2806   routine shorts out all references to indirection nodes. */
2807static
2808Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
2809                                     TyEntIndexCache* ents_cache )
2810{
2811   Word i, n, nChanged = 0;
2812   Bool b;
2813   n = VG_(sizeXA)( ents );
2814   for (i = 0; i < n; i++) {
2815      TyEnt* ent = VG_(indexXA)( ents, i );
2816      vg_assert(ent->tag != Te_EMPTY);
2817      /* We have to substitute everything, even indirections, so as to
2818         ensure that chains of indirections don't build up. */
2819      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
2820      if (b)
2821         nChanged++;
2822   }
2823
2824   return nChanged;
2825}
2826
2827
2828/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
2829   Look up each new tyent in the dictionary in turn.  If it is already
2830   in the dictionary, replace this tyent with an indirection to the
2831   existing one, and delete any malloc'd stuff hanging off this one.
2832   In summary, this routine commons up all tyents that are identical
2833   as defined by TyEnt__cmp_by_all_except_cuOff. */
2834static
2835Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
2836{
2837   Word    n, i, nDeleted;
2838   WordFM* dict; /* TyEnt* -> void */
2839   TyEnt*  ent;
2840   UWord   keyW, valW;
2841
2842   dict = VG_(newFM)(
2843             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
2844             ML_(dinfo_free),
2845             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
2846          );
2847
2848   nDeleted = 0;
2849   n = VG_(sizeXA)( ents );
2850   for (i = 0; i < n; i++) {
2851      ent = VG_(indexXA)( ents, i );
2852      vg_assert(ent->tag != Te_EMPTY);
2853
2854      /* Ignore indirections, although check that they are
2855         not forming a cycle. */
2856      if (ent->tag == Te_INDIR) {
2857         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
2858         continue;
2859      }
2860
2861      keyW = valW = 0;
2862      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
2863         /* it's already in the dictionary. */
2864         TyEnt* old = (TyEnt*)keyW;
2865         vg_assert(valW == 0);
2866         vg_assert(old != ent);
2867         vg_assert(old->tag != Te_INDIR);
2868         /* since we are traversing the array in increasing order of
2869            cuOff: */
2870         vg_assert(old->cuOff < ent->cuOff);
2871         /* So anyway, dump this entry and replace it with an
2872            indirection to the one in the dictionary.  Note that the
2873            assertion above guarantees that we cannot create cycles of
2874            indirections, since we are always creating an indirection
2875            to a tyent with a cuOff lower than this one. */
2876         ML_(TyEnt__make_EMPTY)( ent );
2877         ent->tag = Te_INDIR;
2878         ent->Te.INDIR.indR = old->cuOff;
2879         nDeleted++;
2880      } else {
2881         /* not in dictionary; add it and keep going. */
2882         VG_(addToFM)( dict, (UWord)ent, 0 );
2883      }
2884   }
2885
2886   VG_(deleteFM)( dict, NULL, NULL );
2887
2888   return nDeleted;
2889}
2890
2891
2892static
2893void dedup_types ( Bool td3,
2894                   /*MOD*/XArray* /* of TyEnt */ ents,
2895                   TyEntIndexCache* ents_cache )
2896{
2897   Word m, n, i, nDel, nSubst, nThresh;
2898   if (0) td3 = True;
2899
2900   n = VG_(sizeXA)( ents );
2901
2902   /* If a commoning pass and a substitution pass both make fewer than
2903      this many changes, just stop.  It's pointless to burn up CPU
2904      time trying to compress the last 1% or so out of the array. */
2905   nThresh = n / 200;
2906
2907   /* First we must sort .ents by its .cuOff fields, so we
2908      can index into it. */
2909   VG_(setCmpFnXA)(
2910      ents,
2911      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
2912   );
2913   VG_(sortXA)( ents );
2914
2915   /* Now repeatedly do commoning and substitution passes over
2916      the array, until there are no more changes. */
2917   do {
2918      nDel   = dedup_types_commoning_pass ( ents );
2919      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
2920      vg_assert(nDel >= 0 && nSubst >= 0);
2921      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
2922   } while (nDel > nThresh || nSubst > nThresh);
2923
2924   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
2925      In fact this should be true at the end of every loop iteration
2926      above (a commoning pass followed by a substitution pass), but
2927      checking it on every iteration is excessively expensive.  Note,
2928      this loop also computes 'm' for the stats printing below it. */
2929   m = 0;
2930   n = VG_(sizeXA)( ents );
2931   for (i = 0; i < n; i++) {
2932      TyEnt *ent, *ind;
2933      ent = VG_(indexXA)( ents, i );
2934      if (ent->tag != Te_INDIR) continue;
2935      m++;
2936      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
2937                                         ent->Te.INDIR.indR );
2938      vg_assert(ind);
2939      vg_assert(ind->tag != Te_INDIR);
2940   }
2941
2942   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
2943}
2944
2945
2946/*------------------------------------------------------------*/
2947/*---                                                      ---*/
2948/*--- Resolution of references to type DIEs                ---*/
2949/*---                                                      ---*/
2950/*------------------------------------------------------------*/
2951
2952/* Make a pass through the (temporary) variables array.  Examine the
2953   type of each variable, check is it found, and chase any Te_INDIRs.
2954   Postcondition is: each variable has a typeR field that refers to a
2955   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
2956   not to refer to a Te_INDIR.  (This is so that we can throw all the
2957   Te_INDIRs away later). */
2958
2959__attribute__((noinline))
2960static void resolve_variable_types (
2961               void (*barf)( HChar* ) __attribute__((noreturn)),
2962               /*R-O*/XArray* /* of TyEnt */ ents,
2963               /*MOD*/TyEntIndexCache* ents_cache,
2964               /*MOD*/XArray* /* of TempVar* */ vars
2965            )
2966{
2967   Word i, n;
2968   n = VG_(sizeXA)( vars );
2969   for (i = 0; i < n; i++) {
2970      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
2971      /* This is the stated type of the variable.  But it might be
2972         an indirection, so be careful. */
2973      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
2974                                                var->typeR );
2975      if (ent && ent->tag == Te_INDIR) {
2976         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
2977                                            ent->Te.INDIR.indR );
2978         vg_assert(ent);
2979         vg_assert(ent->tag != Te_INDIR);
2980      }
2981
2982      /* Deal first with "normal" cases */
2983      if (ent && ML_(TyEnt__is_type)(ent)) {
2984         var->typeR = ent->cuOff;
2985         continue;
2986      }
2987
2988      /* If there's no ent, it probably we did not manage to read a
2989         type at the cuOffset which is stated as being this variable's
2990         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
2991      if (ent == NULL) {
2992         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
2993         barf("resolve_variable_types: "
2994              "cuOff does not refer to a known type");
2995      }
2996      vg_assert(ent);
2997      /* If ent has any other tag, something bad happened, along the
2998         lines of var->typeR not referring to a type at all. */
2999      vg_assert(ent->tag == Te_UNKNOWN);
3000      /* Just accept it; the type will be useless, but at least keep
3001         going. */
3002      var->typeR = ent->cuOff;
3003   }
3004}
3005
3006
3007/*------------------------------------------------------------*/
3008/*---                                                      ---*/
3009/*--- Parsing of Compilation Units                         ---*/
3010/*---                                                      ---*/
3011/*------------------------------------------------------------*/
3012
3013static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3014   TempVar* t1 = *(TempVar**)v1;
3015   TempVar* t2 = *(TempVar**)v2;
3016   if (t1->dioff < t2->dioff) return -1;
3017   if (t1->dioff > t2->dioff) return 1;
3018   return 0;
3019}
3020
3021static void read_DIE (
3022   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3023   /*MOD*/XArray* /* of TyEnt */ tyents,
3024   /*MOD*/XArray* /* of TempVar* */ tempvars,
3025   /*MOD*/XArray* /* of GExpr* */ gexprs,
3026   /*MOD*/D3TypeParser* typarser,
3027   /*MOD*/D3VarParser* varparser,
3028   Cursor* c, Bool td3, CUConst* cc, Int level
3029)
3030{
3031   Cursor abbv;
3032   ULong  atag, abbv_code;
3033   UWord  posn;
3034   UInt   has_children;
3035   UWord  start_die_c_offset, start_abbv_c_offset;
3036   UWord  after_die_c_offset, after_abbv_c_offset;
3037
3038   /* --- Deal with this DIE --- */
3039   posn      = get_position_of_Cursor( c );
3040   abbv_code = get_ULEB128( c );
3041   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3042   atag      = get_ULEB128( &abbv );
3043   TRACE_D3("\n");
3044   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3045            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3046
3047   if (atag == 0)
3048      cc->barf("read_DIE: invalid zero tag on DIE");
3049
3050   has_children = get_UChar( &abbv );
3051   if (has_children != DW_children_no && has_children != DW_children_yes)
3052      cc->barf("read_DIE: invalid has_children value");
3053
3054   /* We're set up to look at the fields of this DIE.  Hand it off to
3055      any parser(s) that want to see it.  Since they will in general
3056      advance both the DIE and abbrev cursors, remember their current
3057      settings so that we can then back up and do one final pass over
3058      the DIE, to print out its contents. */
3059
3060   start_die_c_offset  = get_position_of_Cursor( c );
3061   start_abbv_c_offset = get_position_of_Cursor( &abbv );
3062
3063   while (True) {
3064      ULong cts;
3065      Int   ctsSzB;
3066      UWord ctsMemSzB;
3067      ULong at_name = get_ULEB128( &abbv );
3068      ULong at_form = get_ULEB128( &abbv );
3069      if (at_name == 0 && at_form == 0) break;
3070      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3071      /* Get the form contents, but ignore them; the only purpose is
3072         to print them, if td3 is True */
3073      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3074                         cc, c, td3, (DW_FORM)at_form );
3075      TRACE_D3("\t");
3076      TRACE_D3("\n");
3077   }
3078
3079   after_die_c_offset  = get_position_of_Cursor( c );
3080   after_abbv_c_offset = get_position_of_Cursor( &abbv );
3081
3082   set_position_of_Cursor( c,     start_die_c_offset );
3083   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3084
3085   parse_type_DIE( tyents,
3086                   typarser,
3087                   (DW_TAG)atag,
3088                   posn,
3089                   level,
3090                   c,     /* DIE cursor */
3091                   &abbv, /* abbrev cursor */
3092                   cc,
3093                   td3 );
3094
3095   set_position_of_Cursor( c,     start_die_c_offset );
3096   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3097
3098   parse_var_DIE( rangestree,
3099                  tempvars,
3100                  gexprs,
3101                  varparser,
3102                  (DW_TAG)atag,
3103                  posn,
3104                  level,
3105                  c,     /* DIE cursor */
3106                  &abbv, /* abbrev cursor */
3107                  cc,
3108                  td3 );
3109
3110   set_position_of_Cursor( c,     after_die_c_offset );
3111   set_position_of_Cursor( &abbv, after_abbv_c_offset );
3112
3113   /* --- Now recurse into its children, if any --- */
3114   if (has_children == DW_children_yes) {
3115      if (0) TRACE_D3("BEGIN children of level %d\n", level);
3116      while (True) {
3117         atag = peek_ULEB128( c );
3118         if (atag == 0) break;
3119         read_DIE( rangestree, tyents, tempvars, gexprs,
3120                   typarser, varparser,
3121                   c, td3, cc, level+1 );
3122      }
3123      /* Now we need to eat the terminating zero */
3124      atag = get_ULEB128( c );
3125      vg_assert(atag == 0);
3126      if (0) TRACE_D3("END children of level %d\n", level);
3127   }
3128
3129}
3130
3131
3132static
3133void new_dwarf3_reader_wrk (
3134   struct _DebugInfo* di,
3135   __attribute__((noreturn)) void (*barf)( HChar* ),
3136   UChar* debug_info_img,   SizeT debug_info_sz,
3137   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3138   UChar* debug_line_img,   SizeT debug_line_sz,
3139   UChar* debug_str_img,    SizeT debug_str_sz,
3140   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3141   UChar* debug_loc_img,    SizeT debug_loc_sz
3142)
3143{
3144   XArray* /* of TyEnt */     tyents;
3145   XArray* /* of TyEnt */     tyents_to_keep;
3146   XArray* /* of GExpr* */    gexprs;
3147   XArray* /* of TempVar* */  tempvars;
3148   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3149   TyEntIndexCache* tyents_cache = NULL;
3150   TyEntIndexCache* tyents_to_keep_cache = NULL;
3151   TempVar *varp, *varp2;
3152   GExpr* gexpr;
3153   Cursor abbv; /* for showing .debug_abbrev */
3154   Cursor info; /* primary cursor for parsing .debug_info */
3155   Cursor ranges; /* for showing .debug_ranges */
3156   D3TypeParser typarser;
3157   D3VarParser varparser;
3158   Addr  dr_base;
3159   UWord dr_offset;
3160   Word  i, j, n;
3161   Bool td3 = di->trace_symtab;
3162   XArray* /* of TempVar* */ dioff_lookup_tab;
3163   Bool text_biasing_borked;
3164   KludgeyTextBiaser ktb;
3165#if 0
3166   /* This doesn't work properly because it assumes all entries are
3167      packed end to end, with no holes.  But that doesn't always
3168      appear to be the case, so it loses sync.  And the D3 spec
3169      doesn't appear to require a no-hole situation either. */
3170   /* Display .debug_loc */
3171   Addr  dl_base;
3172   UWord dl_offset;
3173   Cursor loc; /* for showing .debug_loc */
3174   TRACE_SYMTAB("\n");
3175   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3176   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3177   init_Cursor( &loc, debug_loc_img,
3178                debug_loc_sz, 0, barf,
3179                "Overrun whilst reading .debug_loc section(1)" );
3180   dl_base = 0;
3181   dl_offset = 0;
3182   while (True) {
3183      UWord  w1, w2;
3184      UWord  len;
3185      if (is_at_end_Cursor( &loc ))
3186         break;
3187
3188      /* Read a (host-)word pair.  This is something of a hack since
3189         the word size to read is really dictated by the ELF file;
3190         however, we assume we're reading a file with the same
3191         word-sizeness as the host.  Reasonably enough. */
3192      w1 = get_UWord( &loc );
3193      w2 = get_UWord( &loc );
3194
3195      if (w1 == 0 && w2 == 0) {
3196         /* end of list.  reset 'base' */
3197         TRACE_D3("    %08lx <End of list>\n", dl_offset);
3198         dl_base = 0;
3199         dl_offset = get_position_of_Cursor( &loc );
3200         continue;
3201      }
3202
3203      if (w1 == -1UL) {
3204         /* new value for 'base' */
3205         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3206                  dl_offset, w1, w2);
3207         dl_base = w2;
3208         continue;
3209      }
3210
3211      /* else a location expression follows */
3212      TRACE_D3("    %08lx %08lx %08lx ",
3213               dl_offset, w1 + dl_base, w2 + dl_base);
3214      len = (UWord)get_UShort( &loc );
3215      while (len > 0) {
3216         UChar byte = get_UChar( &loc );
3217         TRACE_D3("%02x", (UInt)byte);
3218         len--;
3219      }
3220      TRACE_SYMTAB("\n");
3221   }
3222#endif
3223
3224   /* Display .debug_ranges */
3225   TRACE_SYMTAB("\n");
3226   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3227   TRACE_SYMTAB("    Offset   Begin    End\n");
3228   init_Cursor( &ranges, debug_ranges_img,
3229                debug_ranges_sz, 0, barf,
3230                "Overrun whilst reading .debug_ranges section(1)" );
3231   dr_base = 0;
3232   dr_offset = 0;
3233   while (True) {
3234      UWord  w1, w2;
3235
3236      if (is_at_end_Cursor( &ranges ))
3237         break;
3238
3239      /* Read a (host-)word pair.  This is something of a hack since
3240         the word size to read is really dictated by the ELF file;
3241         however, we assume we're reading a file with the same
3242         word-sizeness as the host.  Reasonably enough. */
3243      w1 = get_UWord( &ranges );
3244      w2 = get_UWord( &ranges );
3245
3246      if (w1 == 0 && w2 == 0) {
3247         /* end of list.  reset 'base' */
3248         TRACE_D3("    %08lx <End of list>\n", dr_offset);
3249         dr_base = 0;
3250         dr_offset = get_position_of_Cursor( &ranges );
3251         continue;
3252      }
3253
3254      if (w1 == -1UL) {
3255         /* new value for 'base' */
3256         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3257                  dr_offset, w1, w2);
3258         dr_base = w2;
3259         continue;
3260      }
3261
3262      /* else a range [w1+base, w2+base) is denoted */
3263      TRACE_D3("    %08lx %08lx %08lx\n",
3264               dr_offset, w1 + dr_base, w2 + dr_base);
3265   }
3266
3267   /* Display .debug_abbrev */
3268   init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3269                "Overrun whilst reading .debug_abbrev section" );
3270   TRACE_SYMTAB("\n");
3271   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3272   while (True) {
3273      if (is_at_end_Cursor( &abbv ))
3274         break;
3275      /* Read one abbreviation table */
3276      TRACE_D3("  Number TAG\n");
3277      while (True) {
3278         ULong atag;
3279         UInt  has_children;
3280         ULong acode = get_ULEB128( &abbv );
3281         if (acode == 0) break; /* end of the table */
3282         atag = get_ULEB128( &abbv );
3283         has_children = get_UChar( &abbv );
3284         TRACE_D3("   %llu      %s    [%s]\n",
3285                  acode, ML_(pp_DW_TAG)(atag),
3286                         ML_(pp_DW_children)(has_children));
3287         while (True) {
3288            ULong at_name = get_ULEB128( &abbv );
3289            ULong at_form = get_ULEB128( &abbv );
3290            if (at_name == 0 && at_form == 0) break;
3291            TRACE_D3("    %18s %s\n",
3292                     ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3293         }
3294      }
3295   }
3296   TRACE_SYMTAB("\n");
3297
3298   /* Now loop over the Compilation Units listed in the .debug_info
3299      section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3300      unit contains a Compilation Unit Header followed by precisely
3301      one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3302   init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3303                "Overrun whilst reading .debug_info section" );
3304
3305   /* We'll park the harvested type information in here.  Also create
3306      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3307      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3308      huge and presumably will not occur in any valid DWARF3 file --
3309      it would need to have a .debug_info section 4GB long for that to
3310      happen.  These type entries end up in the DebugInfo. */
3311   tyents = VG_(newXA)( ML_(dinfo_zalloc),
3312                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3313                        ML_(dinfo_free), sizeof(TyEnt) );
3314   { TyEnt tyent;
3315     VG_(memset)(&tyent, 0, sizeof(tyent));
3316     tyent.tag   = Te_TyVoid;
3317     tyent.cuOff = D3_FAKEVOID_CUOFF;
3318     tyent.Te.TyVoid.isFake = True;
3319     VG_(addToXA)( tyents, &tyent );
3320   }
3321   { TyEnt tyent;
3322     VG_(memset)(&tyent, 0, sizeof(tyent));
3323     tyent.tag   = Te_UNKNOWN;
3324     tyent.cuOff = D3_INVALID_CUOFF;
3325     VG_(addToXA)( tyents, &tyent );
3326   }
3327
3328   /* This is a tree used to unique-ify the range lists that are
3329      manufactured by parse_var_DIE.  References to the keys in the
3330      tree wind up in .rngMany fields in TempVars.  We'll need to
3331      delete this tree, and the XArrays attached to it, at the end of
3332      this function. */
3333   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3334                            "di.readdwarf3.ndrw.2 (rangestree)",
3335                            ML_(dinfo_free),
3336                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3337
3338   /* List of variables we're accumulating.  These don't end up in the
3339      DebugInfo; instead their contents are handed to ML_(addVar) and
3340      the list elements are then deleted. */
3341   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3342                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
3343                          ML_(dinfo_free),
3344                          sizeof(TempVar*) );
3345
3346   /* List of GExprs we're accumulating.  These wind up in the
3347      DebugInfo. */
3348   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3349                        ML_(dinfo_free), sizeof(GExpr*) );
3350
3351   /* We need a D3TypeParser to keep track of partially constructed
3352      types.  It'll be discarded as soon as we've completed the CU,
3353      since the resulting information is tipped in to 'tyents' as it
3354      is generated. */
3355   VG_(memset)( &typarser, 0, sizeof(typarser) );
3356   typarser.sp = -1;
3357   typarser.language = '?';
3358   for (i = 0; i < N_D3_TYPE_STACK; i++) {
3359      typarser.qparentE[i].tag   = Te_EMPTY;
3360      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3361   }
3362
3363   VG_(memset)( &varparser, 0, sizeof(varparser) );
3364   varparser.sp = -1;
3365
3366   TRACE_D3("\n------ Parsing .debug_info section ------\n");
3367   while (True) {
3368      UWord   cu_start_offset, cu_offset_now;
3369      CUConst cc;
3370
3371      /* It seems icc9 finishes the DIE info before debug_info_sz
3372         bytes have been used up.  So be flexible, and declare the
3373         sequence complete if there is not enough remaining bytes to
3374         hold even the smallest conceivable CU header.  (11 bytes I
3375         reckon). */
3376      Word avail = get_remaining_length_Cursor( &info );
3377      if (avail < 11) {
3378         if (avail > 0)
3379            TRACE_D3("new_dwarf3_reader_wrk: warning: "
3380                     "%ld unused bytes after end of DIEs\n", avail);
3381         break;
3382      }
3383
3384      /* Check the varparser's stack is in a sane state. */
3385      vg_assert(varparser.sp == -1);
3386      for (i = 0; i < N_D3_VAR_STACK; i++) {
3387         vg_assert(varparser.ranges[i] == NULL);
3388         vg_assert(varparser.level[i] == 0);
3389      }
3390      for (i = 0; i < N_D3_TYPE_STACK; i++) {
3391         vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3392         vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3393         vg_assert(typarser.qlevel[i] == 0);
3394      }
3395
3396      cu_start_offset = get_position_of_Cursor( &info );
3397      TRACE_D3("\n");
3398      TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3399      /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3400         (saC_cache) */
3401      parse_CU_Header( &cc, td3, &info,
3402                       (UChar*)debug_abbv_img, debug_abbv_sz );
3403      cc.debug_str_img    = debug_str_img;
3404      cc.debug_str_sz     = debug_str_sz;
3405      cc.debug_ranges_img = debug_ranges_img;
3406      cc.debug_ranges_sz  = debug_ranges_sz;
3407      cc.debug_loc_img    = debug_loc_img;
3408      cc.debug_loc_sz     = debug_loc_sz;
3409      cc.debug_line_img   = debug_line_img;
3410      cc.debug_line_sz    = debug_line_sz;
3411      cc.cu_start_offset  = cu_start_offset;
3412      cc.di = di;
3413      /* The CU's svma can be deduced by looking at the AT_low_pc
3414         value in the top level TAG_compile_unit, which is the topmost
3415         DIE.  We'll leave it for the 'varparser' to acquire that info
3416         and fill it in -- since it is the only party to want to know
3417         it. */
3418      cc.cu_svma_known = False;
3419      cc.cu_svma       = 0;
3420
3421      /* Create a fake outermost-level range covering the entire
3422         address range.  So we always have *something* to catch all
3423         variable declarations. */
3424      varstack_push( &cc, &varparser, td3,
3425                     unitary_range_list(0UL, ~0UL),
3426                     -1, False/*isFunc*/, NULL/*fbGX*/ );
3427
3428      /* And set up the file name table.  When we come across the top
3429         level DIE for this CU (which is what the next call to
3430         read_DIE should process) we will copy all the file names out
3431         of the .debug_line img area and use this table to look up the
3432         copies when we later see filename numbers in DW_TAG_variables
3433         etc. */
3434      vg_assert(!varparser.filenameTable );
3435      varparser.filenameTable
3436         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3437                       ML_(dinfo_free),
3438                       sizeof(UChar*) );
3439      vg_assert(varparser.filenameTable);
3440
3441      /* Now read the one-and-only top-level DIE for this CU. */
3442      vg_assert(varparser.sp == 0);
3443      read_DIE( rangestree,
3444                tyents, tempvars, gexprs,
3445                &typarser, &varparser,
3446                &info, td3, &cc, 0 );
3447
3448      cu_offset_now = get_position_of_Cursor( &info );
3449      if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3450                      cu_offset_now, debug_info_sz);
3451      if (cu_offset_now > debug_info_sz)
3452         barf("toplevel DIEs beyond end of CU");
3453      if (cu_offset_now == debug_info_sz)
3454         break;
3455
3456      /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3457         anywhere else at all.  Our fake the-entire-address-space
3458         range is at level -1, so preening to -2 should completely
3459         empty the stack out. */
3460      TRACE_D3("\n");
3461      varstack_preen( &varparser, td3, -2 );
3462      /* Similarly, empty the type stack out. */
3463      typestack_preen( &typarser, td3, -2 );
3464      /* else keep going */
3465
3466      TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3467               cc.saC_cache_queries, cc.saC_cache_misses);
3468
3469      vg_assert(varparser.filenameTable );
3470      VG_(deleteXA)( varparser.filenameTable );
3471      varparser.filenameTable = NULL;
3472   }
3473
3474   /* From here on we're post-processing the stuff we got
3475      out of the .debug_info section. */
3476   if (td3) {
3477      TRACE_D3("\n");
3478      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
3479      TRACE_D3("\n");
3480      TRACE_D3("------ Compressing type entries ------\n");
3481   }
3482
3483   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
3484                                     sizeof(TyEntIndexCache) );
3485   ML_(TyEntIndexCache__invalidate)( tyents_cache );
3486   dedup_types( td3, tyents, tyents_cache );
3487   if (td3) {
3488      TRACE_D3("\n");
3489      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
3490   }
3491
3492   TRACE_D3("\n");
3493   TRACE_D3("------ Resolving the types of variables ------\n" );
3494   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
3495
3496   /* Copy all the non-INDIR tyents into a new table.  For large
3497      .so's, about 90% of the tyents will by now have been resolved to
3498      INDIRs, and we no longer need them, and so don't need to store
3499      them. */
3500   tyents_to_keep
3501      = VG_(newXA)( ML_(dinfo_zalloc),
3502                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
3503                    ML_(dinfo_free), sizeof(TyEnt) );
3504   n = VG_(sizeXA)( tyents );
3505   for (i = 0; i < n; i++) {
3506      TyEnt* ent = VG_(indexXA)( tyents, i );
3507      if (ent->tag != Te_INDIR)
3508         VG_(addToXA)( tyents_to_keep, ent );
3509   }
3510
3511   VG_(deleteXA)( tyents );
3512   tyents = NULL;
3513   ML_(dinfo_free)( tyents_cache );
3514   tyents_cache = NULL;
3515
3516   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
3517      minor) waste of time, since tyents itself is sorted, but
3518      necessary since VG_(lookupXA) refuses to cooperate if we
3519      don't. */
3520   VG_(setCmpFnXA)(
3521      tyents_to_keep,
3522      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3523   );
3524   VG_(sortXA)( tyents_to_keep );
3525
3526   /* Enable cacheing on tyents_to_keep */
3527   tyents_to_keep_cache
3528      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
3529                           sizeof(TyEntIndexCache) );
3530   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
3531
3532   /* And record the tyents in the DebugInfo.  We do this before
3533      starting to hand variables to ML_(addVar), since if ML_(addVar)
3534      wants to do debug printing (of the types of said vars) then it
3535      will need the tyents.*/
3536   vg_assert(!di->admin_tyents);
3537   di->admin_tyents = tyents_to_keep;
3538
3539   /* Bias all the location expressions.  See
3540      "Comment_Regarding_DWARF3_Text_Biasing" above. */
3541   TRACE_D3("\n");
3542   TRACE_D3("------ Biasing the location expressions ------\n" );
3543   VG_(memset)( &ktb, 0, sizeof(ktb ));
3544   ktb.rx_map_avma = di->rx_map_avma;
3545   ktb.rx_map_size = di->rx_map_size;
3546   ktb.text_bias   = di->text_bias;
3547
3548   n = VG_(sizeXA)( gexprs );
3549   for (i = 0; i < n; i++) {
3550      gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
3551      bias_GX( gexpr, &ktb );
3552   }
3553
3554   TRACE_D3("\n");
3555   TRACE_D3("------ Acquired the following variables: ------\n\n");
3556
3557   /* Park (pointers to) all the vars in an XArray, so we can look up
3558      abstract origins quickly.  The array is sorted (hence, looked-up
3559      by) the .dioff fields.  Since the .dioffs should be in strictly
3560      ascending order, there is no need to sort the array after
3561      construction.  The ascendingness is however asserted for. */
3562   dioff_lookup_tab
3563      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
3564                    ML_(dinfo_free),
3565                    sizeof(TempVar*) );
3566   vg_assert(dioff_lookup_tab);
3567
3568   n = VG_(sizeXA)( tempvars );
3569   for (i = 0; i < n; i++) {
3570      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3571      if (i > 0) {
3572         varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 );
3573         /* why should this hold?  Only, I think, because we've
3574            constructed the array by reading .debug_info sequentially,
3575            and so the array .dioff fields should reflect that, and be
3576            strictly ascending. */
3577         vg_assert(varp2->dioff < varp->dioff);
3578      }
3579      VG_(addToXA)( dioff_lookup_tab, &varp );
3580   }
3581   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3582   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3583
3584   /* Now visit each var.  Collect up as much info as possible for
3585      each var and hand it to ML_(addVar). */
3586   n = VG_(sizeXA)( tempvars );
3587   for (j = 0; j < n; j++) {
3588      TyEnt* ent;
3589      varp = *(TempVar**)VG_(indexXA)( tempvars, j );
3590
3591      /* Possibly show .. */
3592      if (td3) {
3593         VG_(printf)("<%lx> addVar: level %d: %s :: ",
3594                     varp->dioff,
3595                     varp->level,
3596                     varp->name ? varp->name : (UChar*)"<anon_var>" );
3597         if (varp->typeR) {
3598            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
3599         } else {
3600            VG_(printf)("NULL");
3601         }
3602         VG_(printf)("\n  Loc=");
3603         if (varp->gexpr) {
3604            ML_(pp_GX)(varp->gexpr);
3605         } else {
3606            VG_(printf)("NULL");
3607         }
3608         VG_(printf)("\n");
3609         if (varp->fbGX) {
3610            VG_(printf)("  FrB=");
3611            ML_(pp_GX)( varp->fbGX );
3612            VG_(printf)("\n");
3613         } else {
3614            VG_(printf)("  FrB=none\n");
3615         }
3616         VG_(printf)("  declared at: %s:%d\n",
3617                     varp->fName ? varp->fName : (UChar*)"NULL",
3618                     varp->fLine );
3619         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3620            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
3621      }
3622
3623      /* Skip variables which have no location.  These must be
3624         abstract instances; they are useless as-is since with no
3625         location they have no specified memory location.  They will
3626         presumably be referred to via the absOri fields of other
3627         variables. */
3628      if (!varp->gexpr) {
3629         TRACE_D3("  SKIP (no location)\n\n");
3630         continue;
3631      }
3632
3633      /* So it has a location, at least.  If it refers to some other
3634         entry through its absOri field, pull in further info through
3635         that. */
3636      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3637         Bool found;
3638         Word ixFirst, ixLast;
3639         TempVar key;
3640         TempVar* keyp = &key;
3641         TempVar *varAI;
3642         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3643         key.dioff = varp->absOri; /* this is what we want to find */
3644         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3645                                &ixFirst, &ixLast );
3646         if (!found)
3647            barf("DW_AT_abstract_origin can't be resolved");
3648         /* If the following fails, there is more than one entry with
3649            the same dioff.  Which can't happen. */
3650         vg_assert(ixFirst == ixLast);
3651         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3652         /* stay sane */
3653         vg_assert(varAI);
3654         vg_assert(varAI->dioff == varp->absOri);
3655
3656         /* Copy what useful info we can. */
3657         if (varAI->typeR && !varp->typeR)
3658            varp->typeR = varAI->typeR;
3659         if (varAI->name && !varp->name)
3660            varp->name = varAI->name;
3661         if (varAI->fName && !varp->fName)
3662            varp->fName = varAI->fName;
3663         if (varAI->fLine > 0 && varp->fLine == 0)
3664            varp->fLine = varAI->fLine;
3665      }
3666
3667      /* Give it a name if it doesn't have one. */
3668      if (!varp->name)
3669         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3670
3671      /* So now does it have enough info to be useful? */
3672      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
3673         the type didn't get resolved.  Really, in that case
3674         something's broken earlier on, and should be fixed, rather
3675         than just skipping the variable. */
3676      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
3677                                         tyents_to_keep_cache,
3678                                         varp->typeR );
3679      /* The next two assertions should be guaranteed by
3680         our previous call to resolve_variable_types. */
3681      vg_assert(ent);
3682      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
3683
3684      if (ent->tag == Te_UNKNOWN) continue;
3685
3686      vg_assert(varp->gexpr);
3687      vg_assert(varp->name);
3688      vg_assert(varp->typeR);
3689      vg_assert(varp->level >= 0);
3690
3691      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
3692         each address range in which the variable exists. */
3693      TRACE_D3("  ACQUIRE for range(s) ");
3694      { AddrRange  oneRange;
3695        AddrRange* varPcRanges;
3696        Word       nVarPcRanges;
3697        /* Set up to iterate over address ranges, however
3698           represented. */
3699        if (varp->nRanges == 0 || varp->nRanges == 1) {
3700           vg_assert(!varp->rngMany);
3701           if (varp->nRanges == 0) {
3702              vg_assert(varp->rngOneMin == 0);
3703              vg_assert(varp->rngOneMax == 0);
3704           }
3705           nVarPcRanges = varp->nRanges;
3706           oneRange.aMin = varp->rngOneMin;
3707           oneRange.aMax = varp->rngOneMax;
3708           varPcRanges = &oneRange;
3709        } else {
3710           vg_assert(varp->rngMany);
3711           vg_assert(varp->rngOneMin == 0);
3712           vg_assert(varp->rngOneMax == 0);
3713           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3714           vg_assert(nVarPcRanges >= 2);
3715           vg_assert(nVarPcRanges == (Word)varp->nRanges);
3716           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3717        }
3718        if (varp->level == 0)
3719           vg_assert( nVarPcRanges == 1 );
3720        /* and iterate */
3721        for (i = 0; i < nVarPcRanges; i++) {
3722           Addr pcMin = varPcRanges[i].aMin;
3723           Addr pcMax = varPcRanges[i].aMax;
3724           vg_assert(pcMin <= pcMax);
3725           /* Level 0 is the global address range.  So at level 0 we
3726              don't want to bias pcMin/pcMax; but at all other levels
3727              we do since those are derived from svmas in the Dwarf
3728              we're reading.  Be paranoid ... */
3729           if (varp->level == 0) {
3730              vg_assert(pcMin == (Addr)0);
3731              vg_assert(pcMax == ~(Addr)0);
3732           } else {
3733              /* vg_assert(pcMin > (Addr)0);
3734                 No .. we can legitimately expect to see ranges like
3735                 0x0-0x11D (pre-biasing, of course). */
3736              vg_assert(pcMax < ~(Addr)0);
3737           }
3738
3739           /* Apply text biasing, for non-global variables. */
3740           if (varp->level > 0) {
3741              pcMin = apply_kludgey_text_bias( &ktb, pcMin );
3742              pcMax = apply_kludgey_text_bias( &ktb, pcMax );
3743           }
3744
3745           if (i > 0 && (i%2) == 0)
3746              TRACE_D3("\n                       ");
3747           TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
3748
3749           ML_(addVar)(
3750              di, varp->level,
3751                  pcMin, pcMax,
3752                  varp->name,  varp->typeR,
3753                  varp->gexpr, varp->fbGX,
3754                  varp->fName, varp->fLine, td3
3755           );
3756        }
3757      }
3758
3759      TRACE_D3("\n\n");
3760      /* and move on to the next var */
3761   }
3762
3763   /* For the text biasing to work out, we expect that:
3764      - there were no failures, and
3765      - either all were done straightforwardly, or all kludgily,
3766        but not with a mixture
3767   */
3768   text_biasing_borked
3769      = ktb.n_failed_biasings > 0
3770        || (ktb.n_straightforward_biasings > 0 && ktb.n_kludgey_biasings > 0);
3771
3772   if (td3 || text_biasing_borked) {
3773      VG_(printf)("TEXT SVMA BIASING STATISTICS:\n");
3774      VG_(printf)("   straightforward biasings: %lu\n",
3775                  ktb.n_straightforward_biasings );
3776      VG_(printf)("           kludgey biasings: %lu\n",
3777                  ktb.n_kludgey_biasings );
3778      VG_(printf)("            failed biasings: %lu\n\n",
3779                  ktb.n_failed_biasings );
3780   }
3781   if (text_biasing_borked)
3782      barf("couldn't make sense of DWARF3 text-svma biasing; details above");
3783
3784   /* Now free all the TempVars */
3785   n = VG_(sizeXA)( tempvars );
3786   for (i = 0; i < n; i++) {
3787      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
3788      ML_(dinfo_free)(varp);
3789   }
3790   VG_(deleteXA)( tempvars );
3791   tempvars = NULL;
3792
3793   /* and the temp lookup table */
3794   VG_(deleteXA)( dioff_lookup_tab );
3795
3796   /* and the ranges tree.  Note that we need to also free the XArrays
3797      which constitute the keys, hence pass VG_(deleteXA) as a
3798      key-finalizer. */
3799   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
3800
3801   /* and the tyents_to_keep cache */
3802   ML_(dinfo_free)( tyents_to_keep_cache );
3803   tyents_to_keep_cache = NULL;
3804
3805   /* and the file name table (just the array, not the entries
3806      themselves).  (Apparently, 2008-Oct-23, varparser.filenameTable
3807      can be NULL here, for icc9 generated Dwarf3.  Not sure what that
3808      signifies (a deeper problem with the reader?)) */
3809   if (varparser.filenameTable) {
3810      VG_(deleteXA)( varparser.filenameTable );
3811      varparser.filenameTable = NULL;
3812   }
3813
3814   /* record the GExprs in di so they can be freed later */
3815   vg_assert(!di->admin_gexprs);
3816   di->admin_gexprs = gexprs;
3817}
3818
3819
3820/*------------------------------------------------------------*/
3821/*---                                                      ---*/
3822/*--- The "new" DWARF3 reader -- top level control logic   ---*/
3823/*---                                                      ---*/
3824/*------------------------------------------------------------*/
3825
3826/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
3827#include <setjmp.h>   /* For jmp_buf */
3828/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
3829
3830static Bool    d3rd_jmpbuf_valid  = False;
3831static HChar*  d3rd_jmpbuf_reason = NULL;
3832static jmp_buf d3rd_jmpbuf;
3833
3834static __attribute__((noreturn)) void barf ( HChar* reason ) {
3835   vg_assert(d3rd_jmpbuf_valid);
3836   d3rd_jmpbuf_reason = reason;
3837   __builtin_longjmp(&d3rd_jmpbuf, 1);
3838   /*NOTREACHED*/
3839   vg_assert(0);
3840}
3841
3842
3843void
3844ML_(new_dwarf3_reader) (
3845   struct _DebugInfo* di,
3846   UChar* debug_info_img,   SizeT debug_info_sz,
3847   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3848   UChar* debug_line_img,   SizeT debug_line_sz,
3849   UChar* debug_str_img,    SizeT debug_str_sz,
3850   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3851   UChar* debug_loc_img,    SizeT debug_loc_sz
3852)
3853{
3854   volatile Int  jumped;
3855   volatile Bool td3 = di->trace_symtab;
3856
3857   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
3858      just returns normally.  If there is any failure, it longjmp's
3859      back here, having first set d3rd_jmpbuf_reason to something
3860      useful. */
3861   vg_assert(d3rd_jmpbuf_valid  == False);
3862   vg_assert(d3rd_jmpbuf_reason == NULL);
3863
3864   d3rd_jmpbuf_valid = True;
3865   jumped = __builtin_setjmp(&d3rd_jmpbuf);
3866   if (jumped == 0) {
3867      /* try this ... */
3868      new_dwarf3_reader_wrk( di, barf,
3869                             debug_info_img,   debug_info_sz,
3870                             debug_abbv_img,   debug_abbv_sz,
3871                             debug_line_img,   debug_line_sz,
3872                             debug_str_img,    debug_str_sz,
3873                             debug_ranges_img, debug_ranges_sz,
3874                             debug_loc_img,    debug_loc_sz );
3875      d3rd_jmpbuf_valid = False;
3876      TRACE_D3("\n------ .debug_info reading was successful ------\n");
3877   } else {
3878      /* It longjmp'd. */
3879      d3rd_jmpbuf_valid = False;
3880      /* Can't longjump without giving some sort of reason. */
3881      vg_assert(d3rd_jmpbuf_reason != NULL);
3882
3883      TRACE_D3("\n------ .debug_info reading failed ------\n");
3884
3885      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
3886   }
3887
3888   d3rd_jmpbuf_valid  = False;
3889   d3rd_jmpbuf_reason = NULL;
3890}
3891
3892
3893
3894/* --- Unused code fragments which might be useful one day. --- */
3895
3896#if 0
3897   /* Read the arange tables */
3898   TRACE_SYMTAB("\n");
3899   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
3900   init_Cursor( &aranges, debug_aranges_img,
3901                debug_aranges_sz, 0, barf,
3902                "Overrun whilst reading .debug_aranges section" );
3903   while (True) {
3904      ULong  len, d_i_offset;
3905      Bool   is64;
3906      UShort version;
3907      UChar  asize, segsize;
3908
3909      if (is_at_end_Cursor( &aranges ))
3910         break;
3911      /* Read one arange thingy */
3912      /* initial_length field */
3913      len = get_Initial_Length( &is64, &aranges,
3914               "in .debug_aranges: invalid initial-length field" );
3915      version    = get_UShort( &aranges );
3916      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
3917      asize      = get_UChar( &aranges );
3918      segsize    = get_UChar( &aranges );
3919      TRACE_D3("  Length:                   %llu\n", len);
3920      TRACE_D3("  Version:                  %d\n", (Int)version);
3921      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
3922      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
3923      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
3924      TRACE_D3("\n");
3925      TRACE_D3("    Address            Length\n");
3926
3927      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
3928         (void)get_UChar( & aranges );
3929      }
3930      while (True) {
3931         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
3932         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
3933         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
3934         if (address == 0 && length == 0) break;
3935      }
3936   }
3937   TRACE_SYMTAB("\n");
3938#endif
3939
3940/*--------------------------------------------------------------------*/
3941/*--- end                                             readdwarf3.c ---*/
3942/*--------------------------------------------------------------------*/
3943