1
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
4/*---                                                 readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2012 OpenWorks LLP
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#if defined(VGO_linux) || defined(VGO_darwin)
38
39/* REFERENCE (without which this code will not make much sense):
40
41   DWARF Debugging Information Format, Version 3,
42   dated 20 December 2005 (the "D3 spec").
43
44   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
45   .doc (MS Word) version, but for some reason the section numbers
46   between the Word and PDF versions differ by 1 in the first digit.
47   All section references in this code are to the PDF version.
48
49   CURRENT HACKS:
50
51   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
52      assumed to mean "const void" or "volatile void" respectively.
53      GDB appears to interpret them like this, anyway.
54
55   In many cases it is important to know the svma of a CU (the "base
56   address of the CU", as the D3 spec calls it).  There are some
57   situations in which the spec implies this value is unknown, but the
58   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
59   merely zero when not explicitly stated.  So we too have to make
60   that assumption.
61
62   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
63   unitary_range_list() bias the resulting range list in the same way
64   that its more general cousin, get_range_list(), does?  I don't
65   know.
66
67   TODO, 2008 Feb 17:
68
69   get rid of cu_svma_known and document the assumed-zero svma hack.
70
71   ML_(sizeOfType): differentiate between zero sized types and types
72   for which the size is unknown.  Is this important?  I don't know.
73
74   DW_TAG_array_types: deal with explicit sizes (currently we compute
75   the size from the bounds and the element size, although that's
76   fragile, if the bounds incompletely specified, or completely
77   absent)
78
79   Document reason for difference (by 1) of stack preening depth in
80   parse_var_DIE vs parse_type_DIE.
81
82   Don't hand to ML_(addVars), vars whose locations are entirely in
83   registers (DW_OP_reg*).  This is merely a space-saving
84   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
85   expressions correctly, by failing to evaluate them and hence
86   effectively ignoring the variable with which they are associated.
87
88   Deal with DW_TAG_array_types which have element size != stride
89
90   In some cases, the info for a variable is split between two
91   different DIEs (generally a declarer and a definer).  We punt on
92   these.  Could do better here.
93
94   The 'data_bias' argument passed to the expression evaluator
95   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
96   MaybeUWord, to make it clear when we do vs don't know what it is
97   for the evaluation of an expression.  At the moment zero is passed
98   for this parameter in the don't know case.  That's a bit fragile
99   and obscure; using a MaybeUWord would be clearer.
100
101   POTENTIAL PERFORMANCE IMPROVEMENTS:
102
103   Currently, duplicate removal and all other queries for the type
104   entities array is done using cuOffset-based pointing, which
105   involves a binary search (VG_(lookupXA)) for each access.  This is
106   wildly inefficient, although simple.  It would be better to
107   translate all the cuOffset-based references (iow, all the "R" and
108   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
109   'tyents' right at the start of dedup_types(), and use direct
110   indexing (VG_(indexXA)) wherever possible after that.
111
112   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
113   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
114   points, and possibly also make an _UNCHECKED version which skips
115   the range checks in performance-critical situations such as this.
116
117   Handle interaction between read_DIE and parse_{var,type}_DIE
118   better.  Currently read_DIE reads the entire DIE just to find where
119   the end is (and for debug printing), so that it can later reliably
120   move the cursor to the end regardless of what parse_{var,type}_DIE
121   do.  This means many DIEs (most, even?) are read twice.  It would
122   be smarter to make parse_{var,type}_DIE return a Bool indicating
123   whether or not they advanced the DIE cursor, and only if they
124   didn't should read_DIE itself read through the DIE.
125
126   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
127   zero variables in their .vars XArray.  Rather than have an XArray
128   with zero elements (which uses 2 malloc'd blocks), allow the .vars
129   pointer to be NULL in this case.
130
131   More generally, reduce the amount of memory allocated and freed
132   while reading Dwarf3 type/variable information.  Even modest (20MB)
133   objects cause this module to allocate and free hundreds of
134   thousands of small blocks, and ML_(arena_malloc) and its various
135   groupies always show up at the top of performance profiles. */
136
137#include "pub_core_basics.h"
138#include "pub_core_debuginfo.h"
139#include "pub_core_libcbase.h"
140#include "pub_core_libcassert.h"
141#include "pub_core_libcprint.h"
142#include "pub_core_libcsetjmp.h"   // setjmp facilities
143#include "pub_core_hashtable.h"
144#include "pub_core_options.h"
145#include "pub_core_tooliface.h"    /* VG_(needs) */
146#include "pub_core_xarray.h"
147#include "pub_core_wordfm.h"
148#include "priv_misc.h"             /* dinfo_zalloc/free */
149#include "priv_tytypes.h"
150#include "priv_d3basics.h"
151#include "priv_storage.h"
152#include "priv_readdwarf3.h"       /* self */
153
154
155/*------------------------------------------------------------*/
156/*---                                                      ---*/
157/*--- Basic machinery for parsing DIEs.                    ---*/
158/*---                                                      ---*/
159/*------------------------------------------------------------*/
160
161#define TRACE_D3(format, args...) \
162   if (td3) { VG_(printf)(format, ## args); }
163
164#define D3_INVALID_CUOFF  ((UWord)(-1UL))
165#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
166
167typedef
168   struct {
169      UChar* region_start_img;
170      UWord  region_szB;
171      UWord  region_next;
172      void (*barf)( HChar* ) __attribute__((noreturn));
173      HChar* barfstr;
174   }
175   Cursor;
176
177static inline Bool is_sane_Cursor ( Cursor* c ) {
178   if (!c)                return False;
179   if (!c->barf)          return False;
180   if (!c->barfstr)       return False;
181   return True;
182}
183
184static void init_Cursor ( Cursor* c,
185                          UChar*  region_start_img,
186                          UWord   region_szB,
187                          UWord   region_next,
188                          __attribute__((noreturn)) void (*barf)( HChar* ),
189                          HChar*  barfstr )
190{
191   vg_assert(c);
192   VG_(memset)(c, 0, sizeof(*c));
193   c->region_start_img = region_start_img;
194   c->region_szB       = region_szB;
195   c->region_next      = region_next;
196   c->barf             = barf;
197   c->barfstr          = barfstr;
198   vg_assert(is_sane_Cursor(c));
199}
200
201static Bool is_at_end_Cursor ( Cursor* c ) {
202   vg_assert(is_sane_Cursor(c));
203   return c->region_next >= c->region_szB;
204}
205
206static inline UWord get_position_of_Cursor ( Cursor* c ) {
207   vg_assert(is_sane_Cursor(c));
208   return c->region_next;
209}
210static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
211   c->region_next = pos;
212   vg_assert(is_sane_Cursor(c));
213}
214
215static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
216   vg_assert(is_sane_Cursor(c));
217   return c->region_szB - c->region_next;
218}
219
220static UChar* get_address_of_Cursor ( Cursor* c ) {
221   vg_assert(is_sane_Cursor(c));
222   return &c->region_start_img[ c->region_next ];
223}
224
225/* FIXME: document assumptions on endianness for
226   get_UShort/UInt/ULong. */
227static inline UChar get_UChar ( Cursor* c ) {
228   UChar r;
229   /* vg_assert(is_sane_Cursor(c)); */
230   if (c->region_next + sizeof(UChar) > c->region_szB) {
231      c->barf(c->barfstr);
232      /*NOTREACHED*/
233      vg_assert(0);
234   }
235   r = * (UChar*) &c->region_start_img[ c->region_next ];
236   c->region_next += sizeof(UChar);
237   return r;
238}
239static UShort get_UShort ( Cursor* c ) {
240   UShort r;
241   vg_assert(is_sane_Cursor(c));
242   if (c->region_next + sizeof(UShort) > c->region_szB) {
243      c->barf(c->barfstr);
244      /*NOTREACHED*/
245      vg_assert(0);
246   }
247   r = ML_(read_UShort)(&c->region_start_img[ c->region_next ]);
248   c->region_next += sizeof(UShort);
249   return r;
250}
251static UInt get_UInt ( Cursor* c ) {
252   UInt r;
253   vg_assert(is_sane_Cursor(c));
254   if (c->region_next + sizeof(UInt) > c->region_szB) {
255      c->barf(c->barfstr);
256      /*NOTREACHED*/
257      vg_assert(0);
258   }
259   r = ML_(read_UInt)(&c->region_start_img[ c->region_next ]);
260   c->region_next += sizeof(UInt);
261   return r;
262}
263static ULong get_ULong ( Cursor* c ) {
264   ULong r;
265   vg_assert(is_sane_Cursor(c));
266   if (c->region_next + sizeof(ULong) > c->region_szB) {
267      c->barf(c->barfstr);
268      /*NOTREACHED*/
269      vg_assert(0);
270   }
271   r = ML_(read_ULong)(&c->region_start_img[ c->region_next ]);
272   c->region_next += sizeof(ULong);
273   return r;
274}
275static inline ULong get_ULEB128 ( Cursor* c ) {
276   ULong result;
277   Int   shift;
278   UChar byte;
279   /* unroll first iteration */
280   byte = get_UChar( c );
281   result = (ULong)(byte & 0x7f);
282   if (LIKELY(!(byte & 0x80))) return result;
283   shift = 7;
284   /* end unroll first iteration */
285   do {
286      byte = get_UChar( c );
287      result |= ((ULong)(byte & 0x7f)) << shift;
288      shift += 7;
289   } while (byte & 0x80);
290   return result;
291}
292static Long get_SLEB128 ( Cursor* c ) {
293   ULong  result = 0;
294   Int    shift = 0;
295   UChar  byte;
296   do {
297      byte = get_UChar(c);
298      result |= ((ULong)(byte & 0x7f)) << shift;
299      shift += 7;
300   } while (byte & 0x80);
301   if (shift < 64 && (byte & 0x40))
302      result |= -(1ULL << shift);
303   return result;
304}
305
306/* Assume 'c' points to the start of a string.  Return the absolute
307   address of whatever it points at, and advance it past the
308   terminating zero.  This makes it safe for the caller to then copy
309   the string with ML_(addStr), since (w.r.t. image overruns) the
310   process of advancing past the terminating zero will already have
311   "vetted" the string. */
312static UChar* get_AsciiZ ( Cursor* c ) {
313   UChar  uc;
314   UChar* res = get_address_of_Cursor(c);
315   do { uc = get_UChar(c); } while (uc != 0);
316   return res;
317}
318
319static ULong peek_ULEB128 ( Cursor* c ) {
320   Word here = c->region_next;
321   ULong r = get_ULEB128( c );
322   c->region_next = here;
323   return r;
324}
325static UChar peek_UChar ( Cursor* c ) {
326   Word here = c->region_next;
327   UChar r = get_UChar( c );
328   c->region_next = here;
329   return r;
330}
331
332static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
333   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
334}
335
336static UWord get_UWord ( Cursor* c ) {
337   vg_assert(sizeof(UWord) == sizeof(void*));
338   if (sizeof(UWord) == 4) return get_UInt(c);
339   if (sizeof(UWord) == 8) return get_ULong(c);
340   vg_assert(0);
341}
342
343/* Read a DWARF3 'Initial Length' field */
344static ULong get_Initial_Length ( /*OUT*/Bool* is64,
345                                  Cursor* c,
346                                  HChar* barfMsg )
347{
348   ULong w64;
349   UInt  w32;
350   *is64 = False;
351   w32 = get_UInt( c );
352   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
353      c->barf( barfMsg );
354   }
355   else if (w32 == 0xFFFFFFFF) {
356      *is64 = True;
357      w64   = get_ULong( c );
358   } else {
359      *is64 = False;
360      w64 = (ULong)w32;
361   }
362   return w64;
363}
364
365
366/*------------------------------------------------------------*/
367/*---                                                      ---*/
368/*--- "CUConst" structure                                  ---*/
369/*---                                                      ---*/
370/*------------------------------------------------------------*/
371
372#define N_ABBV_CACHE 32
373
374/* Holds information that is constant through the parsing of a
375   Compilation Unit.  This is basically plumbed through to
376   everywhere. */
377typedef
378   struct {
379      /* Call here if anything goes wrong */
380      void (*barf)( HChar* ) __attribute__((noreturn));
381      /* Is this 64-bit DWARF ? */
382      Bool   is_dw64;
383      /* Which DWARF version ?  (2, 3 or 4) */
384      UShort version;
385      /* Length of this Compilation Unit, as stated in the
386         .unit_length :: InitialLength field of the CU Header.
387         However, this size (as specified by the D3 spec) does not
388         include the size of the .unit_length field itself, which is
389         either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
390         can be obtained through the expression ".is_dw64 ? 12 : 4". */
391      ULong  unit_length;
392      /* Offset of start of this unit in .debug_info */
393      UWord  cu_start_offset;
394      /* SVMA for this CU.  In the D3 spec, is known as the "base
395         address of the compilation unit (last para sec 3.1.1).
396         Needed for (amongst things) interpretation of location-list
397         values. */
398      Addr   cu_svma;
399      Bool   cu_svma_known;
400      /* The debug_abbreviations table to be used for this Unit */
401      UChar* debug_abbv;
402      /* Upper bound on size thereof (an overestimate, in general) */
403      UWord  debug_abbv_maxszB;
404      /* Where is .debug_str ? */
405      UChar* debug_str_img;
406      UWord  debug_str_sz;
407      /* Where is .debug_ranges ? */
408      UChar* debug_ranges_img;
409      UWord  debug_ranges_sz;
410      /* Where is .debug_loc ? */
411      UChar* debug_loc_img;
412      UWord  debug_loc_sz;
413      /* Where is .debug_line? */
414      UChar* debug_line_img;
415      UWord  debug_line_sz;
416      /* Where is .debug_info? */
417      UChar* debug_info_img;
418      UWord  debug_info_sz;
419      /* Where is .debug_types? */
420      UChar* debug_types_img;
421      UWord  debug_types_sz;
422      /* Where is alternate .debug_info? */
423      UChar* debug_info_alt_img;
424      UWord  debug_info_alt_sz;
425      /* Where is alternate .debug_str ? */
426      UChar* debug_str_alt_img;
427      UWord  debug_str_alt_sz;
428      /* How much to add to .debug_types resp. alternate .debug_info offsets
429         in cook_die*.  */
430      UWord  types_cuOff_bias;
431      UWord  alt_cuOff_bias;
432      /* --- Needed so we can add stuff to the string table. --- */
433      struct _DebugInfo* di;
434      /* --- a cache for set_abbv_Cursor --- */
435      /* abbv_code == (ULong)-1 for an unused entry. */
436      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
437      UWord saC_cache_queries;
438      UWord saC_cache_misses;
439
440      /* True if this came from .debug_types; otherwise it came from
441         .debug_info.  */
442      Bool is_type_unit;
443      /* For a unit coming from .debug_types, these hold the TU's type
444         signature and the uncooked DIE offset of the TU's signatured
445         type.  For a unit coming from .debug_info, these are unused.  */
446      ULong type_signature;
447      ULong type_offset;
448
449      /* Signatured type hash; computed once and then shared by all
450         CUs.  */
451      VgHashTable signature_types;
452
453      /* True if this came from alternate .debug_info; otherwise
454         it came from normal .debug_info or .debug_types.  */
455      Bool is_alt_info;
456   }
457   CUConst;
458
459
460/* Return the cooked value of DIE depending on whether CC represents a
461   .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
462   .debug_types and optional alternate .debug_info sections form
463   a contiguous whole, so that DIEs coming from .debug_types are numbered
464   starting at the end of .debug_info and DIEs coming from alternate
465   .debug_info are numbered starting at the end of .debug_types.  */
466static UWord cook_die( CUConst* cc, UWord die )
467{
468   if (cc->is_type_unit)
469      die += cc->types_cuOff_bias;
470   else if (cc->is_alt_info)
471      die += cc->alt_cuOff_bias;
472   return die;
473}
474
475/* Like cook_die, but understand that DIEs coming from a
476   DW_FORM_ref_sig8 reference are already cooked.  Also, handle
477   DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
478   as reference to alternate .debug_info.  */
479static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
480{
481   if (form == DW_FORM_ref_sig8)
482      return die;
483   if (form == DW_FORM_GNU_ref_alt)
484      return die + cc->alt_cuOff_bias;
485   return cook_die( cc, die );
486}
487
488/* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
489   came from the .debug_types section and *ALT_FLAG to true if the DIE
490   came from alternate .debug_info section.  */
491static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
492                         Bool *alt_flag )
493{
494   *alt_flag = False;
495   *type_flag = False;
496   if (die >= cc->debug_info_sz) {
497      if (die >= cc->debug_info_sz + cc->debug_types_sz) {
498         *alt_flag = True;
499         die -= cc->debug_info_sz + cc->debug_types_sz;
500      } else {
501         *type_flag = True;
502         die -= cc->debug_info_sz;
503      }
504   }
505   return die;
506}
507
508/*------------------------------------------------------------*/
509/*---                                                      ---*/
510/*--- Helper functions for Guarded Expressions             ---*/
511/*---                                                      ---*/
512/*------------------------------------------------------------*/
513
514/* Parse the location list starting at img-offset 'debug_loc_offset'
515   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
516   and so I believe are correct SVMAs for the object as a whole.  This
517   function allocates the UChar*, and the caller must deallocate it.
518   The resulting block is in so-called Guarded-Expression format.
519
520   Guarded-Expression format is similar but not identical to the DWARF3
521   location-list format.  The format of each returned block is:
522
523      UChar biasMe;
524      UChar isEnd;
525      followed by zero or more of
526
527      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
528
529   '..bytes..' is an standard DWARF3 location expression which is
530   valid when aMin <= pc <= aMax (possibly after suitable biasing).
531
532   The number of bytes in '..bytes..' is nbytes.
533
534   The end of the sequence is marked by an isEnd == 1 value.  All
535   previous isEnd values must be zero.
536
537   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
538   text_bias added before use, and 0 if the GX is this is not
539   necessary (is ready to go).
540
541   Hence the block can be quickly parsed and is self-describing.  Note
542   that aMax is 1 less than the corresponding value in a DWARF3
543   location list.  Zero length ranges, with aMax == aMin-1, are not
544   allowed.
545*/
546/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
547   it more logically belongs. */
548
549
550/* Apply a text bias to a GX. */
551static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
552{
553   UShort nbytes;
554   UChar* p = &gx->payload[0];
555   UChar* pA;
556   UChar  uc;
557   uc = *p++; /*biasMe*/
558   if (uc == 0)
559      return;
560   vg_assert(uc == 1);
561   p[-1] = 0; /* mark it as done */
562   while (True) {
563      uc = *p++;
564      if (uc == 1)
565         break; /*isEnd*/
566      vg_assert(uc == 0);
567      /* t-bias aMin */
568      pA = (UChar*)p;
569      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
570      p += sizeof(Addr);
571      /* t-bias aMax */
572      pA = (UChar*)p;
573      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
574      p += sizeof(Addr);
575      /* nbytes, and actual expression */
576      nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
577      p += nbytes;
578   }
579}
580
581__attribute__((noinline))
582static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
583{
584   SizeT  bytesReqd;
585   GExpr* gx;
586   UChar *p, *pstart;
587
588   vg_assert(sizeof(UWord) == sizeof(Addr));
589   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
590   bytesReqd
591      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
592        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
593        + sizeof(UShort) /*nbytes*/    + nbytes
594        + sizeof(UChar); /*isEnd*/
595
596   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
597                           sizeof(GExpr) + bytesReqd );
598   vg_assert(gx);
599
600   p = pstart = &gx->payload[0];
601
602   p = ML_(write_UChar)(p, 0);        /*biasMe*/
603   p = ML_(write_UChar)(p, 0);        /*!isEnd*/
604   p = ML_(write_Addr)(p, 0);         /*aMin*/
605   p = ML_(write_Addr)(p, ~0);        /*aMax*/
606   p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
607   VG_(memcpy)(p, block, nbytes); p += nbytes;
608   p = ML_(write_UChar)(p, 1);        /*isEnd*/
609
610   vg_assert( (SizeT)(p - pstart) == bytesReqd);
611   vg_assert( &gx->payload[bytesReqd]
612              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
613
614   return gx;
615}
616
617__attribute__((noinline))
618static GExpr* make_general_GX ( CUConst* cc,
619                                Bool     td3,
620                                UWord    debug_loc_offset,
621                                Addr     svma_of_referencing_CU )
622{
623   Addr      base;
624   Cursor    loc;
625   XArray*   xa; /* XArray of UChar */
626   GExpr*    gx;
627   Word      nbytes;
628
629   vg_assert(sizeof(UWord) == sizeof(Addr));
630   if (cc->debug_loc_sz == 0)
631      cc->barf("make_general_GX: .debug_loc is empty/missing");
632
633   init_Cursor( &loc, cc->debug_loc_img,
634                cc->debug_loc_sz, 0, cc->barf,
635                "Overrun whilst reading .debug_loc section(2)" );
636   set_position_of_Cursor( &loc, debug_loc_offset );
637
638   TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
639            debug_loc_offset, get_address_of_Cursor( &loc ) );
640
641   /* Who frees this xa?  It is freed before this fn exits. */
642   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
643                    ML_(dinfo_free),
644                    sizeof(UChar) );
645
646   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
647
648   base = 0;
649   while (True) {
650      Bool  acquire;
651      UWord len;
652      /* Read a (host-)word pair.  This is something of a hack since
653         the word size to read is really dictated by the ELF file;
654         however, we assume we're reading a file with the same
655         word-sizeness as the host.  Reasonably enough. */
656      UWord w1 = get_UWord( &loc );
657      UWord w2 = get_UWord( &loc );
658
659      TRACE_D3("   %08lx %08lx\n", w1, w2);
660      if (w1 == 0 && w2 == 0)
661         break; /* end of list */
662
663      if (w1 == -1UL) {
664         /* new value for 'base' */
665         base = w2;
666         continue;
667      }
668
669      /* else a location expression follows */
670      /* else enumerate [w1+base, w2+base) */
671      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
672         (sec 2.17.2) */
673      if (w1 > w2) {
674         TRACE_D3("negative range is for .debug_loc expr at "
675                  "file offset %lu\n",
676                  debug_loc_offset);
677         cc->barf( "negative range in .debug_loc section" );
678      }
679
680      /* ignore zero length ranges */
681      acquire = w1 < w2;
682      len     = (UWord)get_UShort( &loc );
683
684      if (acquire) {
685         UWord  w;
686         UShort s;
687         UChar  c;
688         c = 0; /* !isEnd*/
689         VG_(addBytesToXA)( xa, &c, sizeof(c) );
690         w = w1    + base + svma_of_referencing_CU;
691         VG_(addBytesToXA)( xa, &w, sizeof(w) );
692         w = w2 -1 + base + svma_of_referencing_CU;
693         VG_(addBytesToXA)( xa, &w, sizeof(w) );
694         s = (UShort)len;
695         VG_(addBytesToXA)( xa, &s, sizeof(s) );
696      }
697
698      while (len > 0) {
699         UChar byte = get_UChar( &loc );
700         TRACE_D3("%02x", (UInt)byte);
701         if (acquire)
702            VG_(addBytesToXA)( xa, &byte, 1 );
703         len--;
704      }
705      TRACE_D3("\n");
706   }
707
708   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
709
710   nbytes = VG_(sizeXA)( xa );
711   vg_assert(nbytes >= 1);
712
713   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
714   vg_assert(gx);
715   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
716   vg_assert( &gx->payload[nbytes]
717              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
718
719   VG_(deleteXA)( xa );
720
721   TRACE_D3("}\n");
722
723   return gx;
724}
725
726
727/*------------------------------------------------------------*/
728/*---                                                      ---*/
729/*--- Helper functions for range lists and CU headers      ---*/
730/*---                                                      ---*/
731/*------------------------------------------------------------*/
732
733/* Denotes an address range.  Both aMin and aMax are included in the
734   range; hence a complete range is (0, ~0) and an empty range is any
735   (X, X-1) for X > 0.*/
736typedef
737   struct { Addr aMin; Addr aMax; }
738   AddrRange;
739
740
741/* Generate an arbitrary structural total ordering on
742   XArray* of AddrRange. */
743static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
744{
745   Word n1, n2, i;
746   tl_assert(rngs1 && rngs2);
747   n1 = VG_(sizeXA)( rngs1 );
748   n2 = VG_(sizeXA)( rngs2 );
749   if (n1 < n2) return -1;
750   if (n1 > n2) return 1;
751   for (i = 0; i < n1; i++) {
752      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
753      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
754      if (rng1->aMin < rng2->aMin) return -1;
755      if (rng1->aMin > rng2->aMin) return 1;
756      if (rng1->aMax < rng2->aMax) return -1;
757      if (rng1->aMax > rng2->aMax) return 1;
758   }
759   return 0;
760}
761
762
763__attribute__((noinline))
764static XArray* /* of AddrRange */ empty_range_list ( void )
765{
766   XArray* xa; /* XArray of AddrRange */
767   /* Who frees this xa?  varstack_preen() does. */
768   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
769                    ML_(dinfo_free),
770                    sizeof(AddrRange) );
771   return xa;
772}
773
774
775__attribute__((noinline))
776static XArray* unitary_range_list ( Addr aMin, Addr aMax )
777{
778   XArray*   xa;
779   AddrRange pair;
780   vg_assert(aMin <= aMax);
781   /* Who frees this xa?  varstack_preen() does. */
782   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
783                    ML_(dinfo_free),
784                    sizeof(AddrRange) );
785   pair.aMin = aMin;
786   pair.aMax = aMax;
787   VG_(addToXA)( xa, &pair );
788   return xa;
789}
790
791
792/* Enumerate the address ranges starting at img-offset
793   'debug_ranges_offset' in .debug_ranges.  Results are biased with
794   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
795   object as a whole.  This function allocates the XArray, and the
796   caller must deallocate it. */
797__attribute__((noinline))
798static XArray* /* of AddrRange */
799       get_range_list ( CUConst* cc,
800                        Bool     td3,
801                        UWord    debug_ranges_offset,
802                        Addr     svma_of_referencing_CU )
803{
804   Addr      base;
805   Cursor    ranges;
806   XArray*   xa; /* XArray of AddrRange */
807   AddrRange pair;
808
809   if (cc->debug_ranges_sz == 0)
810      cc->barf("get_range_list: .debug_ranges is empty/missing");
811
812   init_Cursor( &ranges, cc->debug_ranges_img,
813                cc->debug_ranges_sz, 0, cc->barf,
814                "Overrun whilst reading .debug_ranges section(2)" );
815   set_position_of_Cursor( &ranges, debug_ranges_offset );
816
817   /* Who frees this xa?  varstack_preen() does. */
818   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
819                    sizeof(AddrRange) );
820   base = 0;
821   while (True) {
822      /* Read a (host-)word pair.  This is something of a hack since
823         the word size to read is really dictated by the ELF file;
824         however, we assume we're reading a file with the same
825         word-sizeness as the host.  Reasonably enough. */
826      UWord w1 = get_UWord( &ranges );
827      UWord w2 = get_UWord( &ranges );
828
829      if (w1 == 0 && w2 == 0)
830         break; /* end of list. */
831
832      if (w1 == -1UL) {
833         /* new value for 'base' */
834         base = w2;
835         continue;
836      }
837
838      /* else enumerate [w1+base, w2+base) */
839      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
840         (sec 2.17.2) */
841      if (w1 > w2)
842         cc->barf( "negative range in .debug_ranges section" );
843      if (w1 < w2) {
844         pair.aMin = w1     + base + svma_of_referencing_CU;
845         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
846         vg_assert(pair.aMin <= pair.aMax);
847         VG_(addToXA)( xa, &pair );
848      }
849   }
850   return xa;
851}
852
853
854/* Parse the Compilation Unit header indicated at 'c' and
855   initialise 'cc' accordingly. */
856static __attribute__((noinline))
857void parse_CU_Header ( /*OUT*/CUConst* cc,
858                       Bool td3,
859                       Cursor* c,
860                       UChar* debug_abbv_img, UWord debug_abbv_sz,
861		       Bool type_unit,
862                       Bool alt_info )
863{
864   UChar  address_size;
865   UWord  debug_abbrev_offset;
866   Int    i;
867
868   VG_(memset)(cc, 0, sizeof(*cc));
869   vg_assert(c && c->barf);
870   cc->barf = c->barf;
871
872   /* initial_length field */
873   cc->unit_length
874      = get_Initial_Length( &cc->is_dw64, c,
875           "parse_CU_Header: invalid initial-length field" );
876
877   TRACE_D3("   Length:        %lld\n", cc->unit_length );
878
879   /* version */
880   cc->version = get_UShort( c );
881   if (cc->version != 2 && cc->version != 3 && cc->version != 4)
882      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
883   TRACE_D3("   Version:       %d\n", (Int)cc->version );
884
885   /* debug_abbrev_offset */
886   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
887   if (debug_abbrev_offset >= debug_abbv_sz)
888      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
889   TRACE_D3("   Abbrev Offset: %ld\n", debug_abbrev_offset );
890
891   /* address size.  If this isn't equal to the host word size, just
892      give up.  This makes it safe to assume elsewhere that
893      DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
894      word. */
895   address_size = get_UChar( c );
896   if (address_size != sizeof(void*))
897      cc->barf( "parse_CU_Header: invalid address_size" );
898   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
899
900   cc->is_type_unit = type_unit;
901   cc->is_alt_info = alt_info;
902
903   if (type_unit) {
904      cc->type_signature = get_ULong( c );
905      cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
906   }
907
908   /* Set up so that cc->debug_abbv points to the relevant table for
909      this CU.  Set the szB so that at least we can't read off the end
910      of the debug_abbrev section -- potentially (and quite likely)
911      too big, if this isn't the last table in the section, but at
912      least it's safe. */
913   cc->debug_abbv        = debug_abbv_img + debug_abbrev_offset;
914   cc->debug_abbv_maxszB = debug_abbv_sz  - debug_abbrev_offset;
915   /* and empty out the set_abbv_Cursor cache */
916   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
917   for (i = 0; i < N_ABBV_CACHE; i++) {
918      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
919      cc->saC_cache[i].posn = 0;
920   }
921   cc->saC_cache_queries = 0;
922   cc->saC_cache_misses = 0;
923}
924
925
926/* Set up 'c' so it is ready to parse the abbv table entry code
927   'abbv_code' for this compilation unit.  */
928static __attribute__((noinline))
929void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
930                       CUConst* cc, ULong abbv_code )
931{
932   Int   i;
933   ULong acode;
934
935   if (abbv_code == 0)
936      cc->barf("set_abbv_Cursor: abbv_code == 0" );
937
938   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
939      allow it.  In any case no valid DWARF3 should make a reference
940      to a negative abbreviation code.  [at least, they always seem to
941      be numbered upwards from zero as far as I have seen] */
942   vg_assert(abbv_code != (ULong)-1);
943
944   /* First search the cache. */
945   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
946   cc->saC_cache_queries++;
947   for (i = 0; i < N_ABBV_CACHE; i++) {
948      /* No need to test the cached abbv_codes for -1 (empty), since
949         we just asserted that abbv_code is not -1. */
950     if (cc->saC_cache[i].abbv_code == abbv_code) {
951        /* Found it.  Cool.  Set up the parser using the cached
952           position, and move this cache entry 1 step closer to the
953           front. */
954        if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
955        init_Cursor( c, cc->debug_abbv,
956                     cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
957                     cc->barf,
958                     "Overrun whilst parsing .debug_abbrev section(1)" );
959        if (i > 0) {
960           ULong t_abbv_code = cc->saC_cache[i].abbv_code;
961           UWord t_posn = cc->saC_cache[i].posn;
962           while (i > 0) {
963              cc->saC_cache[i] = cc->saC_cache[i-1];
964              cc->saC_cache[0].abbv_code = t_abbv_code;
965              cc->saC_cache[0].posn = t_posn;
966              i--;
967           }
968        }
969        return;
970     }
971   }
972
973   /* No.  It's not in the cache.  We have to search through
974      .debug_abbrev, of course taking care to update the cache
975      when done. */
976
977   cc->saC_cache_misses++;
978   init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
979               "Overrun whilst parsing .debug_abbrev section(2)" );
980
981   /* Now iterate though the table until we find the requested
982      entry. */
983   while (True) {
984      //ULong atag;
985      //UInt  has_children;
986      acode = get_ULEB128( c );
987      if (acode == 0) break; /* end of the table */
988      if (acode == abbv_code) break; /* found it */
989      /*atag         = */ get_ULEB128( c );
990      /*has_children = */ get_UChar( c );
991      //TRACE_D3("   %llu      %s    [%s]\n",
992      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
993      while (True) {
994         ULong at_name = get_ULEB128( c );
995         ULong at_form = get_ULEB128( c );
996         if (at_name == 0 && at_form == 0) break;
997         //TRACE_D3("    %18s %s\n",
998         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
999      }
1000   }
1001
1002   if (acode == 0) {
1003      /* Not found.  This is fatal. */
1004      cc->barf("set_abbv_Cursor: abbv_code not found");
1005   }
1006
1007   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
1008      starting from the abbreviation entry's tag.  So just cache
1009      the result, and return. */
1010   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
1011      cc->saC_cache[i] = cc->saC_cache[i-1];
1012   }
1013   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
1014   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
1015   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
1016}
1017
1018/* This represents a single signatured type.  It maps a type signature
1019   (a ULong) to a cooked DIE offset.  Objects of this type are stored
1020   in the type signature hash table.  */
1021typedef
1022   struct D3SignatureType {
1023      struct D3SignatureType *next;
1024      UWord data;
1025      ULong type_signature;
1026      UWord die;
1027   }
1028   D3SignatureType;
1029
1030/* Record a signatured type in the hash table.  */
1031static void record_signatured_type ( VgHashTable tab,
1032                                     ULong type_signature,
1033                                     UWord die )
1034{
1035   D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1036                                                 sizeof(D3SignatureType) );
1037   dstype->data = (UWord) type_signature;
1038   dstype->type_signature = type_signature;
1039   dstype->die = die;
1040   VG_(HT_add_node) ( tab, dstype );
1041}
1042
1043/* Given a type signature hash table and a type signature, return the
1044   cooked DIE offset of the type.  If the type cannot be found, call
1045   BARF.  */
1046static UWord lookup_signatured_type ( VgHashTable tab,
1047                                      ULong type_signature,
1048                                      void (*barf)( HChar* ) __attribute__((noreturn)) )
1049{
1050   D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1051   /* This may be unwarranted chumminess with the hash table
1052      implementation.  */
1053   while ( dstype != NULL && dstype->type_signature != type_signature)
1054      dstype = dstype->next;
1055   if (dstype == NULL) {
1056      barf("lookup_signatured_type: could not find signatured type");
1057      /*NOTREACHED*/
1058      vg_assert(0);
1059   }
1060   return dstype->die;
1061}
1062
1063/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
1064
1065   If *cts itself contains the entire result, then *ctsSzB is set to
1066   1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
1067
1068   Alternatively, the result can be a block of data (in the
1069   transiently mapped-in object, so-called "image" space).  If so then
1070   the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
1071   image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
1072
1073   Unfortunately this means it is impossible to represent a zero-size
1074   image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
1075   and so is ambiguous (which case it is?)
1076
1077   Invariant on successful return:
1078      (*ctsSzB > 0 && *ctsMemSzB == 0)
1079      || (*ctsSzB == 0 && *ctsMemSzB > 0)
1080*/
1081static
1082void get_Form_contents ( /*OUT*/ULong* cts,
1083                         /*OUT*/Int*   ctsSzB,
1084                         /*OUT*/UWord* ctsMemSzB,
1085                         CUConst* cc, Cursor* c,
1086                         Bool td3, DW_FORM form )
1087{
1088   *cts       = 0;
1089   *ctsSzB    = 0;
1090   *ctsMemSzB = 0;
1091   switch (form) {
1092      case DW_FORM_data1:
1093         *cts = (ULong)(UChar)get_UChar(c);
1094         *ctsSzB = 1;
1095         TRACE_D3("%u", (UInt)*cts);
1096         break;
1097      case DW_FORM_data2:
1098         *cts = (ULong)(UShort)get_UShort(c);
1099         *ctsSzB = 2;
1100         TRACE_D3("%u", (UInt)*cts);
1101         break;
1102      case DW_FORM_data4:
1103         *cts = (ULong)(UInt)get_UInt(c);
1104         *ctsSzB = 4;
1105         TRACE_D3("%u", (UInt)*cts);
1106         break;
1107      case DW_FORM_data8:
1108         *cts = get_ULong(c);
1109         *ctsSzB = 8;
1110         TRACE_D3("%llu", *cts);
1111         break;
1112      case DW_FORM_sec_offset:
1113         *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1114         *ctsSzB = cc->is_dw64 ? 8 : 4;
1115         TRACE_D3("%llu", *cts);
1116         break;
1117      case DW_FORM_sdata:
1118         *cts = (ULong)(Long)get_SLEB128(c);
1119         *ctsSzB = 8;
1120         TRACE_D3("%lld", (Long)*cts);
1121         break;
1122      case DW_FORM_udata:
1123         *cts = (ULong)(Long)get_ULEB128(c);
1124         *ctsSzB = 8;
1125         TRACE_D3("%llu", (Long)*cts);
1126         break;
1127      case DW_FORM_addr:
1128         /* note, this is a hack.  DW_FORM_addr is defined as getting
1129            a word the size of the target machine as defined by the
1130            address_size field in the CU Header.  However,
1131            parse_CU_Header() rejects all inputs except those for
1132            which address_size == sizeof(Word), hence we can just
1133            treat it as a (host) Word.  */
1134         *cts = (ULong)(UWord)get_UWord(c);
1135         *ctsSzB = sizeof(UWord);
1136         TRACE_D3("0x%lx", (UWord)*cts);
1137         break;
1138
1139      case DW_FORM_ref_addr:
1140         /* We make the same word-size assumption as DW_FORM_addr. */
1141         /* What does this really mean?  From D3 Sec 7.5.4,
1142            description of "reference", it would appear to reference
1143            some other DIE, by specifying the offset from the
1144            beginning of a .debug_info section.  The D3 spec mentions
1145            that this might be in some other shared object and
1146            executable.  But I don't see how the name of the other
1147            object/exe is specified.
1148
1149            At least for the DW_FORM_ref_addrs created by icc11, the
1150            references seem to be within the same object/executable.
1151            So for the moment we merely range-check, to see that they
1152            actually do specify a plausible offset within this
1153            object's .debug_info, and return the value unchanged.
1154
1155            In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1156            DWARF 3 and later, it is offset-sized.
1157         */
1158         if (cc->version == 2) {
1159            *cts = (ULong)(UWord)get_UWord(c);
1160            *ctsSzB = sizeof(UWord);
1161         } else {
1162            *cts = get_Dwarfish_UWord(c, cc->is_dw64);
1163            *ctsSzB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1164         }
1165         TRACE_D3("0x%lx", (UWord)*cts);
1166         if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts);
1167         if (/* the following 2 are surely impossible, but ... */
1168             cc->debug_info_img == NULL || cc->debug_info_sz == 0
1169             || *cts >= (ULong)cc->debug_info_sz) {
1170            /* Hmm.  Offset is nonsensical for this object's .debug_info
1171               section.  Be safe and reject it. */
1172            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1173                     "outside .debug_info");
1174         }
1175         break;
1176
1177      case DW_FORM_strp: {
1178         /* this is an offset into .debug_str */
1179         UChar* str;
1180         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1181         if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
1182            cc->barf("get_Form_contents: DW_FORM_strp "
1183                     "points outside .debug_str");
1184         /* FIXME: check the entire string lies inside debug_str,
1185            not just the first byte of it. */
1186         str = (UChar*)cc->debug_str_img + uw;
1187         TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
1188         *cts = (ULong)(UWord)str;
1189         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1190         break;
1191      }
1192      case DW_FORM_string: {
1193         UChar* str = get_AsciiZ(c);
1194         TRACE_D3("%s", str);
1195         *cts = (ULong)(UWord)str;
1196         /* strlen is safe because get_AsciiZ already 'vetted' the
1197            entire string */
1198         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1199         break;
1200      }
1201      case DW_FORM_ref1: {
1202         UChar  u8 = get_UChar(c);
1203         UWord res = cc->cu_start_offset + (UWord)u8;
1204         *cts = (ULong)res;
1205         *ctsSzB = sizeof(UWord);
1206         TRACE_D3("<%lx>", res);
1207         break;
1208      }
1209      case DW_FORM_ref2: {
1210         UShort  u16 = get_UShort(c);
1211         UWord res = cc->cu_start_offset + (UWord)u16;
1212         *cts = (ULong)res;
1213         *ctsSzB = sizeof(UWord);
1214         TRACE_D3("<%lx>", res);
1215         break;
1216      }
1217      case DW_FORM_ref4: {
1218         UInt  u32 = get_UInt(c);
1219         UWord res = cc->cu_start_offset + (UWord)u32;
1220         *cts = (ULong)res;
1221         *ctsSzB = sizeof(UWord);
1222         TRACE_D3("<%lx>", res);
1223         break;
1224      }
1225      case DW_FORM_ref8: {
1226         ULong  u64 = get_ULong(c);
1227         UWord res = cc->cu_start_offset + (UWord)u64;
1228         *cts = (ULong)res;
1229         *ctsSzB = sizeof(UWord);
1230         TRACE_D3("<%lx>", res);
1231         break;
1232      }
1233      case DW_FORM_ref_udata: {
1234         ULong  u64 = get_ULEB128(c);
1235         UWord res = cc->cu_start_offset + (UWord)u64;
1236         *cts = (ULong)res;
1237         *ctsSzB = sizeof(UWord);
1238         TRACE_D3("<%lx>", res);
1239         break;
1240      }
1241      case DW_FORM_flag: {
1242         UChar u8 = get_UChar(c);
1243         TRACE_D3("%u", (UInt)u8);
1244         *cts = (ULong)u8;
1245         *ctsSzB = 1;
1246         break;
1247      }
1248      case DW_FORM_flag_present:
1249         TRACE_D3("1");
1250         *cts = 1;
1251         *ctsSzB = 1;
1252         break;
1253      case DW_FORM_block1: {
1254         ULong  u64b;
1255         ULong  u64 = (ULong)get_UChar(c);
1256         UChar* block = get_address_of_Cursor(c);
1257         TRACE_D3("%llu byte block: ", u64);
1258         for (u64b = u64; u64b > 0; u64b--) {
1259            UChar u8 = get_UChar(c);
1260            TRACE_D3("%x ", (UInt)u8);
1261         }
1262         *cts = (ULong)(UWord)block;
1263         *ctsMemSzB = (UWord)u64;
1264         break;
1265      }
1266      case DW_FORM_block2: {
1267         ULong  u64b;
1268         ULong  u64 = (ULong)get_UShort(c);
1269         UChar* block = get_address_of_Cursor(c);
1270         TRACE_D3("%llu byte block: ", u64);
1271         for (u64b = u64; u64b > 0; u64b--) {
1272            UChar u8 = get_UChar(c);
1273            TRACE_D3("%x ", (UInt)u8);
1274         }
1275         *cts = (ULong)(UWord)block;
1276         *ctsMemSzB = (UWord)u64;
1277         break;
1278      }
1279      case DW_FORM_block4: {
1280         ULong  u64b;
1281         ULong  u64 = (ULong)get_UInt(c);
1282         UChar* block = get_address_of_Cursor(c);
1283         TRACE_D3("%llu byte block: ", u64);
1284         for (u64b = u64; u64b > 0; u64b--) {
1285            UChar u8 = get_UChar(c);
1286            TRACE_D3("%x ", (UInt)u8);
1287         }
1288         *cts = (ULong)(UWord)block;
1289         *ctsMemSzB = (UWord)u64;
1290         break;
1291      }
1292      case DW_FORM_exprloc:
1293      case DW_FORM_block: {
1294         ULong  u64b;
1295         ULong  u64 = (ULong)get_ULEB128(c);
1296         UChar* block = get_address_of_Cursor(c);
1297         TRACE_D3("%llu byte block: ", u64);
1298         for (u64b = u64; u64b > 0; u64b--) {
1299            UChar u8 = get_UChar(c);
1300            TRACE_D3("%x ", (UInt)u8);
1301         }
1302         *cts = (ULong)(UWord)block;
1303         *ctsMemSzB = (UWord)u64;
1304         break;
1305      }
1306      case DW_FORM_ref_sig8: {
1307         ULong  u64b;
1308         ULong  signature = get_ULong (c);
1309         ULong  work = signature;
1310         TRACE_D3("8 byte signature: ");
1311         for (u64b = 8; u64b > 0; u64b--) {
1312            UChar u8 = work & 0xff;
1313            TRACE_D3("%x ", (UInt)u8);
1314            work >>= 8;
1315         }
1316         /* Due to the way that the hash table is constructed, the
1317            resulting DIE offset here is already "cooked".  See
1318            cook_die_using_form.  */
1319         *cts = lookup_signatured_type (cc->signature_types, signature,
1320                                        c->barf);
1321         *ctsSzB = sizeof(UWord);
1322         break;
1323      }
1324      case DW_FORM_indirect:
1325         get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3,
1326                            (DW_FORM)get_ULEB128(c));
1327         return;
1328
1329      case DW_FORM_GNU_ref_alt:
1330         *cts = get_Dwarfish_UWord(c, cc->is_dw64);
1331         *ctsSzB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1332         TRACE_D3("0x%lx", (UWord)*cts);
1333         if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)*cts);
1334         if (/* the following 2 are surely impossible, but ... */
1335             cc->debug_info_alt_img == NULL || cc->debug_info_alt_sz == 0
1336             || *cts >= (ULong)cc->debug_info_alt_sz) {
1337            /* Hmm.  Offset is nonsensical for this object's .debug_info
1338               section.  Be safe and reject it. */
1339            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1340                     "outside alternate .debug_info");
1341         }
1342         break;
1343
1344      case DW_FORM_GNU_strp_alt: {
1345         /* this is an offset into alternate .debug_str */
1346         UChar* str;
1347         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1348         if (cc->debug_str_alt_img == NULL || uw >= cc->debug_str_alt_sz)
1349            cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1350                     "points outside alternate .debug_str");
1351         /* FIXME: check the entire string lies inside debug_str,
1352            not just the first byte of it. */
1353         str = (UChar*)cc->debug_str_alt_img + uw;
1354         TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, str);
1355         *cts = (ULong)(UWord)str;
1356         *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
1357         break;
1358      }
1359
1360      default:
1361         VG_(printf)(
1362            "get_Form_contents: unhandled %d (%s) at <%lx>\n",
1363            form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1364         c->barf("get_Form_contents: unhandled DW_FORM");
1365   }
1366}
1367
1368
1369/*------------------------------------------------------------*/
1370/*---                                                      ---*/
1371/*--- Parsing of variable-related DIEs                     ---*/
1372/*---                                                      ---*/
1373/*------------------------------------------------------------*/
1374
1375typedef
1376   struct _TempVar {
1377      UChar*  name; /* in DebugInfo's .strchunks */
1378      /* Represent ranges economically.  nRanges is the number of
1379         ranges.  Cases:
1380         0: .rngOneMin .rngOneMax .manyRanges are all zero
1381         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1382         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1383         This is merely an optimisation to avoid having to allocate
1384         and free the XArray in the common (98%) of cases where there
1385         is zero or one address ranges. */
1386      UWord   nRanges;
1387      Addr    rngOneMin;
1388      Addr    rngOneMax;
1389      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1390      /* Do not free .rngMany, since many TempVars will have the same
1391         value.  Instead the associated storage is to be freed by
1392         deleting 'rangetree', which stores a single copy of each
1393         range. */
1394      /* --- */
1395      Int     level;
1396      UWord   typeR; /* a cuOff */
1397      GExpr*  gexpr; /* for this variable */
1398      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1399                        any */
1400      UChar*  fName; /* declaring file name, or NULL */
1401      Int     fLine; /* declaring file line number, or zero */
1402      /* offset in .debug_info, so that abstract instances can be
1403         found to satisfy references from concrete instances. */
1404      UWord   dioff;
1405      UWord   absOri; /* so the absOri fields refer to dioff fields
1406                         in some other, related TempVar. */
1407   }
1408   TempVar;
1409
1410#define N_D3_VAR_STACK 48
1411
1412typedef
1413   struct {
1414      /* Contains the range stack: a stack of address ranges, one
1415         stack entry for each nested scope.
1416
1417         Some scope entries are created by function definitions
1418         (DW_AT_subprogram), and for those, we also note the GExpr
1419         derived from its DW_AT_frame_base attribute, if any.
1420         Consequently it should be possible to find, for any
1421         variable's DIE, the GExpr for the the containing function's
1422         DW_AT_frame_base by scanning back through the stack to find
1423         the nearest entry associated with a function.  This somewhat
1424         elaborate scheme is provided so as to make it possible to
1425         obtain the correct DW_AT_frame_base expression even in the
1426         presence of nested functions (or to be more precise, in the
1427         presence of nested DW_AT_subprogram DIEs).
1428      */
1429      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1430                     stack */
1431      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1432      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1433      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1434      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1435                                         expr, else NULL */
1436      /* The file name table.  Is a mapping from integer index to the
1437         (permanent) copy of the string, iow a non-img area. */
1438      XArray* /* of UChar* */ filenameTable;
1439   }
1440   D3VarParser;
1441
1442static void varstack_show ( D3VarParser* parser, HChar* str ) {
1443   Word i, j;
1444   VG_(printf)("  varstack (%s) {\n", str);
1445   for (i = 0; i <= parser->sp; i++) {
1446      XArray* xa = parser->ranges[i];
1447      vg_assert(xa);
1448      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1449      if (parser->isFunc[i]) {
1450         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1451      } else {
1452         vg_assert(parser->fbGX[i] == NULL);
1453      }
1454      VG_(printf)(": ");
1455      if (VG_(sizeXA)( xa ) == 0) {
1456         VG_(printf)("** empty PC range array **");
1457      } else {
1458         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1459            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1460            vg_assert(range);
1461            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1462         }
1463      }
1464      VG_(printf)("\n");
1465   }
1466   VG_(printf)("  }\n");
1467}
1468
1469/* Remove from the stack, all entries with .level > 'level' */
1470static
1471void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1472{
1473   Bool changed = False;
1474   vg_assert(parser->sp < N_D3_VAR_STACK);
1475   while (True) {
1476      vg_assert(parser->sp >= -1);
1477      if (parser->sp == -1) break;
1478      if (parser->level[parser->sp] <= level) break;
1479      if (0)
1480         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1481      vg_assert(parser->ranges[parser->sp]);
1482      /* Who allocated this xa?  get_range_list() or
1483         unitary_range_list(). */
1484      VG_(deleteXA)( parser->ranges[parser->sp] );
1485      parser->ranges[parser->sp] = NULL;
1486      parser->level[parser->sp]  = 0;
1487      parser->isFunc[parser->sp] = False;
1488      parser->fbGX[parser->sp]   = NULL;
1489      parser->sp--;
1490      changed = True;
1491   }
1492   if (changed && td3)
1493      varstack_show( parser, "after preen" );
1494}
1495
1496static void varstack_push ( CUConst* cc,
1497                            D3VarParser* parser,
1498                            Bool td3,
1499                            XArray* ranges, Int level,
1500                            Bool    isFunc, GExpr* fbGX ) {
1501   if (0)
1502   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1503            parser->sp+1, level, ranges);
1504
1505   /* First we need to zap everything >= 'level', as we are about to
1506      replace any previous entry at 'level', so .. */
1507   varstack_preen(parser, /*td3*/False, level-1);
1508
1509   vg_assert(parser->sp >= -1);
1510   vg_assert(parser->sp < N_D3_VAR_STACK);
1511   if (parser->sp == N_D3_VAR_STACK-1)
1512      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1513               "increase and recompile");
1514   if (parser->sp >= 0)
1515      vg_assert(parser->level[parser->sp] < level);
1516   parser->sp++;
1517   vg_assert(parser->ranges[parser->sp] == NULL);
1518   vg_assert(parser->level[parser->sp]  == 0);
1519   vg_assert(parser->isFunc[parser->sp] == False);
1520   vg_assert(parser->fbGX[parser->sp]   == NULL);
1521   vg_assert(ranges != NULL);
1522   if (!isFunc) vg_assert(fbGX == NULL);
1523   parser->ranges[parser->sp] = ranges;
1524   parser->level[parser->sp]  = level;
1525   parser->isFunc[parser->sp] = isFunc;
1526   parser->fbGX[parser->sp]   = fbGX;
1527   if (td3)
1528      varstack_show( parser, "after push" );
1529}
1530
1531
1532/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1533   refer either to a location expression or to a location list.
1534   Figure out which, and in both cases bundle the expression or
1535   location list into a so-called GExpr (guarded expression). */
1536__attribute__((noinline))
1537static GExpr* get_GX ( CUConst* cc, Bool td3,
1538                       ULong cts, Int ctsSzB, UWord ctsMemSzB )
1539{
1540   GExpr* gexpr = NULL;
1541   if (ctsMemSzB > 0 && ctsSzB == 0) {
1542      /* represents an in-line location expression, and cts points
1543         right at it */
1544      gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1545   }
1546   else
1547   if (ctsMemSzB == 0 && ctsSzB > 0) {
1548      /* represents location list.  cts is the offset of it in
1549         .debug_loc. */
1550      if (!cc->cu_svma_known)
1551         cc->barf("get_GX: location list, but CU svma is unknown");
1552      gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1553   }
1554   else {
1555      vg_assert(0); /* else caller is bogus */
1556   }
1557   return gexpr;
1558}
1559
1560
1561static
1562void read_filename_table( /*MOD*/D3VarParser* parser,
1563                          CUConst* cc, UWord debug_line_offset,
1564                          Bool td3 )
1565{
1566   Bool   is_dw64;
1567   Cursor c;
1568   Word   i;
1569   UShort version;
1570   UChar  opcode_base;
1571   UChar* str;
1572
1573   vg_assert(parser && cc && cc->barf);
1574   if ((!cc->debug_line_img)
1575       || cc->debug_line_sz <= debug_line_offset)
1576      cc->barf("read_filename_table: .debug_line is missing?");
1577
1578   init_Cursor( &c, cc->debug_line_img,
1579                cc->debug_line_sz, debug_line_offset, cc->barf,
1580                "Overrun whilst reading .debug_line section(1)" );
1581
1582   /* unit_length = */
1583      get_Initial_Length( &is_dw64, &c,
1584           "read_filename_table: invalid initial-length field" );
1585   version = get_UShort( &c );
1586   if (version != 2 && version != 3 && version != 4)
1587     cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1588              "is currently supported.");
1589   /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1590   /*minimum_instruction_length = */ get_UChar( &c );
1591   if (version >= 4)
1592      /*maximum_operations_per_insn = */ get_UChar( &c );
1593   /*default_is_stmt            = */ get_UChar( &c );
1594   /*line_base                  = (Char)*/ get_UChar( &c );
1595   /*line_range                 = */ get_UChar( &c );
1596   opcode_base                = get_UChar( &c );
1597   /* skip over "standard_opcode_lengths" */
1598   for (i = 1; i < (Word)opcode_base; i++)
1599     (void)get_UChar( &c );
1600
1601   /* skip over the directory names table */
1602   while (peek_UChar(&c) != 0) {
1603     (void)get_AsciiZ(&c);
1604   }
1605   (void)get_UChar(&c); /* skip terminating zero */
1606
1607   /* Read and record the file names table */
1608   vg_assert(parser->filenameTable);
1609   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1610   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1611      from 1, for some reason. */
1612   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1613   VG_(addToXA)( parser->filenameTable, &str );
1614   while (peek_UChar(&c) != 0) {
1615      str = get_AsciiZ(&c);
1616      TRACE_D3("  read_filename_table: %ld %s\n",
1617               VG_(sizeXA)(parser->filenameTable), str);
1618      str = ML_(addStr)( cc->di, str, -1 );
1619      VG_(addToXA)( parser->filenameTable, &str );
1620      (void)get_ULEB128( &c ); /* skip directory index # */
1621      (void)get_ULEB128( &c ); /* skip last mod time */
1622      (void)get_ULEB128( &c ); /* file size */
1623   }
1624   /* We're done!  The rest of it is not interesting. */
1625}
1626
1627
1628__attribute__((noinline))
1629static void parse_var_DIE (
1630   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1631   /*MOD*/XArray* /* of TempVar* */ tempvars,
1632   /*MOD*/XArray* /* of GExpr* */ gexprs,
1633   /*MOD*/D3VarParser* parser,
1634   DW_TAG dtag,
1635   UWord posn,
1636   Int level,
1637   Cursor* c_die,
1638   Cursor* c_abbv,
1639   CUConst* cc,
1640   Bool td3
1641)
1642{
1643   ULong       cts;
1644   Int         ctsSzB;
1645   UWord       ctsMemSzB;
1646
1647   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1648   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1649   Bool  debug_types_flag;
1650   Bool  alt_flag;
1651
1652   varstack_preen( parser, td3, level-1 );
1653
1654   if (dtag == DW_TAG_compile_unit
1655       || dtag == DW_TAG_type_unit
1656       || dtag == DW_TAG_partial_unit) {
1657      Bool have_lo    = False;
1658      Bool have_hi1   = False;
1659      Bool hiIsRelative = False;
1660      Bool have_range = False;
1661      Addr ip_lo    = 0;
1662      Addr ip_hi1   = 0;
1663      Addr rangeoff = 0;
1664      while (True) {
1665         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1666         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1667         if (attr == 0 && form == 0) break;
1668         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1669                            cc, c_die, False/*td3*/, form );
1670         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1671            ip_lo   = cts;
1672            have_lo = True;
1673         }
1674         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1675            ip_hi1   = cts;
1676            have_hi1 = True;
1677            if (form != DW_FORM_addr)
1678               hiIsRelative = True;
1679         }
1680         if (attr == DW_AT_ranges && ctsSzB > 0) {
1681            rangeoff = cts;
1682            have_range = True;
1683         }
1684         if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1685            read_filename_table( parser, cc, (UWord)cts, td3 );
1686         }
1687      }
1688      if (have_lo && have_hi1 && hiIsRelative)
1689         ip_hi1 += ip_lo;
1690      /* Now, does this give us an opportunity to find this
1691         CU's svma? */
1692#if 0
1693      if (level == 0 && have_lo) {
1694         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1695         because we've already seen a DW_TAG_compile_unit DIE at level
1696         0.  But that can't happen, because DWARF3 only allows exactly
1697         one top level DIE per CU. */
1698         cc->cu_svma_known = True;
1699         cc->cu_svma = ip_lo;
1700         if (1)
1701            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1702         /* Now, it may be that this DIE doesn't tell us the CU's
1703            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1704            the CU doesn't *have* to have its SVMA specified.
1705
1706            But as per last para D3 spec sec 3.1.1 ("Normal and
1707            Partial Compilation Unit Entries", "If the base address
1708            (viz, the SVMA) is undefined, then any DWARF entry of
1709            structure defined interms of the base address of that
1710            compilation unit is not valid.".  So that means, if whilst
1711            processing the children of this top level DIE (or their
1712            children, etc) we see a DW_AT_range, and cu_svma_known is
1713            False, then the DIE that contains it is (per the spec)
1714            invalid, and we can legitimately stop and complain. */
1715      }
1716#else
1717      /* .. whereas The Reality is, simply assume the SVMA is zero
1718         if it isn't specified. */
1719      if (level == 0) {
1720         vg_assert(!cc->cu_svma_known);
1721         cc->cu_svma_known = True;
1722         if (have_lo)
1723            cc->cu_svma = ip_lo;
1724         else
1725            cc->cu_svma = 0;
1726      }
1727#endif
1728      /* Do we have something that looks sane? */
1729      if (have_lo && have_hi1 && (!have_range)) {
1730         if (ip_lo < ip_hi1)
1731            varstack_push( cc, parser, td3,
1732                           unitary_range_list(ip_lo, ip_hi1 - 1),
1733                           level,
1734                           False/*isFunc*/, NULL/*fbGX*/ );
1735      } else
1736      if ((!have_lo) && (!have_hi1) && have_range) {
1737         varstack_push( cc, parser, td3,
1738                        get_range_list( cc, td3,
1739                                        rangeoff, cc->cu_svma ),
1740                        level,
1741                        False/*isFunc*/, NULL/*fbGX*/ );
1742      } else
1743      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1744         /* CU has no code, presumably? */
1745         varstack_push( cc, parser, td3,
1746                        empty_range_list(),
1747                        level,
1748                        False/*isFunc*/, NULL/*fbGX*/ );
1749      } else
1750      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1751         /* broken DIE created by gcc-4.3.X ?  Ignore the
1752            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1753            instead. */
1754         varstack_push( cc, parser, td3,
1755                        get_range_list( cc, td3,
1756                                        rangeoff, cc->cu_svma ),
1757                        level,
1758                        False/*isFunc*/, NULL/*fbGX*/ );
1759      } else {
1760         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1761                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
1762         goto bad_DIE;
1763      }
1764   }
1765
1766   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1767      Bool   have_lo    = False;
1768      Bool   have_hi1   = False;
1769      Bool   have_range = False;
1770      Bool   hiIsRelative = False;
1771      Addr   ip_lo      = 0;
1772      Addr   ip_hi1     = 0;
1773      Addr   rangeoff   = 0;
1774      Bool   isFunc     = dtag == DW_TAG_subprogram;
1775      GExpr* fbGX       = NULL;
1776      while (True) {
1777         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1778         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1779         if (attr == 0 && form == 0) break;
1780         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1781                            cc, c_die, False/*td3*/, form );
1782         if (attr == DW_AT_low_pc && ctsSzB > 0) {
1783            ip_lo   = cts;
1784            have_lo = True;
1785         }
1786         if (attr == DW_AT_high_pc && ctsSzB > 0) {
1787            ip_hi1   = cts;
1788            have_hi1 = True;
1789            if (form != DW_FORM_addr)
1790               hiIsRelative = True;
1791         }
1792         if (attr == DW_AT_ranges && ctsSzB > 0) {
1793            rangeoff = cts;
1794            have_range = True;
1795         }
1796         if (isFunc
1797             && attr == DW_AT_frame_base
1798             && ((ctsMemSzB > 0 && ctsSzB == 0)
1799                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1800            fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1801            vg_assert(fbGX);
1802            VG_(addToXA)(gexprs, &fbGX);
1803         }
1804      }
1805      if (have_lo && have_hi1 && hiIsRelative)
1806         ip_hi1 += ip_lo;
1807      /* Do we have something that looks sane? */
1808      if (dtag == DW_TAG_subprogram
1809          && (!have_lo) && (!have_hi1) && (!have_range)) {
1810         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1811            representing a subroutine declaration that is not also a
1812            definition does not have code address or range
1813            attributes." */
1814      } else
1815      if (dtag == DW_TAG_lexical_block
1816          && (!have_lo) && (!have_hi1) && (!have_range)) {
1817         /* I believe this is legit, and means the lexical block
1818            contains no insns (whatever that might mean).  Ignore. */
1819      } else
1820      if (have_lo && have_hi1 && (!have_range)) {
1821         /* This scope supplies just a single address range. */
1822         if (ip_lo < ip_hi1)
1823            varstack_push( cc, parser, td3,
1824                           unitary_range_list(ip_lo, ip_hi1 - 1),
1825                           level, isFunc, fbGX );
1826      } else
1827      if ((!have_lo) && (!have_hi1) && have_range) {
1828         /* This scope supplies multiple address ranges via the use of
1829            a range list. */
1830         varstack_push( cc, parser, td3,
1831                        get_range_list( cc, td3,
1832                                        rangeoff, cc->cu_svma ),
1833                        level, isFunc, fbGX );
1834      } else
1835      if (have_lo && (!have_hi1) && (!have_range)) {
1836         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1837            Entries) says fairly clearly that a scope must have either
1838            _range or (_low_pc and _high_pc). */
1839         /* The spec is a bit ambiguous though.  Perhaps a single byte
1840            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1841         /* This case is here because icc9 produced this:
1842         <2><13bd>: DW_TAG_lexical_block
1843            DW_AT_decl_line   : 5229
1844            DW_AT_decl_column : 37
1845            DW_AT_decl_file   : 1
1846            DW_AT_low_pc      : 0x401b03
1847         */
1848         /* Ignore (seems safe than pushing a single byte range) */
1849      } else
1850         goto bad_DIE;
1851   }
1852
1853   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1854      UChar* name        = NULL;
1855      UWord  typeR       = D3_INVALID_CUOFF;
1856      Bool   external    = False;
1857      GExpr* gexpr       = NULL;
1858      Int    n_attrs     = 0;
1859      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1860      Int    lineNo      = 0;
1861      UChar* fileName    = NULL;
1862      while (True) {
1863         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1864         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1865         if (attr == 0 && form == 0) break;
1866         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1867                            cc, c_die, False/*td3*/, form );
1868         n_attrs++;
1869         if (attr == DW_AT_name && ctsMemSzB > 0) {
1870            name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1871         }
1872         if (attr == DW_AT_location
1873             && ((ctsMemSzB > 0 && ctsSzB == 0)
1874                 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1875            gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1876            vg_assert(gexpr);
1877            VG_(addToXA)(gexprs, &gexpr);
1878         }
1879         if (attr == DW_AT_type && ctsSzB > 0) {
1880            typeR = cook_die_using_form( cc, (UWord)cts, form );
1881         }
1882         if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1883            external = True;
1884         }
1885         if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1886            abs_ori = (UWord)cts;
1887         }
1888         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1889            /*declaration = True;*/
1890         }
1891         if (attr == DW_AT_decl_line && ctsSzB > 0) {
1892            lineNo = (Int)cts;
1893         }
1894         if (attr == DW_AT_decl_file && ctsSzB > 0) {
1895            Int ftabIx = (Int)cts;
1896            if (ftabIx >= 1
1897                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1898               fileName = *(UChar**)
1899                          VG_(indexXA)( parser->filenameTable, ftabIx );
1900               vg_assert(fileName);
1901            }
1902            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1903         }
1904      }
1905      /* We'll collect it under if one of the following three
1906         conditions holds:
1907         (1) has location and type    -> completed
1908         (2) has type only            -> is an abstract instance
1909         (3) has location and abs_ori -> is a concrete instance
1910         Name, filename and line number are all optional frills.
1911      */
1912      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1913           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1914           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1915
1916         /* Add this variable to the list of interesting looking
1917            variables.  Crucially, note along with it the address
1918            range(s) associated with the variable, which for locals
1919            will be the address ranges at the top of the varparser's
1920            stack. */
1921         GExpr*   fbGX = NULL;
1922         Word     i, nRanges;
1923         XArray*  /* of AddrRange */ xa;
1924         TempVar* tv;
1925         /* Stack can't be empty; we put a dummy entry on it for the
1926            entire address range before starting with the DIEs for
1927            this CU. */
1928         vg_assert(parser->sp >= 0);
1929
1930         /* If this is a local variable (non-external), try to find
1931            the GExpr for the DW_AT_frame_base of the containing
1932            function.  It should have been pushed on the stack at the
1933            time we encountered its DW_TAG_subprogram DIE, so the way
1934            to find it is to scan back down the stack looking for it.
1935            If there isn't an enclosing stack entry marked 'isFunc'
1936            then we must be seeing variable or formal param DIEs
1937            outside of a function, so we deem the Dwarf to be
1938            malformed if that happens.  Note that the fbGX may be NULL
1939            if the containing DT_TAG_subprogram didn't supply a
1940            DW_AT_frame_base -- that's OK, but there must actually be
1941            a containing DW_TAG_subprogram. */
1942         if (!external) {
1943            Bool found = False;
1944            for (i = parser->sp; i >= 0; i--) {
1945               if (parser->isFunc[i]) {
1946                  fbGX = parser->fbGX[i];
1947                  found = True;
1948                  break;
1949               }
1950            }
1951            if (!found) {
1952               if (0 && VG_(clo_verbosity) >= 0) {
1953                  VG_(message)(Vg_DebugMsg,
1954                     "warning: parse_var_DIE: non-external variable "
1955                     "outside DW_TAG_subprogram\n");
1956               }
1957               /* goto bad_DIE; */
1958               /* This seems to happen a lot.  Just ignore it -- if,
1959                  when we come to evaluation of the location (guarded)
1960                  expression, it requires a frame base value, and
1961                  there's no expression for that, then evaluation as a
1962                  whole will fail.  Harmless - a bit of a waste of
1963                  cycles but nothing more. */
1964            }
1965         }
1966
1967         /* re "external ? 0 : parser->sp" (twice), if the var is
1968            marked 'external' then we must put it at the global scope,
1969            as only the global scope (level 0) covers the entire PC
1970            address space.  It is asserted elsewhere that level 0
1971            always covers the entire address space. */
1972         xa = parser->ranges[external ? 0 : parser->sp];
1973         nRanges = VG_(sizeXA)(xa);
1974         vg_assert(nRanges >= 0);
1975
1976         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
1977         tv->name   = name;
1978         tv->level  = external ? 0 : parser->sp;
1979         tv->typeR  = typeR;
1980         tv->gexpr  = gexpr;
1981         tv->fbGX   = fbGX;
1982         tv->fName  = fileName;
1983         tv->fLine  = lineNo;
1984         tv->dioff  = posn;
1985         tv->absOri = abs_ori;
1986
1987         /* See explanation on definition of type TempVar for the
1988            reason for this elaboration. */
1989         tv->nRanges = nRanges;
1990         tv->rngOneMin = 0;
1991         tv->rngOneMax = 0;
1992         tv->rngMany = NULL;
1993         if (nRanges == 1) {
1994            AddrRange* range = VG_(indexXA)(xa, 0);
1995            tv->rngOneMin = range->aMin;
1996            tv->rngOneMax = range->aMax;
1997         }
1998         else if (nRanges > 1) {
1999            /* See if we already have a range list which is
2000               structurally identical.  If so, use that; if not, clone
2001               this one, and add it to our collection. */
2002            UWord keyW, valW;
2003            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2004               XArray* old = (XArray*)keyW;
2005               tl_assert(valW == 0);
2006               tl_assert(old != xa);
2007               tv->rngMany = old;
2008            } else {
2009               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2010               tv->rngMany = cloned;
2011               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2012            }
2013         }
2014
2015         VG_(addToXA)( tempvars, &tv );
2016
2017         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2018                  VG_(sizeXA)(xa) );
2019         /* collect stats on how effective the ->ranges special
2020            casing is */
2021         if (0) {
2022            static Int ntot=0, ngt=0;
2023            ntot++;
2024            if (tv->rngMany) ngt++;
2025            if (0 == (ntot % 100000))
2026               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2027         }
2028
2029      }
2030
2031      /* Here are some other weird cases seen in the wild:
2032
2033            We have a variable with a name and a type, but no
2034            location.  I guess that's a sign that it has been
2035            optimised away.  Ignore it.  Here's an example:
2036
2037            static Int lc_compar(void* n1, void* n2) {
2038               MC_Chunk* mc1 = *(MC_Chunk**)n1;
2039               MC_Chunk* mc2 = *(MC_Chunk**)n2;
2040               return (mc1->data < mc2->data ? -1 : 1);
2041            }
2042
2043            Both mc1 and mc2 are like this
2044            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2045                DW_AT_name        : mc1
2046                DW_AT_decl_file   : 1
2047                DW_AT_decl_line   : 216
2048                DW_AT_type        : <5d3>
2049
2050            whereas n1 and n2 do have locations specified.
2051
2052            ---------------------------------------------
2053
2054            We see a DW_TAG_formal_parameter with a type, but
2055            no name and no location.  It's probably part of a function type
2056            construction, thusly, hence ignore it:
2057         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2058             DW_AT_sibling     : <2c9>
2059             DW_AT_prototyped  : 1
2060             DW_AT_type        : <114>
2061         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2062             DW_AT_type        : <13e>
2063         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2064             DW_AT_type        : <133>
2065
2066            ---------------------------------------------
2067
2068            Is very minimal, like this:
2069            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2070                DW_AT_abstract_origin: <7ba>
2071            What that signifies I have no idea.  Ignore.
2072
2073            ----------------------------------------------
2074
2075            Is very minimal, like this:
2076            <200f>: DW_TAG_formal_parameter
2077                DW_AT_abstract_ori: <1f4c>
2078                DW_AT_location    : 13440
2079            What that signifies I have no idea.  Ignore.
2080            It might be significant, though: the variable at least
2081            has a location and so might exist somewhere.
2082            Maybe we should handle this.
2083
2084            ---------------------------------------------
2085
2086            <22407>: DW_TAG_variable
2087              DW_AT_name        : (indirect string, offset: 0x6579):
2088                                  vgPlain_trampoline_stuff_start
2089              DW_AT_decl_file   : 29
2090              DW_AT_decl_line   : 56
2091              DW_AT_external    : 1
2092              DW_AT_declaration : 1
2093
2094            Nameless and typeless variable that has a location?  Who
2095            knows.  Not me.
2096            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2097                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2098                                     (DW_OP_addr: 3813c7c0)
2099
2100            No, really.  Check it out.  gcc is quite simply borked.
2101            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2102            // followed by no attributes, and the next DIE is a sibling,
2103            // not a child
2104            */
2105   }
2106   return;
2107
2108  bad_DIE:
2109   set_position_of_Cursor( c_die,  saved_die_c_offset );
2110   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2111   VG_(printf)("\nparse_var_DIE: confused by:\n");
2112   posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2113   VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
2114   if (debug_types_flag) {
2115      VG_(printf)(" (in .debug_types)");
2116   }
2117   else if (alt_flag) {
2118      VG_(printf)(" (in alternate .debug_info)");
2119   }
2120   VG_(printf)("\n");
2121   while (True) {
2122      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2123      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2124      if (attr == 0 && form == 0) break;
2125      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2126      /* Get the form contents, so as to print them */
2127      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2128                         cc, c_die, True, form );
2129      VG_(printf)("\t\n");
2130   }
2131   VG_(printf)("\n");
2132   cc->barf("parse_var_DIE: confused by the above DIE");
2133   /*NOTREACHED*/
2134}
2135
2136
2137/*------------------------------------------------------------*/
2138/*---                                                      ---*/
2139/*--- Parsing of type-related DIEs                         ---*/
2140/*---                                                      ---*/
2141/*------------------------------------------------------------*/
2142
2143#define N_D3_TYPE_STACK 16
2144
2145typedef
2146   struct {
2147      /* What source language?  'A'=Ada83/95,
2148                                'C'=C/C++,
2149                                'F'=Fortran,
2150                                '?'=other
2151         Established once per compilation unit. */
2152      UChar language;
2153      /* A stack of types which are currently under construction */
2154      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2155                   stack */
2156      /* Note that the TyEnts in qparentE are temporary copies of the
2157         ones accumulating in the main tyent array.  So it is not safe
2158         to free up anything on them when popping them off the stack
2159         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2160         memset them to zero when done. */
2161      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
2162      Int   qlevel[N_D3_TYPE_STACK];
2163
2164   }
2165   D3TypeParser;
2166
2167static void typestack_show ( D3TypeParser* parser, HChar* str ) {
2168   Word i;
2169   VG_(printf)("  typestack (%s) {\n", str);
2170   for (i = 0; i <= parser->sp; i++) {
2171      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2172      ML_(pp_TyEnt)( &parser->qparentE[i] );
2173      VG_(printf)("\n");
2174   }
2175   VG_(printf)("  }\n");
2176}
2177
2178/* Remove from the stack, all entries with .level > 'level' */
2179static
2180void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2181{
2182   Bool changed = False;
2183   vg_assert(parser->sp < N_D3_TYPE_STACK);
2184   while (True) {
2185      vg_assert(parser->sp >= -1);
2186      if (parser->sp == -1) break;
2187      if (parser->qlevel[parser->sp] <= level) break;
2188      if (0)
2189         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2190      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2191      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
2192      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
2193      parser->qparentE[parser->sp].tag = Te_EMPTY;
2194      parser->qlevel[parser->sp] = 0;
2195      parser->sp--;
2196      changed = True;
2197   }
2198   if (changed && td3)
2199      typestack_show( parser, "after preen" );
2200}
2201
2202static Bool typestack_is_empty ( D3TypeParser* parser ) {
2203   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2204   return parser->sp == -1;
2205}
2206
2207static void typestack_push ( CUConst* cc,
2208                             D3TypeParser* parser,
2209                             Bool td3,
2210                             TyEnt* parentE, Int level ) {
2211   if (0)
2212   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2213            parser->sp+1, level, parentE->cuOff);
2214
2215   /* First we need to zap everything >= 'level', as we are about to
2216      replace any previous entry at 'level', so .. */
2217   typestack_preen(parser, /*td3*/False, level-1);
2218
2219   vg_assert(parser->sp >= -1);
2220   vg_assert(parser->sp < N_D3_TYPE_STACK);
2221   if (parser->sp == N_D3_TYPE_STACK-1)
2222      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2223               "increase and recompile");
2224   if (parser->sp >= 0)
2225      vg_assert(parser->qlevel[parser->sp] < level);
2226   parser->sp++;
2227   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2228   vg_assert(parser->qlevel[parser->sp]  == 0);
2229   vg_assert(parentE);
2230   vg_assert(ML_(TyEnt__is_type)(parentE));
2231   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2232   parser->qparentE[parser->sp] = *parentE;
2233   parser->qlevel[parser->sp]  = level;
2234   if (td3)
2235      typestack_show( parser, "after push" );
2236}
2237
2238/* True if the subrange type being parsed gives the bounds of an array. */
2239static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2240                                                 DW_TAG dtag ) {
2241   vg_assert(dtag == DW_TAG_subrange_type);
2242   /* For most languages, a subrange_type dtag always gives the
2243      bounds of an array.
2244      For Ada, there are additional conditions as a subrange_type
2245      is also used for other purposes. */
2246   if (parser->language != 'A')
2247      /* not Ada, so it definitely denotes an array bound. */
2248      return True;
2249   else
2250      /* Extra constraints for Ada: it only denotes an array bound if .. */
2251      return (! typestack_is_empty(parser)
2252              && parser->qparentE[parser->sp].tag == Te_TyArray);
2253}
2254
2255/* Parse a type-related DIE.  'parser' holds the current parser state.
2256   'admin' is where the completed types are dumped.  'dtag' is the tag
2257   for this DIE.  'c_die' points to the start of the data fields (FORM
2258   stuff) for the DIE.  c_abbv points to the start of the (name,form)
2259   pairs which describe the DIE.
2260
2261   We may find the DIE uninteresting, in which case we should ignore
2262   it.
2263
2264   What happens: the DIE is examined.  If uninteresting, it is ignored.
2265   Otherwise, the DIE gives rise to two things:
2266
2267   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2268   (2) a TyAdmin structure, which holds the type, or related stuff
2269
2270   (2) is added at the end of 'tyadmins', at some index, say 'i'.
2271
2272   A pair (cuOffset, i) is added to 'tydict'.
2273
2274   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2275   a mapping from cuOffset to the index of the corresponding entry in
2276   'tyadmin'.
2277
2278   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2279   in the tydict (by binary search).  This gives an index into
2280   tyadmins, and the required entity lives in tyadmins at that index.
2281*/
2282__attribute__((noinline))
2283static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2284                             /*MOD*/D3TypeParser* parser,
2285                             DW_TAG dtag,
2286                             UWord posn,
2287                             Int level,
2288                             Cursor* c_die,
2289                             Cursor* c_abbv,
2290                             CUConst* cc,
2291                             Bool td3 )
2292{
2293   ULong cts;
2294   Int   ctsSzB;
2295   UWord ctsMemSzB;
2296   TyEnt typeE;
2297   TyEnt atomE;
2298   TyEnt fieldE;
2299   TyEnt boundE;
2300   Bool  debug_types_flag;
2301   Bool  alt_flag;
2302
2303   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2304   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2305
2306   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2307   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2308   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2309   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2310
2311   /* If we've returned to a level at or above any previously noted
2312      parent, un-note it, so we don't believe we're still collecting
2313      its children. */
2314   typestack_preen( parser, td3, level-1 );
2315
2316   if (dtag == DW_TAG_compile_unit
2317       || dtag == DW_TAG_type_unit
2318       || dtag == DW_TAG_partial_unit) {
2319      /* See if we can find DW_AT_language, since it is important for
2320         establishing array bounds (see DW_TAG_subrange_type below in
2321         this fn) */
2322      while (True) {
2323         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2324         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2325         if (attr == 0 && form == 0) break;
2326         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2327                            cc, c_die, False/*td3*/, form );
2328         if (attr != DW_AT_language)
2329            continue;
2330         if (ctsSzB == 0)
2331           goto bad_DIE;
2332         switch (cts) {
2333            case DW_LANG_C89: case DW_LANG_C:
2334            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2335            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2336            case DW_LANG_Upc: case DW_LANG_C99:
2337               parser->language = 'C'; break;
2338            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2339            case DW_LANG_Fortran95:
2340               parser->language = 'F'; break;
2341            case DW_LANG_Ada83: case DW_LANG_Ada95:
2342               parser->language = 'A'; break;
2343            case DW_LANG_Cobol74:
2344            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2345            case DW_LANG_Modula2: case DW_LANG_Java:
2346            case DW_LANG_PLI:
2347            case DW_LANG_D: case DW_LANG_Python:
2348            case DW_LANG_Mips_Assembler:
2349               parser->language = '?'; break;
2350            default:
2351               goto bad_DIE;
2352         }
2353      }
2354   }
2355
2356   if (dtag == DW_TAG_base_type) {
2357      /* We can pick up a new base type any time. */
2358      VG_(memset)(&typeE, 0, sizeof(typeE));
2359      typeE.cuOff = D3_INVALID_CUOFF;
2360      typeE.tag   = Te_TyBase;
2361      while (True) {
2362         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2363         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2364         if (attr == 0 && form == 0) break;
2365         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2366                            cc, c_die, False/*td3*/, form );
2367         if (attr == DW_AT_name && ctsMemSzB > 0) {
2368            typeE.Te.TyBase.name
2369               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
2370                                    (UChar*)(UWord)cts );
2371         }
2372         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2373            typeE.Te.TyBase.szB = cts;
2374         }
2375         if (attr == DW_AT_encoding && ctsSzB > 0) {
2376            switch (cts) {
2377               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2378               case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2379               case DW_ATE_boolean:/* FIXME - is this correct? */
2380               case DW_ATE_unsigned_fixed:
2381                  typeE.Te.TyBase.enc = 'U'; break;
2382               case DW_ATE_signed: case DW_ATE_signed_char:
2383               case DW_ATE_signed_fixed:
2384                  typeE.Te.TyBase.enc = 'S'; break;
2385               case DW_ATE_float:
2386                  typeE.Te.TyBase.enc = 'F'; break;
2387               case DW_ATE_complex_float:
2388                  typeE.Te.TyBase.enc = 'C'; break;
2389               default:
2390                  goto bad_DIE;
2391            }
2392         }
2393      }
2394
2395      /* Invent a name if it doesn't have one.  gcc-4.3
2396         -ftree-vectorize is observed to emit nameless base types. */
2397      if (!typeE.Te.TyBase.name)
2398         typeE.Te.TyBase.name
2399            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2400                                 "<anon_base_type>" );
2401
2402      /* Do we have something that looks sane? */
2403      if (/* must have a name */
2404          typeE.Te.TyBase.name == NULL
2405          /* and a plausible size.  Yes, really 32: "complex long
2406             double" apparently has size=32 */
2407          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2408          /* and a plausible encoding */
2409          || (typeE.Te.TyBase.enc != 'U'
2410              && typeE.Te.TyBase.enc != 'S'
2411              && typeE.Te.TyBase.enc != 'F'
2412              && typeE.Te.TyBase.enc != 'C'))
2413         goto bad_DIE;
2414      /* Last minute hack: if we see this
2415         <1><515>: DW_TAG_base_type
2416             DW_AT_byte_size   : 0
2417             DW_AT_encoding    : 5
2418             DW_AT_name        : void
2419         convert it into a real Void type. */
2420      if (typeE.Te.TyBase.szB == 0
2421          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2422         ML_(TyEnt__make_EMPTY)(&typeE);
2423         typeE.tag = Te_TyVoid;
2424         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2425      }
2426
2427      goto acquire_Type;
2428   }
2429
2430   /*
2431    * An example of DW_TAG_rvalue_reference_type:
2432    *
2433    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
2434    *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
2435    *     <1015>   DW_AT_byte_size   : 4
2436    *     <1016>   DW_AT_type        : <0xe52>
2437    */
2438   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2439       || dtag == DW_TAG_ptr_to_member_type
2440       || dtag == DW_TAG_rvalue_reference_type) {
2441      /* This seems legit for _pointer_type and _reference_type.  I
2442         don't know if rolling _ptr_to_member_type in here really is
2443         legit, but it's better than not handling it at all. */
2444      VG_(memset)(&typeE, 0, sizeof(typeE));
2445      typeE.cuOff = D3_INVALID_CUOFF;
2446      switch (dtag) {
2447      case DW_TAG_pointer_type:
2448         typeE.tag = Te_TyPtr;
2449         break;
2450      case DW_TAG_reference_type:
2451         typeE.tag = Te_TyRef;
2452         break;
2453      case DW_TAG_ptr_to_member_type:
2454         typeE.tag = Te_TyPtrMbr;
2455         break;
2456      case DW_TAG_rvalue_reference_type:
2457         typeE.tag = Te_TyRvalRef;
2458         break;
2459      default:
2460         vg_assert(False);
2461      }
2462      /* target type defaults to void */
2463      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2464      /* These four type kinds don't *have* to specify their size, in
2465         which case we assume it's a machine word.  But if they do
2466         specify it, it must be a machine word :-)  This probably
2467         assumes that the word size of the Dwarf3 we're reading is the
2468         same size as that on the machine.  gcc appears to give a size
2469         whereas icc9 doesn't. */
2470      typeE.Te.TyPorR.szB = sizeof(UWord);
2471      while (True) {
2472         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2473         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2474         if (attr == 0 && form == 0) break;
2475         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2476                            cc, c_die, False/*td3*/, form );
2477         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2478            typeE.Te.TyPorR.szB = cts;
2479         }
2480         if (attr == DW_AT_type && ctsSzB > 0) {
2481            typeE.Te.TyPorR.typeR = cook_die_using_form( cc, (UWord)cts, form );
2482         }
2483      }
2484      /* Do we have something that looks sane? */
2485      if (typeE.Te.TyPorR.szB != sizeof(UWord))
2486         goto bad_DIE;
2487      else
2488         goto acquire_Type;
2489   }
2490
2491   if (dtag == DW_TAG_enumeration_type) {
2492      /* Create a new Type to hold the results. */
2493      VG_(memset)(&typeE, 0, sizeof(typeE));
2494      typeE.cuOff = posn;
2495      typeE.tag   = Te_TyEnum;
2496      typeE.Te.TyEnum.atomRs
2497         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2498                       ML_(dinfo_free),
2499                       sizeof(UWord) );
2500      while (True) {
2501         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2502         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2503         if (attr == 0 && form == 0) break;
2504         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2505                            cc, c_die, False/*td3*/, form );
2506         if (attr == DW_AT_name && ctsMemSzB > 0) {
2507            typeE.Te.TyEnum.name
2508              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
2509                                   (UChar*)(UWord)cts );
2510         }
2511         if (attr == DW_AT_byte_size && ctsSzB > 0) {
2512            typeE.Te.TyEnum.szB = cts;
2513         }
2514      }
2515
2516      if (!typeE.Te.TyEnum.name)
2517         typeE.Te.TyEnum.name
2518            = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2519                                 "<anon_enum_type>" );
2520
2521      /* Do we have something that looks sane? */
2522      if (typeE.Te.TyEnum.szB == 0
2523          /* we must know the size */
2524          /* but not for Ada, which uses such dummy
2525             enumerations as helper for gdb ada mode. */
2526          && parser->language != 'A') {
2527         /* GCC has been seen to put an odd DIE like this into
2528            .debug_types:
2529
2530            <1><cb72>: DW_TAG_enumeration_type (in .debug_types)
2531            DW_AT_name        : (indirect string, offset: 0x3374a): exec_direction_kind
2532            DW_AT_declaration : 1
2533
2534            It isn't clear what this means, but we accept it and
2535            assume that the enum is int-sized.  */
2536         if (cc->is_type_unit) {
2537            typeE.Te.TyEnum.szB = sizeof(int);
2538         } else {
2539            goto bad_DIE;
2540         }
2541      }
2542
2543      /* On't stack! */
2544      typestack_push( cc, parser, td3, &typeE, level );
2545      goto acquire_Type;
2546   }
2547
2548   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2549      DW_TAG_enumerator with only a DW_AT_name but no
2550      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2551      and appears to be a new "feature" of gcc - versions 4.3.x and
2552      earlier do not appear to do this.  So accept DW_TAG_enumerator
2553      which only have a name but no value.  An example:
2554
2555      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2556         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2557                                     QtMsgType
2558         <185>   DW_AT_byte_size   : 4
2559         <186>   DW_AT_decl_file   : 14
2560         <187>   DW_AT_decl_line   : 1480
2561         <189>   DW_AT_sibling     : <0x1a7>
2562      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2563         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2564                                     QtDebugMsg
2565      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2566         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2567                                     QtWarningMsg
2568      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2569         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2570                                     QtCriticalMsg
2571      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2572         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2573                                     QtFatalMsg
2574      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2575         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2576                                     QtSystemMsg
2577   */
2578   if (dtag == DW_TAG_enumerator) {
2579      VG_(memset)( &atomE, 0, sizeof(atomE) );
2580      atomE.cuOff = posn;
2581      atomE.tag   = Te_Atom;
2582      while (True) {
2583         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2584         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2585         if (attr == 0 && form == 0) break;
2586         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2587                            cc, c_die, False/*td3*/, form );
2588         if (attr == DW_AT_name && ctsMemSzB > 0) {
2589            atomE.Te.Atom.name
2590              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
2591                                   (UChar*)(UWord)cts );
2592         }
2593         if (attr == DW_AT_const_value && ctsSzB > 0) {
2594            atomE.Te.Atom.value = cts;
2595            atomE.Te.Atom.valueKnown = True;
2596         }
2597      }
2598      /* Do we have something that looks sane? */
2599      if (atomE.Te.Atom.name == NULL)
2600         goto bad_DIE;
2601      /* Do we have a plausible parent? */
2602      if (typestack_is_empty(parser)) goto bad_DIE;
2603      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2604      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2605      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2606      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
2607      /* Record this child in the parent */
2608      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2609      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2610                    &atomE );
2611      /* And record the child itself */
2612      goto acquire_Atom;
2613   }
2614
2615   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2616      don't know if this is correct, but it at least makes this reader
2617      usable for gcc-4.3 produced Dwarf3. */
2618   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2619       || dtag == DW_TAG_union_type) {
2620      Bool have_szB = False;
2621      Bool is_decl  = False;
2622      Bool is_spec  = False;
2623      /* Create a new Type to hold the results. */
2624      VG_(memset)(&typeE, 0, sizeof(typeE));
2625      typeE.cuOff = posn;
2626      typeE.tag   = Te_TyStOrUn;
2627      typeE.Te.TyStOrUn.name = NULL;
2628      typeE.Te.TyStOrUn.fieldRs
2629         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2630                       ML_(dinfo_free),
2631                       sizeof(UWord) );
2632      typeE.Te.TyStOrUn.complete = True;
2633      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2634                                   || dtag == DW_TAG_class_type;
2635      while (True) {
2636         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2637         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2638         if (attr == 0 && form == 0) break;
2639         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2640                            cc, c_die, False/*td3*/, form );
2641         if (attr == DW_AT_name && ctsMemSzB > 0) {
2642            typeE.Te.TyStOrUn.name
2643               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
2644                                    (UChar*)(UWord)cts );
2645         }
2646         if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2647            typeE.Te.TyStOrUn.szB = cts;
2648            have_szB = True;
2649         }
2650         if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2651            is_decl = True;
2652         }
2653         if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2654            is_spec = True;
2655         }
2656      }
2657      /* Do we have something that looks sane? */
2658      if (is_decl && (!is_spec)) {
2659         /* It's a DW_AT_declaration.  We require the name but
2660            nothing else. */
2661         /* JRS 2012-06-28: following discussion w/ tromey, if the the
2662            type doesn't have name, just make one up, and accept it.
2663            It might be referred to by other DIEs, so ignoring it
2664            doesn't seem like a safe option. */
2665         if (typeE.Te.TyStOrUn.name == NULL)
2666            typeE.Te.TyStOrUn.name
2667               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
2668                                    "<anon_struct_type>" );
2669         typeE.Te.TyStOrUn.complete = False;
2670         /* JRS 2009 Aug 10: <possible kludge>? */
2671         /* Push this tyent on the stack, even though it's incomplete.
2672            It appears that gcc-4.4 on Fedora 11 will sometimes create
2673            DW_TAG_member entries for it, and so we need to have a
2674            plausible parent present in order for that to work.  See
2675            #200029 comments 8 and 9. */
2676         typestack_push( cc, parser, td3, &typeE, level );
2677         /* </possible kludge> */
2678         goto acquire_Type;
2679      }
2680      if ((!is_decl) /* && (!is_spec) */) {
2681         /* this is the common, ordinary case */
2682         if ((!have_szB) /* we must know the size */
2683             /* But the name can be present, or not */)
2684            goto bad_DIE;
2685         /* On't stack! */
2686         typestack_push( cc, parser, td3, &typeE, level );
2687         goto acquire_Type;
2688      }
2689      else {
2690         /* don't know how to handle any other variants just now */
2691         goto bad_DIE;
2692      }
2693   }
2694
2695   if (dtag == DW_TAG_member) {
2696      /* Acquire member entries for both DW_TAG_structure_type and
2697         DW_TAG_union_type.  They differ minorly, in that struct
2698         members must have a DW_AT_data_member_location expression
2699         whereas union members must not. */
2700      Bool parent_is_struct;
2701      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2702      fieldE.cuOff = posn;
2703      fieldE.tag   = Te_Field;
2704      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2705      while (True) {
2706         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2707         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2708         if (attr == 0 && form == 0) break;
2709         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2710                            cc, c_die, False/*td3*/, form );
2711         if (attr == DW_AT_name && ctsMemSzB > 0) {
2712            fieldE.Te.Field.name
2713               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
2714                                    (UChar*)(UWord)cts );
2715         }
2716         if (attr == DW_AT_type && ctsSzB > 0) {
2717            fieldE.Te.Field.typeR = cook_die_using_form( cc, (UWord)cts, form );
2718         }
2719         /* There are 2 different cases for DW_AT_data_member_location.
2720            If it is a constant class attribute, it contains byte offset
2721            from the beginning of the containing entity.
2722            Otherwise it is a location expression.  */
2723         if (attr == DW_AT_data_member_location && ctsSzB > 0) {
2724            fieldE.Te.Field.nLoc = -1;
2725            fieldE.Te.Field.pos.offset = cts;
2726         } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2727            fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
2728            fieldE.Te.Field.pos.loc
2729               = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
2730                                    (UChar*)(UWord)cts,
2731                                    (SizeT)fieldE.Te.Field.nLoc );
2732         }
2733      }
2734      /* Do we have a plausible parent? */
2735      if (typestack_is_empty(parser)) goto bad_DIE;
2736      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2737      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2738      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2739      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
2740      /* Do we have something that looks sane?  If this a member of a
2741         struct, we must have a location expression; but if a member
2742         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2743         to reject in the latter case, but some compilers have been
2744         observed to emit constant-zero expressions.  So just ignore
2745         them. */
2746      parent_is_struct
2747         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2748      if (!fieldE.Te.Field.name)
2749         fieldE.Te.Field.name
2750            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2751                                 "<anon_field>" );
2752      vg_assert(fieldE.Te.Field.name);
2753      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2754         goto bad_DIE;
2755      if (fieldE.Te.Field.nLoc) {
2756         if (!parent_is_struct) {
2757            /* If this is a union type, pretend we haven't seen the data
2758               member location expression, as it is by definition
2759               redundant (it must be zero). */
2760            if (fieldE.Te.Field.nLoc > 0)
2761               ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2762            fieldE.Te.Field.pos.loc = NULL;
2763            fieldE.Te.Field.nLoc = 0;
2764         }
2765         /* Record this child in the parent */
2766         fieldE.Te.Field.isStruct = parent_is_struct;
2767         vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2768         VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2769                       &posn );
2770         /* And record the child itself */
2771         goto acquire_Field;
2772      } else {
2773         /* Member with no location - this can happen with static
2774            const members in C++ code which are compile time constants
2775            that do no exist in the class. They're not of any interest
2776            to us so we ignore them. */
2777         ML_(TyEnt__make_EMPTY)(&fieldE);
2778      }
2779   }
2780
2781   if (dtag == DW_TAG_array_type) {
2782      VG_(memset)(&typeE, 0, sizeof(typeE));
2783      typeE.cuOff = posn;
2784      typeE.tag   = Te_TyArray;
2785      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2786      typeE.Te.TyArray.boundRs
2787         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2788                       ML_(dinfo_free),
2789                       sizeof(UWord) );
2790      while (True) {
2791         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2792         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2793         if (attr == 0 && form == 0) break;
2794         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2795                            cc, c_die, False/*td3*/, form );
2796         if (attr == DW_AT_type && ctsSzB > 0) {
2797            typeE.Te.TyArray.typeR = cook_die_using_form( cc, (UWord)cts,
2798                                                          form );
2799         }
2800      }
2801      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2802         goto bad_DIE;
2803      /* On't stack! */
2804      typestack_push( cc, parser, td3, &typeE, level );
2805      goto acquire_Type;
2806   }
2807
2808   /* this is a subrange type defining the bounds of an array. */
2809   if (dtag == DW_TAG_subrange_type
2810       && subrange_type_denotes_array_bounds(parser, dtag)) {
2811      Bool have_lower = False;
2812      Bool have_upper = False;
2813      Bool have_count = False;
2814      Long lower = 0;
2815      Long upper = 0;
2816
2817      switch (parser->language) {
2818         case 'C': have_lower = True;  lower = 0; break;
2819         case 'F': have_lower = True;  lower = 1; break;
2820         case '?': have_lower = False; break;
2821         case 'A': have_lower = False; break;
2822         default:  vg_assert(0); /* assured us by handling of
2823                                    DW_TAG_compile_unit in this fn */
2824      }
2825
2826      VG_(memset)( &boundE, 0, sizeof(boundE) );
2827      boundE.cuOff = D3_INVALID_CUOFF;
2828      boundE.tag   = Te_Bound;
2829      while (True) {
2830         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2831         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2832         if (attr == 0 && form == 0) break;
2833         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2834                            cc, c_die, False/*td3*/, form );
2835         if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2836            lower      = (Long)cts;
2837            have_lower = True;
2838         }
2839         if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2840            upper      = (Long)cts;
2841            have_upper = True;
2842         }
2843         if (attr == DW_AT_count && ctsSzB > 0) {
2844            /*count    = (Long)cts;*/
2845            have_count = True;
2846         }
2847      }
2848      /* FIXME: potentially skip the rest if no parent present, since
2849         it could be the case that this subrange type is free-standing
2850         (not being used to describe the bounds of a containing array
2851         type) */
2852      /* Do we have a plausible parent? */
2853      if (typestack_is_empty(parser)) goto bad_DIE;
2854      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2855      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2856      if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2857      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
2858
2859      /* Figure out if we have a definite range or not */
2860      if (have_lower && have_upper && (!have_count)) {
2861         boundE.Te.Bound.knownL = True;
2862         boundE.Te.Bound.knownU = True;
2863         boundE.Te.Bound.boundL = lower;
2864         boundE.Te.Bound.boundU = upper;
2865      }
2866      else if (have_lower && (!have_upper) && (!have_count)) {
2867         boundE.Te.Bound.knownL = True;
2868         boundE.Te.Bound.knownU = False;
2869         boundE.Te.Bound.boundL = lower;
2870         boundE.Te.Bound.boundU = 0;
2871      }
2872      else if ((!have_lower) && have_upper && (!have_count)) {
2873         boundE.Te.Bound.knownL = False;
2874         boundE.Te.Bound.knownU = True;
2875         boundE.Te.Bound.boundL = 0;
2876         boundE.Te.Bound.boundU = upper;
2877      }
2878      else if ((!have_lower) && (!have_upper) && (!have_count)) {
2879         boundE.Te.Bound.knownL = False;
2880         boundE.Te.Bound.knownU = False;
2881         boundE.Te.Bound.boundL = 0;
2882         boundE.Te.Bound.boundU = 0;
2883      } else {
2884         /* FIXME: handle more cases */
2885         goto bad_DIE;
2886      }
2887
2888      /* Record this bound in the parent */
2889      boundE.cuOff = posn;
2890      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2891      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2892                    &boundE.cuOff );
2893      /* And record the child itself */
2894      goto acquire_Bound;
2895   }
2896
2897   /* typedef or subrange_type other than array bounds. */
2898   if (dtag == DW_TAG_typedef
2899       || (dtag == DW_TAG_subrange_type
2900           && !subrange_type_denotes_array_bounds(parser, dtag))) {
2901      /* subrange_type other than array bound is only for Ada. */
2902      vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2903      /* We can pick up a new typedef/subrange_type any time. */
2904      VG_(memset)(&typeE, 0, sizeof(typeE));
2905      typeE.cuOff = D3_INVALID_CUOFF;
2906      typeE.tag   = Te_TyTyDef;
2907      typeE.Te.TyTyDef.name = NULL;
2908      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2909      while (True) {
2910         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2911         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2912         if (attr == 0 && form == 0) break;
2913         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2914                            cc, c_die, False/*td3*/, form );
2915         if (attr == DW_AT_name && ctsMemSzB > 0) {
2916            typeE.Te.TyTyDef.name
2917               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
2918                                    (UChar*)(UWord)cts );
2919         }
2920         if (attr == DW_AT_type && ctsSzB > 0) {
2921            typeE.Te.TyTyDef.typeR = cook_die_using_form( cc, (UWord)cts,
2922                                                          form );
2923         }
2924      }
2925      /* Do we have something that looks sane? */
2926      if (/* must have a name */
2927          typeE.Te.TyTyDef.name == NULL
2928          /* However gcc gnat Ada generates minimal typedef
2929             such as the below => accept no name for Ada.
2930             <6><91cc>: DW_TAG_typedef
2931                DW_AT_abstract_ori: <9066>
2932          */
2933          && parser->language != 'A'
2934          /* but the referred-to type can be absent */)
2935         goto bad_DIE;
2936      else
2937         goto acquire_Type;
2938   }
2939
2940   if (dtag == DW_TAG_subroutine_type) {
2941      /* function type? just record that one fact and ask no
2942         further questions. */
2943      VG_(memset)(&typeE, 0, sizeof(typeE));
2944      typeE.cuOff = D3_INVALID_CUOFF;
2945      typeE.tag   = Te_TyFn;
2946      goto acquire_Type;
2947   }
2948
2949   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2950      Int have_ty = 0;
2951      VG_(memset)(&typeE, 0, sizeof(typeE));
2952      typeE.cuOff = D3_INVALID_CUOFF;
2953      typeE.tag   = Te_TyQual;
2954      typeE.Te.TyQual.qual
2955         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2956      /* target type defaults to 'void' */
2957      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2958      while (True) {
2959         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2960         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2961         if (attr == 0 && form == 0) break;
2962         get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2963                            cc, c_die, False/*td3*/, form );
2964         if (attr == DW_AT_type && ctsSzB > 0) {
2965            typeE.Te.TyQual.typeR = cook_die_using_form( cc, (UWord)cts, form );
2966            have_ty++;
2967         }
2968      }
2969      /* gcc sometimes generates DW_TAG_const/volatile_type without
2970         DW_AT_type and GDB appears to interpret the type as 'const
2971         void' (resp. 'volatile void').  So just allow it .. */
2972      if (have_ty == 1 || have_ty == 0)
2973         goto acquire_Type;
2974      else
2975         goto bad_DIE;
2976   }
2977
2978   /*
2979    * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
2980    *
2981    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
2982    *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
2983    *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
2984    */
2985   if (dtag == DW_TAG_unspecified_type) {
2986      VG_(memset)(&typeE, 0, sizeof(typeE));
2987      typeE.cuOff           = D3_INVALID_CUOFF;
2988      typeE.tag             = Te_TyQual;
2989      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2990      goto acquire_Type;
2991   }
2992
2993   /* else ignore this DIE */
2994   return;
2995   /*NOTREACHED*/
2996
2997  acquire_Type:
2998   if (0) VG_(printf)("YYYY Acquire Type\n");
2999   vg_assert(ML_(TyEnt__is_type)( &typeE ));
3000   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3001   typeE.cuOff = posn;
3002   VG_(addToXA)( tyents, &typeE );
3003   return;
3004   /*NOTREACHED*/
3005
3006  acquire_Atom:
3007   if (0) VG_(printf)("YYYY Acquire Atom\n");
3008   vg_assert(atomE.tag == Te_Atom);
3009   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3010   atomE.cuOff = posn;
3011   VG_(addToXA)( tyents, &atomE );
3012   return;
3013   /*NOTREACHED*/
3014
3015  acquire_Field:
3016   /* For union members, Expr should be absent */
3017   if (0) VG_(printf)("YYYY Acquire Field\n");
3018   vg_assert(fieldE.tag == Te_Field);
3019   vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3020   vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3021   if (fieldE.Te.Field.isStruct) {
3022      vg_assert(fieldE.Te.Field.nLoc != 0);
3023   } else {
3024      vg_assert(fieldE.Te.Field.nLoc == 0);
3025   }
3026   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3027   fieldE.cuOff = posn;
3028   VG_(addToXA)( tyents, &fieldE );
3029   return;
3030   /*NOTREACHED*/
3031
3032  acquire_Bound:
3033   if (0) VG_(printf)("YYYY Acquire Bound\n");
3034   vg_assert(boundE.tag == Te_Bound);
3035   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3036   boundE.cuOff = posn;
3037   VG_(addToXA)( tyents, &boundE );
3038   return;
3039   /*NOTREACHED*/
3040
3041  bad_DIE:
3042   set_position_of_Cursor( c_die,  saved_die_c_offset );
3043   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
3044   VG_(printf)("\nparse_type_DIE: confused by:\n");
3045   posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
3046   VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
3047   if (debug_types_flag) {
3048      VG_(printf)(" (in .debug_types)");
3049   } else if (alt_flag) {
3050      VG_(printf)(" (in alternate .debug_info)");
3051   }
3052   VG_(printf)("\n");
3053   while (True) {
3054      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
3055      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
3056      if (attr == 0 && form == 0) break;
3057      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
3058      /* Get the form contents, so as to print them */
3059      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3060                         cc, c_die, True, form );
3061      VG_(printf)("\t\n");
3062   }
3063   VG_(printf)("\n");
3064   cc->barf("parse_type_DIE: confused by the above DIE");
3065   /*NOTREACHED*/
3066}
3067
3068
3069/*------------------------------------------------------------*/
3070/*---                                                      ---*/
3071/*--- Compression of type DIE information                  ---*/
3072/*---                                                      ---*/
3073/*------------------------------------------------------------*/
3074
3075static UWord chase_cuOff ( Bool* changed,
3076                           XArray* /* of TyEnt */ ents,
3077                           TyEntIndexCache* ents_cache,
3078                           UWord cuOff )
3079{
3080   TyEnt* ent;
3081   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3082
3083   if (!ent) {
3084      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3085      *changed = False;
3086      return cuOff;
3087   }
3088
3089   vg_assert(ent->tag != Te_EMPTY);
3090   if (ent->tag != Te_INDIR) {
3091      *changed = False;
3092      return cuOff;
3093   } else {
3094      vg_assert(ent->Te.INDIR.indR < cuOff);
3095      *changed = True;
3096      return ent->Te.INDIR.indR;
3097   }
3098}
3099
3100static
3101void chase_cuOffs_in_XArray ( Bool* changed,
3102                              XArray* /* of TyEnt */ ents,
3103                              TyEntIndexCache* ents_cache,
3104                              /*MOD*/XArray* /* of UWord */ cuOffs )
3105{
3106   Bool b2 = False;
3107   Word i, n = VG_(sizeXA)( cuOffs );
3108   for (i = 0; i < n; i++) {
3109      Bool   b = False;
3110      UWord* p = VG_(indexXA)( cuOffs, i );
3111      *p = chase_cuOff( &b, ents, ents_cache, *p );
3112      if (b)
3113         b2 = True;
3114   }
3115   *changed = b2;
3116}
3117
3118static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
3119                                    TyEntIndexCache* ents_cache,
3120                                    /*MOD*/TyEnt* te )
3121{
3122   Bool b, changed = False;
3123   switch (te->tag) {
3124      case Te_EMPTY:
3125         break;
3126      case Te_INDIR:
3127         te->Te.INDIR.indR
3128            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3129         if (b) changed = True;
3130         break;
3131      case Te_UNKNOWN:
3132         break;
3133      case Te_Atom:
3134         break;
3135      case Te_Field:
3136         te->Te.Field.typeR
3137            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3138         if (b) changed = True;
3139         break;
3140      case Te_Bound:
3141         break;
3142      case Te_TyBase:
3143         break;
3144      case Te_TyPtr:
3145      case Te_TyRef:
3146      case Te_TyPtrMbr:
3147      case Te_TyRvalRef:
3148         te->Te.TyPorR.typeR
3149            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3150         if (b) changed = True;
3151         break;
3152      case Te_TyTyDef:
3153         te->Te.TyTyDef.typeR
3154            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3155         if (b) changed = True;
3156         break;
3157      case Te_TyStOrUn:
3158         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3159         if (b) changed = True;
3160         break;
3161      case Te_TyEnum:
3162         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3163         if (b) changed = True;
3164         break;
3165      case Te_TyArray:
3166         te->Te.TyArray.typeR
3167            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3168         if (b) changed = True;
3169         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3170         if (b) changed = True;
3171         break;
3172      case Te_TyFn:
3173         break;
3174      case Te_TyQual:
3175         te->Te.TyQual.typeR
3176            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3177         if (b) changed = True;
3178         break;
3179      case Te_TyVoid:
3180         break;
3181      default:
3182         ML_(pp_TyEnt)(te);
3183         vg_assert(0);
3184   }
3185   return changed;
3186}
3187
3188/* Make a pass over 'ents'.  For each tyent, inspect the target of any
3189   'R' or 'Rs' fields (those which refer to other tyents), and replace
3190   any which point to INDIR nodes with the target of the indirection
3191   (which should not itself be an indirection).  In summary, this
3192   routine shorts out all references to indirection nodes. */
3193static
3194Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3195                                     TyEntIndexCache* ents_cache )
3196{
3197   Word i, n, nChanged = 0;
3198   Bool b;
3199   n = VG_(sizeXA)( ents );
3200   for (i = 0; i < n; i++) {
3201      TyEnt* ent = VG_(indexXA)( ents, i );
3202      vg_assert(ent->tag != Te_EMPTY);
3203      /* We have to substitute everything, even indirections, so as to
3204         ensure that chains of indirections don't build up. */
3205      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3206      if (b)
3207         nChanged++;
3208   }
3209
3210   return nChanged;
3211}
3212
3213
3214/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3215   Look up each new tyent in the dictionary in turn.  If it is already
3216   in the dictionary, replace this tyent with an indirection to the
3217   existing one, and delete any malloc'd stuff hanging off this one.
3218   In summary, this routine commons up all tyents that are identical
3219   as defined by TyEnt__cmp_by_all_except_cuOff. */
3220static
3221Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3222{
3223   Word    n, i, nDeleted;
3224   WordFM* dict; /* TyEnt* -> void */
3225   TyEnt*  ent;
3226   UWord   keyW, valW;
3227
3228   dict = VG_(newFM)(
3229             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3230             ML_(dinfo_free),
3231             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3232          );
3233
3234   nDeleted = 0;
3235   n = VG_(sizeXA)( ents );
3236   for (i = 0; i < n; i++) {
3237      ent = VG_(indexXA)( ents, i );
3238      vg_assert(ent->tag != Te_EMPTY);
3239
3240      /* Ignore indirections, although check that they are
3241         not forming a cycle. */
3242      if (ent->tag == Te_INDIR) {
3243         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3244         continue;
3245      }
3246
3247      keyW = valW = 0;
3248      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3249         /* it's already in the dictionary. */
3250         TyEnt* old = (TyEnt*)keyW;
3251         vg_assert(valW == 0);
3252         vg_assert(old != ent);
3253         vg_assert(old->tag != Te_INDIR);
3254         /* since we are traversing the array in increasing order of
3255            cuOff: */
3256         vg_assert(old->cuOff < ent->cuOff);
3257         /* So anyway, dump this entry and replace it with an
3258            indirection to the one in the dictionary.  Note that the
3259            assertion above guarantees that we cannot create cycles of
3260            indirections, since we are always creating an indirection
3261            to a tyent with a cuOff lower than this one. */
3262         ML_(TyEnt__make_EMPTY)( ent );
3263         ent->tag = Te_INDIR;
3264         ent->Te.INDIR.indR = old->cuOff;
3265         nDeleted++;
3266      } else {
3267         /* not in dictionary; add it and keep going. */
3268         VG_(addToFM)( dict, (UWord)ent, 0 );
3269      }
3270   }
3271
3272   VG_(deleteFM)( dict, NULL, NULL );
3273
3274   return nDeleted;
3275}
3276
3277
3278static
3279void dedup_types ( Bool td3,
3280                   /*MOD*/XArray* /* of TyEnt */ ents,
3281                   TyEntIndexCache* ents_cache )
3282{
3283   Word m, n, i, nDel, nSubst, nThresh;
3284   if (0) td3 = True;
3285
3286   n = VG_(sizeXA)( ents );
3287
3288   /* If a commoning pass and a substitution pass both make fewer than
3289      this many changes, just stop.  It's pointless to burn up CPU
3290      time trying to compress the last 1% or so out of the array. */
3291   nThresh = n / 200;
3292
3293   /* First we must sort .ents by its .cuOff fields, so we
3294      can index into it. */
3295   VG_(setCmpFnXA)(
3296      ents,
3297      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
3298   );
3299   VG_(sortXA)( ents );
3300
3301   /* Now repeatedly do commoning and substitution passes over
3302      the array, until there are no more changes. */
3303   do {
3304      nDel   = dedup_types_commoning_pass ( ents );
3305      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3306      vg_assert(nDel >= 0 && nSubst >= 0);
3307      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
3308   } while (nDel > nThresh || nSubst > nThresh);
3309
3310   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3311      In fact this should be true at the end of every loop iteration
3312      above (a commoning pass followed by a substitution pass), but
3313      checking it on every iteration is excessively expensive.  Note,
3314      this loop also computes 'm' for the stats printing below it. */
3315   m = 0;
3316   n = VG_(sizeXA)( ents );
3317   for (i = 0; i < n; i++) {
3318      TyEnt *ent, *ind;
3319      ent = VG_(indexXA)( ents, i );
3320      if (ent->tag != Te_INDIR) continue;
3321      m++;
3322      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3323                                         ent->Te.INDIR.indR );
3324      vg_assert(ind);
3325      vg_assert(ind->tag != Te_INDIR);
3326   }
3327
3328   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3329}
3330
3331
3332/*------------------------------------------------------------*/
3333/*---                                                      ---*/
3334/*--- Resolution of references to type DIEs                ---*/
3335/*---                                                      ---*/
3336/*------------------------------------------------------------*/
3337
3338/* Make a pass through the (temporary) variables array.  Examine the
3339   type of each variable, check is it found, and chase any Te_INDIRs.
3340   Postcondition is: each variable has a typeR field that refers to a
3341   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3342   not to refer to a Te_INDIR.  (This is so that we can throw all the
3343   Te_INDIRs away later). */
3344
3345__attribute__((noinline))
3346static void resolve_variable_types (
3347               void (*barf)( HChar* ) __attribute__((noreturn)),
3348               /*R-O*/XArray* /* of TyEnt */ ents,
3349               /*MOD*/TyEntIndexCache* ents_cache,
3350               /*MOD*/XArray* /* of TempVar* */ vars
3351            )
3352{
3353   Word i, n;
3354   n = VG_(sizeXA)( vars );
3355   for (i = 0; i < n; i++) {
3356      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3357      /* This is the stated type of the variable.  But it might be
3358         an indirection, so be careful. */
3359      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3360                                                var->typeR );
3361      if (ent && ent->tag == Te_INDIR) {
3362         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3363                                            ent->Te.INDIR.indR );
3364         vg_assert(ent);
3365         vg_assert(ent->tag != Te_INDIR);
3366      }
3367
3368      /* Deal first with "normal" cases */
3369      if (ent && ML_(TyEnt__is_type)(ent)) {
3370         var->typeR = ent->cuOff;
3371         continue;
3372      }
3373
3374      /* If there's no ent, it probably we did not manage to read a
3375         type at the cuOffset which is stated as being this variable's
3376         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3377      if (ent == NULL) {
3378         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3379         barf("resolve_variable_types: "
3380              "cuOff does not refer to a known type");
3381      }
3382      vg_assert(ent);
3383      /* If ent has any other tag, something bad happened, along the
3384         lines of var->typeR not referring to a type at all. */
3385      vg_assert(ent->tag == Te_UNKNOWN);
3386      /* Just accept it; the type will be useless, but at least keep
3387         going. */
3388      var->typeR = ent->cuOff;
3389   }
3390}
3391
3392
3393/*------------------------------------------------------------*/
3394/*---                                                      ---*/
3395/*--- Parsing of Compilation Units                         ---*/
3396/*---                                                      ---*/
3397/*------------------------------------------------------------*/
3398
3399static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
3400   TempVar* t1 = *(TempVar**)v1;
3401   TempVar* t2 = *(TempVar**)v2;
3402   if (t1->dioff < t2->dioff) return -1;
3403   if (t1->dioff > t2->dioff) return 1;
3404   return 0;
3405}
3406
3407static void read_DIE (
3408   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3409   /*MOD*/XArray* /* of TyEnt */ tyents,
3410   /*MOD*/XArray* /* of TempVar* */ tempvars,
3411   /*MOD*/XArray* /* of GExpr* */ gexprs,
3412   /*MOD*/D3TypeParser* typarser,
3413   /*MOD*/D3VarParser* varparser,
3414   Cursor* c, Bool td3, CUConst* cc, Int level
3415)
3416{
3417   Cursor abbv;
3418   ULong  atag, abbv_code;
3419   UWord  posn;
3420   UInt   has_children;
3421   UWord  start_die_c_offset, start_abbv_c_offset;
3422   UWord  after_die_c_offset, after_abbv_c_offset;
3423
3424   /* --- Deal with this DIE --- */
3425   posn      = cook_die( cc, get_position_of_Cursor( c ) );
3426   abbv_code = get_ULEB128( c );
3427   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3428   atag      = get_ULEB128( &abbv );
3429   TRACE_D3("\n");
3430   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3431            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3432
3433   if (atag == 0)
3434      cc->barf("read_DIE: invalid zero tag on DIE");
3435
3436   has_children = get_UChar( &abbv );
3437   if (has_children != DW_children_no && has_children != DW_children_yes)
3438      cc->barf("read_DIE: invalid has_children value");
3439
3440   /* We're set up to look at the fields of this DIE.  Hand it off to
3441      any parser(s) that want to see it.  Since they will in general
3442      advance both the DIE and abbrev cursors, remember their current
3443      settings so that we can then back up and do one final pass over
3444      the DIE, to print out its contents. */
3445
3446   start_die_c_offset  = get_position_of_Cursor( c );
3447   start_abbv_c_offset = get_position_of_Cursor( &abbv );
3448
3449   while (True) {
3450      ULong cts;
3451      Int   ctsSzB;
3452      UWord ctsMemSzB;
3453      ULong at_name = get_ULEB128( &abbv );
3454      ULong at_form = get_ULEB128( &abbv );
3455      if (at_name == 0 && at_form == 0) break;
3456      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3457      /* Get the form contents, but ignore them; the only purpose is
3458         to print them, if td3 is True */
3459      get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
3460                         cc, c, td3, (DW_FORM)at_form );
3461      TRACE_D3("\t");
3462      TRACE_D3("\n");
3463   }
3464
3465   after_die_c_offset  = get_position_of_Cursor( c );
3466   after_abbv_c_offset = get_position_of_Cursor( &abbv );
3467
3468   set_position_of_Cursor( c,     start_die_c_offset );
3469   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3470
3471   parse_type_DIE( tyents,
3472                   typarser,
3473                   (DW_TAG)atag,
3474                   posn,
3475                   level,
3476                   c,     /* DIE cursor */
3477                   &abbv, /* abbrev cursor */
3478                   cc,
3479                   td3 );
3480
3481   set_position_of_Cursor( c,     start_die_c_offset );
3482   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3483
3484   parse_var_DIE( rangestree,
3485                  tempvars,
3486                  gexprs,
3487                  varparser,
3488                  (DW_TAG)atag,
3489                  posn,
3490                  level,
3491                  c,     /* DIE cursor */
3492                  &abbv, /* abbrev cursor */
3493                  cc,
3494                  td3 );
3495
3496   set_position_of_Cursor( c,     after_die_c_offset );
3497   set_position_of_Cursor( &abbv, after_abbv_c_offset );
3498
3499   /* --- Now recurse into its children, if any --- */
3500   if (has_children == DW_children_yes) {
3501      if (0) TRACE_D3("BEGIN children of level %d\n", level);
3502      while (True) {
3503         atag = peek_ULEB128( c );
3504         if (atag == 0) break;
3505         read_DIE( rangestree, tyents, tempvars, gexprs,
3506                   typarser, varparser,
3507                   c, td3, cc, level+1 );
3508      }
3509      /* Now we need to eat the terminating zero */
3510      atag = get_ULEB128( c );
3511      vg_assert(atag == 0);
3512      if (0) TRACE_D3("END children of level %d\n", level);
3513   }
3514
3515}
3516
3517
3518static
3519void new_dwarf3_reader_wrk (
3520   struct _DebugInfo* di,
3521   __attribute__((noreturn)) void (*barf)( HChar* ),
3522   UChar* debug_info_img,   SizeT debug_info_sz,
3523   UChar* debug_types_img,  SizeT debug_types_sz,
3524   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
3525   UChar* debug_line_img,   SizeT debug_line_sz,
3526   UChar* debug_str_img,    SizeT debug_str_sz,
3527   UChar* debug_ranges_img, SizeT debug_ranges_sz,
3528   UChar* debug_loc_img,    SizeT debug_loc_sz,
3529   UChar* debug_info_alt_img, SizeT debug_info_alt_sz,
3530   UChar* debug_abbv_alt_img, SizeT debug_abbv_alt_sz,
3531   UChar* debug_line_alt_img, SizeT debug_line_alt_sz,
3532   UChar* debug_str_alt_img,  SizeT debug_str_alt_sz
3533)
3534{
3535   XArray* /* of TyEnt */     tyents;
3536   XArray* /* of TyEnt */     tyents_to_keep;
3537   XArray* /* of GExpr* */    gexprs;
3538   XArray* /* of TempVar* */  tempvars;
3539   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3540   TyEntIndexCache* tyents_cache = NULL;
3541   TyEntIndexCache* tyents_to_keep_cache = NULL;
3542   TempVar *varp, *varp2;
3543   GExpr* gexpr;
3544   Cursor abbv; /* for showing .debug_abbrev */
3545   Cursor info; /* primary cursor for parsing .debug_info */
3546   Cursor ranges; /* for showing .debug_ranges */
3547   D3TypeParser typarser;
3548   D3VarParser varparser;
3549   Addr  dr_base;
3550   UWord dr_offset;
3551   Word  i, j, n;
3552   Bool td3 = di->trace_symtab;
3553   XArray* /* of TempVar* */ dioff_lookup_tab;
3554   Int pass;
3555   VgHashTable signature_types;
3556#if 0
3557   /* This doesn't work properly because it assumes all entries are
3558      packed end to end, with no holes.  But that doesn't always
3559      appear to be the case, so it loses sync.  And the D3 spec
3560      doesn't appear to require a no-hole situation either. */
3561   /* Display .debug_loc */
3562   Addr  dl_base;
3563   UWord dl_offset;
3564   Cursor loc; /* for showing .debug_loc */
3565   TRACE_SYMTAB("\n");
3566   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3567   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3568   init_Cursor( &loc, debug_loc_img,
3569                debug_loc_sz, 0, barf,
3570                "Overrun whilst reading .debug_loc section(1)" );
3571   dl_base = 0;
3572   dl_offset = 0;
3573   while (True) {
3574      UWord  w1, w2;
3575      UWord  len;
3576      if (is_at_end_Cursor( &loc ))
3577         break;
3578
3579      /* Read a (host-)word pair.  This is something of a hack since
3580         the word size to read is really dictated by the ELF file;
3581         however, we assume we're reading a file with the same
3582         word-sizeness as the host.  Reasonably enough. */
3583      w1 = get_UWord( &loc );
3584      w2 = get_UWord( &loc );
3585
3586      if (w1 == 0 && w2 == 0) {
3587         /* end of list.  reset 'base' */
3588         TRACE_D3("    %08lx <End of list>\n", dl_offset);
3589         dl_base = 0;
3590         dl_offset = get_position_of_Cursor( &loc );
3591         continue;
3592      }
3593
3594      if (w1 == -1UL) {
3595         /* new value for 'base' */
3596         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3597                  dl_offset, w1, w2);
3598         dl_base = w2;
3599         continue;
3600      }
3601
3602      /* else a location expression follows */
3603      TRACE_D3("    %08lx %08lx %08lx ",
3604               dl_offset, w1 + dl_base, w2 + dl_base);
3605      len = (UWord)get_UShort( &loc );
3606      while (len > 0) {
3607         UChar byte = get_UChar( &loc );
3608         TRACE_D3("%02x", (UInt)byte);
3609         len--;
3610      }
3611      TRACE_SYMTAB("\n");
3612   }
3613#endif
3614
3615   /* Display .debug_ranges */
3616   TRACE_SYMTAB("\n");
3617   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3618   TRACE_SYMTAB("    Offset   Begin    End\n");
3619   init_Cursor( &ranges, debug_ranges_img,
3620                debug_ranges_sz, 0, barf,
3621                "Overrun whilst reading .debug_ranges section(1)" );
3622   dr_base = 0;
3623   dr_offset = 0;
3624   while (True) {
3625      UWord  w1, w2;
3626
3627      if (is_at_end_Cursor( &ranges ))
3628         break;
3629
3630      /* Read a (host-)word pair.  This is something of a hack since
3631         the word size to read is really dictated by the ELF file;
3632         however, we assume we're reading a file with the same
3633         word-sizeness as the host.  Reasonably enough. */
3634      w1 = get_UWord( &ranges );
3635      w2 = get_UWord( &ranges );
3636
3637      if (w1 == 0 && w2 == 0) {
3638         /* end of list.  reset 'base' */
3639         TRACE_D3("    %08lx <End of list>\n", dr_offset);
3640         dr_base = 0;
3641         dr_offset = get_position_of_Cursor( &ranges );
3642         continue;
3643      }
3644
3645      if (w1 == -1UL) {
3646         /* new value for 'base' */
3647         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3648                  dr_offset, w1, w2);
3649         dr_base = w2;
3650         continue;
3651      }
3652
3653      /* else a range [w1+base, w2+base) is denoted */
3654      TRACE_D3("    %08lx %08lx %08lx\n",
3655               dr_offset, w1 + dr_base, w2 + dr_base);
3656   }
3657
3658   /* Display .debug_abbrev */
3659   init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
3660                "Overrun whilst reading .debug_abbrev section" );
3661   TRACE_SYMTAB("\n");
3662   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3663   while (True) {
3664      if (is_at_end_Cursor( &abbv ))
3665         break;
3666      /* Read one abbreviation table */
3667      TRACE_D3("  Number TAG\n");
3668      while (True) {
3669         ULong atag;
3670         UInt  has_children;
3671         ULong acode = get_ULEB128( &abbv );
3672         if (acode == 0) break; /* end of the table */
3673         atag = get_ULEB128( &abbv );
3674         has_children = get_UChar( &abbv );
3675         TRACE_D3("   %llu      %s    [%s]\n",
3676                  acode, ML_(pp_DW_TAG)(atag),
3677                         ML_(pp_DW_children)(has_children));
3678         while (True) {
3679            ULong at_name = get_ULEB128( &abbv );
3680            ULong at_form = get_ULEB128( &abbv );
3681            if (at_name == 0 && at_form == 0) break;
3682            TRACE_D3("    %18s %s\n",
3683                     ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3684         }
3685      }
3686   }
3687   TRACE_SYMTAB("\n");
3688
3689   /* We'll park the harvested type information in here.  Also create
3690      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3691      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3692      huge and presumably will not occur in any valid DWARF3 file --
3693      it would need to have a .debug_info section 4GB long for that to
3694      happen.  These type entries end up in the DebugInfo. */
3695   tyents = VG_(newXA)( ML_(dinfo_zalloc),
3696                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3697                        ML_(dinfo_free), sizeof(TyEnt) );
3698   { TyEnt tyent;
3699     VG_(memset)(&tyent, 0, sizeof(tyent));
3700     tyent.tag   = Te_TyVoid;
3701     tyent.cuOff = D3_FAKEVOID_CUOFF;
3702     tyent.Te.TyVoid.isFake = True;
3703     VG_(addToXA)( tyents, &tyent );
3704   }
3705   { TyEnt tyent;
3706     VG_(memset)(&tyent, 0, sizeof(tyent));
3707     tyent.tag   = Te_UNKNOWN;
3708     tyent.cuOff = D3_INVALID_CUOFF;
3709     VG_(addToXA)( tyents, &tyent );
3710   }
3711
3712   /* This is a tree used to unique-ify the range lists that are
3713      manufactured by parse_var_DIE.  References to the keys in the
3714      tree wind up in .rngMany fields in TempVars.  We'll need to
3715      delete this tree, and the XArrays attached to it, at the end of
3716      this function. */
3717   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3718                            "di.readdwarf3.ndrw.2 (rangestree)",
3719                            ML_(dinfo_free),
3720                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3721
3722   /* List of variables we're accumulating.  These don't end up in the
3723      DebugInfo; instead their contents are handed to ML_(addVar) and
3724      the list elements are then deleted. */
3725   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3726                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
3727                          ML_(dinfo_free),
3728                          sizeof(TempVar*) );
3729
3730   /* List of GExprs we're accumulating.  These wind up in the
3731      DebugInfo. */
3732   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3733                        ML_(dinfo_free), sizeof(GExpr*) );
3734
3735   /* We need a D3TypeParser to keep track of partially constructed
3736      types.  It'll be discarded as soon as we've completed the CU,
3737      since the resulting information is tipped in to 'tyents' as it
3738      is generated. */
3739   VG_(memset)( &typarser, 0, sizeof(typarser) );
3740   typarser.sp = -1;
3741   typarser.language = '?';
3742   for (i = 0; i < N_D3_TYPE_STACK; i++) {
3743      typarser.qparentE[i].tag   = Te_EMPTY;
3744      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3745   }
3746
3747   VG_(memset)( &varparser, 0, sizeof(varparser) );
3748   varparser.sp = -1;
3749
3750   signature_types = VG_(HT_construct) ("signature_types");
3751
3752   /* Do an initial pass to scan the .debug_types section, if any, and
3753      fill in the signatured types hash table.  This lets us handle
3754      mapping from a type signature to a (cooked) DIE offset directly
3755      in get_Form_contents.  */
3756   if (debug_types_img != NULL) {
3757      init_Cursor( &info, debug_types_img, debug_types_sz, 0, barf,
3758                   "Overrun whilst reading .debug_types section" );
3759      TRACE_D3("\n------ Collecting signatures from .debug_types section ------\n");
3760
3761      while (True) {
3762         UWord   cu_start_offset, cu_offset_now;
3763         CUConst cc;
3764
3765         cu_start_offset = get_position_of_Cursor( &info );
3766         TRACE_D3("\n");
3767         TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3768         /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3769            (saC_cache) */
3770         parse_CU_Header( &cc, td3, &info,
3771                          (UChar*)debug_abbv_img, debug_abbv_sz,
3772                          True, False );
3773
3774         /* Needed by cook_die.  */
3775         cc.types_cuOff_bias = debug_info_sz;
3776
3777         record_signatured_type( signature_types, cc.type_signature,
3778                                 cook_die( &cc, cc.type_offset ));
3779
3780         /* Until proven otherwise we assume we don't need the icc9
3781            workaround in this case; see the DIE-reading loop below
3782            for details.  */
3783         cu_offset_now = (cu_start_offset + cc.unit_length
3784                          + (cc.is_dw64 ? 12 : 4));
3785
3786         if (cu_offset_now == debug_types_sz)
3787            break;
3788
3789         set_position_of_Cursor ( &info, cu_offset_now );
3790      }
3791   }
3792
3793   /* Perform three DIE-reading passes.  The first pass reads DIEs from
3794      alternate .debug_info (if any), the second pass reads DIEs from
3795      .debug_info, and the third pass reads DIEs from .debug_types.
3796      Moving the body of this loop into a separate function would
3797      require a large number of arguments to be passed in, so it is
3798      kept inline instead.  */
3799   for (pass = 0; pass < 3; ++pass) {
3800      UWord section_size;
3801
3802      if (pass == 0) {
3803         if (debug_info_alt_img == NULL)
3804	    continue;
3805         /* Now loop over the Compilation Units listed in the alternate
3806            .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
3807            Each compilation unit contains a Compilation Unit Header
3808            followed by precisely one DW_TAG_compile_unit or
3809            DW_TAG_partial_unit DIE. */
3810         init_Cursor( &info, debug_info_alt_img, debug_info_alt_sz, 0, barf,
3811                      "Overrun whilst reading alternate .debug_info section" );
3812         section_size = debug_info_alt_sz;
3813
3814         TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
3815      } else if (pass == 1) {
3816         /* Now loop over the Compilation Units listed in the .debug_info
3817            section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3818            unit contains a Compilation Unit Header followed by precisely
3819            one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3820         init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
3821                      "Overrun whilst reading .debug_info section" );
3822         section_size = debug_info_sz;
3823
3824         TRACE_D3("\n------ Parsing .debug_info section ------\n");
3825      } else {
3826         if (debug_types_img == NULL)
3827            continue;
3828         init_Cursor( &info, debug_types_img, debug_types_sz, 0, barf,
3829                      "Overrun whilst reading .debug_types section" );
3830         section_size = debug_types_sz;
3831
3832         TRACE_D3("\n------ Parsing .debug_types section ------\n");
3833      }
3834
3835      while (True) {
3836         UWord   cu_start_offset, cu_offset_now;
3837         CUConst cc;
3838         /* It may be that the stated size of this CU is larger than the
3839            amount of stuff actually in it.  icc9 seems to generate CUs
3840            thusly.  We use these variables to figure out if this is
3841            indeed the case, and if so how many bytes we need to skip to
3842            get to the start of the next CU.  Not skipping those bytes
3843            causes us to misidentify the start of the next CU, and it all
3844            goes badly wrong after that (not surprisingly). */
3845         UWord cu_size_including_IniLen, cu_amount_used;
3846
3847         /* It seems icc9 finishes the DIE info before debug_info_sz
3848            bytes have been used up.  So be flexible, and declare the
3849            sequence complete if there is not enough remaining bytes to
3850            hold even the smallest conceivable CU header.  (11 bytes I
3851            reckon). */
3852         /* JRS 23Jan09: I suspect this is no longer necessary now that
3853            the code below contains a 'while (cu_amount_used <
3854            cu_size_including_IniLen ...'  style loop, which skips over
3855            any leftover bytes at the end of a CU in the case where the
3856            CU's stated size is larger than its actual size (as
3857            determined by reading all its DIEs).  However, for prudence,
3858            I'll leave the following test in place.  I can't see that a
3859            CU header can be smaller than 11 bytes, so I don't think
3860            there's any harm possible through the test -- it just adds
3861            robustness. */
3862         Word avail = get_remaining_length_Cursor( &info );
3863         if (avail < 11) {
3864            if (avail > 0)
3865               TRACE_D3("new_dwarf3_reader_wrk: warning: "
3866                        "%ld unused bytes after end of DIEs\n", avail);
3867            break;
3868         }
3869
3870         /* Check the varparser's stack is in a sane state. */
3871         vg_assert(varparser.sp == -1);
3872         for (i = 0; i < N_D3_VAR_STACK; i++) {
3873            vg_assert(varparser.ranges[i] == NULL);
3874            vg_assert(varparser.level[i] == 0);
3875         }
3876         for (i = 0; i < N_D3_TYPE_STACK; i++) {
3877            vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3878            vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3879            vg_assert(typarser.qlevel[i] == 0);
3880         }
3881
3882         cu_start_offset = get_position_of_Cursor( &info );
3883         TRACE_D3("\n");
3884         TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3885         /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3886            (saC_cache) */
3887         if (pass == 0)
3888            parse_CU_Header( &cc, td3, &info,
3889                             (UChar*)debug_abbv_alt_img, debug_abbv_alt_sz,
3890                             False, True );
3891         else
3892            parse_CU_Header( &cc, td3, &info,
3893                             (UChar*)debug_abbv_img, debug_abbv_sz,
3894                             pass == 2, False );
3895         cc.debug_str_img    = pass == 0 ? debug_str_alt_img : debug_str_img;
3896         cc.debug_str_sz     = pass == 0 ? debug_str_alt_sz : debug_str_sz;
3897         cc.debug_ranges_img = debug_ranges_img;
3898         cc.debug_ranges_sz  = debug_ranges_sz;
3899         cc.debug_loc_img    = debug_loc_img;
3900         cc.debug_loc_sz     = debug_loc_sz;
3901         cc.debug_line_img   = pass == 0 ? debug_line_alt_img : debug_line_img;
3902         cc.debug_line_sz    = pass == 0 ? debug_line_alt_sz : debug_line_sz;
3903         cc.debug_info_img   = pass == 0 ? debug_info_alt_img : debug_info_img;
3904         cc.debug_info_sz    = pass == 0 ? debug_info_alt_sz : debug_info_sz;
3905         cc.debug_types_img  = debug_types_img;
3906         cc.debug_types_sz   = debug_types_sz;
3907         cc.debug_info_alt_img = debug_info_alt_img;
3908         cc.debug_info_alt_sz = debug_info_alt_sz;
3909         cc.debug_str_alt_img = debug_str_alt_img;
3910         cc.debug_str_alt_sz = debug_str_alt_sz;
3911         cc.types_cuOff_bias = debug_info_sz;
3912         cc.alt_cuOff_bias   = debug_info_sz + debug_types_sz;
3913         cc.cu_start_offset  = cu_start_offset;
3914         cc.di = di;
3915         /* The CU's svma can be deduced by looking at the AT_low_pc
3916            value in the top level TAG_compile_unit, which is the topmost
3917            DIE.  We'll leave it for the 'varparser' to acquire that info
3918            and fill it in -- since it is the only party to want to know
3919            it. */
3920         cc.cu_svma_known = False;
3921         cc.cu_svma       = 0;
3922
3923         cc.signature_types = signature_types;
3924
3925         /* Create a fake outermost-level range covering the entire
3926            address range.  So we always have *something* to catch all
3927            variable declarations. */
3928         varstack_push( &cc, &varparser, td3,
3929                        unitary_range_list(0UL, ~0UL),
3930                        -1, False/*isFunc*/, NULL/*fbGX*/ );
3931
3932         /* And set up the file name table.  When we come across the top
3933            level DIE for this CU (which is what the next call to
3934            read_DIE should process) we will copy all the file names out
3935            of the .debug_line img area and use this table to look up the
3936            copies when we later see filename numbers in DW_TAG_variables
3937            etc. */
3938         vg_assert(!varparser.filenameTable );
3939         varparser.filenameTable
3940            = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3941                          ML_(dinfo_free),
3942                          sizeof(UChar*) );
3943         vg_assert(varparser.filenameTable);
3944
3945         /* Now read the one-and-only top-level DIE for this CU. */
3946         vg_assert(varparser.sp == 0);
3947         read_DIE( rangestree,
3948                   tyents, tempvars, gexprs,
3949                   &typarser, &varparser,
3950                   &info, td3, &cc, 0 );
3951
3952         cu_offset_now = get_position_of_Cursor( &info );
3953
3954         if (0) VG_(printf)("Travelled: %lu  size %llu\n",
3955                            cu_offset_now - cc.cu_start_offset,
3956                            cc.unit_length + (cc.is_dw64 ? 12 : 4));
3957
3958         /* How big the CU claims it is .. */
3959         cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3960         /* .. vs how big we have found it to be */
3961         cu_amount_used = cu_offset_now - cc.cu_start_offset;
3962
3963         if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3964                         cu_offset_now, section_size);
3965         if (cu_offset_now > section_size)
3966            barf("toplevel DIEs beyond end of CU");
3967
3968         /* If the CU is bigger than it claims to be, we've got a serious
3969            problem. */
3970         if (cu_amount_used > cu_size_including_IniLen)
3971            barf("CU's actual size appears to be larger than it claims it is");
3972
3973         /* If the CU is smaller than it claims to be, we need to skip some
3974            bytes.  Loop updates cu_offset_new and cu_amount_used. */
3975         while (cu_amount_used < cu_size_including_IniLen
3976                && get_remaining_length_Cursor( &info ) > 0) {
3977            if (0) VG_(printf)("SKIP\n");
3978            (void)get_UChar( &info );
3979            cu_offset_now = get_position_of_Cursor( &info );
3980            cu_amount_used = cu_offset_now - cc.cu_start_offset;
3981         }
3982
3983         /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
3984            anywhere else at all.  Our fake the-entire-address-space
3985            range is at level -1, so preening to -2 should completely
3986            empty the stack out. */
3987         TRACE_D3("\n");
3988         varstack_preen( &varparser, td3, -2 );
3989         /* Similarly, empty the type stack out. */
3990         typestack_preen( &typarser, td3, -2 );
3991
3992         TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3993                  cc.saC_cache_queries, cc.saC_cache_misses);
3994
3995         vg_assert(varparser.filenameTable );
3996         VG_(deleteXA)( varparser.filenameTable );
3997         varparser.filenameTable = NULL;
3998
3999         if (cu_offset_now == section_size)
4000            break;
4001         /* else keep going */
4002      }
4003   }
4004
4005   /* From here on we're post-processing the stuff we got
4006      out of the .debug_info section. */
4007   if (td3) {
4008      TRACE_D3("\n");
4009      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4010      TRACE_D3("\n");
4011      TRACE_D3("------ Compressing type entries ------\n");
4012   }
4013
4014   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4015                                     sizeof(TyEntIndexCache) );
4016   ML_(TyEntIndexCache__invalidate)( tyents_cache );
4017   dedup_types( td3, tyents, tyents_cache );
4018   if (td3) {
4019      TRACE_D3("\n");
4020      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4021   }
4022
4023   TRACE_D3("\n");
4024   TRACE_D3("------ Resolving the types of variables ------\n" );
4025   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4026
4027   /* Copy all the non-INDIR tyents into a new table.  For large
4028      .so's, about 90% of the tyents will by now have been resolved to
4029      INDIRs, and we no longer need them, and so don't need to store
4030      them. */
4031   tyents_to_keep
4032      = VG_(newXA)( ML_(dinfo_zalloc),
4033                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4034                    ML_(dinfo_free), sizeof(TyEnt) );
4035   n = VG_(sizeXA)( tyents );
4036   for (i = 0; i < n; i++) {
4037      TyEnt* ent = VG_(indexXA)( tyents, i );
4038      if (ent->tag != Te_INDIR)
4039         VG_(addToXA)( tyents_to_keep, ent );
4040   }
4041
4042   VG_(deleteXA)( tyents );
4043   tyents = NULL;
4044   ML_(dinfo_free)( tyents_cache );
4045   tyents_cache = NULL;
4046
4047   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4048      minor) waste of time, since tyents itself is sorted, but
4049      necessary since VG_(lookupXA) refuses to cooperate if we
4050      don't. */
4051   VG_(setCmpFnXA)(
4052      tyents_to_keep,
4053      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
4054   );
4055   VG_(sortXA)( tyents_to_keep );
4056
4057   /* Enable cacheing on tyents_to_keep */
4058   tyents_to_keep_cache
4059      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4060                           sizeof(TyEntIndexCache) );
4061   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4062
4063   /* And record the tyents in the DebugInfo.  We do this before
4064      starting to hand variables to ML_(addVar), since if ML_(addVar)
4065      wants to do debug printing (of the types of said vars) then it
4066      will need the tyents.*/
4067   vg_assert(!di->admin_tyents);
4068   di->admin_tyents = tyents_to_keep;
4069
4070   /* Bias all the location expressions. */
4071   TRACE_D3("\n");
4072   TRACE_D3("------ Biasing the location expressions ------\n" );
4073
4074   n = VG_(sizeXA)( gexprs );
4075   for (i = 0; i < n; i++) {
4076      gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4077      bias_GX( gexpr, di );
4078   }
4079
4080   TRACE_D3("\n");
4081   TRACE_D3("------ Acquired the following variables: ------\n\n");
4082
4083   /* Park (pointers to) all the vars in an XArray, so we can look up
4084      abstract origins quickly.  The array is sorted (hence, looked-up
4085      by) the .dioff fields.  Since the .dioffs should be in strictly
4086      ascending order, there is no need to sort the array after
4087      construction.  The ascendingness is however asserted for. */
4088   dioff_lookup_tab
4089      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4090                    ML_(dinfo_free),
4091                    sizeof(TempVar*) );
4092   vg_assert(dioff_lookup_tab);
4093
4094   n = VG_(sizeXA)( tempvars );
4095   Word first_primary_var;
4096   for (first_primary_var = 0;
4097        debug_info_alt_sz && first_primary_var < n;
4098        first_primary_var++) {
4099      varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4100      if (varp->dioff < debug_info_sz + debug_types_sz)
4101         break;
4102   }
4103   for (i = 0; i < n; i++) {
4104      varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4105      if (i > first_primary_var) {
4106         varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4107                                           (i + first_primary_var - 1) % n );
4108         /* why should this hold?  Only, I think, because we've
4109            constructed the array by reading .debug_info sequentially,
4110            and so the array .dioff fields should reflect that, and be
4111            strictly ascending. */
4112         vg_assert(varp2->dioff < varp->dioff);
4113      }
4114      VG_(addToXA)( dioff_lookup_tab, &varp );
4115   }
4116   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4117   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4118
4119   /* Now visit each var.  Collect up as much info as possible for
4120      each var and hand it to ML_(addVar). */
4121   n = VG_(sizeXA)( tempvars );
4122   for (j = 0; j < n; j++) {
4123      TyEnt* ent;
4124      varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4125
4126      /* Possibly show .. */
4127      if (td3) {
4128         VG_(printf)("<%lx> addVar: level %d: %s :: ",
4129                     varp->dioff,
4130                     varp->level,
4131                     varp->name ? varp->name : (UChar*)"<anon_var>" );
4132         if (varp->typeR) {
4133            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4134         } else {
4135            VG_(printf)("NULL");
4136         }
4137         VG_(printf)("\n  Loc=");
4138         if (varp->gexpr) {
4139            ML_(pp_GX)(varp->gexpr);
4140         } else {
4141            VG_(printf)("NULL");
4142         }
4143         VG_(printf)("\n");
4144         if (varp->fbGX) {
4145            VG_(printf)("  FrB=");
4146            ML_(pp_GX)( varp->fbGX );
4147            VG_(printf)("\n");
4148         } else {
4149            VG_(printf)("  FrB=none\n");
4150         }
4151         VG_(printf)("  declared at: %s:%d\n",
4152                     varp->fName ? varp->fName : (UChar*)"NULL",
4153                     varp->fLine );
4154         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4155            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
4156      }
4157
4158      /* Skip variables which have no location.  These must be
4159         abstract instances; they are useless as-is since with no
4160         location they have no specified memory location.  They will
4161         presumably be referred to via the absOri fields of other
4162         variables. */
4163      if (!varp->gexpr) {
4164         TRACE_D3("  SKIP (no location)\n\n");
4165         continue;
4166      }
4167
4168      /* So it has a location, at least.  If it refers to some other
4169         entry through its absOri field, pull in further info through
4170         that. */
4171      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4172         Bool found;
4173         Word ixFirst, ixLast;
4174         TempVar key;
4175         TempVar* keyp = &key;
4176         TempVar *varAI;
4177         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4178         key.dioff = varp->absOri; /* this is what we want to find */
4179         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4180                                &ixFirst, &ixLast );
4181         if (!found) {
4182            /* barf("DW_AT_abstract_origin can't be resolved"); */
4183            TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
4184            continue;
4185         }
4186         /* If the following fails, there is more than one entry with
4187            the same dioff.  Which can't happen. */
4188         vg_assert(ixFirst == ixLast);
4189         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
4190         /* stay sane */
4191         vg_assert(varAI);
4192         vg_assert(varAI->dioff == varp->absOri);
4193
4194         /* Copy what useful info we can. */
4195         if (varAI->typeR && !varp->typeR)
4196            varp->typeR = varAI->typeR;
4197         if (varAI->name && !varp->name)
4198            varp->name = varAI->name;
4199         if (varAI->fName && !varp->fName)
4200            varp->fName = varAI->fName;
4201         if (varAI->fLine > 0 && varp->fLine == 0)
4202            varp->fLine = varAI->fLine;
4203      }
4204
4205      /* Give it a name if it doesn't have one. */
4206      if (!varp->name)
4207         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
4208
4209      /* So now does it have enough info to be useful? */
4210      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
4211         the type didn't get resolved.  Really, in that case
4212         something's broken earlier on, and should be fixed, rather
4213         than just skipping the variable. */
4214      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
4215                                         tyents_to_keep_cache,
4216                                         varp->typeR );
4217      /* The next two assertions should be guaranteed by
4218         our previous call to resolve_variable_types. */
4219      vg_assert(ent);
4220      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
4221
4222      if (ent->tag == Te_UNKNOWN) continue;
4223
4224      vg_assert(varp->gexpr);
4225      vg_assert(varp->name);
4226      vg_assert(varp->typeR);
4227      vg_assert(varp->level >= 0);
4228
4229      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
4230         each address range in which the variable exists. */
4231      TRACE_D3("  ACQUIRE for range(s) ");
4232      { AddrRange  oneRange;
4233        AddrRange* varPcRanges;
4234        Word       nVarPcRanges;
4235        /* Set up to iterate over address ranges, however
4236           represented. */
4237        if (varp->nRanges == 0 || varp->nRanges == 1) {
4238           vg_assert(!varp->rngMany);
4239           if (varp->nRanges == 0) {
4240              vg_assert(varp->rngOneMin == 0);
4241              vg_assert(varp->rngOneMax == 0);
4242           }
4243           nVarPcRanges = varp->nRanges;
4244           oneRange.aMin = varp->rngOneMin;
4245           oneRange.aMax = varp->rngOneMax;
4246           varPcRanges = &oneRange;
4247        } else {
4248           vg_assert(varp->rngMany);
4249           vg_assert(varp->rngOneMin == 0);
4250           vg_assert(varp->rngOneMax == 0);
4251           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
4252           vg_assert(nVarPcRanges >= 2);
4253           vg_assert(nVarPcRanges == (Word)varp->nRanges);
4254           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
4255        }
4256        if (varp->level == 0)
4257           vg_assert( nVarPcRanges == 1 );
4258        /* and iterate */
4259        for (i = 0; i < nVarPcRanges; i++) {
4260           Addr pcMin = varPcRanges[i].aMin;
4261           Addr pcMax = varPcRanges[i].aMax;
4262           vg_assert(pcMin <= pcMax);
4263           /* Level 0 is the global address range.  So at level 0 we
4264              don't want to bias pcMin/pcMax; but at all other levels
4265              we do since those are derived from svmas in the Dwarf
4266              we're reading.  Be paranoid ... */
4267           if (varp->level == 0) {
4268              vg_assert(pcMin == (Addr)0);
4269              vg_assert(pcMax == ~(Addr)0);
4270           } else {
4271              /* vg_assert(pcMin > (Addr)0);
4272                 No .. we can legitimately expect to see ranges like
4273                 0x0-0x11D (pre-biasing, of course). */
4274              vg_assert(pcMax < ~(Addr)0);
4275           }
4276
4277           /* Apply text biasing, for non-global variables. */
4278           if (varp->level > 0) {
4279              pcMin += di->text_debug_bias;
4280              pcMax += di->text_debug_bias;
4281           }
4282
4283           if (i > 0 && (i%2) == 0)
4284              TRACE_D3("\n                       ");
4285           TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
4286
4287           ML_(addVar)(
4288              di, varp->level,
4289                  pcMin, pcMax,
4290                  varp->name,  varp->typeR,
4291                  varp->gexpr, varp->fbGX,
4292                  varp->fName, varp->fLine, td3
4293           );
4294        }
4295      }
4296
4297      TRACE_D3("\n\n");
4298      /* and move on to the next var */
4299   }
4300
4301   /* Now free all the TempVars */
4302   n = VG_(sizeXA)( tempvars );
4303   for (i = 0; i < n; i++) {
4304      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
4305      ML_(dinfo_free)(varp);
4306   }
4307   VG_(deleteXA)( tempvars );
4308   tempvars = NULL;
4309
4310   /* and the temp lookup table */
4311   VG_(deleteXA)( dioff_lookup_tab );
4312
4313   /* and the ranges tree.  Note that we need to also free the XArrays
4314      which constitute the keys, hence pass VG_(deleteXA) as a
4315      key-finalizer. */
4316   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
4317
4318   /* and the tyents_to_keep cache */
4319   ML_(dinfo_free)( tyents_to_keep_cache );
4320   tyents_to_keep_cache = NULL;
4321
4322   vg_assert( varparser.filenameTable == NULL );
4323
4324   /* And the signatured type hash.  */
4325   VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
4326
4327   /* record the GExprs in di so they can be freed later */
4328   vg_assert(!di->admin_gexprs);
4329   di->admin_gexprs = gexprs;
4330}
4331
4332
4333/*------------------------------------------------------------*/
4334/*---                                                      ---*/
4335/*--- The "new" DWARF3 reader -- top level control logic   ---*/
4336/*---                                                      ---*/
4337/*------------------------------------------------------------*/
4338
4339static Bool               d3rd_jmpbuf_valid  = False;
4340static HChar*             d3rd_jmpbuf_reason = NULL;
4341static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
4342
4343static __attribute__((noreturn)) void barf ( HChar* reason ) {
4344   vg_assert(d3rd_jmpbuf_valid);
4345   d3rd_jmpbuf_reason = reason;
4346   VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
4347   /*NOTREACHED*/
4348   vg_assert(0);
4349}
4350
4351
4352void
4353ML_(new_dwarf3_reader) (
4354   struct _DebugInfo* di,
4355   UChar* debug_info_img,   SizeT debug_info_sz,
4356   UChar* debug_types_img,  SizeT debug_types_sz,
4357   UChar* debug_abbv_img,   SizeT debug_abbv_sz,
4358   UChar* debug_line_img,   SizeT debug_line_sz,
4359   UChar* debug_str_img,    SizeT debug_str_sz,
4360   UChar* debug_ranges_img, SizeT debug_ranges_sz,
4361   UChar* debug_loc_img,    SizeT debug_loc_sz,
4362   UChar* debug_info_alt_img, SizeT debug_info_alt_sz,
4363   UChar* debug_abbv_alt_img, SizeT debug_abbv_alt_sz,
4364   UChar* debug_line_alt_img, SizeT debug_line_alt_sz,
4365   UChar* debug_str_alt_img,  SizeT debug_str_alt_sz
4366)
4367{
4368   volatile Int  jumped;
4369   volatile Bool td3 = di->trace_symtab;
4370
4371   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
4372      just returns normally.  If there is any failure, it longjmp's
4373      back here, having first set d3rd_jmpbuf_reason to something
4374      useful. */
4375   vg_assert(d3rd_jmpbuf_valid  == False);
4376   vg_assert(d3rd_jmpbuf_reason == NULL);
4377
4378   d3rd_jmpbuf_valid = True;
4379   jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
4380   if (jumped == 0) {
4381      /* try this ... */
4382      new_dwarf3_reader_wrk( di, barf,
4383                             debug_info_img,   debug_info_sz,
4384                             debug_types_img,  debug_types_sz,
4385                             debug_abbv_img,   debug_abbv_sz,
4386                             debug_line_img,   debug_line_sz,
4387                             debug_str_img,    debug_str_sz,
4388                             debug_ranges_img, debug_ranges_sz,
4389                             debug_loc_img,    debug_loc_sz,
4390                             debug_info_alt_img, debug_info_alt_sz,
4391                             debug_abbv_alt_img, debug_abbv_alt_sz,
4392                             debug_line_alt_img, debug_line_alt_sz,
4393                             debug_str_alt_img,  debug_str_alt_sz);
4394      d3rd_jmpbuf_valid = False;
4395      TRACE_D3("\n------ .debug_info reading was successful ------\n");
4396   } else {
4397      /* It longjmp'd. */
4398      d3rd_jmpbuf_valid = False;
4399      /* Can't longjump without giving some sort of reason. */
4400      vg_assert(d3rd_jmpbuf_reason != NULL);
4401
4402      TRACE_D3("\n------ .debug_info reading failed ------\n");
4403
4404      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4405   }
4406
4407   d3rd_jmpbuf_valid  = False;
4408   d3rd_jmpbuf_reason = NULL;
4409}
4410
4411
4412
4413/* --- Unused code fragments which might be useful one day. --- */
4414
4415#if 0
4416   /* Read the arange tables */
4417   TRACE_SYMTAB("\n");
4418   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4419   init_Cursor( &aranges, debug_aranges_img,
4420                debug_aranges_sz, 0, barf,
4421                "Overrun whilst reading .debug_aranges section" );
4422   while (True) {
4423      ULong  len, d_i_offset;
4424      Bool   is64;
4425      UShort version;
4426      UChar  asize, segsize;
4427
4428      if (is_at_end_Cursor( &aranges ))
4429         break;
4430      /* Read one arange thingy */
4431      /* initial_length field */
4432      len = get_Initial_Length( &is64, &aranges,
4433               "in .debug_aranges: invalid initial-length field" );
4434      version    = get_UShort( &aranges );
4435      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4436      asize      = get_UChar( &aranges );
4437      segsize    = get_UChar( &aranges );
4438      TRACE_D3("  Length:                   %llu\n", len);
4439      TRACE_D3("  Version:                  %d\n", (Int)version);
4440      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4441      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4442      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4443      TRACE_D3("\n");
4444      TRACE_D3("    Address            Length\n");
4445
4446      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4447         (void)get_UChar( & aranges );
4448      }
4449      while (True) {
4450         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4451         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4452         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4453         if (address == 0 && length == 0) break;
4454      }
4455   }
4456   TRACE_SYMTAB("\n");
4457#endif
4458
4459#endif // defined(VGO_linux) || defined(VGO_darwin)
4460
4461/*--------------------------------------------------------------------*/
4462/*--- end                                                          ---*/
4463/*--------------------------------------------------------------------*/
4464