1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
5/*---                                                 readdwarf3.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2008-2013 OpenWorks LLP
13      info@open-works.co.uk
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31
32   Neither the names of the U.S. Department of Energy nor the
33   University of California nor the names of its contributors may be
34   used to endorse or promote products derived from this software
35   without prior written permission.
36*/
37
38#if defined(VGO_linux) || defined(VGO_darwin)
39
40/* REFERENCE (without which this code will not make much sense):
41
42   DWARF Debugging Information Format, Version 3,
43   dated 20 December 2005 (the "D3 spec").
44
45   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
46   .doc (MS Word) version, but for some reason the section numbers
47   between the Word and PDF versions differ by 1 in the first digit.
48   All section references in this code are to the PDF version.
49
50   CURRENT HACKS:
51
52   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53      assumed to mean "const void" or "volatile void" respectively.
54      GDB appears to interpret them like this, anyway.
55
56   In many cases it is important to know the svma of a CU (the "base
57   address of the CU", as the D3 spec calls it).  There are some
58   situations in which the spec implies this value is unknown, but the
59   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60   merely zero when not explicitly stated.  So we too have to make
61   that assumption.
62
63   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
64   unitary_range_list() bias the resulting range list in the same way
65   that its more general cousin, get_range_list(), does?  I don't
66   know.
67
68   TODO, 2008 Feb 17:
69
70   get rid of cu_svma_known and document the assumed-zero svma hack.
71
72   ML_(sizeOfType): differentiate between zero sized types and types
73   for which the size is unknown.  Is this important?  I don't know.
74
75   DW_TAG_array_types: deal with explicit sizes (currently we compute
76   the size from the bounds and the element size, although that's
77   fragile, if the bounds incompletely specified, or completely
78   absent)
79
80   Document reason for difference (by 1) of stack preening depth in
81   parse_var_DIE vs parse_type_DIE.
82
83   Don't hand to ML_(addVars), vars whose locations are entirely in
84   registers (DW_OP_reg*).  This is merely a space-saving
85   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86   expressions correctly, by failing to evaluate them and hence
87   effectively ignoring the variable with which they are associated.
88
89   Deal with DW_TAG_array_types which have element size != stride
90
91   In some cases, the info for a variable is split between two
92   different DIEs (generally a declarer and a definer).  We punt on
93   these.  Could do better here.
94
95   The 'data_bias' argument passed to the expression evaluator
96   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97   MaybeUWord, to make it clear when we do vs don't know what it is
98   for the evaluation of an expression.  At the moment zero is passed
99   for this parameter in the don't know case.  That's a bit fragile
100   and obscure; using a MaybeUWord would be clearer.
101
102   POTENTIAL PERFORMANCE IMPROVEMENTS:
103
104   Currently, duplicate removal and all other queries for the type
105   entities array is done using cuOffset-based pointing, which
106   involves a binary search (VG_(lookupXA)) for each access.  This is
107   wildly inefficient, although simple.  It would be better to
108   translate all the cuOffset-based references (iow, all the "R" and
109   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110   'tyents' right at the start of dedup_types(), and use direct
111   indexing (VG_(indexXA)) wherever possible after that.
112
113   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
114   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115   points, and possibly also make an _UNCHECKED version which skips
116   the range checks in performance-critical situations such as this.
117
118   Handle interaction between read_DIE and parse_{var,type}_DIE
119   better.  Currently read_DIE reads the entire DIE just to find where
120   the end is (and for debug printing), so that it can later reliably
121   move the cursor to the end regardless of what parse_{var,type}_DIE
122   do.  This means many DIEs (most, even?) are read twice.  It would
123   be smarter to make parse_{var,type}_DIE return a Bool indicating
124   whether or not they advanced the DIE cursor, and only if they
125   didn't should read_DIE itself read through the DIE.
126
127   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128   zero variables in their .vars XArray.  Rather than have an XArray
129   with zero elements (which uses 2 malloc'd blocks), allow the .vars
130   pointer to be NULL in this case.
131
132   More generally, reduce the amount of memory allocated and freed
133   while reading Dwarf3 type/variable information.  Even modest (20MB)
134   objects cause this module to allocate and free hundreds of
135   thousands of small blocks, and ML_(arena_malloc) and its various
136   groupies always show up at the top of performance profiles. */
137
138#include "pub_core_basics.h"
139#include "pub_core_debuginfo.h"
140#include "pub_core_libcbase.h"
141#include "pub_core_libcassert.h"
142#include "pub_core_libcprint.h"
143#include "pub_core_libcsetjmp.h"   // setjmp facilities
144#include "pub_core_hashtable.h"
145#include "pub_core_options.h"
146#include "pub_core_tooliface.h"    /* VG_(needs) */
147#include "pub_core_xarray.h"
148#include "pub_core_wordfm.h"
149#include "priv_misc.h"             /* dinfo_zalloc/free */
150#include "priv_image.h"
151#include "priv_tytypes.h"
152#include "priv_d3basics.h"
153#include "priv_storage.h"
154#include "priv_readdwarf3.h"       /* self */
155
156
157/*------------------------------------------------------------*/
158/*---                                                      ---*/
159/*--- Basic machinery for parsing DIEs.                    ---*/
160/*---                                                      ---*/
161/*------------------------------------------------------------*/
162
163#define TRACE_D3(format, args...) \
164   if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
165#define TD3 (UNLIKELY(td3))
166
167#define D3_INVALID_CUOFF  ((UWord)(-1UL))
168#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
169
170typedef
171   struct {
172      DiSlice sli;      // to which this cursor applies
173      DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
174      void (*barf)( const HChar* ) __attribute__((noreturn));
175      const HChar* barfstr;
176   }
177   Cursor;
178
179static inline Bool is_sane_Cursor ( const Cursor* c ) {
180   if (!c)                return False;
181   if (!c->barf)          return False;
182   if (!c->barfstr)       return False;
183   if (!ML_(sli_is_valid)(c->sli))    return False;
184   if (c->sli.ioff == DiOffT_INVALID) return False;
185   if (c->sli_next < c->sli.ioff)     return False;
186   return True;
187}
188
189// Initialise a cursor from a DiSlice (ELF section, really) so as to
190// start reading at offset |sli_initial_offset| from the start of the
191// slice.
192static void init_Cursor ( /*OUT*/Cursor* c,
193                          DiSlice sli,
194                          ULong   sli_initial_offset,
195                          __attribute__((noreturn)) void (*barf)(const HChar*),
196                          const HChar* barfstr )
197{
198   vg_assert(c);
199   VG_(bzero_inline)(c, sizeof(*c));
200   c->sli              = sli;
201   c->sli_next         = c->sli.ioff + sli_initial_offset;
202   c->barf             = barf;
203   c->barfstr          = barfstr;
204   vg_assert(is_sane_Cursor(c));
205}
206
207static Bool is_at_end_Cursor ( const Cursor* c ) {
208   vg_assert(is_sane_Cursor(c));
209   return c->sli_next >= c->sli.ioff + c->sli.szB;
210}
211
212static inline ULong get_position_of_Cursor ( const Cursor* c ) {
213   vg_assert(is_sane_Cursor(c));
214   return c->sli_next - c->sli.ioff;
215}
216static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
217   c->sli_next = c->sli.ioff + pos;
218   vg_assert(is_sane_Cursor(c));
219}
220static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
221   c->sli_next += delta;
222   vg_assert(is_sane_Cursor(c));
223}
224
225static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
226   vg_assert(is_sane_Cursor(c));
227   return c->sli.ioff + c->sli.szB - c->sli_next;
228}
229
230//static void* get_address_of_Cursor ( Cursor* c ) {
231//   vg_assert(is_sane_Cursor(c));
232//   return &c->region_start_img[ c->region_next ];
233//}
234
235static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
236   return mk_DiCursor(c->sli.img, c->sli_next);
237}
238
239/* FIXME: document assumptions on endianness for
240   get_UShort/UInt/ULong. */
241static inline UChar get_UChar ( Cursor* c ) {
242   UChar r;
243   vg_assert(is_sane_Cursor(c));
244   if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
245      c->barf(c->barfstr);
246      /*NOTREACHED*/
247      vg_assert(0);
248   }
249   r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
250   c->sli_next += sizeof(UChar);
251   return r;
252}
253static UShort get_UShort ( Cursor* c ) {
254   UShort r;
255   vg_assert(is_sane_Cursor(c));
256   if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
257      c->barf(c->barfstr);
258      /*NOTREACHED*/
259      vg_assert(0);
260   }
261   r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
262   c->sli_next += sizeof(UShort);
263   return r;
264}
265static UInt get_UInt ( Cursor* c ) {
266   UInt r;
267   vg_assert(is_sane_Cursor(c));
268   if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
269      c->barf(c->barfstr);
270      /*NOTREACHED*/
271      vg_assert(0);
272   }
273   r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
274   c->sli_next += sizeof(UInt);
275   return r;
276}
277static ULong get_ULong ( Cursor* c ) {
278   ULong r;
279   vg_assert(is_sane_Cursor(c));
280   if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
281      c->barf(c->barfstr);
282      /*NOTREACHED*/
283      vg_assert(0);
284   }
285   r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
286   c->sli_next += sizeof(ULong);
287   return r;
288}
289static ULong get_ULEB128 ( Cursor* c ) {
290   ULong result;
291   Int   shift;
292   UChar byte;
293   /* unroll first iteration */
294   byte = get_UChar( c );
295   result = (ULong)(byte & 0x7f);
296   if (LIKELY(!(byte & 0x80))) return result;
297   shift = 7;
298   /* end unroll first iteration */
299   do {
300      byte = get_UChar( c );
301      result |= ((ULong)(byte & 0x7f)) << shift;
302      shift += 7;
303   } while (byte & 0x80);
304   return result;
305}
306static Long get_SLEB128 ( Cursor* c ) {
307   ULong  result = 0;
308   Int    shift = 0;
309   UChar  byte;
310   do {
311      byte = get_UChar(c);
312      result |= ((ULong)(byte & 0x7f)) << shift;
313      shift += 7;
314   } while (byte & 0x80);
315   if (shift < 64 && (byte & 0x40))
316      result |= -(1ULL << shift);
317   return result;
318}
319
320/* Assume 'c' points to the start of a string.  Return a DiCursor of
321   whatever it points at, and advance it past the terminating zero.
322   This makes it safe for the caller to then copy the string with
323   ML_(addStr), since (w.r.t. image overruns) the process of advancing
324   past the terminating zero will already have "vetted" the string. */
325static DiCursor get_AsciiZ ( Cursor* c ) {
326   UChar uc;
327   DiCursor res = get_DiCursor_from_Cursor(c);
328   do { uc = get_UChar(c); } while (uc != 0);
329   return res;
330}
331
332static ULong peek_ULEB128 ( Cursor* c ) {
333   DiOffT here = c->sli_next;
334   ULong  r    = get_ULEB128( c );
335   c->sli_next = here;
336   return r;
337}
338static UChar peek_UChar ( Cursor* c ) {
339   DiOffT here = c->sli_next;
340   UChar  r    = get_UChar( c );
341   c->sli_next = here;
342   return r;
343}
344
345static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
346   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
347}
348
349static UWord get_UWord ( Cursor* c ) {
350   vg_assert(sizeof(UWord) == sizeof(void*));
351   if (sizeof(UWord) == 4) return get_UInt(c);
352   if (sizeof(UWord) == 8) return get_ULong(c);
353   vg_assert(0);
354}
355
356/* Read a DWARF3 'Initial Length' field */
357static ULong get_Initial_Length ( /*OUT*/Bool* is64,
358                                  Cursor* c,
359                                  const HChar* barfMsg )
360{
361   ULong w64;
362   UInt  w32;
363   *is64 = False;
364   w32 = get_UInt( c );
365   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
366      c->barf( barfMsg );
367   }
368   else if (w32 == 0xFFFFFFFF) {
369      *is64 = True;
370      w64   = get_ULong( c );
371   } else {
372      *is64 = False;
373      w64 = (ULong)w32;
374   }
375   return w64;
376}
377
378
379/*------------------------------------------------------------*/
380/*---                                                      ---*/
381/*--- "CUConst" structure                                  ---*/
382/*---                                                      ---*/
383/*------------------------------------------------------------*/
384
385typedef
386   struct _name_form {
387      ULong at_name;  // Dwarf Attribute name
388      ULong at_form;  // Dwarf Attribute form
389      UInt  skip_szB; // Nr of bytes skippable from here ...
390      UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
391   } name_form;
392/* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
393   Each name_form maintains how many (fixed) nr of bytes can be skipped from
394   the beginning of this form till the next attr/form to look at.
395   The next form to look can be:
396       an 'interesting' attr/form to read while skipping a DIE
397          (currently, this is only DW_AT_sibling)
398   or
399       a variable length form which must be read to be skipped.
400   For a variable length form, the skip_szB will be equal to VARSZ_FORM.
401
402   Note: this technique could also be used to speed up the parsing
403   of DIEs : for each parser kind, we could have the nr of bytes
404   to skip to directly reach the interesting form(s) for the parser. */
405
406typedef
407   struct _g_abbv {
408      struct _g_abbv *next; // read/write by hash table.
409      UWord  abbv_code;     // key, read by hash table
410      ULong  atag;
411      ULong  has_children;
412      name_form nf[0];
413      /* Variable-length array of name/form pairs, terminated
414         by a 0/0 pair.
415         The skip_szB/next_nf allows to skip efficiently a DIE
416         described by this g_abbv; */
417    } g_abbv;
418
419/* Holds information that is constant through the parsing of a
420   Compilation Unit.  This is basically plumbed through to
421   everywhere. */
422typedef
423   struct {
424      /* Call here if anything goes wrong */
425      void (*barf)( const HChar* ) __attribute__((noreturn));
426      /* Is this 64-bit DWARF ? */
427      Bool   is_dw64;
428      /* Which DWARF version ?  (2, 3 or 4) */
429      UShort version;
430      /* Length of this Compilation Unit, as stated in the
431         .unit_length :: InitialLength field of the CU Header.
432         However, this size (as specified by the D3 spec) does not
433         include the size of the .unit_length field itself, which is
434         either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
435         can be obtained through the expression ".is_dw64 ? 12 : 4". */
436      ULong  unit_length;
437      /* Offset of start of this unit in .debug_info */
438      UWord  cu_start_offset;
439      /* SVMA for this CU.  In the D3 spec, is known as the "base
440         address of the compilation unit (last para sec 3.1.1).
441         Needed for (amongst things) interpretation of location-list
442         values. */
443      Addr   cu_svma;
444      Bool   cu_svma_known;
445
446      /* The debug_abbreviations table to be used for this Unit */
447      //UChar* debug_abbv;
448      /* Upper bound on size thereof (an overestimate, in general) */
449      //UWord  debug_abbv_maxszB;
450      /* A bounded area of the image, to be used as the
451         debug_abbreviations table tobe used for this Unit. */
452      DiSlice debug_abbv;
453
454      /* Image information for various sections. */
455      DiSlice escn_debug_str;
456      DiSlice escn_debug_ranges;
457      DiSlice escn_debug_loc;
458      DiSlice escn_debug_line;
459      DiSlice escn_debug_info;
460      DiSlice escn_debug_types;
461      DiSlice escn_debug_info_alt;
462      DiSlice escn_debug_str_alt;
463      /* How much to add to .debug_types resp. alternate .debug_info offsets
464         in cook_die*.  */
465      UWord  types_cuOff_bias;
466      UWord  alt_cuOff_bias;
467      /* --- Needed so we can add stuff to the string table. --- */
468      struct _DebugInfo* di;
469      /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
470      VgHashTable *ht_abbvs;
471
472      /* True if this came from .debug_types; otherwise it came from
473         .debug_info.  */
474      Bool is_type_unit;
475      /* For a unit coming from .debug_types, these hold the TU's type
476         signature and the uncooked DIE offset of the TU's signatured
477         type.  For a unit coming from .debug_info, these are unused.  */
478      ULong type_signature;
479      ULong type_offset;
480
481      /* Signatured type hash; computed once and then shared by all
482         CUs.  */
483      VgHashTable *signature_types;
484
485      /* True if this came from alternate .debug_info; otherwise
486         it came from normal .debug_info or .debug_types.  */
487      Bool is_alt_info;
488   }
489   CUConst;
490
491
492/* Return the cooked value of DIE depending on whether CC represents a
493   .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
494   .debug_types and optional alternate .debug_info sections form
495   a contiguous whole, so that DIEs coming from .debug_types are numbered
496   starting at the end of .debug_info and DIEs coming from alternate
497   .debug_info are numbered starting at the end of .debug_types.  */
498static UWord cook_die( const CUConst* cc, UWord die )
499{
500   if (cc->is_type_unit)
501      die += cc->types_cuOff_bias;
502   else if (cc->is_alt_info)
503      die += cc->alt_cuOff_bias;
504   return die;
505}
506
507/* Like cook_die, but understand that DIEs coming from a
508   DW_FORM_ref_sig8 reference are already cooked.  Also, handle
509   DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
510   as reference to alternate .debug_info.  */
511static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
512{
513   if (form == DW_FORM_ref_sig8)
514      return die;
515   if (form == DW_FORM_GNU_ref_alt)
516      return die + cc->alt_cuOff_bias;
517   return cook_die( cc, die );
518}
519
520/* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
521   came from the .debug_types section and *ALT_FLAG to true if the DIE
522   came from alternate .debug_info section.  */
523static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
524                         Bool *alt_flag )
525{
526   *alt_flag = False;
527   *type_flag = False;
528   /* The use of escn_debug_{info,types}.szB seems safe to me even if
529      escn_debug_{info,types} are DiSlice_INVALID (meaning the
530      sections were not found), because DiSlice_INVALID.szB is always
531      zero.  That said, it seems unlikely we'd ever get here if
532      .debug_info or .debug_types were missing. */
533   if (die >= cc->escn_debug_info.szB) {
534      if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
535         *alt_flag = True;
536         die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
537      } else {
538         *type_flag = True;
539         die -= cc->escn_debug_info.szB;
540      }
541   }
542   return die;
543}
544
545/*------------------------------------------------------------*/
546/*---                                                      ---*/
547/*--- Helper functions for Guarded Expressions             ---*/
548/*---                                                      ---*/
549/*------------------------------------------------------------*/
550
551/* Parse the location list starting at img-offset 'debug_loc_offset'
552   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
553   and so I believe are correct SVMAs for the object as a whole.  This
554   function allocates the UChar*, and the caller must deallocate it.
555   The resulting block is in so-called Guarded-Expression format.
556
557   Guarded-Expression format is similar but not identical to the DWARF3
558   location-list format.  The format of each returned block is:
559
560      UChar biasMe;
561      UChar isEnd;
562      followed by zero or more of
563
564      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
565
566   '..bytes..' is an standard DWARF3 location expression which is
567   valid when aMin <= pc <= aMax (possibly after suitable biasing).
568
569   The number of bytes in '..bytes..' is nbytes.
570
571   The end of the sequence is marked by an isEnd == 1 value.  All
572   previous isEnd values must be zero.
573
574   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
575   text_bias added before use, and 0 if the GX is this is not
576   necessary (is ready to go).
577
578   Hence the block can be quickly parsed and is self-describing.  Note
579   that aMax is 1 less than the corresponding value in a DWARF3
580   location list.  Zero length ranges, with aMax == aMin-1, are not
581   allowed.
582*/
583/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
584   it more logically belongs. */
585
586
587/* Apply a text bias to a GX. */
588static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
589{
590   UShort nbytes;
591   UChar* p = &gx->payload[0];
592   UChar* pA;
593   UChar  uc;
594   uc = *p++; /*biasMe*/
595   if (uc == 0)
596      return;
597   vg_assert(uc == 1);
598   p[-1] = 0; /* mark it as done */
599   while (True) {
600      uc = *p++;
601      if (uc == 1)
602         break; /*isEnd*/
603      vg_assert(uc == 0);
604      /* t-bias aMin */
605      pA = (UChar*)p;
606      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
607      p += sizeof(Addr);
608      /* t-bias aMax */
609      pA = (UChar*)p;
610      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
611      p += sizeof(Addr);
612      /* nbytes, and actual expression */
613      nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
614      p += nbytes;
615   }
616}
617
618__attribute__((noinline))
619static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
620{
621   SizeT  bytesReqd;
622   GExpr* gx;
623   UChar *p, *pstart;
624
625   vg_assert(sizeof(UWord) == sizeof(Addr));
626   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
627   bytesReqd
628      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
629        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
630        + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
631        + sizeof(UChar); /*isEnd*/
632
633   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
634                           sizeof(GExpr) + bytesReqd );
635
636   p = pstart = &gx->payload[0];
637
638   p = ML_(write_UChar)(p, 0);        /*biasMe*/
639   p = ML_(write_UChar)(p, 0);        /*!isEnd*/
640   p = ML_(write_Addr)(p, 0);         /*aMin*/
641   p = ML_(write_Addr)(p, ~0);        /*aMax*/
642   p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
643   ML_(cur_read_get)(p, block, nbytes); p += nbytes;
644   p = ML_(write_UChar)(p, 1);        /*isEnd*/
645
646   vg_assert( (SizeT)(p - pstart) == bytesReqd);
647   vg_assert( &gx->payload[bytesReqd]
648              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
649
650   return gx;
651}
652
653__attribute__((noinline))
654static GExpr* make_general_GX ( const CUConst* cc,
655                                Bool     td3,
656                                ULong    debug_loc_offset,
657                                Addr     svma_of_referencing_CU )
658{
659   Addr      base;
660   Cursor    loc;
661   XArray*   xa; /* XArray of UChar */
662   GExpr*    gx;
663   Word      nbytes;
664
665   vg_assert(sizeof(UWord) == sizeof(Addr));
666   if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
667      cc->barf("make_general_GX: .debug_loc is empty/missing");
668
669   init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
670                "Overrun whilst reading .debug_loc section(2)" );
671   set_position_of_Cursor( &loc, debug_loc_offset );
672
673   TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
674            debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
675
676   /* Who frees this xa?  It is freed before this fn exits. */
677   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
678                    ML_(dinfo_free),
679                    sizeof(UChar) );
680
681   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
682
683   base = 0;
684   while (True) {
685      Bool  acquire;
686      UWord len;
687      /* Read a (host-)word pair.  This is something of a hack since
688         the word size to read is really dictated by the ELF file;
689         however, we assume we're reading a file with the same
690         word-sizeness as the host.  Reasonably enough. */
691      UWord w1 = get_UWord( &loc );
692      UWord w2 = get_UWord( &loc );
693
694      TRACE_D3("   %08lx %08lx\n", w1, w2);
695      if (w1 == 0 && w2 == 0)
696         break; /* end of list */
697
698      if (w1 == -1UL) {
699         /* new value for 'base' */
700         base = w2;
701         continue;
702      }
703
704      /* else a location expression follows */
705      /* else enumerate [w1+base, w2+base) */
706      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
707         (sec 2.17.2) */
708      if (w1 > w2) {
709         TRACE_D3("negative range is for .debug_loc expr at "
710                  "file offset %llu\n",
711                  debug_loc_offset);
712         cc->barf( "negative range in .debug_loc section" );
713      }
714
715      /* ignore zero length ranges */
716      acquire = w1 < w2;
717      len     = (UWord)get_UShort( &loc );
718
719      if (acquire) {
720         UWord  w;
721         UShort s;
722         UChar  c;
723         c = 0; /* !isEnd*/
724         VG_(addBytesToXA)( xa, &c, sizeof(c) );
725         w = w1    + base + svma_of_referencing_CU;
726         VG_(addBytesToXA)( xa, &w, sizeof(w) );
727         w = w2 -1 + base + svma_of_referencing_CU;
728         VG_(addBytesToXA)( xa, &w, sizeof(w) );
729         s = (UShort)len;
730         VG_(addBytesToXA)( xa, &s, sizeof(s) );
731      }
732
733      while (len > 0) {
734         UChar byte = get_UChar( &loc );
735         TRACE_D3("%02x", (UInt)byte);
736         if (acquire)
737            VG_(addBytesToXA)( xa, &byte, 1 );
738         len--;
739      }
740      TRACE_D3("\n");
741   }
742
743   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
744
745   nbytes = VG_(sizeXA)( xa );
746   vg_assert(nbytes >= 1);
747
748   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
749   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
750   vg_assert( &gx->payload[nbytes]
751              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
752
753   VG_(deleteXA)( xa );
754
755   TRACE_D3("}\n");
756
757   return gx;
758}
759
760
761/*------------------------------------------------------------*/
762/*---                                                      ---*/
763/*--- Helper functions for range lists and CU headers      ---*/
764/*---                                                      ---*/
765/*------------------------------------------------------------*/
766
767/* Denotes an address range.  Both aMin and aMax are included in the
768   range; hence a complete range is (0, ~0) and an empty range is any
769   (X, X-1) for X > 0.*/
770typedef
771   struct { Addr aMin; Addr aMax; }
772   AddrRange;
773
774
775/* Generate an arbitrary structural total ordering on
776   XArray* of AddrRange. */
777static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
778                                        const XArray* rngs2 )
779{
780   Word n1, n2, i;
781   vg_assert(rngs1 && rngs2);
782   n1 = VG_(sizeXA)( rngs1 );
783   n2 = VG_(sizeXA)( rngs2 );
784   if (n1 < n2) return -1;
785   if (n1 > n2) return 1;
786   for (i = 0; i < n1; i++) {
787      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
788      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
789      if (rng1->aMin < rng2->aMin) return -1;
790      if (rng1->aMin > rng2->aMin) return 1;
791      if (rng1->aMax < rng2->aMax) return -1;
792      if (rng1->aMax > rng2->aMax) return 1;
793   }
794   return 0;
795}
796
797
798__attribute__((noinline))
799static XArray* /* of AddrRange */ empty_range_list ( void )
800{
801   XArray* xa; /* XArray of AddrRange */
802   /* Who frees this xa?  varstack_preen() does. */
803   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
804                    ML_(dinfo_free),
805                    sizeof(AddrRange) );
806   return xa;
807}
808
809
810__attribute__((noinline))
811static XArray* unitary_range_list ( Addr aMin, Addr aMax )
812{
813   XArray*   xa;
814   AddrRange pair;
815   vg_assert(aMin <= aMax);
816   /* Who frees this xa?  varstack_preen() does. */
817   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
818                    ML_(dinfo_free),
819                    sizeof(AddrRange) );
820   pair.aMin = aMin;
821   pair.aMax = aMax;
822   VG_(addToXA)( xa, &pair );
823   return xa;
824}
825
826
827/* Enumerate the address ranges starting at img-offset
828   'debug_ranges_offset' in .debug_ranges.  Results are biased with
829   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
830   object as a whole.  This function allocates the XArray, and the
831   caller must deallocate it. */
832__attribute__((noinline))
833static XArray* /* of AddrRange */
834get_range_list ( const CUConst* cc,
835                 Bool     td3,
836                 UWord    debug_ranges_offset,
837                 Addr     svma_of_referencing_CU )
838{
839   Addr      base;
840   Cursor    ranges;
841   XArray*   xa; /* XArray of AddrRange */
842   AddrRange pair;
843
844   if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
845       || cc->escn_debug_ranges.szB == 0)
846      cc->barf("get_range_list: .debug_ranges is empty/missing");
847
848   init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
849                "Overrun whilst reading .debug_ranges section(2)" );
850   set_position_of_Cursor( &ranges, debug_ranges_offset );
851
852   /* Who frees this xa?  varstack_preen() does. */
853   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
854                    sizeof(AddrRange) );
855   base = 0;
856   while (True) {
857      /* Read a (host-)word pair.  This is something of a hack since
858         the word size to read is really dictated by the ELF file;
859         however, we assume we're reading a file with the same
860         word-sizeness as the host.  Reasonably enough. */
861      UWord w1 = get_UWord( &ranges );
862      UWord w2 = get_UWord( &ranges );
863
864      if (w1 == 0 && w2 == 0)
865         break; /* end of list. */
866
867      if (w1 == -1UL) {
868         /* new value for 'base' */
869         base = w2;
870         continue;
871      }
872
873      /* else enumerate [w1+base, w2+base) */
874      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
875         (sec 2.17.2) */
876      if (w1 > w2)
877         cc->barf( "negative range in .debug_ranges section" );
878      if (w1 < w2) {
879         pair.aMin = w1     + base + svma_of_referencing_CU;
880         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
881         vg_assert(pair.aMin <= pair.aMax);
882         VG_(addToXA)( xa, &pair );
883      }
884   }
885   return xa;
886}
887
888#define VARSZ_FORM 0xffffffff
889static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
890
891/* Initialises the hash table of abbreviations.
892   We do a single scan of the abbv slice to parse and
893   build all abbreviations, for the following reasons:
894     * all or most abbreviations will be needed in any case
895       (at least for var-info reading).
896     * re-reading each time an abbreviation causes a lot of calls
897       to get_ULEB128.
898     * a CU should not have many abbreviations. */
899static void init_ht_abbvs (CUConst* cc,
900                           Bool td3)
901{
902   Cursor c;
903   g_abbv *ta; // temporary abbreviation, reallocated if needed.
904   UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
905   UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
906   g_abbv *ht_ta; // abbv to insert in hash table.
907   Int i;
908
909   #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
910
911   ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
912   ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
913   cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
914
915   init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
916               "Overrun whilst parsing .debug_abbrev section(2)" );
917   while (True) {
918      ta->abbv_code = get_ULEB128( &c );
919      if (ta->abbv_code == 0) break; /* end of the table */
920
921      ta->atag = get_ULEB128( &c );
922      ta->has_children = get_UChar( &c );
923      ta_nf_n = 0;
924      while (True) {
925         if (ta_nf_n >= ta_nf_maxE) {
926            g_abbv *old_ta = ta;
927            ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
928                                    SZ_G_ABBV(2 * ta_nf_maxE));
929            ta_nf_maxE = 2 * ta_nf_maxE;
930            VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
931            ML_(dinfo_free) (old_ta);
932         }
933         ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
934         ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
935         if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
936            ta_nf_n++;
937            break;
938         }
939        ta_nf_n++;
940      }
941
942      // Initialises the skip_szB/next_nf elements : an element at position
943      // i must contain the sum of its own size + the sizes of all elements
944      // following i till either the next variable size element, the next
945      // sibling element or the end of the DIE.
946      ta->nf[ta_nf_n - 1].skip_szB = 0;
947      ta->nf[ta_nf_n - 1].next_nf = 0;
948      for (i = ta_nf_n - 2; i >= 0; i--) {
949         const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
950
951         if (ta->nf[i+1].at_name == DW_AT_sibling
952             || ta->nf[i+1].skip_szB == VARSZ_FORM) {
953            ta->nf[i].skip_szB = form_szB;
954            ta->nf[i].next_nf  = i+1;
955         } else if (form_szB == VARSZ_FORM) {
956            ta->nf[i].skip_szB = form_szB;
957            ta->nf[i].next_nf  = i+1;
958         } else {
959            ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
960            ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
961         }
962      }
963
964      ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
965      VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
966      VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
967      if (TD3) {
968         TRACE_D3("  Adding abbv_code %llu TAG  %s [%s] nf %d ",
969                  (ULong) ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
970                  ML_(pp_DW_children)(ht_ta->has_children),
971                  ta_nf_n);
972         TRACE_D3("  ");
973         for (i = 0; i < ta_nf_n; i++)
974            TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
975         TRACE_D3("\n");
976      }
977   }
978
979   ML_(dinfo_free) (ta);
980   #undef SZ_G_ABBV
981}
982
983static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
984{
985   g_abbv *abbv;
986
987   abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
988   if (!abbv)
989      cc->barf ("abbv_code not found in ht_abbvs table");
990   return abbv;
991}
992
993/* Free the memory allocated in CUConst. */
994static void clear_CUConst (CUConst* cc)
995{
996   VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
997   cc->ht_abbvs = NULL;
998}
999
1000/* Parse the Compilation Unit header indicated at 'c' and
1001   initialise 'cc' accordingly. */
1002static __attribute__((noinline))
1003void parse_CU_Header ( /*OUT*/CUConst* cc,
1004                       Bool td3,
1005                       Cursor* c,
1006                       DiSlice escn_debug_abbv,
1007		       Bool type_unit,
1008                       Bool alt_info )
1009{
1010   UChar  address_size;
1011   ULong  debug_abbrev_offset;
1012
1013   VG_(memset)(cc, 0, sizeof(*cc));
1014   vg_assert(c && c->barf);
1015   cc->barf = c->barf;
1016
1017   /* initial_length field */
1018   cc->unit_length
1019      = get_Initial_Length( &cc->is_dw64, c,
1020           "parse_CU_Header: invalid initial-length field" );
1021
1022   TRACE_D3("   Length:        %lld\n", cc->unit_length );
1023
1024   /* version */
1025   cc->version = get_UShort( c );
1026   if (cc->version != 2 && cc->version != 3 && cc->version != 4)
1027      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1028   TRACE_D3("   Version:       %d\n", (Int)cc->version );
1029
1030   /* debug_abbrev_offset */
1031   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1032   if (debug_abbrev_offset >= escn_debug_abbv.szB)
1033      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1034   TRACE_D3("   Abbrev Offset: %lld\n", debug_abbrev_offset );
1035
1036   /* address size.  If this isn't equal to the host word size, just
1037      give up.  This makes it safe to assume elsewhere that
1038      DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1039      word. */
1040   address_size = get_UChar( c );
1041   if (address_size != sizeof(void*))
1042      cc->barf( "parse_CU_Header: invalid address_size" );
1043   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1044
1045   cc->is_type_unit = type_unit;
1046   cc->is_alt_info = alt_info;
1047
1048   if (type_unit) {
1049      cc->type_signature = get_ULong( c );
1050      cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1051   }
1052
1053   /* Set up cc->debug_abbv to point to the relevant table for this
1054      CU.  Set its .szB so that at least we can't read off the end of
1055      the debug_abbrev section -- potentially (and quite likely) too
1056      big, if this isn't the last table in the section, but at least
1057      it's safe.
1058
1059      This amounts to taking debug_abbv_escn and moving the start
1060      position along by debug_abbrev_offset bytes, hence forming a
1061      smaller DiSlice which has the same end point.  Since we checked
1062      just above that debug_abbrev_offset is less than the size of
1063      debug_abbv_escn, this should leave us with a nonempty slice. */
1064   vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1065   cc->debug_abbv      = escn_debug_abbv;
1066   cc->debug_abbv.ioff += debug_abbrev_offset;
1067   cc->debug_abbv.szB  -= debug_abbrev_offset;
1068
1069   init_ht_abbvs(cc, td3);
1070}
1071
1072/* This represents a single signatured type.  It maps a type signature
1073   (a ULong) to a cooked DIE offset.  Objects of this type are stored
1074   in the type signature hash table.  */
1075typedef
1076   struct D3SignatureType {
1077      struct D3SignatureType *next;
1078      UWord data;
1079      ULong type_signature;
1080      UWord die;
1081   }
1082   D3SignatureType;
1083
1084/* Record a signatured type in the hash table.  */
1085static void record_signatured_type ( VgHashTable *tab,
1086                                     ULong type_signature,
1087                                     UWord die )
1088{
1089   D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1090                                                 sizeof(D3SignatureType) );
1091   dstype->data = (UWord) type_signature;
1092   dstype->type_signature = type_signature;
1093   dstype->die = die;
1094   VG_(HT_add_node) ( tab, dstype );
1095}
1096
1097/* Given a type signature hash table and a type signature, return the
1098   cooked DIE offset of the type.  If the type cannot be found, call
1099   BARF.  */
1100static UWord lookup_signatured_type ( const VgHashTable *tab,
1101                                      ULong type_signature,
1102                                      void (*barf)( const HChar* ) __attribute__((noreturn)) )
1103{
1104   D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1105   /* This may be unwarranted chumminess with the hash table
1106      implementation.  */
1107   while ( dstype != NULL && dstype->type_signature != type_signature)
1108      dstype = dstype->next;
1109   if (dstype == NULL) {
1110      barf("lookup_signatured_type: could not find signatured type");
1111      /*NOTREACHED*/
1112      vg_assert(0);
1113   }
1114   return dstype->die;
1115}
1116
1117
1118/* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1119   lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1120   result is an image section beginning at u.cur and with size -szB.
1121   No other szB values are allowed. */
1122typedef
1123   struct {
1124      Long szB; // 1, 2, 4, 8 or non-positive values only.
1125      union { ULong val; DiCursor cur; } u;
1126   }
1127   FormContents;
1128
1129/* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1130   byte scalar value, or (a reference to) zero or more bytes starting
1131   at a DiCursor.*/
1132static
1133void get_Form_contents ( /*OUT*/FormContents* cts,
1134                         const CUConst* cc, Cursor* c,
1135                         Bool td3, DW_FORM form )
1136{
1137   VG_(bzero_inline)(cts, sizeof(*cts));
1138   // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1139   // must be computed similarly in get_Form_szB.
1140   // The consistency is verified in trace_DIE.
1141   switch (form) {
1142      case DW_FORM_data1:
1143         cts->u.val = (ULong)(UChar)get_UChar(c);
1144         cts->szB   = 1;
1145         TRACE_D3("%u", (UInt)cts->u.val);
1146         break;
1147      case DW_FORM_data2:
1148         cts->u.val = (ULong)(UShort)get_UShort(c);
1149         cts->szB   = 2;
1150         TRACE_D3("%u", (UInt)cts->u.val);
1151         break;
1152      case DW_FORM_data4:
1153         cts->u.val = (ULong)(UInt)get_UInt(c);
1154         cts->szB   = 4;
1155         TRACE_D3("%u", (UInt)cts->u.val);
1156         break;
1157      case DW_FORM_data8:
1158         cts->u.val = get_ULong(c);
1159         cts->szB   = 8;
1160         TRACE_D3("%llu", cts->u.val);
1161         break;
1162      case DW_FORM_sec_offset:
1163         cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1164         cts->szB   = cc->is_dw64 ? 8 : 4;
1165         TRACE_D3("%llu", cts->u.val);
1166         break;
1167      case DW_FORM_sdata:
1168         cts->u.val = (ULong)(Long)get_SLEB128(c);
1169         cts->szB   = 8;
1170         TRACE_D3("%lld", (Long)cts->u.val);
1171         break;
1172      case DW_FORM_udata:
1173         cts->u.val = (ULong)(Long)get_ULEB128(c);
1174         cts->szB   = 8;
1175         TRACE_D3("%llu", (Long)cts->u.val);
1176         break;
1177      case DW_FORM_addr:
1178         /* note, this is a hack.  DW_FORM_addr is defined as getting
1179            a word the size of the target machine as defined by the
1180            address_size field in the CU Header.  However,
1181            parse_CU_Header() rejects all inputs except those for
1182            which address_size == sizeof(Word), hence we can just
1183            treat it as a (host) Word.  */
1184         cts->u.val = (ULong)(UWord)get_UWord(c);
1185         cts->szB   = sizeof(UWord);
1186         TRACE_D3("0x%lx", (UWord)cts->u.val);
1187         break;
1188
1189      case DW_FORM_ref_addr:
1190         /* We make the same word-size assumption as DW_FORM_addr. */
1191         /* What does this really mean?  From D3 Sec 7.5.4,
1192            description of "reference", it would appear to reference
1193            some other DIE, by specifying the offset from the
1194            beginning of a .debug_info section.  The D3 spec mentions
1195            that this might be in some other shared object and
1196            executable.  But I don't see how the name of the other
1197            object/exe is specified.
1198
1199            At least for the DW_FORM_ref_addrs created by icc11, the
1200            references seem to be within the same object/executable.
1201            So for the moment we merely range-check, to see that they
1202            actually do specify a plausible offset within this
1203            object's .debug_info, and return the value unchanged.
1204
1205            In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1206            DWARF 3 and later, it is offset-sized.
1207         */
1208         if (cc->version == 2) {
1209            cts->u.val = (ULong)(UWord)get_UWord(c);
1210            cts->szB   = sizeof(UWord);
1211         } else {
1212            cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1213            cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1214         }
1215         TRACE_D3("0x%lx", (UWord)cts->u.val);
1216         if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1217         if (/* the following is surely impossible, but ... */
1218             !ML_(sli_is_valid)(cc->escn_debug_info)
1219             || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1220            /* Hmm.  Offset is nonsensical for this object's .debug_info
1221               section.  Be safe and reject it. */
1222            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1223                     "outside .debug_info");
1224         }
1225         break;
1226
1227      case DW_FORM_strp: {
1228         /* this is an offset into .debug_str */
1229         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1230         if (!ML_(sli_is_valid)(cc->escn_debug_str)
1231             || uw >= cc->escn_debug_str.szB)
1232            cc->barf("get_Form_contents: DW_FORM_strp "
1233                     "points outside .debug_str");
1234         /* FIXME: check the entire string lies inside debug_str,
1235            not just the first byte of it. */
1236         DiCursor str
1237            = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1238         if (TD3) {
1239            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1240            TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1241            ML_(dinfo_free)(tmp);
1242         }
1243         cts->u.cur = str;
1244         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1245         break;
1246      }
1247      case DW_FORM_string: {
1248         DiCursor str = get_AsciiZ(c);
1249         if (TD3) {
1250            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1251            TRACE_D3("%s", tmp);
1252            ML_(dinfo_free)(tmp);
1253         }
1254         cts->u.cur = str;
1255         /* strlen is safe because get_AsciiZ already 'vetted' the
1256            entire string */
1257         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1258         break;
1259      }
1260      case DW_FORM_ref1: {
1261         UChar u8   = get_UChar(c);
1262         UWord res  = cc->cu_start_offset + (UWord)u8;
1263         cts->u.val = (ULong)res;
1264         cts->szB   = sizeof(UWord);
1265         TRACE_D3("<%lx>", res);
1266         break;
1267      }
1268      case DW_FORM_ref2: {
1269         UShort u16 = get_UShort(c);
1270         UWord  res = cc->cu_start_offset + (UWord)u16;
1271         cts->u.val = (ULong)res;
1272         cts->szB   = sizeof(UWord);
1273         TRACE_D3("<%lx>", res);
1274         break;
1275      }
1276      case DW_FORM_ref4: {
1277         UInt  u32  = get_UInt(c);
1278         UWord res  = cc->cu_start_offset + (UWord)u32;
1279         cts->u.val = (ULong)res;
1280         cts->szB   = sizeof(UWord);
1281         TRACE_D3("<%lx>", res);
1282         break;
1283      }
1284      case DW_FORM_ref8: {
1285         ULong u64  = get_ULong(c);
1286         UWord res  = cc->cu_start_offset + (UWord)u64;
1287         cts->u.val = (ULong)res;
1288         cts->szB   = sizeof(UWord);
1289         TRACE_D3("<%lx>", res);
1290         break;
1291      }
1292      case DW_FORM_ref_udata: {
1293         ULong u64  = get_ULEB128(c);
1294         UWord res  = cc->cu_start_offset + (UWord)u64;
1295         cts->u.val = (ULong)res;
1296         cts->szB   = sizeof(UWord);
1297         TRACE_D3("<%lx>", res);
1298         break;
1299      }
1300      case DW_FORM_flag: {
1301         UChar u8 = get_UChar(c);
1302         TRACE_D3("%u", (UInt)u8);
1303         cts->u.val = (ULong)u8;
1304         cts->szB   = 1;
1305         break;
1306      }
1307      case DW_FORM_flag_present:
1308         TRACE_D3("1");
1309         cts->u.val = 1;
1310         cts->szB   = 1;
1311         break;
1312      case DW_FORM_block1: {
1313         ULong    u64b;
1314         ULong    u64   = (ULong)get_UChar(c);
1315         DiCursor block = get_DiCursor_from_Cursor(c);
1316         TRACE_D3("%llu byte block: ", u64);
1317         for (u64b = u64; u64b > 0; u64b--) {
1318            UChar u8 = get_UChar(c);
1319            TRACE_D3("%x ", (UInt)u8);
1320         }
1321         cts->u.cur = block;
1322         cts->szB   = - (Long)u64;
1323         break;
1324      }
1325      case DW_FORM_block2: {
1326         ULong    u64b;
1327         ULong    u64   = (ULong)get_UShort(c);
1328         DiCursor block = get_DiCursor_from_Cursor(c);
1329         TRACE_D3("%llu byte block: ", u64);
1330         for (u64b = u64; u64b > 0; u64b--) {
1331            UChar u8 = get_UChar(c);
1332            TRACE_D3("%x ", (UInt)u8);
1333         }
1334         cts->u.cur = block;
1335         cts->szB   = - (Long)u64;
1336         break;
1337      }
1338      case DW_FORM_block4: {
1339         ULong    u64b;
1340         ULong    u64   = (ULong)get_UInt(c);
1341         DiCursor block = get_DiCursor_from_Cursor(c);
1342         TRACE_D3("%llu byte block: ", u64);
1343         for (u64b = u64; u64b > 0; u64b--) {
1344            UChar u8 = get_UChar(c);
1345            TRACE_D3("%x ", (UInt)u8);
1346         }
1347         cts->u.cur = block;
1348         cts->szB   = - (Long)u64;
1349         break;
1350      }
1351      case DW_FORM_exprloc:
1352      case DW_FORM_block: {
1353         ULong    u64b;
1354         ULong    u64   = (ULong)get_ULEB128(c);
1355         DiCursor block = get_DiCursor_from_Cursor(c);
1356         TRACE_D3("%llu byte block: ", u64);
1357         for (u64b = u64; u64b > 0; u64b--) {
1358            UChar u8 = get_UChar(c);
1359            TRACE_D3("%x ", (UInt)u8);
1360         }
1361         cts->u.cur = block;
1362         cts->szB   = - (Long)u64;
1363         break;
1364      }
1365      case DW_FORM_ref_sig8: {
1366         ULong  u64b;
1367         ULong  signature = get_ULong (c);
1368         ULong  work = signature;
1369         TRACE_D3("8 byte signature: ");
1370         for (u64b = 8; u64b > 0; u64b--) {
1371            UChar u8 = work & 0xff;
1372            TRACE_D3("%x ", (UInt)u8);
1373            work >>= 8;
1374         }
1375
1376         /* cc->signature_types is only built/initialised when
1377            VG_(clo_read_var_info) is set. In this case,
1378            the DW_FORM_ref_sig8 can be looked up.
1379            But we can also arrive here when only reading inline info
1380            and VG_(clo_trace_symtab) is set. In such a case,
1381            we cannot lookup the DW_FORM_ref_sig8, we rather assign
1382            a dummy value. This is a kludge, but otherwise,
1383            the 'dwarf inline info reader' tracing would have to
1384            do type processing/reading. It is better to avoid
1385            adding significant 'real' processing only due to tracing. */
1386         if (VG_(clo_read_var_info)) {
1387            /* Due to the way that the hash table is constructed, the
1388               resulting DIE offset here is already "cooked".  See
1389               cook_die_using_form.  */
1390            cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1391                                                 c->barf);
1392         } else {
1393            vg_assert (td3);
1394            vg_assert (VG_(clo_read_inline_info));
1395            TRACE_D3("<not dereferencing signature type>");
1396            cts->u.val = 0; /* Assign a dummy/rubbish value */
1397         }
1398         cts->szB   = sizeof(UWord);
1399         break;
1400      }
1401      case DW_FORM_indirect:
1402         get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1403         return;
1404
1405      case DW_FORM_GNU_ref_alt:
1406         cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1407         cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1408         TRACE_D3("0x%lx", (UWord)cts->u.val);
1409         if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1410         if (/* the following is surely impossible, but ... */
1411             !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1412            cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1413                     "but no alternate .debug_info");
1414         else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1415            /* Hmm.  Offset is nonsensical for this object's .debug_info
1416               section.  Be safe and reject it. */
1417            cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1418                     "outside alternate .debug_info");
1419         }
1420         break;
1421
1422      case DW_FORM_GNU_strp_alt: {
1423         /* this is an offset into alternate .debug_str */
1424         SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1425         if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1426            cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1427                     "but no alternate .debug_str");
1428         else if (uw >= cc->escn_debug_str_alt.szB)
1429            cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1430                     "points outside alternate .debug_str");
1431         /* FIXME: check the entire string lies inside debug_str,
1432            not just the first byte of it. */
1433         DiCursor str
1434            = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1435         if (TD3) {
1436            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1437            TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1438            ML_(dinfo_free)(tmp);
1439         }
1440         cts->u.cur = str;
1441         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1442         break;
1443      }
1444
1445      default:
1446         VG_(printf)(
1447            "get_Form_contents: unhandled %d (%s) at <%llx>\n",
1448            form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1449         c->barf("get_Form_contents: unhandled DW_FORM");
1450   }
1451}
1452
1453static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1454{
1455   if (is_dw64)
1456      return sizeof(ULong);
1457   else
1458      return sizeof(UInt);
1459}
1460
1461#define VARSZ_FORM 0xffffffff
1462/* If the form is a fixed length form, return the nr of bytes for this form.
1463   If the form is a variable length form, return VARSZ_FORM. */
1464static
1465UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1466{
1467   // !!! keep switch in sync with get_Form_contents : the nr of bytes
1468   // read from a cursor by get_Form_contents must be returned by
1469   // the below switch.
1470   // The consistency is verified in trace_DIE.
1471   switch (form) {
1472      case DW_FORM_data1: return 1;
1473      case DW_FORM_data2: return 2;
1474      case DW_FORM_data4: return 4;
1475      case DW_FORM_data8: return 8;
1476      case DW_FORM_sec_offset:
1477         if (cc->is_dw64)
1478            return 8;
1479         else
1480            return 4;
1481      case DW_FORM_sdata:
1482         return VARSZ_FORM;
1483      case DW_FORM_udata:
1484         return VARSZ_FORM;
1485      case DW_FORM_addr: // See hack in get_Form_contents
1486         return sizeof(UWord);
1487      case DW_FORM_ref_addr: // See hack in get_Form_contents
1488         if (cc->version == 2)
1489            return sizeof(UWord);
1490         else
1491            return sizeof_Dwarfish_UWord (cc->is_dw64);
1492      case DW_FORM_strp:
1493         return sizeof_Dwarfish_UWord (cc->is_dw64);
1494      case DW_FORM_string:
1495         return VARSZ_FORM;
1496      case DW_FORM_ref1:
1497         return 1;
1498      case DW_FORM_ref2:
1499         return 2;
1500      case DW_FORM_ref4:
1501         return 4;
1502      case DW_FORM_ref8:
1503         return 8;
1504      case DW_FORM_ref_udata:
1505         return VARSZ_FORM;
1506      case DW_FORM_flag:
1507         return 1;
1508      case DW_FORM_flag_present:
1509         return 0; // !!! special case, no data.
1510      case DW_FORM_block1:
1511         return VARSZ_FORM;
1512      case DW_FORM_block2:
1513         return VARSZ_FORM;
1514      case DW_FORM_block4:
1515         return VARSZ_FORM;
1516      case DW_FORM_exprloc:
1517      case DW_FORM_block:
1518         return VARSZ_FORM;
1519      case DW_FORM_ref_sig8:
1520         return 8;
1521      case DW_FORM_indirect:
1522         return VARSZ_FORM;
1523      case DW_FORM_GNU_ref_alt:
1524         return sizeof_Dwarfish_UWord(cc->is_dw64);
1525      case DW_FORM_GNU_strp_alt:
1526         return sizeof_Dwarfish_UWord(cc->is_dw64);
1527      default:
1528         VG_(printf)(
1529            "get_Form_szB: unhandled %d (%s)\n",
1530            form, ML_(pp_DW_FORM)(form));
1531         cc->barf("get_Form_contents: unhandled DW_FORM");
1532   }
1533}
1534
1535/* Skip a DIE as described by abbv.
1536   If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1537static
1538void skip_DIE (UWord  *sibling,
1539               Cursor* c_die,
1540               const g_abbv *abbv,
1541               const CUConst* cc)
1542{
1543   UInt nf_i;
1544   FormContents cts;
1545   nf_i = 0;
1546   while (True) {
1547      if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1548         get_Form_contents( &cts, cc, c_die, False /*td3*/,
1549                            (DW_FORM)abbv->nf[nf_i].at_form );
1550         if ( cts.szB > 0 )
1551            *sibling = cts.u.val;
1552         nf_i++;
1553      } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1554         get_Form_contents( &cts, cc, c_die, False /*td3*/,
1555                            (DW_FORM)abbv->nf[nf_i].at_form );
1556         nf_i++;
1557      } else {
1558         advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1559         nf_i = abbv->nf[nf_i].next_nf;
1560      }
1561      if (nf_i == 0)
1562         break;
1563   }
1564}
1565
1566
1567/*------------------------------------------------------------*/
1568/*---                                                      ---*/
1569/*--- Parsing of variable-related DIEs                     ---*/
1570/*---                                                      ---*/
1571/*------------------------------------------------------------*/
1572
1573typedef
1574   struct _TempVar {
1575      const HChar*  name; /* in DebugInfo's .strpool */
1576      /* Represent ranges economically.  nRanges is the number of
1577         ranges.  Cases:
1578         0: .rngOneMin .rngOneMax .manyRanges are all zero
1579         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1580         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1581         This is merely an optimisation to avoid having to allocate
1582         and free the XArray in the common (98%) of cases where there
1583         is zero or one address ranges. */
1584      UWord   nRanges;
1585      Addr    rngOneMin;
1586      Addr    rngOneMax;
1587      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1588      /* Do not free .rngMany, since many TempVars will have the same
1589         value.  Instead the associated storage is to be freed by
1590         deleting 'rangetree', which stores a single copy of each
1591         range. */
1592      /* --- */
1593      Int     level;
1594      UWord   typeR; /* a cuOff */
1595      GExpr*  gexpr; /* for this variable */
1596      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1597                        any */
1598      UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1599      Int     fLine; /* declaring file line number, or zero */
1600      /* offset in .debug_info, so that abstract instances can be
1601         found to satisfy references from concrete instances. */
1602      UWord   dioff;
1603      UWord   absOri; /* so the absOri fields refer to dioff fields
1604                         in some other, related TempVar. */
1605   }
1606   TempVar;
1607
1608typedef
1609   struct {
1610      /* Contains the range stack: a stack of address ranges, one
1611         stack entry for each nested scope.
1612
1613         Some scope entries are created by function definitions
1614         (DW_AT_subprogram), and for those, we also note the GExpr
1615         derived from its DW_AT_frame_base attribute, if any.
1616         Consequently it should be possible to find, for any
1617         variable's DIE, the GExpr for the the containing function's
1618         DW_AT_frame_base by scanning back through the stack to find
1619         the nearest entry associated with a function.  This somewhat
1620         elaborate scheme is provided so as to make it possible to
1621         obtain the correct DW_AT_frame_base expression even in the
1622         presence of nested functions (or to be more precise, in the
1623         presence of nested DW_AT_subprogram DIEs).
1624      */
1625      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1626                     stack */
1627      Int     stack_size;
1628      XArray **ranges; /* XArray of AddrRange */
1629      Int     *level;  /* D3 DIE levels */
1630      Bool    *isFunc; /* from DW_AT_subprogram? */
1631      GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
1632      /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
1633         integer index to the index in di->fndnpool. */
1634      XArray* /* of UInt* */ fndn_ix_Table;
1635   }
1636   D3VarParser;
1637
1638/* Completely initialise a variable parser object */
1639static void
1640var_parser_init ( D3VarParser *parser )
1641{
1642   parser->sp = -1;
1643   parser->stack_size = 0;
1644   parser->ranges = NULL;
1645   parser->level  = NULL;
1646   parser->isFunc = NULL;
1647   parser->fbGX = NULL;
1648   parser->fndn_ix_Table = NULL;
1649}
1650
1651/* Release any memory hanging off a variable parser object */
1652static void
1653var_parser_release ( D3VarParser *parser )
1654{
1655   ML_(dinfo_free)( parser->ranges );
1656   ML_(dinfo_free)( parser->level );
1657   ML_(dinfo_free)( parser->isFunc );
1658   ML_(dinfo_free)( parser->fbGX );
1659}
1660
1661static void varstack_show ( const D3VarParser* parser, const HChar* str )
1662{
1663   Word i, j;
1664   VG_(printf)("  varstack (%s) {\n", str);
1665   for (i = 0; i <= parser->sp; i++) {
1666      XArray* xa = parser->ranges[i];
1667      vg_assert(xa);
1668      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1669      if (parser->isFunc[i]) {
1670         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1671      } else {
1672         vg_assert(parser->fbGX[i] == NULL);
1673      }
1674      VG_(printf)(": ");
1675      if (VG_(sizeXA)( xa ) == 0) {
1676         VG_(printf)("** empty PC range array **");
1677      } else {
1678         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1679            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1680            vg_assert(range);
1681            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1682         }
1683      }
1684      VG_(printf)("\n");
1685   }
1686   VG_(printf)("  }\n");
1687}
1688
1689/* Remove from the stack, all entries with .level > 'level' */
1690static
1691void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1692{
1693   Bool changed = False;
1694   vg_assert(parser->sp < parser->stack_size);
1695   while (True) {
1696      vg_assert(parser->sp >= -1);
1697      if (parser->sp == -1) break;
1698      if (parser->level[parser->sp] <= level) break;
1699      if (0)
1700         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1701      vg_assert(parser->ranges[parser->sp]);
1702      /* Who allocated this xa?  get_range_list() or
1703         unitary_range_list(). */
1704      VG_(deleteXA)( parser->ranges[parser->sp] );
1705      parser->sp--;
1706      changed = True;
1707   }
1708   if (changed && td3)
1709      varstack_show( parser, "after preen" );
1710}
1711
1712static void varstack_push ( const CUConst* cc,
1713                            D3VarParser* parser,
1714                            Bool td3,
1715                            XArray* ranges, Int level,
1716                            Bool    isFunc, GExpr* fbGX ) {
1717   if (0)
1718   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1719            parser->sp+1, level, ranges);
1720
1721   /* First we need to zap everything >= 'level', as we are about to
1722      replace any previous entry at 'level', so .. */
1723   varstack_preen(parser, /*td3*/False, level-1);
1724
1725   vg_assert(parser->sp >= -1);
1726   vg_assert(parser->sp < parser->stack_size);
1727   if (parser->sp == parser->stack_size - 1) {
1728      parser->stack_size += 48;
1729      parser->ranges =
1730         ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1731                            parser->stack_size * sizeof parser->ranges[0]);
1732      parser->level =
1733         ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1734                            parser->stack_size * sizeof parser->level[0]);
1735      parser->isFunc =
1736         ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1737                            parser->stack_size * sizeof parser->isFunc[0]);
1738      parser->fbGX =
1739         ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1740                            parser->stack_size * sizeof parser->fbGX[0]);
1741   }
1742   if (parser->sp >= 0)
1743      vg_assert(parser->level[parser->sp] < level);
1744   parser->sp++;
1745   vg_assert(ranges != NULL);
1746   if (!isFunc) vg_assert(fbGX == NULL);
1747   parser->ranges[parser->sp] = ranges;
1748   parser->level[parser->sp]  = level;
1749   parser->isFunc[parser->sp] = isFunc;
1750   parser->fbGX[parser->sp]   = fbGX;
1751   if (TD3)
1752      varstack_show( parser, "after push" );
1753}
1754
1755
1756/* cts is derived from a DW_AT_location and so refers either to a
1757   location expression or to a location list.  Figure out which, and
1758   in both cases bundle the expression or location list into a
1759   so-called GExpr (guarded expression). */
1760__attribute__((noinline))
1761static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1762{
1763   GExpr* gexpr = NULL;
1764   if (cts->szB < 0) {
1765      /* represents a non-empty in-line location expression, and
1766         cts->u.cur points at the image bytes */
1767      gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1768   }
1769   else
1770   if (cts->szB > 0) {
1771      /* represents a location list.  cts->u.val is the offset of it
1772         in .debug_loc. */
1773      if (!cc->cu_svma_known)
1774         cc->barf("get_GX: location list, but CU svma is unknown");
1775      gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1776   }
1777   else {
1778      vg_assert(0); /* else caller is bogus */
1779   }
1780   return gexpr;
1781}
1782
1783/* Returns an xarray* of directory names (indexed by the dwarf dirname
1784   integer).
1785   If 'compdir' is NULL, entry [0] will be set to "."
1786   otherwise entry [0] is set to compdir.
1787   Entry [0] basically means "the current directory of the compilation",
1788   whatever that means, according to the DWARF3 spec.
1789   FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1790static
1791XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
1792                         Cursor *c,
1793                         Bool td3 )
1794{
1795   XArray*        dirname_xa;   /* xarray of HChar* dirname */
1796   const HChar*   dirname;
1797   UInt           compdir_len;
1798
1799   dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
1800                            sizeof(HChar*) );
1801
1802   if (compdir == NULL) {
1803      dirname = ".";
1804      compdir_len = 1;
1805   } else {
1806      dirname = compdir;
1807      compdir_len = VG_(strlen)(compdir);
1808   }
1809   VG_(addToXA) (dirname_xa, &dirname);
1810
1811   TRACE_D3(" The Directory Table%s\n",
1812            peek_UChar(c) == 0 ? " is empty." : ":" );
1813
1814   while (peek_UChar(c) != 0) {
1815
1816      DiCursor cur = get_AsciiZ(c);
1817      HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
1818      TRACE_D3("  %s\n", data_str);
1819
1820      /* If data_str[0] is '/', then 'data' is an absolute path and we
1821         don't mess with it.  Otherwise, construct the
1822         path 'compdir' ++ "/" ++ 'data'. */
1823
1824      if (data_str[0] != '/'
1825          /* not an absolute path */
1826          && compdir
1827          /* actually got something sensible for compdir */
1828          && compdir_len)
1829      {
1830         SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
1831         HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
1832
1833         VG_(strcpy)(buf, compdir);
1834         VG_(strcat)(buf, "/");
1835         VG_(strcat)(buf, data_str);
1836
1837         dirname = ML_(addStr)(di, buf, len);
1838         VG_(addToXA) (dirname_xa, &dirname);
1839         if (0) VG_(printf)("rel path  %s\n", buf);
1840         ML_(dinfo_free)(buf);
1841      } else {
1842         /* just use 'data'. */
1843         dirname = ML_(addStr)(di,data_str,-1);
1844         VG_(addToXA) (dirname_xa, &dirname);
1845         if (0) VG_(printf)("abs path  %s\n", data_str);
1846      }
1847
1848      ML_(dinfo_free)(data_str);
1849   }
1850
1851   TRACE_D3 ("\n");
1852
1853   if (get_UChar (c) != 0) {
1854      ML_(symerr)(NULL, True,
1855                  "could not get NUL at end of DWARF directory table");
1856      VG_(deleteXA)(dirname_xa);
1857      return NULL;
1858   }
1859
1860   return dirname_xa;
1861}
1862
1863static
1864void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
1865                          const HChar* compdir,
1866                          const CUConst* cc, ULong debug_line_offset,
1867                          Bool td3 )
1868{
1869   Bool   is_dw64;
1870   Cursor c;
1871   Word   i;
1872   UShort version;
1873   UChar  opcode_base;
1874   const HChar* str;
1875   XArray* dirname_xa;   /* xarray of HChar* dirname */
1876   ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
1877   const HChar* dirname;
1878   UInt   fndn_ix;
1879
1880   vg_assert(fndn_ix_Table && cc && cc->barf);
1881   if (!ML_(sli_is_valid)(cc->escn_debug_line)
1882       || cc->escn_debug_line.szB <= debug_line_offset) {
1883      cc->barf("read_filename_table: .debug_line is missing?");
1884   }
1885
1886   init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1887                "Overrun whilst reading .debug_line section(1)" );
1888
1889   /* unit_length = */
1890   get_Initial_Length( &is_dw64, &c,
1891                       "read_filename_table: invalid initial-length field" );
1892   version = get_UShort( &c );
1893   if (version != 2 && version != 3 && version != 4)
1894     cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1895              "is currently supported.");
1896   /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1897   /*minimum_instruction_length = */ get_UChar( &c );
1898   if (version >= 4)
1899      /*maximum_operations_per_insn = */ get_UChar( &c );
1900   /*default_is_stmt            = */ get_UChar( &c );
1901   /*line_base                  = (Char)*/ get_UChar( &c );
1902   /*line_range                 = */ get_UChar( &c );
1903   opcode_base                = get_UChar( &c );
1904   /* skip over "standard_opcode_lengths" */
1905   for (i = 1; i < (Word)opcode_base; i++)
1906     (void)get_UChar( &c );
1907
1908   dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
1909
1910   /* Read and record the file names table */
1911   vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
1912   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1913      from 1, for some reason. */
1914   fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
1915   VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1916   while (peek_UChar(&c) != 0) {
1917      DiCursor cur = get_AsciiZ(&c);
1918      str = ML_(addStrFromCursor)( cc->di, cur );
1919      dir_xa_ix = get_ULEB128( &c );
1920      if (dirname_xa != NULL
1921          && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
1922         dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
1923      else
1924         dirname = NULL;
1925      fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
1926      TRACE_D3("  read_filename_table: %ld fndn_ix %d %s %s\n",
1927               VG_(sizeXA)(fndn_ix_Table), fndn_ix,
1928               dirname, str);
1929      VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1930      (void)get_ULEB128( &c ); /* skip last mod time */
1931      (void)get_ULEB128( &c ); /* file size */
1932   }
1933   /* We're done!  The rest of it is not interesting. */
1934   if (dirname_xa != NULL)
1935      VG_(deleteXA)(dirname_xa);
1936}
1937
1938/* setup_cu_svma to be called when a cu is found at level 0,
1939   to establish the cu_svma. */
1940static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
1941{
1942   Addr cu_svma;
1943   /* We have potentially more than one type of parser parsing the
1944      dwarf information. At least currently, each parser establishes
1945      the cu_svma. So, in case cu_svma_known, we check that the same
1946      result is obtained by the 2nd parsing of the cu.
1947
1948      Alternatively, we could reset cu_svma_known after each parsing
1949      and then check that we only see a single DW_TAG_compile_unit DIE
1950      at level 0, DWARF3 only allows exactly one top level DIE per
1951      CU. */
1952
1953   if (have_lo)
1954      cu_svma = ip_lo;
1955   else {
1956      /* Now, it may be that this DIE doesn't tell us the CU's
1957         SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1958         the CU doesn't *have* to have its SVMA specified.
1959
1960         But as per last para D3 spec sec 3.1.1 ("Normal and
1961         Partial Compilation Unit Entries", "If the base address
1962         (viz, the SVMA) is undefined, then any DWARF entry of
1963         structure defined interms of the base address of that
1964         compilation unit is not valid.".  So that means, if whilst
1965         processing the children of this top level DIE (or their
1966         children, etc) we see a DW_AT_range, and cu_svma_known is
1967         False, then the DIE that contains it is (per the spec)
1968         invalid, and we can legitimately stop and complain. */
1969      /* .. whereas The Reality is, simply assume the SVMA is zero
1970         if it isn't specified. */
1971      cu_svma = 0;
1972   }
1973
1974   if (cc->cu_svma_known) {
1975      vg_assert (cu_svma == cc->cu_svma);
1976   } else {
1977      cc->cu_svma_known = True;
1978      cc->cu_svma = cu_svma;
1979      if (0)
1980         TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
1981   }
1982}
1983
1984static void trace_DIE(
1985   DW_TAG dtag,
1986   UWord posn,
1987   Int level,
1988   UWord saved_die_c_offset,
1989   const g_abbv *abbv,
1990   const CUConst* cc)
1991{
1992   Cursor c;
1993   FormContents cts;
1994   UWord sibling = 0;
1995   UInt nf_i;
1996   Bool  debug_types_flag;
1997   Bool  alt_flag;
1998   Cursor check_skip;
1999   UWord check_sibling = 0;
2000
2001   posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2002   init_Cursor (&c,
2003                debug_types_flag ? cc->escn_debug_types :
2004                alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2005                saved_die_c_offset, cc->barf,
2006                "Overrun trace_DIE");
2007   check_skip = c;
2008   VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2009               level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2010               debug_types_flag ? " (in .debug_types)" : "",
2011               alt_flag ? " (in alternate .debug_info)" : "");
2012   nf_i = 0;
2013   while (True) {
2014      DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2015      DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2016      nf_i++;
2017      if (attr == 0 && form == 0) break;
2018      VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2019      /* Get the form contents, so as to print them */
2020      get_Form_contents( &cts, cc, &c, True, form );
2021      if (attr == DW_AT_sibling && cts.szB > 0) {
2022         sibling = cts.u.val;
2023      }
2024      VG_(printf)("\t\n");
2025   }
2026
2027   /* Verify that skipping a DIE gives the same displacement as
2028      tracing (i.e. reading) a DIE. If there is an inconsistency in
2029      the nr of bytes read by get_Form_contents and get_Form_szB, this
2030      should be detected by the below. Using --trace-symtab=yes
2031      --read-var-info=yes will ensure all DIEs are systematically
2032      verified. */
2033   skip_DIE (&check_sibling, &check_skip, abbv, cc);
2034   vg_assert (check_sibling == sibling);
2035   vg_assert (get_position_of_Cursor (&check_skip)
2036              == get_position_of_Cursor (&c));
2037}
2038
2039__attribute__((noreturn))
2040static void dump_bad_die_and_barf(
2041   const HChar *whichparser,
2042   DW_TAG dtag,
2043   UWord posn,
2044   Int level,
2045   Cursor* c_die,
2046   UWord saved_die_c_offset,
2047   const g_abbv *abbv,
2048   const CUConst* cc)
2049{
2050   trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2051   VG_(printf)("%s:\n", whichparser);
2052   cc->barf("confused by the above DIE");
2053}
2054
2055__attribute__((noinline))
2056static void bad_DIE_confusion(int linenr)
2057{
2058   VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2059}
2060#define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2061
2062__attribute__((noinline))
2063static void parse_var_DIE (
2064   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2065   /*MOD*/XArray* /* of TempVar* */ tempvars,
2066   /*MOD*/XArray* /* of GExpr* */ gexprs,
2067   /*MOD*/D3VarParser* parser,
2068   DW_TAG dtag,
2069   UWord posn,
2070   Int level,
2071   Cursor* c_die,
2072   const g_abbv *abbv,
2073   CUConst* cc,
2074   Bool td3
2075)
2076{
2077   FormContents cts;
2078   UInt nf_i;
2079
2080   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2081
2082   varstack_preen( parser, td3, level-1 );
2083
2084   if (dtag == DW_TAG_compile_unit
2085       || dtag == DW_TAG_type_unit
2086       || dtag == DW_TAG_partial_unit) {
2087      Bool have_lo    = False;
2088      Bool have_hi1   = False;
2089      Bool hiIsRelative = False;
2090      Bool have_range = False;
2091      Addr ip_lo    = 0;
2092      Addr ip_hi1   = 0;
2093      Addr rangeoff = 0;
2094      const HChar *compdir = NULL;
2095      nf_i = 0;
2096      while (True) {
2097         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2098         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2099         nf_i++;
2100         if (attr == 0 && form == 0) break;
2101         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2102         if (attr == DW_AT_low_pc && cts.szB > 0) {
2103            ip_lo   = cts.u.val;
2104            have_lo = True;
2105         }
2106         if (attr == DW_AT_high_pc && cts.szB > 0) {
2107            ip_hi1   = cts.u.val;
2108            have_hi1 = True;
2109            if (form != DW_FORM_addr)
2110               hiIsRelative = True;
2111         }
2112         if (attr == DW_AT_ranges && cts.szB > 0) {
2113            rangeoff   = cts.u.val;
2114            have_range = True;
2115         }
2116         if (attr == DW_AT_comp_dir) {
2117            if (cts.szB >= 0)
2118               cc->barf("parse_var_DIE compdir: expecting indirect string");
2119            HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2120                                               "parse_var_DIE.compdir" );
2121            compdir = ML_(addStr)(cc->di, str, -1);
2122            ML_(dinfo_free) (str);
2123         }
2124         if (attr == DW_AT_stmt_list && cts.szB > 0) {
2125            read_filename_table( parser->fndn_ix_Table, compdir,
2126                                 cc, cts.u.val, td3 );
2127         }
2128      }
2129      if (have_lo && have_hi1 && hiIsRelative)
2130         ip_hi1 += ip_lo;
2131
2132      /* Now, does this give us an opportunity to find this
2133         CU's svma? */
2134      if (level == 0)
2135         setup_cu_svma(cc, have_lo, ip_lo, td3);
2136
2137      /* Do we have something that looks sane? */
2138      if (have_lo && have_hi1 && (!have_range)) {
2139         if (ip_lo < ip_hi1)
2140            varstack_push( cc, parser, td3,
2141                           unitary_range_list(ip_lo, ip_hi1 - 1),
2142                           level,
2143                           False/*isFunc*/, NULL/*fbGX*/ );
2144         else if (ip_lo == 0 && ip_hi1 == 0)
2145            /* CU has no code, presumably?
2146               Such situations have been encountered for code
2147               compiled with -ffunction-sections -fdata-sections
2148               and linked with --gc-sections. Completely
2149               eliminated CU gives such 0 lo/hi pc. Similarly
2150               to a CU which has no lo/hi/range pc, we push
2151               an empty range list. */
2152            varstack_push( cc, parser, td3,
2153                           empty_range_list(),
2154                           level,
2155                           False/*isFunc*/, NULL/*fbGX*/ );
2156      } else
2157      if ((!have_lo) && (!have_hi1) && have_range) {
2158         varstack_push( cc, parser, td3,
2159                        get_range_list( cc, td3,
2160                                        rangeoff, cc->cu_svma ),
2161                        level,
2162                        False/*isFunc*/, NULL/*fbGX*/ );
2163      } else
2164      if ((!have_lo) && (!have_hi1) && (!have_range)) {
2165         /* CU has no code, presumably? */
2166         varstack_push( cc, parser, td3,
2167                        empty_range_list(),
2168                        level,
2169                        False/*isFunc*/, NULL/*fbGX*/ );
2170      } else
2171      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2172         /* broken DIE created by gcc-4.3.X ?  Ignore the
2173            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2174            instead. */
2175         varstack_push( cc, parser, td3,
2176                        get_range_list( cc, td3,
2177                                        rangeoff, cc->cu_svma ),
2178                        level,
2179                        False/*isFunc*/, NULL/*fbGX*/ );
2180      } else {
2181         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2182                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
2183         goto_bad_DIE;
2184      }
2185   }
2186
2187   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2188      Bool   have_lo    = False;
2189      Bool   have_hi1   = False;
2190      Bool   have_range = False;
2191      Bool   hiIsRelative = False;
2192      Addr   ip_lo      = 0;
2193      Addr   ip_hi1     = 0;
2194      Addr   rangeoff   = 0;
2195      Bool   isFunc     = dtag == DW_TAG_subprogram;
2196      GExpr* fbGX       = NULL;
2197      nf_i = 0;
2198      while (True) {
2199         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2200         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2201         nf_i++;
2202         if (attr == 0 && form == 0) break;
2203         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2204         if (attr == DW_AT_low_pc && cts.szB > 0) {
2205            ip_lo   = cts.u.val;
2206            have_lo = True;
2207         }
2208         if (attr == DW_AT_high_pc && cts.szB > 0) {
2209            ip_hi1   = cts.u.val;
2210            have_hi1 = True;
2211            if (form != DW_FORM_addr)
2212               hiIsRelative = True;
2213         }
2214         if (attr == DW_AT_ranges && cts.szB > 0) {
2215            rangeoff   = cts.u.val;
2216            have_range = True;
2217         }
2218         if (isFunc
2219             && attr == DW_AT_frame_base
2220             && cts.szB != 0 /* either scalar or nonempty block */) {
2221            fbGX = get_GX( cc, False/*td3*/, &cts );
2222            vg_assert(fbGX);
2223            VG_(addToXA)(gexprs, &fbGX);
2224         }
2225      }
2226      if (have_lo && have_hi1 && hiIsRelative)
2227         ip_hi1 += ip_lo;
2228      /* Do we have something that looks sane? */
2229      if (dtag == DW_TAG_subprogram
2230          && (!have_lo) && (!have_hi1) && (!have_range)) {
2231         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2232            representing a subroutine declaration that is not also a
2233            definition does not have code address or range
2234            attributes." */
2235      } else
2236      if (dtag == DW_TAG_lexical_block
2237          && (!have_lo) && (!have_hi1) && (!have_range)) {
2238         /* I believe this is legit, and means the lexical block
2239            contains no insns (whatever that might mean).  Ignore. */
2240      } else
2241      if (have_lo && have_hi1 && (!have_range)) {
2242         /* This scope supplies just a single address range. */
2243         if (ip_lo < ip_hi1)
2244            varstack_push( cc, parser, td3,
2245                           unitary_range_list(ip_lo, ip_hi1 - 1),
2246                           level, isFunc, fbGX );
2247      } else
2248      if ((!have_lo) && (!have_hi1) && have_range) {
2249         /* This scope supplies multiple address ranges via the use of
2250            a range list. */
2251         varstack_push( cc, parser, td3,
2252                        get_range_list( cc, td3,
2253                                        rangeoff, cc->cu_svma ),
2254                        level, isFunc, fbGX );
2255      } else
2256      if (have_lo && (!have_hi1) && (!have_range)) {
2257         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
2258            Entries) says fairly clearly that a scope must have either
2259            _range or (_low_pc and _high_pc). */
2260         /* The spec is a bit ambiguous though.  Perhaps a single byte
2261            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
2262         /* This case is here because icc9 produced this:
2263         <2><13bd>: DW_TAG_lexical_block
2264            DW_AT_decl_line   : 5229
2265            DW_AT_decl_column : 37
2266            DW_AT_decl_file   : 1
2267            DW_AT_low_pc      : 0x401b03
2268         */
2269         /* Ignore (seems safe than pushing a single byte range) */
2270      } else
2271         goto_bad_DIE;
2272   }
2273
2274   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2275      const  HChar* name = NULL;
2276      UWord  typeR       = D3_INVALID_CUOFF;
2277      Bool   global      = False;
2278      GExpr* gexpr       = NULL;
2279      Int    n_attrs     = 0;
2280      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
2281      Int    lineNo      = 0;
2282      UInt   fndn_ix     = 0;
2283      nf_i = 0;
2284      while (True) {
2285         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2286         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2287         nf_i++;
2288         if (attr == 0 && form == 0) break;
2289         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2290         n_attrs++;
2291         if (attr == DW_AT_name && cts.szB < 0) {
2292            name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2293         }
2294         if (attr == DW_AT_location
2295             && cts.szB != 0 /* either scalar or nonempty block */) {
2296            gexpr = get_GX( cc, False/*td3*/, &cts );
2297            vg_assert(gexpr);
2298            VG_(addToXA)(gexprs, &gexpr);
2299         }
2300         if (attr == DW_AT_type && cts.szB > 0) {
2301            typeR = cook_die_using_form( cc, cts.u.val, form );
2302         }
2303         if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2304            global = True;
2305         }
2306         if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2307            abs_ori = (UWord)cts.u.val;
2308         }
2309         if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2310            /*declaration = True;*/
2311         }
2312         if (attr == DW_AT_decl_line && cts.szB > 0) {
2313            lineNo = (Int)cts.u.val;
2314         }
2315         if (attr == DW_AT_decl_file && cts.szB > 0) {
2316            Int ftabIx = (Int)cts.u.val;
2317            if (ftabIx >= 1
2318                && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2319               fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2320            }
2321            if (0) VG_(printf)("XXX filename fndn_ix = %d %s\n", fndn_ix,
2322                               ML_(fndn_ix2filename) (cc->di, fndn_ix));
2323         }
2324      }
2325      if (!global && dtag == DW_TAG_variable && level == 1) {
2326         /* Case of a static variable. It is better to declare
2327            it global as the variable is not really related to
2328            a PC range, as its address can be used by program
2329            counters outside of the ranges where it is visible . */
2330         global = True;
2331      }
2332
2333      /* We'll collect it under if one of the following three
2334         conditions holds:
2335         (1) has location and type    -> completed
2336         (2) has type only            -> is an abstract instance
2337         (3) has location and abs_ori -> is a concrete instance
2338         Name, fndn_ix and line number are all optional frills.
2339      */
2340      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2341           /* 2 */ || (typeR != D3_INVALID_CUOFF)
2342           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2343
2344         /* Add this variable to the list of interesting looking
2345            variables.  Crucially, note along with it the address
2346            range(s) associated with the variable, which for locals
2347            will be the address ranges at the top of the varparser's
2348            stack. */
2349         GExpr*   fbGX = NULL;
2350         Word     i, nRanges;
2351         const XArray*  /* of AddrRange */ xa;
2352         TempVar* tv;
2353         /* Stack can't be empty; we put a dummy entry on it for the
2354            entire address range before starting with the DIEs for
2355            this CU. */
2356         vg_assert(parser->sp >= 0);
2357
2358         /* If this is a local variable (non-global), try to find
2359            the GExpr for the DW_AT_frame_base of the containing
2360            function.  It should have been pushed on the stack at the
2361            time we encountered its DW_TAG_subprogram DIE, so the way
2362            to find it is to scan back down the stack looking for it.
2363            If there isn't an enclosing stack entry marked 'isFunc'
2364            then we must be seeing variable or formal param DIEs
2365            outside of a function, so we deem the Dwarf to be
2366            malformed if that happens.  Note that the fbGX may be NULL
2367            if the containing DT_TAG_subprogram didn't supply a
2368            DW_AT_frame_base -- that's OK, but there must actually be
2369            a containing DW_TAG_subprogram. */
2370         if (!global) {
2371            Bool found = False;
2372            for (i = parser->sp; i >= 0; i--) {
2373               if (parser->isFunc[i]) {
2374                  fbGX = parser->fbGX[i];
2375                  found = True;
2376                  break;
2377               }
2378            }
2379            if (!found) {
2380               if (0 && VG_(clo_verbosity) >= 0) {
2381                  VG_(message)(Vg_DebugMsg,
2382                     "warning: parse_var_DIE: non-global variable "
2383                     "outside DW_TAG_subprogram\n");
2384               }
2385               /* goto_bad_DIE; */
2386               /* This seems to happen a lot.  Just ignore it -- if,
2387                  when we come to evaluation of the location (guarded)
2388                  expression, it requires a frame base value, and
2389                  there's no expression for that, then evaluation as a
2390                  whole will fail.  Harmless - a bit of a waste of
2391                  cycles but nothing more. */
2392            }
2393         }
2394
2395         /* re "global ? 0 : parser->sp" (twice), if the var is
2396            marked 'global' then we must put it at the global scope,
2397            as only the global scope (level 0) covers the entire PC
2398            address space.  It is asserted elsewhere that level 0
2399            always covers the entire address space. */
2400         xa = parser->ranges[global ? 0 : parser->sp];
2401         nRanges = VG_(sizeXA)(xa);
2402         vg_assert(nRanges >= 0);
2403
2404         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2405         tv->name   = name;
2406         tv->level  = global ? 0 : parser->sp;
2407         tv->typeR  = typeR;
2408         tv->gexpr  = gexpr;
2409         tv->fbGX   = fbGX;
2410         tv->fndn_ix= fndn_ix;
2411         tv->fLine  = lineNo;
2412         tv->dioff  = posn;
2413         tv->absOri = abs_ori;
2414
2415         /* See explanation on definition of type TempVar for the
2416            reason for this elaboration. */
2417         tv->nRanges = nRanges;
2418         tv->rngOneMin = 0;
2419         tv->rngOneMax = 0;
2420         tv->rngMany = NULL;
2421         if (nRanges == 1) {
2422            AddrRange* range = VG_(indexXA)(xa, 0);
2423            tv->rngOneMin = range->aMin;
2424            tv->rngOneMax = range->aMax;
2425         }
2426         else if (nRanges > 1) {
2427            /* See if we already have a range list which is
2428               structurally identical.  If so, use that; if not, clone
2429               this one, and add it to our collection. */
2430            UWord keyW, valW;
2431            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2432               XArray* old = (XArray*)keyW;
2433               vg_assert(valW == 0);
2434               vg_assert(old != xa);
2435               tv->rngMany = old;
2436            } else {
2437               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2438               tv->rngMany = cloned;
2439               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2440            }
2441         }
2442
2443         VG_(addToXA)( tempvars, &tv );
2444
2445         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2446                  VG_(sizeXA)(xa) );
2447         /* collect stats on how effective the ->ranges special
2448            casing is */
2449         if (0) {
2450            static Int ntot=0, ngt=0;
2451            ntot++;
2452            if (tv->rngMany) ngt++;
2453            if (0 == (ntot % 100000))
2454               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2455         }
2456
2457      }
2458
2459      /* Here are some other weird cases seen in the wild:
2460
2461            We have a variable with a name and a type, but no
2462            location.  I guess that's a sign that it has been
2463            optimised away.  Ignore it.  Here's an example:
2464
2465            static Int lc_compar(void* n1, void* n2) {
2466               MC_Chunk* mc1 = *(MC_Chunk**)n1;
2467               MC_Chunk* mc2 = *(MC_Chunk**)n2;
2468               return (mc1->data < mc2->data ? -1 : 1);
2469            }
2470
2471            Both mc1 and mc2 are like this
2472            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2473                DW_AT_name        : mc1
2474                DW_AT_decl_file   : 1
2475                DW_AT_decl_line   : 216
2476                DW_AT_type        : <5d3>
2477
2478            whereas n1 and n2 do have locations specified.
2479
2480            ---------------------------------------------
2481
2482            We see a DW_TAG_formal_parameter with a type, but
2483            no name and no location.  It's probably part of a function type
2484            construction, thusly, hence ignore it:
2485         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2486             DW_AT_sibling     : <2c9>
2487             DW_AT_prototyped  : 1
2488             DW_AT_type        : <114>
2489         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2490             DW_AT_type        : <13e>
2491         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2492             DW_AT_type        : <133>
2493
2494            ---------------------------------------------
2495
2496            Is very minimal, like this:
2497            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2498                DW_AT_abstract_origin: <7ba>
2499            What that signifies I have no idea.  Ignore.
2500
2501            ----------------------------------------------
2502
2503            Is very minimal, like this:
2504            <200f>: DW_TAG_formal_parameter
2505                DW_AT_abstract_ori: <1f4c>
2506                DW_AT_location    : 13440
2507            What that signifies I have no idea.  Ignore.
2508            It might be significant, though: the variable at least
2509            has a location and so might exist somewhere.
2510            Maybe we should handle this.
2511
2512            ---------------------------------------------
2513
2514            <22407>: DW_TAG_variable
2515              DW_AT_name        : (indirect string, offset: 0x6579):
2516                                  vgPlain_trampoline_stuff_start
2517              DW_AT_decl_file   : 29
2518              DW_AT_decl_line   : 56
2519              DW_AT_external    : 1
2520              DW_AT_declaration : 1
2521
2522            Nameless and typeless variable that has a location?  Who
2523            knows.  Not me.
2524            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2525                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2526                                     (DW_OP_addr: 3813c7c0)
2527
2528            No, really.  Check it out.  gcc is quite simply borked.
2529            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2530            // followed by no attributes, and the next DIE is a sibling,
2531            // not a child
2532            */
2533   }
2534   return;
2535
2536  bad_DIE:
2537   dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2538                         c_die, saved_die_c_offset,
2539                         abbv,
2540                         cc);
2541   /*NOTREACHED*/
2542}
2543
2544typedef
2545   struct {
2546      /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
2547         integer index to the index in di->fndnpool. */
2548      XArray* /* of UInt* */ fndn_ix_Table;
2549      UWord sibling; // sibling of the last read DIE (if it has a sibling).
2550   }
2551   D3InlParser;
2552
2553/* Return the function name corresponding to absori.
2554
2555   absori is a 'cooked' reference to a DIE, i.e. absori can be either
2556   in cc->escn_debug_info or in cc->escn_debug_info_alt.
2557   get_inlFnName will uncook absori.
2558
2559   The returned value is a (permanent) string in DebugInfo's .strchunks.
2560
2561   LIMITATION: absori must point in the CU of cc. If absori points
2562   in another CU, returns "UnknownInlinedFun".
2563
2564   Here are the problems to retrieve the fun name if absori is in
2565   another CU:  the DIE reading code cannot properly extract data from
2566   another CU, as the abbv code retrieved in the other CU cannot be
2567   translated in an abbreviation. Reading data from the alternate debug
2568   info also gives problems as the string reference is also in the alternate
2569   file, but when reading the alt DIE, the string form is a 'local' string,
2570   but cannot be read in the current CU, but must be read in the alt CU.
2571   See bug 338803 comment#3 and attachment for a failed attempt to handle
2572   these problems (failed because with the patch, only one alt abbrev hash
2573   table is kept, while we must handle all abbreviations in all CUs
2574   referenced by an absori (being a reference to an alt CU, or a previous
2575   or following CU). */
2576static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2577{
2578   Cursor c;
2579   const g_abbv *abbv;
2580   ULong  atag, abbv_code;
2581   UInt   has_children;
2582   UWord  posn;
2583   Bool type_flag, alt_flag;
2584   const HChar *ret = NULL;
2585   FormContents cts;
2586   UInt nf_i;
2587
2588   posn = uncook_die( cc, absori, &type_flag, &alt_flag);
2589   if (type_flag)
2590      cc->barf("get_inlFnName: uncooked absori in type debug info");
2591
2592   /* LIMITATION: check we are in the same CU.
2593      If not, return unknown inlined function name. */
2594   /* if crossing between alt debug info<>normal info
2595          or posn not in the cu range,
2596      then it is in another CU. */
2597   if (alt_flag != cc->is_alt_info
2598       || posn < cc->cu_start_offset
2599       || posn >= cc->cu_start_offset + cc->unit_length) {
2600      static Bool reported = False;
2601      if (!reported && VG_(clo_verbosity) > 1) {
2602         VG_(message)(Vg_DebugMsg,
2603                      "Warning: cross-CU LIMITATION: some inlined fn names\n"
2604                      "might be shown as UnknownInlinedFun\n");
2605         reported = True;
2606      }
2607      TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
2608      return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2609   }
2610
2611   init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
2612                "Overrun get_inlFnName absori");
2613
2614   abbv_code = get_ULEB128( &c );
2615   abbv      = get_abbv ( cc, abbv_code);
2616   atag      = abbv->atag;
2617   TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2618            posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2619
2620   if (atag == 0)
2621      cc->barf("get_inlFnName: invalid zero tag on DIE");
2622
2623   has_children = abbv->has_children;
2624   if (has_children != DW_children_no && has_children != DW_children_yes)
2625      cc->barf("get_inlFnName: invalid has_children value");
2626
2627   if (atag != DW_TAG_subprogram)
2628      cc->barf("get_inlFnName: absori not a subprogram");
2629
2630   nf_i = 0;
2631   while (True) {
2632      DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2633      DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2634      nf_i++;
2635      if (attr == 0 && form == 0) break;
2636      get_Form_contents( &cts, cc, &c, False/*td3*/, form );
2637      if (attr == DW_AT_name) {
2638         HChar *fnname;
2639         if (cts.szB >= 0)
2640            cc->barf("get_inlFnName: expecting indirect string");
2641         fnname = ML_(cur_read_strdup)( cts.u.cur,
2642                                        "get_inlFnName.1" );
2643         ret = ML_(addStr)(cc->di, fnname, -1);
2644         ML_(dinfo_free) (fnname);
2645         break; /* Name found, get out of the loop, as this has priority over
2646                 DW_AT_specification. */
2647      }
2648      if (attr == DW_AT_specification) {
2649         UWord cdie;
2650
2651         if (cts.szB == 0)
2652            cc->barf("get_inlFnName: AT specification missing");
2653
2654         /* The recursive call to get_inlFnName will uncook its arg.
2655            So, we need to cook it here, so as to reference the
2656            correct section (e.g. the alt info). */
2657         cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
2658
2659         /* hoping that there is no loop */
2660         ret = get_inlFnName (cdie, cc, td3);
2661         /* Unclear if having both DW_AT_specification and DW_AT_name is
2662            possible but in any case, we do not break here.
2663            If we find later on a DW_AT_name, it will override the name found
2664            in the DW_AT_specification.*/
2665      }
2666   }
2667
2668   if (ret)
2669      return ret;
2670   else {
2671      TRACE_D3("AbsOriFnNameNotFound");
2672      return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
2673   }
2674}
2675
2676/* Returns True if the (possibly) childrens of the current DIE are interesting
2677   to parse. Returns False otherwise.
2678   If the current DIE has a sibling, the non interesting children can
2679   maybe be skipped (if the DIE has a DW_AT_sibling).  */
2680__attribute__((noinline))
2681static Bool parse_inl_DIE (
2682   /*MOD*/D3InlParser* parser,
2683   DW_TAG dtag,
2684   UWord posn,
2685   Int level,
2686   Cursor* c_die,
2687   const g_abbv *abbv,
2688   CUConst* cc,
2689   Bool td3
2690)
2691{
2692   FormContents cts;
2693   UInt nf_i;
2694
2695   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2696
2697   /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2698      in theory could also contain inlined fn calls).  */
2699   if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
2700      Bool have_lo    = False;
2701      Addr ip_lo    = 0;
2702      const HChar *compdir = NULL;
2703
2704      nf_i = 0;
2705      while (True) {
2706         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2707         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2708         nf_i++;
2709         if (attr == 0 && form == 0) break;
2710         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2711         if (attr == DW_AT_low_pc && cts.szB > 0) {
2712            ip_lo   = cts.u.val;
2713            have_lo = True;
2714         }
2715         if (attr == DW_AT_comp_dir) {
2716            if (cts.szB >= 0)
2717               cc->barf("parse_inl_DIE compdir: expecting indirect string");
2718            HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2719                                               "parse_inl_DIE.compdir" );
2720            compdir = ML_(addStr)(cc->di, str, -1);
2721            ML_(dinfo_free) (str);
2722         }
2723         if (attr == DW_AT_stmt_list && cts.szB > 0) {
2724            read_filename_table( parser->fndn_ix_Table, compdir,
2725                                 cc, cts.u.val, td3 );
2726         }
2727         if (attr == DW_AT_sibling && cts.szB > 0) {
2728            parser->sibling = cts.u.val;
2729         }
2730      }
2731      if (level == 0)
2732         setup_cu_svma (cc, have_lo, ip_lo, td3);
2733   }
2734
2735   if (dtag == DW_TAG_inlined_subroutine) {
2736      Bool   have_lo    = False;
2737      Bool   have_hi1   = False;
2738      Bool   have_range = False;
2739      Bool   hiIsRelative = False;
2740      Addr   ip_lo      = 0;
2741      Addr   ip_hi1     = 0;
2742      Addr   rangeoff   = 0;
2743      UInt   caller_fndn_ix = 0;
2744      Int caller_lineno = 0;
2745      Int inlinedfn_abstract_origin = 0;
2746
2747      nf_i = 0;
2748      while (True) {
2749         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2750         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2751         nf_i++;
2752         if (attr == 0 && form == 0) break;
2753         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2754         if (attr == DW_AT_call_file && cts.szB > 0) {
2755            Int ftabIx = (Int)cts.u.val;
2756            if (ftabIx >= 1
2757                && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2758               caller_fndn_ix = *(UInt*)
2759                          VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2760            }
2761            if (0) VG_(printf)("XXX caller_fndn_ix = %d %s\n", caller_fndn_ix,
2762                               ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
2763         }
2764         if (attr == DW_AT_call_line && cts.szB > 0) {
2765            caller_lineno = cts.u.val;
2766         }
2767
2768         if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
2769            inlinedfn_abstract_origin
2770               = cook_die_using_form (cc, (UWord)cts.u.val, form);
2771         }
2772
2773         if (attr == DW_AT_low_pc && cts.szB > 0) {
2774            ip_lo   = cts.u.val;
2775            have_lo = True;
2776         }
2777         if (attr == DW_AT_high_pc && cts.szB > 0) {
2778            ip_hi1   = cts.u.val;
2779            have_hi1 = True;
2780            if (form != DW_FORM_addr)
2781               hiIsRelative = True;
2782         }
2783         if (attr == DW_AT_ranges && cts.szB > 0) {
2784            rangeoff   = cts.u.val;
2785            have_range = True;
2786         }
2787         if (attr == DW_AT_sibling && cts.szB > 0) {
2788            parser->sibling = cts.u.val;
2789         }
2790      }
2791      if (have_lo && have_hi1 && hiIsRelative)
2792         ip_hi1 += ip_lo;
2793      /* Do we have something that looks sane? */
2794      if (dtag == DW_TAG_inlined_subroutine
2795          && (!have_lo) && (!have_hi1) && (!have_range)) {
2796         /* Seems strange. How can an inlined subroutine have
2797            no code ? */
2798         goto_bad_DIE;
2799      } else
2800      if (have_lo && have_hi1 && (!have_range)) {
2801         /* This inlined call is just a single address range. */
2802         if (ip_lo < ip_hi1) {
2803            /* Apply text debug biasing */
2804            ip_lo += cc->di->text_debug_bias;
2805            ip_hi1 += cc->di->text_debug_bias;
2806            ML_(addInlInfo) (cc->di,
2807                             ip_lo, ip_hi1,
2808                             get_inlFnName (inlinedfn_abstract_origin, cc, td3),
2809                             caller_fndn_ix,
2810                             caller_lineno, level);
2811         }
2812      } else if (have_range) {
2813         /* This inlined call is several address ranges. */
2814         XArray *ranges;
2815         Word j;
2816         const HChar *inlfnname =
2817            get_inlFnName (inlinedfn_abstract_origin, cc, td3);
2818
2819         /* Ranges are biased for the inline info using the same logic
2820            as what is used for biasing ranges for the var info, for which
2821            ranges are read using cc->cu_svma (see parse_var_DIE).
2822            Then text_debug_bias is added when a (non global) var
2823            is recorded (see just before the call to ML_(addVar)) */
2824         ranges = get_range_list( cc, td3,
2825                                  rangeoff, cc->cu_svma );
2826         for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
2827            AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
2828            ML_(addInlInfo) (cc->di,
2829                             range->aMin   + cc->di->text_debug_bias,
2830                             range->aMax+1 + cc->di->text_debug_bias,
2831                             // aMax+1 as range has its last bound included
2832                             // while ML_(addInlInfo) expects last bound not
2833                             // included.
2834                             inlfnname,
2835                             caller_fndn_ix,
2836                             caller_lineno, level);
2837         }
2838         VG_(deleteXA)( ranges );
2839      } else
2840         goto_bad_DIE;
2841   }
2842
2843   // Only recursively parse the (possible) children for the DIE which
2844   // might maybe contain a DW_TAG_inlined_subroutine:
2845   return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
2846      || dtag == DW_TAG_inlined_subroutine
2847      || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
2848
2849  bad_DIE:
2850   dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
2851                         c_die, saved_die_c_offset,
2852                         abbv,
2853                         cc);
2854   /*NOTREACHED*/
2855}
2856
2857
2858/*------------------------------------------------------------*/
2859/*---                                                      ---*/
2860/*--- Parsing of type-related DIEs                         ---*/
2861/*---                                                      ---*/
2862/*------------------------------------------------------------*/
2863
2864typedef
2865   struct {
2866      /* What source language?  'A'=Ada83/95,
2867                                'C'=C/C++,
2868                                'F'=Fortran,
2869                                '?'=other
2870         Established once per compilation unit. */
2871      UChar language;
2872      /* A stack of types which are currently under construction */
2873      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2874                   stack */
2875      Int   stack_size;
2876      /* Note that the TyEnts in qparentE are temporary copies of the
2877         ones accumulating in the main tyent array.  So it is not safe
2878         to free up anything on them when popping them off the stack
2879         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2880         memset them to zero when done. */
2881      TyEnt *qparentE; /* parent TyEnts */
2882      Int   *qlevel;
2883   }
2884   D3TypeParser;
2885
2886/* Completely initialise a type parser object */
2887static void
2888type_parser_init ( D3TypeParser *parser )
2889{
2890   parser->sp = -1;
2891   parser->language = '?';
2892   parser->stack_size = 0;
2893   parser->qparentE = NULL;
2894   parser->qlevel   = NULL;
2895}
2896
2897/* Release any memory hanging off a type parser object */
2898static void
2899type_parser_release ( D3TypeParser *parser )
2900{
2901   ML_(dinfo_free)( parser->qparentE );
2902   ML_(dinfo_free)( parser->qlevel );
2903}
2904
2905static void typestack_show ( const D3TypeParser* parser, const HChar* str )
2906{
2907   Word i;
2908   VG_(printf)("  typestack (%s) {\n", str);
2909   for (i = 0; i <= parser->sp; i++) {
2910      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2911      ML_(pp_TyEnt)( &parser->qparentE[i] );
2912      VG_(printf)("\n");
2913   }
2914   VG_(printf)("  }\n");
2915}
2916
2917/* Remove from the stack, all entries with .level > 'level' */
2918static
2919void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2920{
2921   Bool changed = False;
2922   vg_assert(parser->sp < parser->stack_size);
2923   while (True) {
2924      vg_assert(parser->sp >= -1);
2925      if (parser->sp == -1) break;
2926      if (parser->qlevel[parser->sp] <= level) break;
2927      if (0)
2928         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2929      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2930      parser->sp--;
2931      changed = True;
2932   }
2933   if (changed && td3)
2934      typestack_show( parser, "after preen" );
2935}
2936
2937static Bool typestack_is_empty ( const D3TypeParser* parser )
2938{
2939   vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
2940   return parser->sp == -1;
2941}
2942
2943static void typestack_push ( const CUConst* cc,
2944                             D3TypeParser* parser,
2945                             Bool td3,
2946                             const TyEnt* parentE, Int level )
2947{
2948   if (0)
2949   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2950            parser->sp+1, level, parentE->cuOff);
2951
2952   /* First we need to zap everything >= 'level', as we are about to
2953      replace any previous entry at 'level', so .. */
2954   typestack_preen(parser, /*td3*/False, level-1);
2955
2956   vg_assert(parser->sp >= -1);
2957   vg_assert(parser->sp < parser->stack_size);
2958   if (parser->sp == parser->stack_size - 1) {
2959      parser->stack_size += 16;
2960      parser->qparentE =
2961         ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
2962                            parser->stack_size * sizeof parser->qparentE[0]);
2963      parser->qlevel =
2964         ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
2965                            parser->stack_size * sizeof parser->qlevel[0]);
2966   }
2967   if (parser->sp >= 0)
2968      vg_assert(parser->qlevel[parser->sp] < level);
2969   parser->sp++;
2970   vg_assert(parentE);
2971   vg_assert(ML_(TyEnt__is_type)(parentE));
2972   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2973   parser->qparentE[parser->sp] = *parentE;
2974   parser->qlevel[parser->sp]  = level;
2975   if (TD3)
2976      typestack_show( parser, "after push" );
2977}
2978
2979/* True if the subrange type being parsed gives the bounds of an array. */
2980static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
2981                                                 DW_TAG dtag ) {
2982   vg_assert(dtag == DW_TAG_subrange_type);
2983   /* For most languages, a subrange_type dtag always gives the
2984      bounds of an array.
2985      For Ada, there are additional conditions as a subrange_type
2986      is also used for other purposes. */
2987   if (parser->language != 'A')
2988      /* not Ada, so it definitely denotes an array bound. */
2989      return True;
2990   else
2991      /* Extra constraints for Ada: it only denotes an array bound if .. */
2992      return (! typestack_is_empty(parser)
2993              && parser->qparentE[parser->sp].tag == Te_TyArray);
2994}
2995
2996/* Parse a type-related DIE.  'parser' holds the current parser state.
2997   'admin' is where the completed types are dumped.  'dtag' is the tag
2998   for this DIE.  'c_die' points to the start of the data fields (FORM
2999   stuff) for the DIE.  abbv is the parsed abbreviation which describe
3000   the DIE.
3001
3002   We may find the DIE uninteresting, in which case we should ignore
3003   it.
3004
3005   What happens: the DIE is examined.  If uninteresting, it is ignored.
3006   Otherwise, the DIE gives rise to two things:
3007
3008   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3009   (2) a TyAdmin structure, which holds the type, or related stuff
3010
3011   (2) is added at the end of 'tyadmins', at some index, say 'i'.
3012
3013   A pair (cuOffset, i) is added to 'tydict'.
3014
3015   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3016   a mapping from cuOffset to the index of the corresponding entry in
3017   'tyadmin'.
3018
3019   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3020   in the tydict (by binary search).  This gives an index into
3021   tyadmins, and the required entity lives in tyadmins at that index.
3022*/
3023__attribute__((noinline))
3024static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3025                             /*MOD*/D3TypeParser* parser,
3026                             DW_TAG dtag,
3027                             UWord posn,
3028                             Int level,
3029                             Cursor* c_die,
3030                             const g_abbv *abbv,
3031                             const CUConst* cc,
3032                             Bool td3 )
3033{
3034   FormContents cts;
3035   UInt nf_i;
3036   TyEnt typeE;
3037   TyEnt atomE;
3038   TyEnt fieldE;
3039   TyEnt boundE;
3040
3041   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3042
3043   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3044   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3045   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3046   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3047
3048   /* If we've returned to a level at or above any previously noted
3049      parent, un-note it, so we don't believe we're still collecting
3050      its children. */
3051   typestack_preen( parser, td3, level-1 );
3052
3053   if (dtag == DW_TAG_compile_unit
3054       || dtag == DW_TAG_type_unit
3055       || dtag == DW_TAG_partial_unit) {
3056      /* See if we can find DW_AT_language, since it is important for
3057         establishing array bounds (see DW_TAG_subrange_type below in
3058         this fn) */
3059      nf_i = 0;
3060      while (True) {
3061         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3062         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3063         nf_i++;
3064         if (attr == 0 && form == 0) break;
3065         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3066         if (attr != DW_AT_language)
3067            continue;
3068         if (cts.szB <= 0)
3069           goto_bad_DIE;
3070         switch (cts.u.val) {
3071            case DW_LANG_C89: case DW_LANG_C:
3072            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3073            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3074            case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3075            case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3076               parser->language = 'C'; break;
3077            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3078            case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3079            case DW_LANG_Fortran08:
3080               parser->language = 'F'; break;
3081            case DW_LANG_Ada83: case DW_LANG_Ada95:
3082               parser->language = 'A'; break;
3083            case DW_LANG_Cobol74:
3084            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3085            case DW_LANG_Modula2: case DW_LANG_Java:
3086            case DW_LANG_PLI:
3087            case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3088            case DW_LANG_Mips_Assembler:
3089               parser->language = '?'; break;
3090            default:
3091               goto_bad_DIE;
3092         }
3093      }
3094   }
3095
3096   if (dtag == DW_TAG_base_type) {
3097      /* We can pick up a new base type any time. */
3098      VG_(memset)(&typeE, 0, sizeof(typeE));
3099      typeE.cuOff = D3_INVALID_CUOFF;
3100      typeE.tag   = Te_TyBase;
3101      nf_i = 0;
3102      while (True) {
3103         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3104         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3105         nf_i++;
3106         if (attr == 0 && form == 0) break;
3107         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3108         if (attr == DW_AT_name && cts.szB < 0) {
3109            typeE.Te.TyBase.name
3110               = ML_(cur_read_strdup)( cts.u.cur,
3111                                       "di.readdwarf3.ptD.base_type.1" );
3112         }
3113         if (attr == DW_AT_byte_size && cts.szB > 0) {
3114            typeE.Te.TyBase.szB = cts.u.val;
3115         }
3116         if (attr == DW_AT_encoding && cts.szB > 0) {
3117            switch (cts.u.val) {
3118               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3119               case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3120               case DW_ATE_boolean:/* FIXME - is this correct? */
3121               case DW_ATE_unsigned_fixed:
3122                  typeE.Te.TyBase.enc = 'U'; break;
3123               case DW_ATE_signed: case DW_ATE_signed_char:
3124               case DW_ATE_signed_fixed:
3125                  typeE.Te.TyBase.enc = 'S'; break;
3126               case DW_ATE_float:
3127                  typeE.Te.TyBase.enc = 'F'; break;
3128               case DW_ATE_complex_float:
3129                  typeE.Te.TyBase.enc = 'C'; break;
3130               default:
3131                  goto_bad_DIE;
3132            }
3133         }
3134      }
3135
3136      /* Invent a name if it doesn't have one.  gcc-4.3
3137         -ftree-vectorize is observed to emit nameless base types. */
3138      if (!typeE.Te.TyBase.name)
3139         typeE.Te.TyBase.name
3140            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3141                                 "<anon_base_type>" );
3142
3143      /* Do we have something that looks sane? */
3144      if (/* must have a name */
3145          typeE.Te.TyBase.name == NULL
3146          /* and a plausible size.  Yes, really 32: "complex long
3147             double" apparently has size=32 */
3148          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3149          /* and a plausible encoding */
3150          || (typeE.Te.TyBase.enc != 'U'
3151              && typeE.Te.TyBase.enc != 'S'
3152              && typeE.Te.TyBase.enc != 'F'
3153              && typeE.Te.TyBase.enc != 'C'))
3154         goto_bad_DIE;
3155      /* Last minute hack: if we see this
3156         <1><515>: DW_TAG_base_type
3157             DW_AT_byte_size   : 0
3158             DW_AT_encoding    : 5
3159             DW_AT_name        : void
3160         convert it into a real Void type. */
3161      if (typeE.Te.TyBase.szB == 0
3162          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3163         ML_(TyEnt__make_EMPTY)(&typeE);
3164         typeE.tag = Te_TyVoid;
3165         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3166      }
3167
3168      goto acquire_Type;
3169   }
3170
3171   /*
3172    * An example of DW_TAG_rvalue_reference_type:
3173    *
3174    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3175    *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3176    *     <1015>   DW_AT_byte_size   : 4
3177    *     <1016>   DW_AT_type        : <0xe52>
3178    */
3179   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3180       || dtag == DW_TAG_ptr_to_member_type
3181       || dtag == DW_TAG_rvalue_reference_type) {
3182      /* This seems legit for _pointer_type and _reference_type.  I
3183         don't know if rolling _ptr_to_member_type in here really is
3184         legit, but it's better than not handling it at all. */
3185      VG_(memset)(&typeE, 0, sizeof(typeE));
3186      typeE.cuOff = D3_INVALID_CUOFF;
3187      switch (dtag) {
3188      case DW_TAG_pointer_type:
3189         typeE.tag = Te_TyPtr;
3190         break;
3191      case DW_TAG_reference_type:
3192         typeE.tag = Te_TyRef;
3193         break;
3194      case DW_TAG_ptr_to_member_type:
3195         typeE.tag = Te_TyPtrMbr;
3196         break;
3197      case DW_TAG_rvalue_reference_type:
3198         typeE.tag = Te_TyRvalRef;
3199         break;
3200      default:
3201         vg_assert(False);
3202      }
3203      /* target type defaults to void */
3204      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3205      /* These four type kinds don't *have* to specify their size, in
3206         which case we assume it's a machine word.  But if they do
3207         specify it, it must be a machine word :-)  This probably
3208         assumes that the word size of the Dwarf3 we're reading is the
3209         same size as that on the machine.  gcc appears to give a size
3210         whereas icc9 doesn't. */
3211      typeE.Te.TyPorR.szB = sizeof(UWord);
3212      nf_i = 0;
3213      while (True) {
3214         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3215         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3216         nf_i++;
3217         if (attr == 0 && form == 0) break;
3218         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3219         if (attr == DW_AT_byte_size && cts.szB > 0) {
3220            typeE.Te.TyPorR.szB = cts.u.val;
3221         }
3222         if (attr == DW_AT_type && cts.szB > 0) {
3223            typeE.Te.TyPorR.typeR
3224               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3225         }
3226      }
3227      /* Do we have something that looks sane? */
3228      if (typeE.Te.TyPorR.szB != sizeof(UWord))
3229         goto_bad_DIE;
3230      else
3231         goto acquire_Type;
3232   }
3233
3234   if (dtag == DW_TAG_enumeration_type) {
3235      /* Create a new Type to hold the results. */
3236      VG_(memset)(&typeE, 0, sizeof(typeE));
3237      typeE.cuOff = posn;
3238      typeE.tag   = Te_TyEnum;
3239      Bool is_decl = False;
3240      typeE.Te.TyEnum.atomRs
3241         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3242                       ML_(dinfo_free),
3243                       sizeof(UWord) );
3244      nf_i=0;
3245      while (True) {
3246         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3247         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3248         nf_i++;
3249         if (attr == 0 && form == 0) break;
3250         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3251         if (attr == DW_AT_name && cts.szB < 0) {
3252            typeE.Te.TyEnum.name
3253               = ML_(cur_read_strdup)( cts.u.cur,
3254                                       "di.readdwarf3.pTD.enum_type.2" );
3255         }
3256         if (attr == DW_AT_byte_size && cts.szB > 0) {
3257            typeE.Te.TyEnum.szB = cts.u.val;
3258         }
3259         if (attr == DW_AT_declaration) {
3260            is_decl = True;
3261         }
3262      }
3263
3264      if (!typeE.Te.TyEnum.name)
3265         typeE.Te.TyEnum.name
3266            = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3267                                 "<anon_enum_type>" );
3268
3269      /* Do we have something that looks sane? */
3270      if (typeE.Te.TyEnum.szB == 0
3271          /* we must know the size */
3272          /* but not for Ada, which uses such dummy
3273             enumerations as helper for gdb ada mode.
3274             Also GCC allows incomplete enums as GNU extension.
3275             http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3276             These are marked as DW_AT_declaration and won't have
3277             a size. They can only be used in declaration or as
3278             pointer types.  You can't allocate variables or storage
3279             using such an enum type. (Also GCC seems to have a bug
3280             that will put such an enumeration_type into a .debug_types
3281             unit which should only contain complete types.) */
3282          && (parser->language != 'A' && !is_decl)) {
3283         goto_bad_DIE;
3284      }
3285
3286      /* On't stack! */
3287      typestack_push( cc, parser, td3, &typeE, level );
3288      goto acquire_Type;
3289   }
3290
3291   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3292      DW_TAG_enumerator with only a DW_AT_name but no
3293      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
3294      and appears to be a new "feature" of gcc - versions 4.3.x and
3295      earlier do not appear to do this.  So accept DW_TAG_enumerator
3296      which only have a name but no value.  An example:
3297
3298      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3299         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
3300                                     QtMsgType
3301         <185>   DW_AT_byte_size   : 4
3302         <186>   DW_AT_decl_file   : 14
3303         <187>   DW_AT_decl_line   : 1480
3304         <189>   DW_AT_sibling     : <0x1a7>
3305      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3306         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
3307                                     QtDebugMsg
3308      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3309         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
3310                                     QtWarningMsg
3311      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3312         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
3313                                     QtCriticalMsg
3314      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3315         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
3316                                     QtFatalMsg
3317      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3318         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
3319                                     QtSystemMsg
3320   */
3321   if (dtag == DW_TAG_enumerator) {
3322      VG_(memset)( &atomE, 0, sizeof(atomE) );
3323      atomE.cuOff = posn;
3324      atomE.tag   = Te_Atom;
3325      nf_i = 0;
3326      while (True) {
3327         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3328         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3329         nf_i++;
3330         if (attr == 0 && form == 0) break;
3331         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3332         if (attr == DW_AT_name && cts.szB < 0) {
3333            atomE.Te.Atom.name
3334              = ML_(cur_read_strdup)( cts.u.cur,
3335                                      "di.readdwarf3.pTD.enumerator.1" );
3336         }
3337         if (attr == DW_AT_const_value && cts.szB > 0) {
3338            atomE.Te.Atom.value      = cts.u.val;
3339            atomE.Te.Atom.valueKnown = True;
3340         }
3341      }
3342      /* Do we have something that looks sane? */
3343      if (atomE.Te.Atom.name == NULL)
3344         goto_bad_DIE;
3345      /* Do we have a plausible parent? */
3346      if (typestack_is_empty(parser)) goto_bad_DIE;
3347      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3348      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3349      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3350      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3351      /* Record this child in the parent */
3352      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3353      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3354                    &atomE );
3355      /* And record the child itself */
3356      goto acquire_Atom;
3357   }
3358
3359   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
3360      don't know if this is correct, but it at least makes this reader
3361      usable for gcc-4.3 produced Dwarf3. */
3362   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3363       || dtag == DW_TAG_union_type) {
3364      Bool have_szB = False;
3365      Bool is_decl  = False;
3366      Bool is_spec  = False;
3367      /* Create a new Type to hold the results. */
3368      VG_(memset)(&typeE, 0, sizeof(typeE));
3369      typeE.cuOff = posn;
3370      typeE.tag   = Te_TyStOrUn;
3371      typeE.Te.TyStOrUn.name = NULL;
3372      typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3373      typeE.Te.TyStOrUn.fieldRs
3374         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3375                       ML_(dinfo_free),
3376                       sizeof(UWord) );
3377      typeE.Te.TyStOrUn.complete = True;
3378      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3379                                   || dtag == DW_TAG_class_type;
3380      nf_i = 0;
3381      while (True) {
3382         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3383         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3384         nf_i++;
3385         if (attr == 0 && form == 0) break;
3386         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3387         if (attr == DW_AT_name && cts.szB < 0) {
3388            typeE.Te.TyStOrUn.name
3389               = ML_(cur_read_strdup)( cts.u.cur,
3390                                       "di.readdwarf3.ptD.struct_type.2" );
3391         }
3392         if (attr == DW_AT_byte_size && cts.szB >= 0) {
3393            typeE.Te.TyStOrUn.szB = cts.u.val;
3394            have_szB = True;
3395         }
3396         if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3397            is_decl = True;
3398         }
3399         if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3400            is_spec = True;
3401         }
3402         if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3403             && cts.szB > 0) {
3404            have_szB = True;
3405            typeE.Te.TyStOrUn.szB = 8;
3406            typeE.Te.TyStOrUn.typeR
3407               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3408         }
3409      }
3410      /* Do we have something that looks sane? */
3411      if (is_decl && (!is_spec)) {
3412         /* It's a DW_AT_declaration.  We require the name but
3413            nothing else. */
3414         /* JRS 2012-06-28: following discussion w/ tromey, if the the
3415            type doesn't have name, just make one up, and accept it.
3416            It might be referred to by other DIEs, so ignoring it
3417            doesn't seem like a safe option. */
3418         if (typeE.Te.TyStOrUn.name == NULL)
3419            typeE.Te.TyStOrUn.name
3420               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3421                                    "<anon_struct_type>" );
3422         typeE.Te.TyStOrUn.complete = False;
3423         /* JRS 2009 Aug 10: <possible kludge>? */
3424         /* Push this tyent on the stack, even though it's incomplete.
3425            It appears that gcc-4.4 on Fedora 11 will sometimes create
3426            DW_TAG_member entries for it, and so we need to have a
3427            plausible parent present in order for that to work.  See
3428            #200029 comments 8 and 9. */
3429         typestack_push( cc, parser, td3, &typeE, level );
3430         /* </possible kludge> */
3431         goto acquire_Type;
3432      }
3433      if ((!is_decl) /* && (!is_spec) */) {
3434         /* this is the common, ordinary case */
3435         /* The name can be present, or not */
3436         if (!have_szB) {
3437            /* We must know the size.
3438               But in Ada, record with discriminants might have no size.
3439               But in C, VLA in the middle of a struct (gcc extension)
3440               might have no size.
3441               Instead, some GNAT dwarf extensions and/or dwarf entries
3442               allow to calculate the struct size at runtime.
3443               We cannot do that (yet?) so, the temporary kludge is to use
3444               a small size. */
3445            typeE.Te.TyStOrUn.szB = 1;
3446         }
3447         /* On't stack! */
3448         typestack_push( cc, parser, td3, &typeE, level );
3449         goto acquire_Type;
3450      }
3451      else {
3452         /* don't know how to handle any other variants just now */
3453         goto_bad_DIE;
3454      }
3455   }
3456
3457   if (dtag == DW_TAG_member) {
3458      /* Acquire member entries for both DW_TAG_structure_type and
3459         DW_TAG_union_type.  They differ minorly, in that struct
3460         members must have a DW_AT_data_member_location expression
3461         whereas union members must not. */
3462      Bool parent_is_struct;
3463      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3464      fieldE.cuOff = posn;
3465      fieldE.tag   = Te_Field;
3466      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3467      nf_i = 0;
3468      while (True) {
3469         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3470         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3471         nf_i++;
3472         if (attr == 0 && form == 0) break;
3473         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3474         if (attr == DW_AT_name && cts.szB < 0) {
3475            fieldE.Te.Field.name
3476               = ML_(cur_read_strdup)( cts.u.cur,
3477                                       "di.readdwarf3.ptD.member.1" );
3478         }
3479         if (attr == DW_AT_type && cts.szB > 0) {
3480            fieldE.Te.Field.typeR
3481               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3482         }
3483         /* There are 2 different cases for DW_AT_data_member_location.
3484            If it is a constant class attribute, it contains byte offset
3485            from the beginning of the containing entity.
3486            Otherwise it is a location expression.  */
3487         if (attr == DW_AT_data_member_location && cts.szB > 0) {
3488            fieldE.Te.Field.nLoc = -1;
3489            fieldE.Te.Field.pos.offset = cts.u.val;
3490         }
3491         if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3492            fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3493            fieldE.Te.Field.pos.loc
3494               = ML_(cur_read_memdup)( cts.u.cur,
3495                                       (SizeT)fieldE.Te.Field.nLoc,
3496                                       "di.readdwarf3.ptD.member.2" );
3497         }
3498      }
3499      /* Do we have a plausible parent? */
3500      if (typestack_is_empty(parser)) goto_bad_DIE;
3501      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3502      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3503      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3504      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3505      /* Do we have something that looks sane?  If this a member of a
3506         struct, we must have a location expression; but if a member
3507         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
3508         to reject in the latter case, but some compilers have been
3509         observed to emit constant-zero expressions.  So just ignore
3510         them. */
3511      parent_is_struct
3512         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3513      if (!fieldE.Te.Field.name)
3514         fieldE.Te.Field.name
3515            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3516                                 "<anon_field>" );
3517      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3518         goto_bad_DIE;
3519      if (fieldE.Te.Field.nLoc) {
3520         if (!parent_is_struct) {
3521            /* If this is a union type, pretend we haven't seen the data
3522               member location expression, as it is by definition
3523               redundant (it must be zero). */
3524            if (fieldE.Te.Field.nLoc > 0)
3525               ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3526            fieldE.Te.Field.pos.loc = NULL;
3527            fieldE.Te.Field.nLoc = 0;
3528         }
3529         /* Record this child in the parent */
3530         fieldE.Te.Field.isStruct = parent_is_struct;
3531         vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3532         VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3533                       &posn );
3534         /* And record the child itself */
3535         goto acquire_Field;
3536      } else {
3537         /* Member with no location - this can happen with static
3538            const members in C++ code which are compile time constants
3539            that do no exist in the class. They're not of any interest
3540            to us so we ignore them. */
3541         ML_(TyEnt__make_EMPTY)(&fieldE);
3542      }
3543   }
3544
3545   if (dtag == DW_TAG_array_type) {
3546      VG_(memset)(&typeE, 0, sizeof(typeE));
3547      typeE.cuOff = posn;
3548      typeE.tag   = Te_TyArray;
3549      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
3550      typeE.Te.TyArray.boundRs
3551         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
3552                       ML_(dinfo_free),
3553                       sizeof(UWord) );
3554      nf_i = 0;
3555      while (True) {
3556         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3557         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3558         nf_i++;
3559         if (attr == 0 && form == 0) break;
3560         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3561         if (attr == DW_AT_type && cts.szB > 0) {
3562            typeE.Te.TyArray.typeR
3563               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3564         }
3565      }
3566      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
3567         goto_bad_DIE;
3568      /* On't stack! */
3569      typestack_push( cc, parser, td3, &typeE, level );
3570      goto acquire_Type;
3571   }
3572
3573   /* this is a subrange type defining the bounds of an array. */
3574   if (dtag == DW_TAG_subrange_type
3575       && subrange_type_denotes_array_bounds(parser, dtag)) {
3576      Bool have_lower = False;
3577      Bool have_upper = False;
3578      Bool have_count = False;
3579      Long lower = 0;
3580      Long upper = 0;
3581
3582      switch (parser->language) {
3583         case 'C': have_lower = True;  lower = 0; break;
3584         case 'F': have_lower = True;  lower = 1; break;
3585         case '?': have_lower = False; break;
3586         case 'A': have_lower = False; break;
3587         default:  vg_assert(0); /* assured us by handling of
3588                                    DW_TAG_compile_unit in this fn */
3589      }
3590
3591      VG_(memset)( &boundE, 0, sizeof(boundE) );
3592      boundE.cuOff = D3_INVALID_CUOFF;
3593      boundE.tag   = Te_Bound;
3594      nf_i = 0;
3595      while (True) {
3596         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3597         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3598         nf_i++;
3599         if (attr == 0 && form == 0) break;
3600         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3601         if (attr == DW_AT_lower_bound && cts.szB > 0) {
3602            lower      = (Long)cts.u.val;
3603            have_lower = True;
3604         }
3605         if (attr == DW_AT_upper_bound && cts.szB > 0) {
3606            upper      = (Long)cts.u.val;
3607            have_upper = True;
3608         }
3609         if (attr == DW_AT_count && cts.szB > 0) {
3610            /*count    = (Long)cts.u.val;*/
3611            have_count = True;
3612         }
3613      }
3614      /* FIXME: potentially skip the rest if no parent present, since
3615         it could be the case that this subrange type is free-standing
3616         (not being used to describe the bounds of a containing array
3617         type) */
3618      /* Do we have a plausible parent? */
3619      if (typestack_is_empty(parser)) goto_bad_DIE;
3620      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3621      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3622      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3623      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
3624
3625      /* Figure out if we have a definite range or not */
3626      if (have_lower && have_upper && (!have_count)) {
3627         boundE.Te.Bound.knownL = True;
3628         boundE.Te.Bound.knownU = True;
3629         boundE.Te.Bound.boundL = lower;
3630         boundE.Te.Bound.boundU = upper;
3631      }
3632      else if (have_lower && (!have_upper) && (!have_count)) {
3633         boundE.Te.Bound.knownL = True;
3634         boundE.Te.Bound.knownU = False;
3635         boundE.Te.Bound.boundL = lower;
3636         boundE.Te.Bound.boundU = 0;
3637      }
3638      else if ((!have_lower) && have_upper && (!have_count)) {
3639         boundE.Te.Bound.knownL = False;
3640         boundE.Te.Bound.knownU = True;
3641         boundE.Te.Bound.boundL = 0;
3642         boundE.Te.Bound.boundU = upper;
3643      }
3644      else if ((!have_lower) && (!have_upper) && (!have_count)) {
3645         boundE.Te.Bound.knownL = False;
3646         boundE.Te.Bound.knownU = False;
3647         boundE.Te.Bound.boundL = 0;
3648         boundE.Te.Bound.boundU = 0;
3649      } else {
3650         /* FIXME: handle more cases */
3651         goto_bad_DIE;
3652      }
3653
3654      /* Record this bound in the parent */
3655      boundE.cuOff = posn;
3656      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
3657      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
3658                    &boundE.cuOff );
3659      /* And record the child itself */
3660      goto acquire_Bound;
3661   }
3662
3663   /* typedef or subrange_type other than array bounds. */
3664   if (dtag == DW_TAG_typedef
3665       || (dtag == DW_TAG_subrange_type
3666           && !subrange_type_denotes_array_bounds(parser, dtag))) {
3667      /* subrange_type other than array bound is only for Ada. */
3668      vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
3669      /* We can pick up a new typedef/subrange_type any time. */
3670      VG_(memset)(&typeE, 0, sizeof(typeE));
3671      typeE.cuOff = D3_INVALID_CUOFF;
3672      typeE.tag   = Te_TyTyDef;
3673      typeE.Te.TyTyDef.name = NULL;
3674      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
3675      nf_i = 0;
3676      while (True) {
3677         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3678         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3679         nf_i++;
3680         if (attr == 0 && form == 0) break;
3681         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3682         if (attr == DW_AT_name && cts.szB < 0) {
3683            typeE.Te.TyTyDef.name
3684               = ML_(cur_read_strdup)( cts.u.cur,
3685                                       "di.readdwarf3.ptD.typedef.1" );
3686         }
3687         if (attr == DW_AT_type && cts.szB > 0) {
3688            typeE.Te.TyTyDef.typeR
3689               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3690         }
3691      }
3692      /* Do we have something that looks sane?
3693         gcc gnat Ada generates minimal typedef
3694         such as the below
3695         <6><91cc>: DW_TAG_typedef
3696            DW_AT_abstract_ori: <9066>
3697         g++ for OMP can generate artificial functions that have
3698         parameters that refer to pointers to unnamed typedefs.
3699         See https://bugs.kde.org/show_bug.cgi?id=273475
3700         So we cannot require a name for a DW_TAG_typedef.
3701      */
3702      goto acquire_Type;
3703   }
3704
3705   if (dtag == DW_TAG_subroutine_type) {
3706      /* function type? just record that one fact and ask no
3707         further questions. */
3708      VG_(memset)(&typeE, 0, sizeof(typeE));
3709      typeE.cuOff = D3_INVALID_CUOFF;
3710      typeE.tag   = Te_TyFn;
3711      goto acquire_Type;
3712   }
3713
3714   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
3715       || dtag == DW_TAG_restrict_type) {
3716      Int have_ty = 0;
3717      VG_(memset)(&typeE, 0, sizeof(typeE));
3718      typeE.cuOff = D3_INVALID_CUOFF;
3719      typeE.tag   = Te_TyQual;
3720      typeE.Te.TyQual.qual
3721         = (dtag == DW_TAG_volatile_type ? 'V'
3722            : (dtag == DW_TAG_const_type ? 'C' : 'R'));
3723      /* target type defaults to 'void' */
3724      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3725      nf_i = 0;
3726      while (True) {
3727         DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3728         DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3729         nf_i++;
3730         if (attr == 0 && form == 0) break;
3731         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3732         if (attr == DW_AT_type && cts.szB > 0) {
3733            typeE.Te.TyQual.typeR
3734               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3735            have_ty++;
3736         }
3737      }
3738      /* gcc sometimes generates DW_TAG_const/volatile_type without
3739         DW_AT_type and GDB appears to interpret the type as 'const
3740         void' (resp. 'volatile void').  So just allow it .. */
3741      if (have_ty == 1 || have_ty == 0)
3742         goto acquire_Type;
3743      else
3744         goto_bad_DIE;
3745   }
3746
3747   /*
3748    * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3749    *
3750    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3751    *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3752    *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3753    */
3754   if (dtag == DW_TAG_unspecified_type) {
3755      VG_(memset)(&typeE, 0, sizeof(typeE));
3756      typeE.cuOff           = D3_INVALID_CUOFF;
3757      typeE.tag             = Te_TyQual;
3758      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3759      goto acquire_Type;
3760   }
3761
3762   /* else ignore this DIE */
3763   return;
3764   /*NOTREACHED*/
3765
3766  acquire_Type:
3767   if (0) VG_(printf)("YYYY Acquire Type\n");
3768   vg_assert(ML_(TyEnt__is_type)( &typeE ));
3769   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3770   typeE.cuOff = posn;
3771   VG_(addToXA)( tyents, &typeE );
3772   return;
3773   /*NOTREACHED*/
3774
3775  acquire_Atom:
3776   if (0) VG_(printf)("YYYY Acquire Atom\n");
3777   vg_assert(atomE.tag == Te_Atom);
3778   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3779   atomE.cuOff = posn;
3780   VG_(addToXA)( tyents, &atomE );
3781   return;
3782   /*NOTREACHED*/
3783
3784  acquire_Field:
3785   /* For union members, Expr should be absent */
3786   if (0) VG_(printf)("YYYY Acquire Field\n");
3787   vg_assert(fieldE.tag == Te_Field);
3788   vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3789   vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3790   if (fieldE.Te.Field.isStruct) {
3791      vg_assert(fieldE.Te.Field.nLoc != 0);
3792   } else {
3793      vg_assert(fieldE.Te.Field.nLoc == 0);
3794   }
3795   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3796   fieldE.cuOff = posn;
3797   VG_(addToXA)( tyents, &fieldE );
3798   return;
3799   /*NOTREACHED*/
3800
3801  acquire_Bound:
3802   if (0) VG_(printf)("YYYY Acquire Bound\n");
3803   vg_assert(boundE.tag == Te_Bound);
3804   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3805   boundE.cuOff = posn;
3806   VG_(addToXA)( tyents, &boundE );
3807   return;
3808   /*NOTREACHED*/
3809
3810  bad_DIE:
3811   dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
3812                         c_die, saved_die_c_offset,
3813                         abbv,
3814                         cc);
3815   /*NOTREACHED*/
3816}
3817
3818
3819/*------------------------------------------------------------*/
3820/*---                                                      ---*/
3821/*--- Compression of type DIE information                  ---*/
3822/*---                                                      ---*/
3823/*------------------------------------------------------------*/
3824
3825static UWord chase_cuOff ( Bool* changed,
3826                           const XArray* /* of TyEnt */ ents,
3827                           TyEntIndexCache* ents_cache,
3828                           UWord cuOff )
3829{
3830   TyEnt* ent;
3831   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3832
3833   if (!ent) {
3834      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3835      *changed = False;
3836      return cuOff;
3837   }
3838
3839   vg_assert(ent->tag != Te_EMPTY);
3840   if (ent->tag != Te_INDIR) {
3841      *changed = False;
3842      return cuOff;
3843   } else {
3844      vg_assert(ent->Te.INDIR.indR < cuOff);
3845      *changed = True;
3846      return ent->Te.INDIR.indR;
3847   }
3848}
3849
3850static
3851void chase_cuOffs_in_XArray ( Bool* changed,
3852                              const XArray* /* of TyEnt */ ents,
3853                              TyEntIndexCache* ents_cache,
3854                              /*MOD*/XArray* /* of UWord */ cuOffs )
3855{
3856   Bool b2 = False;
3857   Word i, n = VG_(sizeXA)( cuOffs );
3858   for (i = 0; i < n; i++) {
3859      Bool   b = False;
3860      UWord* p = VG_(indexXA)( cuOffs, i );
3861      *p = chase_cuOff( &b, ents, ents_cache, *p );
3862      if (b)
3863         b2 = True;
3864   }
3865   *changed = b2;
3866}
3867
3868static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
3869                                    TyEntIndexCache* ents_cache,
3870                                    /*MOD*/TyEnt* te )
3871{
3872   Bool b, changed = False;
3873   switch (te->tag) {
3874      case Te_EMPTY:
3875         break;
3876      case Te_INDIR:
3877         te->Te.INDIR.indR
3878            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3879         if (b) changed = True;
3880         break;
3881      case Te_UNKNOWN:
3882         break;
3883      case Te_Atom:
3884         break;
3885      case Te_Field:
3886         te->Te.Field.typeR
3887            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3888         if (b) changed = True;
3889         break;
3890      case Te_Bound:
3891         break;
3892      case Te_TyBase:
3893         break;
3894      case Te_TyPtr:
3895      case Te_TyRef:
3896      case Te_TyPtrMbr:
3897      case Te_TyRvalRef:
3898         te->Te.TyPorR.typeR
3899            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3900         if (b) changed = True;
3901         break;
3902      case Te_TyTyDef:
3903         te->Te.TyTyDef.typeR
3904            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3905         if (b) changed = True;
3906         break;
3907      case Te_TyStOrUn:
3908         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3909         if (b) changed = True;
3910         break;
3911      case Te_TyEnum:
3912         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3913         if (b) changed = True;
3914         break;
3915      case Te_TyArray:
3916         te->Te.TyArray.typeR
3917            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3918         if (b) changed = True;
3919         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3920         if (b) changed = True;
3921         break;
3922      case Te_TyFn:
3923         break;
3924      case Te_TyQual:
3925         te->Te.TyQual.typeR
3926            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3927         if (b) changed = True;
3928         break;
3929      case Te_TyVoid:
3930         break;
3931      default:
3932         ML_(pp_TyEnt)(te);
3933         vg_assert(0);
3934   }
3935   return changed;
3936}
3937
3938/* Make a pass over 'ents'.  For each tyent, inspect the target of any
3939   'R' or 'Rs' fields (those which refer to other tyents), and replace
3940   any which point to INDIR nodes with the target of the indirection
3941   (which should not itself be an indirection).  In summary, this
3942   routine shorts out all references to indirection nodes. */
3943static
3944Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3945                                     TyEntIndexCache* ents_cache )
3946{
3947   Word i, n, nChanged = 0;
3948   Bool b;
3949   n = VG_(sizeXA)( ents );
3950   for (i = 0; i < n; i++) {
3951      TyEnt* ent = VG_(indexXA)( ents, i );
3952      vg_assert(ent->tag != Te_EMPTY);
3953      /* We have to substitute everything, even indirections, so as to
3954         ensure that chains of indirections don't build up. */
3955      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3956      if (b)
3957         nChanged++;
3958   }
3959
3960   return nChanged;
3961}
3962
3963
3964/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3965   Look up each new tyent in the dictionary in turn.  If it is already
3966   in the dictionary, replace this tyent with an indirection to the
3967   existing one, and delete any malloc'd stuff hanging off this one.
3968   In summary, this routine commons up all tyents that are identical
3969   as defined by TyEnt__cmp_by_all_except_cuOff. */
3970static
3971Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3972{
3973   Word    n, i, nDeleted;
3974   WordFM* dict; /* TyEnt* -> void */
3975   TyEnt*  ent;
3976   UWord   keyW, valW;
3977
3978   dict = VG_(newFM)(
3979             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3980             ML_(dinfo_free),
3981             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3982          );
3983
3984   nDeleted = 0;
3985   n = VG_(sizeXA)( ents );
3986   for (i = 0; i < n; i++) {
3987      ent = VG_(indexXA)( ents, i );
3988      vg_assert(ent->tag != Te_EMPTY);
3989
3990      /* Ignore indirections, although check that they are
3991         not forming a cycle. */
3992      if (ent->tag == Te_INDIR) {
3993         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3994         continue;
3995      }
3996
3997      keyW = valW = 0;
3998      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3999         /* it's already in the dictionary. */
4000         TyEnt* old = (TyEnt*)keyW;
4001         vg_assert(valW == 0);
4002         vg_assert(old != ent);
4003         vg_assert(old->tag != Te_INDIR);
4004         /* since we are traversing the array in increasing order of
4005            cuOff: */
4006         vg_assert(old->cuOff < ent->cuOff);
4007         /* So anyway, dump this entry and replace it with an
4008            indirection to the one in the dictionary.  Note that the
4009            assertion above guarantees that we cannot create cycles of
4010            indirections, since we are always creating an indirection
4011            to a tyent with a cuOff lower than this one. */
4012         ML_(TyEnt__make_EMPTY)( ent );
4013         ent->tag = Te_INDIR;
4014         ent->Te.INDIR.indR = old->cuOff;
4015         nDeleted++;
4016      } else {
4017         /* not in dictionary; add it and keep going. */
4018         VG_(addToFM)( dict, (UWord)ent, 0 );
4019      }
4020   }
4021
4022   VG_(deleteFM)( dict, NULL, NULL );
4023
4024   return nDeleted;
4025}
4026
4027
4028static
4029void dedup_types ( Bool td3,
4030                   /*MOD*/XArray* /* of TyEnt */ ents,
4031                   TyEntIndexCache* ents_cache )
4032{
4033   Word m, n, i, nDel, nSubst, nThresh;
4034   if (0) td3 = True;
4035
4036   n = VG_(sizeXA)( ents );
4037
4038   /* If a commoning pass and a substitution pass both make fewer than
4039      this many changes, just stop.  It's pointless to burn up CPU
4040      time trying to compress the last 1% or so out of the array. */
4041   nThresh = n / 200;
4042
4043   /* First we must sort .ents by its .cuOff fields, so we
4044      can index into it. */
4045   VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4046   VG_(sortXA)( ents );
4047
4048   /* Now repeatedly do commoning and substitution passes over
4049      the array, until there are no more changes. */
4050   do {
4051      nDel   = dedup_types_commoning_pass ( ents );
4052      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4053      vg_assert(nDel >= 0 && nSubst >= 0);
4054      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4055   } while (nDel > nThresh || nSubst > nThresh);
4056
4057   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4058      In fact this should be true at the end of every loop iteration
4059      above (a commoning pass followed by a substitution pass), but
4060      checking it on every iteration is excessively expensive.  Note,
4061      this loop also computes 'm' for the stats printing below it. */
4062   m = 0;
4063   n = VG_(sizeXA)( ents );
4064   for (i = 0; i < n; i++) {
4065      TyEnt *ent, *ind;
4066      ent = VG_(indexXA)( ents, i );
4067      if (ent->tag != Te_INDIR) continue;
4068      m++;
4069      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4070                                         ent->Te.INDIR.indR );
4071      vg_assert(ind);
4072      vg_assert(ind->tag != Te_INDIR);
4073   }
4074
4075   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4076}
4077
4078
4079/*------------------------------------------------------------*/
4080/*---                                                      ---*/
4081/*--- Resolution of references to type DIEs                ---*/
4082/*---                                                      ---*/
4083/*------------------------------------------------------------*/
4084
4085/* Make a pass through the (temporary) variables array.  Examine the
4086   type of each variable, check is it found, and chase any Te_INDIRs.
4087   Postcondition is: each variable has a typeR field that refers to a
4088   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4089   not to refer to a Te_INDIR.  (This is so that we can throw all the
4090   Te_INDIRs away later). */
4091
4092__attribute__((noinline))
4093static void resolve_variable_types (
4094               void (*barf)( const HChar* ) __attribute__((noreturn)),
4095               /*R-O*/XArray* /* of TyEnt */ ents,
4096               /*MOD*/TyEntIndexCache* ents_cache,
4097               /*MOD*/XArray* /* of TempVar* */ vars
4098            )
4099{
4100   Word i, n;
4101   n = VG_(sizeXA)( vars );
4102   for (i = 0; i < n; i++) {
4103      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4104      /* This is the stated type of the variable.  But it might be
4105         an indirection, so be careful. */
4106      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4107                                                var->typeR );
4108      if (ent && ent->tag == Te_INDIR) {
4109         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4110                                            ent->Te.INDIR.indR );
4111         vg_assert(ent);
4112         vg_assert(ent->tag != Te_INDIR);
4113      }
4114
4115      /* Deal first with "normal" cases */
4116      if (ent && ML_(TyEnt__is_type)(ent)) {
4117         var->typeR = ent->cuOff;
4118         continue;
4119      }
4120
4121      /* If there's no ent, it probably we did not manage to read a
4122         type at the cuOffset which is stated as being this variable's
4123         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
4124      if (ent == NULL) {
4125         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4126         barf("resolve_variable_types: "
4127              "cuOff does not refer to a known type");
4128      }
4129      vg_assert(ent);
4130      /* If ent has any other tag, something bad happened, along the
4131         lines of var->typeR not referring to a type at all. */
4132      vg_assert(ent->tag == Te_UNKNOWN);
4133      /* Just accept it; the type will be useless, but at least keep
4134         going. */
4135      var->typeR = ent->cuOff;
4136   }
4137}
4138
4139
4140/*------------------------------------------------------------*/
4141/*---                                                      ---*/
4142/*--- Parsing of Compilation Units                         ---*/
4143/*---                                                      ---*/
4144/*------------------------------------------------------------*/
4145
4146static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4147   const TempVar* t1 = *(const TempVar *const *)v1;
4148   const TempVar* t2 = *(const TempVar *const *)v2;
4149   if (t1->dioff < t2->dioff) return -1;
4150   if (t1->dioff > t2->dioff) return 1;
4151   return 0;
4152}
4153
4154static void read_DIE (
4155   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4156   /*MOD*/XArray* /* of TyEnt */ tyents,
4157   /*MOD*/XArray* /* of TempVar* */ tempvars,
4158   /*MOD*/XArray* /* of GExpr* */ gexprs,
4159   /*MOD*/D3TypeParser* typarser,
4160   /*MOD*/D3VarParser* varparser,
4161   /*MOD*/D3InlParser* inlparser,
4162   Cursor* c, Bool td3, CUConst* cc, Int level
4163)
4164{
4165   const g_abbv *abbv;
4166   ULong  atag, abbv_code;
4167   UWord  posn;
4168   UInt   has_children;
4169   UWord  start_die_c_offset;
4170   UWord  after_die_c_offset;
4171   // If the DIE we will parse has a sibling and the parser(s) are
4172   // all indicating that parse_children is not necessary, then
4173   // we will skip the children by jumping to the sibling of this DIE
4174   // (if it has a sibling).
4175   UWord  sibling = 0;
4176   Bool   parse_children = False;
4177
4178   /* --- Deal with this DIE --- */
4179   posn      = cook_die( cc, get_position_of_Cursor( c ) );
4180   abbv_code = get_ULEB128( c );
4181   abbv = get_abbv(cc, abbv_code);
4182   atag      = abbv->atag;
4183
4184   if (TD3) {
4185      TRACE_D3("\n");
4186      trace_DIE ((DW_TAG)atag, posn, level,
4187                 get_position_of_Cursor( c ), abbv, cc);
4188   }
4189
4190   if (atag == 0)
4191      cc->barf("read_DIE: invalid zero tag on DIE");
4192
4193   has_children = abbv->has_children;
4194   if (has_children != DW_children_no && has_children != DW_children_yes)
4195      cc->barf("read_DIE: invalid has_children value");
4196
4197   /* We're set up to look at the fields of this DIE.  Hand it off to
4198      any parser(s) that want to see it.  Since they will in general
4199      advance the DIE cursor, remember the current settings so that we
4200      can then back up. . */
4201   start_die_c_offset  = get_position_of_Cursor( c );
4202   after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
4203
4204   if (VG_(clo_read_var_info)) {
4205      parse_type_DIE( tyents,
4206                      typarser,
4207                      (DW_TAG)atag,
4208                      posn,
4209                      level,
4210                      c,     /* DIE cursor */
4211                      abbv,  /* abbrev */
4212                      cc,
4213                      td3 );
4214      if (get_position_of_Cursor( c ) != start_die_c_offset) {
4215         after_die_c_offset = get_position_of_Cursor( c );
4216         set_position_of_Cursor( c, start_die_c_offset );
4217      }
4218
4219      parse_var_DIE( rangestree,
4220                     tempvars,
4221                     gexprs,
4222                     varparser,
4223                     (DW_TAG)atag,
4224                     posn,
4225                     level,
4226                     c,     /* DIE cursor */
4227                     abbv,  /* abbrev */
4228                     cc,
4229                     td3 );
4230      if (get_position_of_Cursor( c ) != start_die_c_offset) {
4231         after_die_c_offset = get_position_of_Cursor( c );
4232         set_position_of_Cursor( c, start_die_c_offset );
4233      }
4234
4235      parse_children = True;
4236      // type and var parsers do not have logic to skip childrens and establish
4237      // the value of sibling.
4238   }
4239
4240   if (VG_(clo_read_inline_info)) {
4241      inlparser->sibling = 0;
4242      parse_children =
4243         parse_inl_DIE( inlparser,
4244                        (DW_TAG)atag,
4245                        posn,
4246                        level,
4247                        c,     /* DIE cursor */
4248                        abbv, /* abbrev */
4249                        cc,
4250                        td3 )
4251         || parse_children;
4252      if (get_position_of_Cursor( c ) != start_die_c_offset) {
4253         after_die_c_offset = get_position_of_Cursor( c );
4254         // Last parser, no need to reset the cursor to start_die_c_offset.
4255      }
4256      if (sibling == 0)
4257         sibling = inlparser->sibling;
4258      vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4259   }
4260
4261   if (after_die_c_offset > 0) {
4262      // DIE was read by a parser above, so we know where the DIE ends.
4263      set_position_of_Cursor( c, after_die_c_offset );
4264   } else {
4265      /* No parser has parsed this DIE. So, we need to skip the DIE,
4266         in order to read the next DIE.
4267         At the same time, establish sibling value if the DIE has one. */
4268      TRACE_D3("    uninteresting DIE -> skipping ...\n");
4269      skip_DIE (&sibling, c, abbv, cc);
4270   }
4271
4272   /* --- Now recurse into its children, if any
4273      and the parsing of the children is requested by a parser --- */
4274   if (has_children == DW_children_yes) {
4275      if (parse_children || sibling == 0) {
4276         if (0) TRACE_D3("BEGIN children of level %d\n", level);
4277         while (True) {
4278            atag = peek_ULEB128( c );
4279            if (atag == 0) break;
4280            read_DIE( rangestree, tyents, tempvars, gexprs,
4281                      typarser, varparser, inlparser,
4282                      c, td3, cc, level+1 );
4283         }
4284         /* Now we need to eat the terminating zero */
4285         atag = get_ULEB128( c );
4286         vg_assert(atag == 0);
4287         if (0) TRACE_D3("END children of level %d\n", level);
4288      } else {
4289         // We can skip the childrens, by jumping to the sibling
4290         TRACE_D3("    SKIPPING DIE's children,"
4291                  "jumping to sibling <%d><%lx>\n",
4292                  level, sibling);
4293         set_position_of_Cursor( c, sibling );
4294      }
4295   }
4296
4297}
4298
4299static void trace_debug_loc (const DebugInfo* di,
4300                             __attribute__((noreturn)) void (*barf)( const HChar* ),
4301                             DiSlice escn_debug_loc)
4302{
4303#if 0
4304   /* This doesn't work properly because it assumes all entries are
4305      packed end to end, with no holes.  But that doesn't always
4306      appear to be the case, so it loses sync.  And the D3 spec
4307      doesn't appear to require a no-hole situation either. */
4308   /* Display .debug_loc */
4309   Addr  dl_base;
4310   UWord dl_offset;
4311   Cursor loc; /* for showing .debug_loc */
4312   Bool td3 = di->trace_symtab;
4313
4314   TRACE_SYMTAB("\n");
4315   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4316   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
4317   if (ML_(sli_is_valid)(escn_debug_loc)) {
4318      init_Cursor( &loc, escn_debug_loc, 0, barf,
4319                   "Overrun whilst reading .debug_loc section(1)" );
4320      dl_base = 0;
4321      dl_offset = 0;
4322      while (True) {
4323         UWord  w1, w2;
4324         UWord  len;
4325         if (is_at_end_Cursor( &loc ))
4326            break;
4327
4328         /* Read a (host-)word pair.  This is something of a hack since
4329            the word size to read is really dictated by the ELF file;
4330            however, we assume we're reading a file with the same
4331            word-sizeness as the host.  Reasonably enough. */
4332         w1 = get_UWord( &loc );
4333         w2 = get_UWord( &loc );
4334
4335         if (w1 == 0 && w2 == 0) {
4336            /* end of list.  reset 'base' */
4337            TRACE_D3("    %08lx <End of list>\n", dl_offset);
4338            dl_base = 0;
4339            dl_offset = get_position_of_Cursor( &loc );
4340            continue;
4341         }
4342
4343         if (w1 == -1UL) {
4344            /* new value for 'base' */
4345            TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4346                     dl_offset, w1, w2);
4347            dl_base = w2;
4348            continue;
4349         }
4350
4351         /* else a location expression follows */
4352         TRACE_D3("    %08lx %08lx %08lx ",
4353                  dl_offset, w1 + dl_base, w2 + dl_base);
4354         len = (UWord)get_UShort( &loc );
4355         while (len > 0) {
4356            UChar byte = get_UChar( &loc );
4357            TRACE_D3("%02x", (UInt)byte);
4358            len--;
4359         }
4360         TRACE_SYMTAB("\n");
4361      }
4362   }
4363#endif
4364}
4365
4366static void trace_debug_ranges (const DebugInfo* di,
4367                                __attribute__((noreturn)) void (*barf)( const HChar* ),
4368                                DiSlice escn_debug_ranges)
4369{
4370   Cursor ranges; /* for showing .debug_ranges */
4371   Addr  dr_base;
4372   UWord dr_offset;
4373   Bool td3 = di->trace_symtab;
4374
4375   /* Display .debug_ranges */
4376   TRACE_SYMTAB("\n");
4377   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4378   TRACE_SYMTAB("    Offset   Begin    End\n");
4379   if (ML_(sli_is_valid)(escn_debug_ranges)) {
4380      init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4381                   "Overrun whilst reading .debug_ranges section(1)" );
4382      dr_base = 0;
4383      dr_offset = 0;
4384      while (True) {
4385         UWord  w1, w2;
4386
4387         if (is_at_end_Cursor( &ranges ))
4388            break;
4389
4390         /* Read a (host-)word pair.  This is something of a hack since
4391            the word size to read is really dictated by the ELF file;
4392            however, we assume we're reading a file with the same
4393            word-sizeness as the host.  Reasonably enough. */
4394         w1 = get_UWord( &ranges );
4395         w2 = get_UWord( &ranges );
4396
4397         if (w1 == 0 && w2 == 0) {
4398            /* end of list.  reset 'base' */
4399            TRACE_D3("    %08lx <End of list>\n", dr_offset);
4400            dr_base = 0;
4401            dr_offset = get_position_of_Cursor( &ranges );
4402            continue;
4403         }
4404
4405         if (w1 == -1UL) {
4406            /* new value for 'base' */
4407            TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4408                     dr_offset, w1, w2);
4409            dr_base = w2;
4410            continue;
4411         }
4412
4413         /* else a range [w1+base, w2+base) is denoted */
4414         TRACE_D3("    %08lx %08lx %08lx\n",
4415                  dr_offset, w1 + dr_base, w2 + dr_base);
4416      }
4417   }
4418}
4419
4420static void trace_debug_abbrev (const DebugInfo* di,
4421                                __attribute__((noreturn)) void (*barf)( const HChar* ),
4422                                DiSlice escn_debug_abbv)
4423{
4424   Cursor abbv; /* for showing .debug_abbrev */
4425   Bool td3 = di->trace_symtab;
4426
4427   /* Display .debug_abbrev */
4428   TRACE_SYMTAB("\n");
4429   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4430   if (ML_(sli_is_valid)(escn_debug_abbv)) {
4431      init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4432                   "Overrun whilst reading .debug_abbrev section" );
4433      while (True) {
4434         if (is_at_end_Cursor( &abbv ))
4435            break;
4436         /* Read one abbreviation table */
4437         TRACE_D3("  Number TAG\n");
4438         while (True) {
4439            ULong atag;
4440            UInt  has_children;
4441            ULong acode = get_ULEB128( &abbv );
4442            if (acode == 0) break; /* end of the table */
4443            atag = get_ULEB128( &abbv );
4444            has_children = get_UChar( &abbv );
4445            TRACE_D3("   %llu      %s    [%s]\n",
4446                     acode, ML_(pp_DW_TAG)(atag),
4447                            ML_(pp_DW_children)(has_children));
4448            while (True) {
4449               ULong at_name = get_ULEB128( &abbv );
4450               ULong at_form = get_ULEB128( &abbv );
4451               if (at_name == 0 && at_form == 0) break;
4452               TRACE_D3("    %-18s %s\n",
4453                        ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4454            }
4455         }
4456      }
4457   }
4458}
4459
4460static
4461void new_dwarf3_reader_wrk (
4462   DebugInfo* di,
4463   __attribute__((noreturn)) void (*barf)( const HChar* ),
4464   DiSlice escn_debug_info,      DiSlice escn_debug_types,
4465   DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4466   DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4467   DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4468   DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4469   DiSlice escn_debug_str_alt
4470)
4471{
4472   XArray* /* of TyEnt */     tyents = NULL;
4473   XArray* /* of TyEnt */     tyents_to_keep = NULL;
4474   XArray* /* of GExpr* */    gexprs = NULL;
4475   XArray* /* of TempVar* */  tempvars = NULL;
4476   WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4477   TyEntIndexCache* tyents_cache = NULL;
4478   TyEntIndexCache* tyents_to_keep_cache = NULL;
4479   TempVar *varp, *varp2;
4480   GExpr* gexpr;
4481   Cursor info; /* primary cursor for parsing .debug_info */
4482   D3TypeParser typarser;
4483   D3VarParser varparser;
4484   D3InlParser inlparser;
4485   Word  i, j, n;
4486   Bool td3 = di->trace_symtab;
4487   XArray* /* of TempVar* */ dioff_lookup_tab;
4488   Int pass;
4489   VgHashTable *signature_types = NULL;
4490
4491   /* Display/trace various information, if requested. */
4492   if (TD3) {
4493      trace_debug_loc    (di, barf, escn_debug_loc);
4494      trace_debug_ranges (di, barf, escn_debug_ranges);
4495      trace_debug_abbrev (di, barf, escn_debug_abbv);
4496      TRACE_SYMTAB("\n");
4497   }
4498
4499   /* Zero out all parsers. Parsers will really be initialised
4500      according to VG_(clo_read_*_info). */
4501   VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4502
4503   if (VG_(clo_read_var_info)) {
4504      /* We'll park the harvested type information in here.  Also create
4505         a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4506         have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
4507         huge and presumably will not occur in any valid DWARF3 file --
4508         it would need to have a .debug_info section 4GB long for that to
4509         happen.  These type entries end up in the DebugInfo. */
4510      tyents = VG_(newXA)( ML_(dinfo_zalloc),
4511                           "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4512                           ML_(dinfo_free), sizeof(TyEnt) );
4513      { TyEnt tyent;
4514        VG_(memset)(&tyent, 0, sizeof(tyent));
4515        tyent.tag   = Te_TyVoid;
4516        tyent.cuOff = D3_FAKEVOID_CUOFF;
4517        tyent.Te.TyVoid.isFake = True;
4518        VG_(addToXA)( tyents, &tyent );
4519      }
4520      { TyEnt tyent;
4521        VG_(memset)(&tyent, 0, sizeof(tyent));
4522        tyent.tag   = Te_UNKNOWN;
4523        tyent.cuOff = D3_INVALID_CUOFF;
4524        VG_(addToXA)( tyents, &tyent );
4525      }
4526
4527      /* This is a tree used to unique-ify the range lists that are
4528         manufactured by parse_var_DIE.  References to the keys in the
4529         tree wind up in .rngMany fields in TempVars.  We'll need to
4530         delete this tree, and the XArrays attached to it, at the end of
4531         this function. */
4532      rangestree = VG_(newFM)( ML_(dinfo_zalloc),
4533                               "di.readdwarf3.ndrw.2 (rangestree)",
4534                               ML_(dinfo_free),
4535                               (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
4536
4537      /* List of variables we're accumulating.  These don't end up in the
4538         DebugInfo; instead their contents are handed to ML_(addVar) and
4539         the list elements are then deleted. */
4540      tempvars = VG_(newXA)( ML_(dinfo_zalloc),
4541                             "di.readdwarf3.ndrw.3 (TempVar*s array)",
4542                             ML_(dinfo_free),
4543                             sizeof(TempVar*) );
4544
4545      /* List of GExprs we're accumulating.  These wind up in the
4546         DebugInfo. */
4547      gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
4548                           ML_(dinfo_free), sizeof(GExpr*) );
4549
4550      /* We need a D3TypeParser to keep track of partially constructed
4551         types.  It'll be discarded as soon as we've completed the CU,
4552         since the resulting information is tipped in to 'tyents' as it
4553         is generated. */
4554      type_parser_init(&typarser);
4555
4556      var_parser_init(&varparser);
4557
4558      signature_types = VG_(HT_construct) ("signature_types");
4559   }
4560
4561   /* Do an initial pass to scan the .debug_types section, if any, and
4562      fill in the signatured types hash table.  This lets us handle
4563      mapping from a type signature to a (cooked) DIE offset directly
4564      in get_Form_contents.  */
4565   if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
4566      init_Cursor( &info, escn_debug_types, 0, barf,
4567                   "Overrun whilst reading .debug_types section" );
4568      TRACE_D3("\n------ Collecting signatures from "
4569               ".debug_types section ------\n");
4570
4571      while (True) {
4572         UWord   cu_start_offset, cu_offset_now;
4573         CUConst cc;
4574
4575         cu_start_offset = get_position_of_Cursor( &info );
4576         TRACE_D3("\n");
4577         TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
4578         /* parse_CU_header initialises the CU's abbv hash table.  */
4579         parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
4580
4581         /* Needed by cook_die.  */
4582         cc.types_cuOff_bias = escn_debug_info.szB;
4583
4584         record_signatured_type( signature_types, cc.type_signature,
4585                                 cook_die( &cc, cc.type_offset ));
4586
4587         /* Until proven otherwise we assume we don't need the icc9
4588            workaround in this case; see the DIE-reading loop below
4589            for details.  */
4590         cu_offset_now = (cu_start_offset + cc.unit_length
4591                          + (cc.is_dw64 ? 12 : 4));
4592
4593         if (cu_offset_now >= escn_debug_types.szB) {
4594            clear_CUConst ( &cc);
4595            break;
4596         }
4597
4598         set_position_of_Cursor ( &info, cu_offset_now );
4599      }
4600   }
4601
4602   /* Perform three DIE-reading passes.  The first pass reads DIEs from
4603      alternate .debug_info (if any), the second pass reads DIEs from
4604      .debug_info, and the third pass reads DIEs from .debug_types.
4605      Moving the body of this loop into a separate function would
4606      require a large number of arguments to be passed in, so it is
4607      kept inline instead.  */
4608   for (pass = 0; pass < 3; ++pass) {
4609      ULong section_size;
4610
4611      if (pass == 0) {
4612         if (!ML_(sli_is_valid)(escn_debug_info_alt))
4613	    continue;
4614         /* Now loop over the Compilation Units listed in the alternate
4615            .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4616            Each compilation unit contains a Compilation Unit Header
4617            followed by precisely one DW_TAG_compile_unit or
4618            DW_TAG_partial_unit DIE. */
4619         init_Cursor( &info, escn_debug_info_alt, 0, barf,
4620                      "Overrun whilst reading alternate .debug_info section" );
4621         section_size = escn_debug_info_alt.szB;
4622
4623         TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4624      } else if (pass == 1) {
4625         /* Now loop over the Compilation Units listed in the .debug_info
4626            section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
4627            unit contains a Compilation Unit Header followed by precisely
4628            one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4629         init_Cursor( &info, escn_debug_info, 0, barf,
4630                      "Overrun whilst reading .debug_info section" );
4631         section_size = escn_debug_info.szB;
4632
4633         TRACE_D3("\n------ Parsing .debug_info section ------\n");
4634      } else {
4635         if (!ML_(sli_is_valid)(escn_debug_types))
4636            continue;
4637         if (!VG_(clo_read_var_info))
4638            continue; // Types not needed when only reading inline info.
4639         init_Cursor( &info, escn_debug_types, 0, barf,
4640                      "Overrun whilst reading .debug_types section" );
4641         section_size = escn_debug_types.szB;
4642
4643         TRACE_D3("\n------ Parsing .debug_types section ------\n");
4644      }
4645
4646      while (True) {
4647         ULong   cu_start_offset, cu_offset_now;
4648         CUConst cc;
4649         /* It may be that the stated size of this CU is larger than the
4650            amount of stuff actually in it.  icc9 seems to generate CUs
4651            thusly.  We use these variables to figure out if this is
4652            indeed the case, and if so how many bytes we need to skip to
4653            get to the start of the next CU.  Not skipping those bytes
4654            causes us to misidentify the start of the next CU, and it all
4655            goes badly wrong after that (not surprisingly). */
4656         UWord cu_size_including_IniLen, cu_amount_used;
4657
4658         /* It seems icc9 finishes the DIE info before debug_info_sz
4659            bytes have been used up.  So be flexible, and declare the
4660            sequence complete if there is not enough remaining bytes to
4661            hold even the smallest conceivable CU header.  (11 bytes I
4662            reckon). */
4663         /* JRS 23Jan09: I suspect this is no longer necessary now that
4664            the code below contains a 'while (cu_amount_used <
4665            cu_size_including_IniLen ...'  style loop, which skips over
4666            any leftover bytes at the end of a CU in the case where the
4667            CU's stated size is larger than its actual size (as
4668            determined by reading all its DIEs).  However, for prudence,
4669            I'll leave the following test in place.  I can't see that a
4670            CU header can be smaller than 11 bytes, so I don't think
4671            there's any harm possible through the test -- it just adds
4672            robustness. */
4673         Word avail = get_remaining_length_Cursor( &info );
4674         if (avail < 11) {
4675            if (avail > 0)
4676               TRACE_D3("new_dwarf3_reader_wrk: warning: "
4677                        "%ld unused bytes after end of DIEs\n", avail);
4678            break;
4679         }
4680
4681         if (VG_(clo_read_var_info)) {
4682            /* Check the varparser's stack is in a sane state. */
4683            vg_assert(varparser.sp == -1);
4684            /* Check the typarser's stack is in a sane state. */
4685            vg_assert(typarser.sp == -1);
4686         }
4687
4688         cu_start_offset = get_position_of_Cursor( &info );
4689         TRACE_D3("\n");
4690         TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
4691         /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4692         if (pass == 0) {
4693            parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
4694                             False, True );
4695         } else {
4696            parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
4697                             pass == 2, False );
4698         }
4699         cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
4700                                            : escn_debug_str;
4701         cc.escn_debug_ranges   = escn_debug_ranges;
4702         cc.escn_debug_loc      = escn_debug_loc;
4703         cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
4704                                            : escn_debug_line;
4705         cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
4706                                            : escn_debug_info;
4707         cc.escn_debug_types    = escn_debug_types;
4708         cc.escn_debug_info_alt = escn_debug_info_alt;
4709         cc.escn_debug_str_alt  = escn_debug_str_alt;
4710         cc.types_cuOff_bias    = escn_debug_info.szB;
4711         cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
4712         cc.cu_start_offset     = cu_start_offset;
4713         cc.di = di;
4714         /* The CU's svma can be deduced by looking at the AT_low_pc
4715            value in the top level TAG_compile_unit, which is the topmost
4716            DIE.  We'll leave it for the 'varparser' to acquire that info
4717            and fill it in -- since it is the only party to want to know
4718            it. */
4719         cc.cu_svma_known = False;
4720         cc.cu_svma       = 0;
4721
4722         if (VG_(clo_read_var_info)) {
4723            cc.signature_types = signature_types;
4724
4725            /* Create a fake outermost-level range covering the entire
4726               address range.  So we always have *something* to catch all
4727               variable declarations. */
4728            varstack_push( &cc, &varparser, td3,
4729                           unitary_range_list(0UL, ~0UL),
4730                           -1, False/*isFunc*/, NULL/*fbGX*/ );
4731
4732            /* And set up the fndn_ix_Table.  When we come across the top
4733               level DIE for this CU (which is what the next call to
4734               read_DIE should process) we will copy all the file names out
4735               of the .debug_line img area and use this table to look up the
4736               copies when we later see filename numbers in DW_TAG_variables
4737               etc. */
4738            vg_assert(!varparser.fndn_ix_Table );
4739            varparser.fndn_ix_Table
4740               = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
4741                             ML_(dinfo_free),
4742                             sizeof(UInt) );
4743         }
4744
4745         if (VG_(clo_read_inline_info)) {
4746            /* fndn_ix_Table for the inlined call parser */
4747            vg_assert(!inlparser.fndn_ix_Table );
4748            inlparser.fndn_ix_Table
4749               = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
4750                             ML_(dinfo_free),
4751                             sizeof(UInt) );
4752         }
4753
4754         /* Now read the one-and-only top-level DIE for this CU. */
4755         vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
4756         read_DIE( rangestree,
4757                   tyents, tempvars, gexprs,
4758                   &typarser, &varparser, &inlparser,
4759                   &info, td3, &cc, 0 );
4760
4761         cu_offset_now = get_position_of_Cursor( &info );
4762
4763         if (0) VG_(printf)("Travelled: %llu  size %llu\n",
4764                            cu_offset_now - cc.cu_start_offset,
4765                            cc.unit_length + (cc.is_dw64 ? 12 : 4));
4766
4767         /* How big the CU claims it is .. */
4768         cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
4769         /* .. vs how big we have found it to be */
4770         cu_amount_used = cu_offset_now - cc.cu_start_offset;
4771
4772         if (1) TRACE_D3("offset now %lld, d-i-size %lld\n",
4773                         cu_offset_now, section_size);
4774         if (cu_offset_now > section_size)
4775            barf("toplevel DIEs beyond end of CU");
4776
4777         /* If the CU is bigger than it claims to be, we've got a serious
4778            problem. */
4779         if (cu_amount_used > cu_size_including_IniLen)
4780            barf("CU's actual size appears to be larger than it claims it is");
4781
4782         /* If the CU is smaller than it claims to be, we need to skip some
4783            bytes.  Loop updates cu_offset_new and cu_amount_used. */
4784         while (cu_amount_used < cu_size_including_IniLen
4785                && get_remaining_length_Cursor( &info ) > 0) {
4786            if (0) VG_(printf)("SKIP\n");
4787            (void)get_UChar( &info );
4788            cu_offset_now = get_position_of_Cursor( &info );
4789            cu_amount_used = cu_offset_now - cc.cu_start_offset;
4790         }
4791
4792         if (VG_(clo_read_var_info)) {
4793            /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4794               anywhere else at all.  Our fake the-entire-address-space
4795               range is at level -1, so preening to -2 should completely
4796               empty the stack out. */
4797            TRACE_D3("\n");
4798            varstack_preen( &varparser, td3, -2 );
4799            /* Similarly, empty the type stack out. */
4800            typestack_preen( &typarser, td3, -2 );
4801         }
4802
4803         if (VG_(clo_read_var_info)) {
4804            vg_assert(varparser.fndn_ix_Table );
4805            VG_(deleteXA)( varparser.fndn_ix_Table );
4806            varparser.fndn_ix_Table = NULL;
4807         }
4808         if (VG_(clo_read_inline_info)) {
4809            vg_assert(inlparser.fndn_ix_Table );
4810            VG_(deleteXA)( inlparser.fndn_ix_Table );
4811            inlparser.fndn_ix_Table = NULL;
4812         }
4813         clear_CUConst(&cc);
4814
4815         if (cu_offset_now == section_size)
4816            break;
4817         /* else keep going */
4818      }
4819   }
4820
4821
4822   if (VG_(clo_read_var_info)) {
4823      /* From here on we're post-processing the stuff we got
4824         out of the .debug_info section. */
4825      if (TD3) {
4826         TRACE_D3("\n");
4827         ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4828         TRACE_D3("\n");
4829         TRACE_D3("------ Compressing type entries ------\n");
4830      }
4831
4832      tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4833                                        sizeof(TyEntIndexCache) );
4834      ML_(TyEntIndexCache__invalidate)( tyents_cache );
4835      dedup_types( td3, tyents, tyents_cache );
4836      if (TD3) {
4837         TRACE_D3("\n");
4838         ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4839      }
4840
4841      TRACE_D3("\n");
4842      TRACE_D3("------ Resolving the types of variables ------\n" );
4843      resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4844
4845      /* Copy all the non-INDIR tyents into a new table.  For large
4846         .so's, about 90% of the tyents will by now have been resolved to
4847         INDIRs, and we no longer need them, and so don't need to store
4848         them. */
4849      tyents_to_keep
4850         = VG_(newXA)( ML_(dinfo_zalloc),
4851                       "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4852                       ML_(dinfo_free), sizeof(TyEnt) );
4853      n = VG_(sizeXA)( tyents );
4854      for (i = 0; i < n; i++) {
4855         TyEnt* ent = VG_(indexXA)( tyents, i );
4856         if (ent->tag != Te_INDIR)
4857            VG_(addToXA)( tyents_to_keep, ent );
4858      }
4859
4860      VG_(deleteXA)( tyents );
4861      tyents = NULL;
4862      ML_(dinfo_free)( tyents_cache );
4863      tyents_cache = NULL;
4864
4865      /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4866         minor) waste of time, since tyents itself is sorted, but
4867         necessary since VG_(lookupXA) refuses to cooperate if we
4868         don't. */
4869      VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4870      VG_(sortXA)( tyents_to_keep );
4871
4872      /* Enable cacheing on tyents_to_keep */
4873      tyents_to_keep_cache
4874         = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4875                              sizeof(TyEntIndexCache) );
4876      ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4877
4878      /* And record the tyents in the DebugInfo.  We do this before
4879         starting to hand variables to ML_(addVar), since if ML_(addVar)
4880         wants to do debug printing (of the types of said vars) then it
4881         will need the tyents.*/
4882      vg_assert(!di->admin_tyents);
4883      di->admin_tyents = tyents_to_keep;
4884
4885      /* Bias all the location expressions. */
4886      TRACE_D3("\n");
4887      TRACE_D3("------ Biasing the location expressions ------\n" );
4888
4889      n = VG_(sizeXA)( gexprs );
4890      for (i = 0; i < n; i++) {
4891         gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4892         bias_GX( gexpr, di );
4893      }
4894
4895      TRACE_D3("\n");
4896      TRACE_D3("------ Acquired the following variables: ------\n\n");
4897
4898      /* Park (pointers to) all the vars in an XArray, so we can look up
4899         abstract origins quickly.  The array is sorted (hence, looked-up
4900         by) the .dioff fields.  Since the .dioffs should be in strictly
4901         ascending order, there is no need to sort the array after
4902         construction.  The ascendingness is however asserted for. */
4903      dioff_lookup_tab
4904         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4905                       ML_(dinfo_free),
4906                       sizeof(TempVar*) );
4907
4908      n = VG_(sizeXA)( tempvars );
4909      Word first_primary_var = 0;
4910      for (first_primary_var = 0;
4911           escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4912           first_primary_var++) {
4913         varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4914         if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4915            break;
4916      }
4917      for (i = 0; i < n; i++) {
4918         varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4919         if (i > first_primary_var) {
4920            varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4921                                              (i + first_primary_var - 1) % n );
4922            /* why should this hold?  Only, I think, because we've
4923               constructed the array by reading .debug_info sequentially,
4924               and so the array .dioff fields should reflect that, and be
4925               strictly ascending. */
4926            vg_assert(varp2->dioff < varp->dioff);
4927         }
4928         VG_(addToXA)( dioff_lookup_tab, &varp );
4929      }
4930      VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4931      VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4932
4933      /* Now visit each var.  Collect up as much info as possible for
4934         each var and hand it to ML_(addVar). */
4935      n = VG_(sizeXA)( tempvars );
4936      for (j = 0; j < n; j++) {
4937         TyEnt* ent;
4938         varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4939
4940         /* Possibly show .. */
4941         if (TD3) {
4942            VG_(printf)("<%lx> addVar: level %d: %s :: ",
4943                        varp->dioff,
4944                        varp->level,
4945                        varp->name ? varp->name : "<anon_var>" );
4946            if (varp->typeR) {
4947               ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4948            } else {
4949               VG_(printf)("NULL");
4950            }
4951            VG_(printf)("\n  Loc=");
4952            if (varp->gexpr) {
4953               ML_(pp_GX)(varp->gexpr);
4954            } else {
4955               VG_(printf)("NULL");
4956            }
4957            VG_(printf)("\n");
4958            if (varp->fbGX) {
4959               VG_(printf)("  FrB=");
4960               ML_(pp_GX)( varp->fbGX );
4961               VG_(printf)("\n");
4962            } else {
4963               VG_(printf)("  FrB=none\n");
4964            }
4965            VG_(printf)("  declared at: %d %s:%d\n",
4966                        varp->fndn_ix,
4967                        ML_(fndn_ix2filename) (di, varp->fndn_ix),
4968                        varp->fLine );
4969            if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4970               VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
4971         }
4972
4973         /* Skip variables which have no location.  These must be
4974            abstract instances; they are useless as-is since with no
4975            location they have no specified memory location.  They will
4976            presumably be referred to via the absOri fields of other
4977            variables. */
4978         if (!varp->gexpr) {
4979            TRACE_D3("  SKIP (no location)\n\n");
4980            continue;
4981         }
4982
4983         /* So it has a location, at least.  If it refers to some other
4984            entry through its absOri field, pull in further info through
4985            that. */
4986         if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4987            Bool found;
4988            Word ixFirst, ixLast;
4989            TempVar key;
4990            TempVar* keyp = &key;
4991            TempVar *varAI;
4992            VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4993            key.dioff = varp->absOri; /* this is what we want to find */
4994            found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4995                                   &ixFirst, &ixLast );
4996            if (!found) {
4997               /* barf("DW_AT_abstract_origin can't be resolved"); */
4998               TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
4999               continue;
5000            }
5001            /* If the following fails, there is more than one entry with
5002               the same dioff.  Which can't happen. */
5003            vg_assert(ixFirst == ixLast);
5004            varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5005            /* stay sane */
5006            vg_assert(varAI);
5007            vg_assert(varAI->dioff == varp->absOri);
5008
5009            /* Copy what useful info we can. */
5010            if (varAI->typeR && !varp->typeR)
5011               varp->typeR = varAI->typeR;
5012            if (varAI->name && !varp->name)
5013               varp->name = varAI->name;
5014            if (varAI->fndn_ix && !varp->fndn_ix)
5015               varp->fndn_ix = varAI->fndn_ix;
5016            if (varAI->fLine > 0 && varp->fLine == 0)
5017               varp->fLine = varAI->fLine;
5018         }
5019
5020         /* Give it a name if it doesn't have one. */
5021         if (!varp->name)
5022            varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5023
5024         /* So now does it have enough info to be useful? */
5025         /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
5026            the type didn't get resolved.  Really, in that case
5027            something's broken earlier on, and should be fixed, rather
5028            than just skipping the variable. */
5029         ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5030                                            tyents_to_keep_cache,
5031                                            varp->typeR );
5032         /* The next two assertions should be guaranteed by
5033            our previous call to resolve_variable_types. */
5034         vg_assert(ent);
5035         vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5036
5037         if (ent->tag == Te_UNKNOWN) continue;
5038
5039         vg_assert(varp->gexpr);
5040         vg_assert(varp->name);
5041         vg_assert(varp->typeR);
5042         vg_assert(varp->level >= 0);
5043
5044         /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
5045            each address range in which the variable exists. */
5046         TRACE_D3("  ACQUIRE for range(s) ");
5047         { AddrRange  oneRange;
5048           AddrRange* varPcRanges;
5049           Word       nVarPcRanges;
5050           /* Set up to iterate over address ranges, however
5051              represented. */
5052           if (varp->nRanges == 0 || varp->nRanges == 1) {
5053              vg_assert(!varp->rngMany);
5054              if (varp->nRanges == 0) {
5055                 vg_assert(varp->rngOneMin == 0);
5056                 vg_assert(varp->rngOneMax == 0);
5057              }
5058              nVarPcRanges = varp->nRanges;
5059              oneRange.aMin = varp->rngOneMin;
5060              oneRange.aMax = varp->rngOneMax;
5061              varPcRanges = &oneRange;
5062           } else {
5063              vg_assert(varp->rngMany);
5064              vg_assert(varp->rngOneMin == 0);
5065              vg_assert(varp->rngOneMax == 0);
5066              nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5067              vg_assert(nVarPcRanges >= 2);
5068              vg_assert(nVarPcRanges == (Word)varp->nRanges);
5069              varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5070           }
5071           if (varp->level == 0)
5072              vg_assert( nVarPcRanges == 1 );
5073           /* and iterate */
5074           for (i = 0; i < nVarPcRanges; i++) {
5075              Addr pcMin = varPcRanges[i].aMin;
5076              Addr pcMax = varPcRanges[i].aMax;
5077              vg_assert(pcMin <= pcMax);
5078              /* Level 0 is the global address range.  So at level 0 we
5079                 don't want to bias pcMin/pcMax; but at all other levels
5080                 we do since those are derived from svmas in the Dwarf
5081                 we're reading.  Be paranoid ... */
5082              if (varp->level == 0) {
5083                 vg_assert(pcMin == (Addr)0);
5084                 vg_assert(pcMax == ~(Addr)0);
5085              } else {
5086                 /* vg_assert(pcMin > (Addr)0);
5087                    No .. we can legitimately expect to see ranges like
5088                    0x0-0x11D (pre-biasing, of course). */
5089                 vg_assert(pcMax < ~(Addr)0);
5090              }
5091
5092              /* Apply text biasing, for non-global variables. */
5093              if (varp->level > 0) {
5094                 pcMin += di->text_debug_bias;
5095                 pcMax += di->text_debug_bias;
5096              }
5097
5098              if (i > 0 && (i%2) == 0)
5099                 TRACE_D3("\n                       ");
5100              TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5101
5102              ML_(addVar)(
5103                 di, varp->level,
5104                     pcMin, pcMax,
5105                     varp->name,  varp->typeR,
5106                     varp->gexpr, varp->fbGX,
5107                     varp->fndn_ix, varp->fLine, td3
5108              );
5109           }
5110         }
5111
5112         TRACE_D3("\n\n");
5113         /* and move on to the next var */
5114      }
5115
5116      /* Now free all the TempVars */
5117      n = VG_(sizeXA)( tempvars );
5118      for (i = 0; i < n; i++) {
5119         varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5120         ML_(dinfo_free)(varp);
5121      }
5122      VG_(deleteXA)( tempvars );
5123      tempvars = NULL;
5124
5125      /* and the temp lookup table */
5126      VG_(deleteXA)( dioff_lookup_tab );
5127
5128      /* and the ranges tree.  Note that we need to also free the XArrays
5129         which constitute the keys, hence pass VG_(deleteXA) as a
5130         key-finalizer. */
5131      VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5132
5133      /* and the tyents_to_keep cache */
5134      ML_(dinfo_free)( tyents_to_keep_cache );
5135      tyents_to_keep_cache = NULL;
5136
5137      vg_assert( varparser.fndn_ix_Table == NULL );
5138
5139      /* And the signatured type hash.  */
5140      VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5141
5142      /* record the GExprs in di so they can be freed later */
5143      vg_assert(!di->admin_gexprs);
5144      di->admin_gexprs = gexprs;
5145   }
5146
5147   // Free up dynamically allocated memory
5148   if (VG_(clo_read_var_info)) {
5149      type_parser_release(&typarser);
5150      var_parser_release(&varparser);
5151   }
5152}
5153
5154
5155/*------------------------------------------------------------*/
5156/*---                                                      ---*/
5157/*--- The "new" DWARF3 reader -- top level control logic   ---*/
5158/*---                                                      ---*/
5159/*------------------------------------------------------------*/
5160
5161static Bool               d3rd_jmpbuf_valid  = False;
5162static const HChar*       d3rd_jmpbuf_reason = NULL;
5163static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5164
5165static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5166   vg_assert(d3rd_jmpbuf_valid);
5167   d3rd_jmpbuf_reason = reason;
5168   VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5169   /*NOTREACHED*/
5170   vg_assert(0);
5171}
5172
5173
5174void
5175ML_(new_dwarf3_reader) (
5176   DebugInfo* di,
5177   DiSlice escn_debug_info,      DiSlice escn_debug_types,
5178   DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5179   DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5180   DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5181   DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5182   DiSlice escn_debug_str_alt
5183)
5184{
5185   volatile Int  jumped;
5186   volatile Bool td3 = di->trace_symtab;
5187
5188   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
5189      just returns normally.  If there is any failure, it longjmp's
5190      back here, having first set d3rd_jmpbuf_reason to something
5191      useful. */
5192   vg_assert(d3rd_jmpbuf_valid  == False);
5193   vg_assert(d3rd_jmpbuf_reason == NULL);
5194
5195   d3rd_jmpbuf_valid = True;
5196   jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5197   if (jumped == 0) {
5198      /* try this ... */
5199      new_dwarf3_reader_wrk( di, barf,
5200                             escn_debug_info,     escn_debug_types,
5201                             escn_debug_abbv,     escn_debug_line,
5202                             escn_debug_str,      escn_debug_ranges,
5203                             escn_debug_loc,      escn_debug_info_alt,
5204                             escn_debug_abbv_alt, escn_debug_line_alt,
5205                             escn_debug_str_alt );
5206      d3rd_jmpbuf_valid = False;
5207      TRACE_D3("\n------ .debug_info reading was successful ------\n");
5208   } else {
5209      /* It longjmp'd. */
5210      d3rd_jmpbuf_valid = False;
5211      /* Can't longjump without giving some sort of reason. */
5212      vg_assert(d3rd_jmpbuf_reason != NULL);
5213
5214      TRACE_D3("\n------ .debug_info reading failed ------\n");
5215
5216      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5217   }
5218
5219   d3rd_jmpbuf_valid  = False;
5220   d3rd_jmpbuf_reason = NULL;
5221}
5222
5223
5224
5225/* --- Unused code fragments which might be useful one day. --- */
5226
5227#if 0
5228   /* Read the arange tables */
5229   TRACE_SYMTAB("\n");
5230   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5231   init_Cursor( &aranges, debug_aranges_img,
5232                debug_aranges_sz, 0, barf,
5233                "Overrun whilst reading .debug_aranges section" );
5234   while (True) {
5235      ULong  len, d_i_offset;
5236      Bool   is64;
5237      UShort version;
5238      UChar  asize, segsize;
5239
5240      if (is_at_end_Cursor( &aranges ))
5241         break;
5242      /* Read one arange thingy */
5243      /* initial_length field */
5244      len = get_Initial_Length( &is64, &aranges,
5245               "in .debug_aranges: invalid initial-length field" );
5246      version    = get_UShort( &aranges );
5247      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5248      asize      = get_UChar( &aranges );
5249      segsize    = get_UChar( &aranges );
5250      TRACE_D3("  Length:                   %llu\n", len);
5251      TRACE_D3("  Version:                  %d\n", (Int)version);
5252      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
5253      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
5254      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
5255      TRACE_D3("\n");
5256      TRACE_D3("    Address            Length\n");
5257
5258      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5259         (void)get_UChar( & aranges );
5260      }
5261      while (True) {
5262         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5263         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5264         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
5265         if (address == 0 && length == 0) break;
5266      }
5267   }
5268   TRACE_SYMTAB("\n");
5269#endif
5270
5271#endif // defined(VGO_linux) || defined(VGO_darwin)
5272
5273/*--------------------------------------------------------------------*/
5274/*--- end                                                          ---*/
5275/*--------------------------------------------------------------------*/
5276