1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
5/*---                                                 readdwarf3.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2008-2013 OpenWorks LLP
13      info@open-works.co.uk
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31
32   Neither the names of the U.S. Department of Energy nor the
33   University of California nor the names of its contributors may be
34   used to endorse or promote products derived from this software
35   without prior written permission.
36*/
37
38#if defined(VGO_linux) || defined(VGO_darwin)
39
40/* REFERENCE (without which this code will not make much sense):
41
42   DWARF Debugging Information Format, Version 3,
43   dated 20 December 2005 (the "D3 spec").
44
45   Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
46   .doc (MS Word) version, but for some reason the section numbers
47   between the Word and PDF versions differ by 1 in the first digit.
48   All section references in this code are to the PDF version.
49
50   CURRENT HACKS:
51
52   DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53      assumed to mean "const void" or "volatile void" respectively.
54      GDB appears to interpret them like this, anyway.
55
56   In many cases it is important to know the svma of a CU (the "base
57   address of the CU", as the D3 spec calls it).  There are some
58   situations in which the spec implies this value is unknown, but the
59   Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60   merely zero when not explicitly stated.  So we too have to make
61   that assumption.
62
63   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
64   unitary_range_list() bias the resulting range list in the same way
65   that its more general cousin, get_range_list(), does?  I don't
66   know.
67
68   TODO, 2008 Feb 17:
69
70   get rid of cu_svma_known and document the assumed-zero svma hack.
71
72   ML_(sizeOfType): differentiate between zero sized types and types
73   for which the size is unknown.  Is this important?  I don't know.
74
75   DW_TAG_array_types: deal with explicit sizes (currently we compute
76   the size from the bounds and the element size, although that's
77   fragile, if the bounds incompletely specified, or completely
78   absent)
79
80   Document reason for difference (by 1) of stack preening depth in
81   parse_var_DIE vs parse_type_DIE.
82
83   Don't hand to ML_(addVars), vars whose locations are entirely in
84   registers (DW_OP_reg*).  This is merely a space-saving
85   optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86   expressions correctly, by failing to evaluate them and hence
87   effectively ignoring the variable with which they are associated.
88
89   Deal with DW_TAG_array_types which have element size != stride
90
91   In some cases, the info for a variable is split between two
92   different DIEs (generally a declarer and a definer).  We punt on
93   these.  Could do better here.
94
95   The 'data_bias' argument passed to the expression evaluator
96   (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97   MaybeUWord, to make it clear when we do vs don't know what it is
98   for the evaluation of an expression.  At the moment zero is passed
99   for this parameter in the don't know case.  That's a bit fragile
100   and obscure; using a MaybeUWord would be clearer.
101
102   POTENTIAL PERFORMANCE IMPROVEMENTS:
103
104   Currently, duplicate removal and all other queries for the type
105   entities array is done using cuOffset-based pointing, which
106   involves a binary search (VG_(lookupXA)) for each access.  This is
107   wildly inefficient, although simple.  It would be better to
108   translate all the cuOffset-based references (iow, all the "R" and
109   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110   'tyents' right at the start of dedup_types(), and use direct
111   indexing (VG_(indexXA)) wherever possible after that.
112
113   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
114   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115   points, and possibly also make an _UNCHECKED version which skips
116   the range checks in performance-critical situations such as this.
117
118   Handle interaction between read_DIE and parse_{var,type}_DIE
119   better.  Currently read_DIE reads the entire DIE just to find where
120   the end is (and for debug printing), so that it can later reliably
121   move the cursor to the end regardless of what parse_{var,type}_DIE
122   do.  This means many DIEs (most, even?) are read twice.  It would
123   be smarter to make parse_{var,type}_DIE return a Bool indicating
124   whether or not they advanced the DIE cursor, and only if they
125   didn't should read_DIE itself read through the DIE.
126
127   ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128   zero variables in their .vars XArray.  Rather than have an XArray
129   with zero elements (which uses 2 malloc'd blocks), allow the .vars
130   pointer to be NULL in this case.
131
132   More generally, reduce the amount of memory allocated and freed
133   while reading Dwarf3 type/variable information.  Even modest (20MB)
134   objects cause this module to allocate and free hundreds of
135   thousands of small blocks, and ML_(arena_malloc) and its various
136   groupies always show up at the top of performance profiles. */
137
138#include "pub_core_basics.h"
139#include "pub_core_debuginfo.h"
140#include "pub_core_libcbase.h"
141#include "pub_core_libcassert.h"
142#include "pub_core_libcprint.h"
143#include "pub_core_libcsetjmp.h"   // setjmp facilities
144#include "pub_core_hashtable.h"
145#include "pub_core_options.h"
146#include "pub_core_tooliface.h"    /* VG_(needs) */
147#include "pub_core_xarray.h"
148#include "pub_core_wordfm.h"
149#include "priv_misc.h"             /* dinfo_zalloc/free */
150#include "priv_image.h"
151#include "priv_tytypes.h"
152#include "priv_d3basics.h"
153#include "priv_storage.h"
154#include "priv_readdwarf3.h"       /* self */
155
156
157/*------------------------------------------------------------*/
158/*---                                                      ---*/
159/*--- Basic machinery for parsing DIEs.                    ---*/
160/*---                                                      ---*/
161/*------------------------------------------------------------*/
162
163#define TRACE_D3(format, args...) \
164   if (td3) { VG_(printf)(format, ## args); }
165
166#define D3_INVALID_CUOFF  ((UWord)(-1UL))
167#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
168
169typedef
170   struct {
171      DiSlice sli;      // to which this cursor applies
172      DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
173      void (*barf)( const HChar* ) __attribute__((noreturn));
174      const HChar* barfstr;
175   }
176   Cursor;
177
178static inline Bool is_sane_Cursor ( Cursor* c ) {
179   if (!c)                return False;
180   if (!c->barf)          return False;
181   if (!c->barfstr)       return False;
182   if (!ML_(sli_is_valid)(c->sli))    return False;
183   if (c->sli.ioff == DiOffT_INVALID) return False;
184   if (c->sli_next < c->sli.ioff)     return False;
185   return True;
186}
187
188// Initialise a cursor from a DiSlice (ELF section, really) so as to
189// start reading at offset |sli_initial_offset| from the start of the
190// slice.
191static void init_Cursor ( /*OUT*/Cursor* c,
192                          DiSlice sli,
193                          ULong   sli_initial_offset,
194                          __attribute__((noreturn)) void (*barf)(const HChar*),
195                          const HChar* barfstr )
196{
197   vg_assert(c);
198   VG_(bzero_inline)(c, sizeof(*c));
199   c->sli              = sli;
200   c->sli_next         = c->sli.ioff + sli_initial_offset;
201   c->barf             = barf;
202   c->barfstr          = barfstr;
203   vg_assert(is_sane_Cursor(c));
204}
205
206static Bool is_at_end_Cursor ( Cursor* c ) {
207   vg_assert(is_sane_Cursor(c));
208   return c->sli_next >= c->sli.ioff + c->sli.szB;
209}
210
211static inline ULong get_position_of_Cursor ( Cursor* c ) {
212   vg_assert(is_sane_Cursor(c));
213   return c->sli_next - c->sli.ioff;
214}
215static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
216   c->sli_next = c->sli.ioff + pos;
217   vg_assert(is_sane_Cursor(c));
218}
219
220static /*signed*/Long get_remaining_length_Cursor ( Cursor* c ) {
221   vg_assert(is_sane_Cursor(c));
222   return c->sli.ioff + c->sli.szB - c->sli_next;
223}
224
225//static void* get_address_of_Cursor ( Cursor* c ) {
226//   vg_assert(is_sane_Cursor(c));
227//   return &c->region_start_img[ c->region_next ];
228//}
229
230static DiCursor get_DiCursor_from_Cursor ( Cursor* c ) {
231   return mk_DiCursor(c->sli.img, c->sli_next);
232}
233
234/* FIXME: document assumptions on endianness for
235   get_UShort/UInt/ULong. */
236static inline UChar get_UChar ( Cursor* c ) {
237   UChar r;
238   vg_assert(is_sane_Cursor(c));
239   if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
240      c->barf(c->barfstr);
241      /*NOTREACHED*/
242      vg_assert(0);
243   }
244   r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
245   c->sli_next += sizeof(UChar);
246   return r;
247}
248static UShort get_UShort ( Cursor* c ) {
249   UShort r;
250   vg_assert(is_sane_Cursor(c));
251   if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
252      c->barf(c->barfstr);
253      /*NOTREACHED*/
254      vg_assert(0);
255   }
256   r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
257   c->sli_next += sizeof(UShort);
258   return r;
259}
260static UInt get_UInt ( Cursor* c ) {
261   UInt r;
262   vg_assert(is_sane_Cursor(c));
263   if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
264      c->barf(c->barfstr);
265      /*NOTREACHED*/
266      vg_assert(0);
267   }
268   r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
269   c->sli_next += sizeof(UInt);
270   return r;
271}
272static ULong get_ULong ( Cursor* c ) {
273   ULong r;
274   vg_assert(is_sane_Cursor(c));
275   if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
276      c->barf(c->barfstr);
277      /*NOTREACHED*/
278      vg_assert(0);
279   }
280   r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
281   c->sli_next += sizeof(ULong);
282   return r;
283}
284static ULong get_ULEB128 ( Cursor* c ) {
285   ULong result;
286   Int   shift;
287   UChar byte;
288   /* unroll first iteration */
289   byte = get_UChar( c );
290   result = (ULong)(byte & 0x7f);
291   if (LIKELY(!(byte & 0x80))) return result;
292   shift = 7;
293   /* end unroll first iteration */
294   do {
295      byte = get_UChar( c );
296      result |= ((ULong)(byte & 0x7f)) << shift;
297      shift += 7;
298   } while (byte & 0x80);
299   return result;
300}
301static Long get_SLEB128 ( Cursor* c ) {
302   ULong  result = 0;
303   Int    shift = 0;
304   UChar  byte;
305   do {
306      byte = get_UChar(c);
307      result |= ((ULong)(byte & 0x7f)) << shift;
308      shift += 7;
309   } while (byte & 0x80);
310   if (shift < 64 && (byte & 0x40))
311      result |= -(1ULL << shift);
312   return result;
313}
314
315/* Assume 'c' points to the start of a string.  Return a DiCursor of
316   whatever it points at, and advance it past the terminating zero.
317   This makes it safe for the caller to then copy the string with
318   ML_(addStr), since (w.r.t. image overruns) the process of advancing
319   past the terminating zero will already have "vetted" the string. */
320static DiCursor get_AsciiZ ( Cursor* c ) {
321   UChar uc;
322   DiCursor res = get_DiCursor_from_Cursor(c);
323   do { uc = get_UChar(c); } while (uc != 0);
324   return res;
325}
326
327static ULong peek_ULEB128 ( Cursor* c ) {
328   DiOffT here = c->sli_next;
329   ULong  r    = get_ULEB128( c );
330   c->sli_next = here;
331   return r;
332}
333static UChar peek_UChar ( Cursor* c ) {
334   DiOffT here = c->sli_next;
335   UChar  r    = get_UChar( c );
336   c->sli_next = here;
337   return r;
338}
339
340static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
341   return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
342}
343
344static UWord get_UWord ( Cursor* c ) {
345   vg_assert(sizeof(UWord) == sizeof(void*));
346   if (sizeof(UWord) == 4) return get_UInt(c);
347   if (sizeof(UWord) == 8) return get_ULong(c);
348   vg_assert(0);
349}
350
351/* Read a DWARF3 'Initial Length' field */
352static ULong get_Initial_Length ( /*OUT*/Bool* is64,
353                                  Cursor* c,
354                                  const HChar* barfMsg )
355{
356   ULong w64;
357   UInt  w32;
358   *is64 = False;
359   w32 = get_UInt( c );
360   if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
361      c->barf( barfMsg );
362   }
363   else if (w32 == 0xFFFFFFFF) {
364      *is64 = True;
365      w64   = get_ULong( c );
366   } else {
367      *is64 = False;
368      w64 = (ULong)w32;
369   }
370   return w64;
371}
372
373
374/*------------------------------------------------------------*/
375/*---                                                      ---*/
376/*--- "CUConst" structure                                  ---*/
377/*---                                                      ---*/
378/*------------------------------------------------------------*/
379
380#define N_ABBV_CACHE 32
381
382/* Holds information that is constant through the parsing of a
383   Compilation Unit.  This is basically plumbed through to
384   everywhere. */
385typedef
386   struct {
387      /* Call here if anything goes wrong */
388      void (*barf)( const HChar* ) __attribute__((noreturn));
389      /* Is this 64-bit DWARF ? */
390      Bool   is_dw64;
391      /* Which DWARF version ?  (2, 3 or 4) */
392      UShort version;
393      /* Length of this Compilation Unit, as stated in the
394         .unit_length :: InitialLength field of the CU Header.
395         However, this size (as specified by the D3 spec) does not
396         include the size of the .unit_length field itself, which is
397         either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
398         can be obtained through the expression ".is_dw64 ? 12 : 4". */
399      ULong  unit_length;
400      /* Offset of start of this unit in .debug_info */
401      UWord  cu_start_offset;
402      /* SVMA for this CU.  In the D3 spec, is known as the "base
403         address of the compilation unit (last para sec 3.1.1).
404         Needed for (amongst things) interpretation of location-list
405         values. */
406      Addr   cu_svma;
407      Bool   cu_svma_known;
408
409      /* The debug_abbreviations table to be used for this Unit */
410      //UChar* debug_abbv;
411      /* Upper bound on size thereof (an overestimate, in general) */
412      //UWord  debug_abbv_maxszB;
413      /* A bounded area of the image, to be used as the
414         debug_abbreviations table tobe used for this Unit. */
415      DiSlice debug_abbv;
416
417      /* Image information for various sections. */
418      DiSlice escn_debug_str;
419      DiSlice escn_debug_ranges;
420      DiSlice escn_debug_loc;
421      DiSlice escn_debug_line;
422      DiSlice escn_debug_info;
423      DiSlice escn_debug_types;
424      DiSlice escn_debug_info_alt;
425      DiSlice escn_debug_str_alt;
426      /* How much to add to .debug_types resp. alternate .debug_info offsets
427         in cook_die*.  */
428      UWord  types_cuOff_bias;
429      UWord  alt_cuOff_bias;
430      /* --- Needed so we can add stuff to the string table. --- */
431      struct _DebugInfo* di;
432      /* --- a cache for set_abbv_Cursor --- */
433      /* abbv_code == (ULong)-1 for an unused entry. */
434      struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
435      UWord saC_cache_queries;
436      UWord saC_cache_misses;
437
438      /* True if this came from .debug_types; otherwise it came from
439         .debug_info.  */
440      Bool is_type_unit;
441      /* For a unit coming from .debug_types, these hold the TU's type
442         signature and the uncooked DIE offset of the TU's signatured
443         type.  For a unit coming from .debug_info, these are unused.  */
444      ULong type_signature;
445      ULong type_offset;
446
447      /* Signatured type hash; computed once and then shared by all
448         CUs.  */
449      VgHashTable signature_types;
450
451      /* True if this came from alternate .debug_info; otherwise
452         it came from normal .debug_info or .debug_types.  */
453      Bool is_alt_info;
454   }
455   CUConst;
456
457
458/* Return the cooked value of DIE depending on whether CC represents a
459   .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
460   .debug_types and optional alternate .debug_info sections form
461   a contiguous whole, so that DIEs coming from .debug_types are numbered
462   starting at the end of .debug_info and DIEs coming from alternate
463   .debug_info are numbered starting at the end of .debug_types.  */
464static UWord cook_die( CUConst* cc, UWord die )
465{
466   if (cc->is_type_unit)
467      die += cc->types_cuOff_bias;
468   else if (cc->is_alt_info)
469      die += cc->alt_cuOff_bias;
470   return die;
471}
472
473/* Like cook_die, but understand that DIEs coming from a
474   DW_FORM_ref_sig8 reference are already cooked.  Also, handle
475   DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
476   as reference to alternate .debug_info.  */
477static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
478{
479   if (form == DW_FORM_ref_sig8)
480      return die;
481   if (form == DW_FORM_GNU_ref_alt)
482      return die + cc->alt_cuOff_bias;
483   return cook_die( cc, die );
484}
485
486/* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
487   came from the .debug_types section and *ALT_FLAG to true if the DIE
488   came from alternate .debug_info section.  */
489static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
490                         Bool *alt_flag )
491{
492   *alt_flag = False;
493   *type_flag = False;
494   /* The use of escn_debug_{info,types}.szB seems safe to me even if
495      escn_debug_{info,types} are DiSlice_INVALID (meaning the
496      sections were not found), because DiSlice_INVALID.szB is always
497      zero.  That said, it seems unlikely we'd ever get here if
498      .debug_info or .debug_types were missing. */
499   if (die >= cc->escn_debug_info.szB) {
500      if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
501         *alt_flag = True;
502         die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
503      } else {
504         *type_flag = True;
505         die -= cc->escn_debug_info.szB;
506      }
507   }
508   return die;
509}
510
511/*------------------------------------------------------------*/
512/*---                                                      ---*/
513/*--- Helper functions for Guarded Expressions             ---*/
514/*---                                                      ---*/
515/*------------------------------------------------------------*/
516
517/* Parse the location list starting at img-offset 'debug_loc_offset'
518   in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
519   and so I believe are correct SVMAs for the object as a whole.  This
520   function allocates the UChar*, and the caller must deallocate it.
521   The resulting block is in so-called Guarded-Expression format.
522
523   Guarded-Expression format is similar but not identical to the DWARF3
524   location-list format.  The format of each returned block is:
525
526      UChar biasMe;
527      UChar isEnd;
528      followed by zero or more of
529
530      (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
531
532   '..bytes..' is an standard DWARF3 location expression which is
533   valid when aMin <= pc <= aMax (possibly after suitable biasing).
534
535   The number of bytes in '..bytes..' is nbytes.
536
537   The end of the sequence is marked by an isEnd == 1 value.  All
538   previous isEnd values must be zero.
539
540   biasMe is 1 if the aMin/aMax fields need this DebugInfo's
541   text_bias added before use, and 0 if the GX is this is not
542   necessary (is ready to go).
543
544   Hence the block can be quickly parsed and is self-describing.  Note
545   that aMax is 1 less than the corresponding value in a DWARF3
546   location list.  Zero length ranges, with aMax == aMin-1, are not
547   allowed.
548*/
549/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
550   it more logically belongs. */
551
552
553/* Apply a text bias to a GX. */
554static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
555{
556   UShort nbytes;
557   UChar* p = &gx->payload[0];
558   UChar* pA;
559   UChar  uc;
560   uc = *p++; /*biasMe*/
561   if (uc == 0)
562      return;
563   vg_assert(uc == 1);
564   p[-1] = 0; /* mark it as done */
565   while (True) {
566      uc = *p++;
567      if (uc == 1)
568         break; /*isEnd*/
569      vg_assert(uc == 0);
570      /* t-bias aMin */
571      pA = (UChar*)p;
572      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
573      p += sizeof(Addr);
574      /* t-bias aMax */
575      pA = (UChar*)p;
576      ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
577      p += sizeof(Addr);
578      /* nbytes, and actual expression */
579      nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
580      p += nbytes;
581   }
582}
583
584__attribute__((noinline))
585static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
586{
587   SizeT  bytesReqd;
588   GExpr* gx;
589   UChar *p, *pstart;
590
591   vg_assert(sizeof(UWord) == sizeof(Addr));
592   vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
593   bytesReqd
594      =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
595        + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
596        + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
597        + sizeof(UChar); /*isEnd*/
598
599   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
600                           sizeof(GExpr) + bytesReqd );
601   vg_assert(gx);
602
603   p = pstart = &gx->payload[0];
604
605   p = ML_(write_UChar)(p, 0);        /*biasMe*/
606   p = ML_(write_UChar)(p, 0);        /*!isEnd*/
607   p = ML_(write_Addr)(p, 0);         /*aMin*/
608   p = ML_(write_Addr)(p, ~0);        /*aMax*/
609   p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
610   ML_(cur_read_get)(p, block, nbytes); p += nbytes;
611   p = ML_(write_UChar)(p, 1);        /*isEnd*/
612
613   vg_assert( (SizeT)(p - pstart) == bytesReqd);
614   vg_assert( &gx->payload[bytesReqd]
615              == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
616
617   return gx;
618}
619
620__attribute__((noinline))
621static GExpr* make_general_GX ( CUConst* cc,
622                                Bool     td3,
623                                ULong    debug_loc_offset,
624                                Addr     svma_of_referencing_CU )
625{
626   Addr      base;
627   Cursor    loc;
628   XArray*   xa; /* XArray of UChar */
629   GExpr*    gx;
630   Word      nbytes;
631
632   vg_assert(sizeof(UWord) == sizeof(Addr));
633   if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
634      cc->barf("make_general_GX: .debug_loc is empty/missing");
635
636   init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
637                "Overrun whilst reading .debug_loc section(2)" );
638   set_position_of_Cursor( &loc, debug_loc_offset );
639
640   TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
641            debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
642
643   /* Who frees this xa?  It is freed before this fn exits. */
644   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
645                    ML_(dinfo_free),
646                    sizeof(UChar) );
647
648   { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
649
650   base = 0;
651   while (True) {
652      Bool  acquire;
653      UWord len;
654      /* Read a (host-)word pair.  This is something of a hack since
655         the word size to read is really dictated by the ELF file;
656         however, we assume we're reading a file with the same
657         word-sizeness as the host.  Reasonably enough. */
658      UWord w1 = get_UWord( &loc );
659      UWord w2 = get_UWord( &loc );
660
661      TRACE_D3("   %08lx %08lx\n", w1, w2);
662      if (w1 == 0 && w2 == 0)
663         break; /* end of list */
664
665      if (w1 == -1UL) {
666         /* new value for 'base' */
667         base = w2;
668         continue;
669      }
670
671      /* else a location expression follows */
672      /* else enumerate [w1+base, w2+base) */
673      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
674         (sec 2.17.2) */
675      if (w1 > w2) {
676         TRACE_D3("negative range is for .debug_loc expr at "
677                  "file offset %llu\n",
678                  debug_loc_offset);
679         cc->barf( "negative range in .debug_loc section" );
680      }
681
682      /* ignore zero length ranges */
683      acquire = w1 < w2;
684      len     = (UWord)get_UShort( &loc );
685
686      if (acquire) {
687         UWord  w;
688         UShort s;
689         UChar  c;
690         c = 0; /* !isEnd*/
691         VG_(addBytesToXA)( xa, &c, sizeof(c) );
692         w = w1    + base + svma_of_referencing_CU;
693         VG_(addBytesToXA)( xa, &w, sizeof(w) );
694         w = w2 -1 + base + svma_of_referencing_CU;
695         VG_(addBytesToXA)( xa, &w, sizeof(w) );
696         s = (UShort)len;
697         VG_(addBytesToXA)( xa, &s, sizeof(s) );
698      }
699
700      while (len > 0) {
701         UChar byte = get_UChar( &loc );
702         TRACE_D3("%02x", (UInt)byte);
703         if (acquire)
704            VG_(addBytesToXA)( xa, &byte, 1 );
705         len--;
706      }
707      TRACE_D3("\n");
708   }
709
710   { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
711
712   nbytes = VG_(sizeXA)( xa );
713   vg_assert(nbytes >= 1);
714
715   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
716   vg_assert(gx);
717   VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
718   vg_assert( &gx->payload[nbytes]
719              == ((UChar*)gx) + sizeof(GExpr) + nbytes );
720
721   VG_(deleteXA)( xa );
722
723   TRACE_D3("}\n");
724
725   return gx;
726}
727
728
729/*------------------------------------------------------------*/
730/*---                                                      ---*/
731/*--- Helper functions for range lists and CU headers      ---*/
732/*---                                                      ---*/
733/*------------------------------------------------------------*/
734
735/* Denotes an address range.  Both aMin and aMax are included in the
736   range; hence a complete range is (0, ~0) and an empty range is any
737   (X, X-1) for X > 0.*/
738typedef
739   struct { Addr aMin; Addr aMax; }
740   AddrRange;
741
742
743/* Generate an arbitrary structural total ordering on
744   XArray* of AddrRange. */
745static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
746{
747   Word n1, n2, i;
748   tl_assert(rngs1 && rngs2);
749   n1 = VG_(sizeXA)( rngs1 );
750   n2 = VG_(sizeXA)( rngs2 );
751   if (n1 < n2) return -1;
752   if (n1 > n2) return 1;
753   for (i = 0; i < n1; i++) {
754      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
755      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
756      if (rng1->aMin < rng2->aMin) return -1;
757      if (rng1->aMin > rng2->aMin) return 1;
758      if (rng1->aMax < rng2->aMax) return -1;
759      if (rng1->aMax > rng2->aMax) return 1;
760   }
761   return 0;
762}
763
764
765__attribute__((noinline))
766static XArray* /* of AddrRange */ empty_range_list ( void )
767{
768   XArray* xa; /* XArray of AddrRange */
769   /* Who frees this xa?  varstack_preen() does. */
770   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
771                    ML_(dinfo_free),
772                    sizeof(AddrRange) );
773   return xa;
774}
775
776
777__attribute__((noinline))
778static XArray* unitary_range_list ( Addr aMin, Addr aMax )
779{
780   XArray*   xa;
781   AddrRange pair;
782   vg_assert(aMin <= aMax);
783   /* Who frees this xa?  varstack_preen() does. */
784   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
785                    ML_(dinfo_free),
786                    sizeof(AddrRange) );
787   pair.aMin = aMin;
788   pair.aMax = aMax;
789   VG_(addToXA)( xa, &pair );
790   return xa;
791}
792
793
794/* Enumerate the address ranges starting at img-offset
795   'debug_ranges_offset' in .debug_ranges.  Results are biased with
796   'svma_of_referencing_CU' and so I believe are correct SVMAs for the
797   object as a whole.  This function allocates the XArray, and the
798   caller must deallocate it. */
799__attribute__((noinline))
800static XArray* /* of AddrRange */
801       get_range_list ( CUConst* cc,
802                        Bool     td3,
803                        UWord    debug_ranges_offset,
804                        Addr     svma_of_referencing_CU )
805{
806   Addr      base;
807   Cursor    ranges;
808   XArray*   xa; /* XArray of AddrRange */
809   AddrRange pair;
810
811   if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
812       || cc->escn_debug_ranges.szB == 0)
813      cc->barf("get_range_list: .debug_ranges is empty/missing");
814
815   init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
816                "Overrun whilst reading .debug_ranges section(2)" );
817   set_position_of_Cursor( &ranges, debug_ranges_offset );
818
819   /* Who frees this xa?  varstack_preen() does. */
820   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
821                    sizeof(AddrRange) );
822   base = 0;
823   while (True) {
824      /* Read a (host-)word pair.  This is something of a hack since
825         the word size to read is really dictated by the ELF file;
826         however, we assume we're reading a file with the same
827         word-sizeness as the host.  Reasonably enough. */
828      UWord w1 = get_UWord( &ranges );
829      UWord w2 = get_UWord( &ranges );
830
831      if (w1 == 0 && w2 == 0)
832         break; /* end of list. */
833
834      if (w1 == -1UL) {
835         /* new value for 'base' */
836         base = w2;
837         continue;
838      }
839
840      /* else enumerate [w1+base, w2+base) */
841      /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
842         (sec 2.17.2) */
843      if (w1 > w2)
844         cc->barf( "negative range in .debug_ranges section" );
845      if (w1 < w2) {
846         pair.aMin = w1     + base + svma_of_referencing_CU;
847         pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
848         vg_assert(pair.aMin <= pair.aMax);
849         VG_(addToXA)( xa, &pair );
850      }
851   }
852   return xa;
853}
854
855
856/* Parse the Compilation Unit header indicated at 'c' and
857   initialise 'cc' accordingly. */
858static __attribute__((noinline))
859void parse_CU_Header ( /*OUT*/CUConst* cc,
860                       Bool td3,
861                       Cursor* c,
862                       DiSlice escn_debug_abbv,
863		       Bool type_unit,
864                       Bool alt_info )
865{
866   UChar  address_size;
867   ULong  debug_abbrev_offset;
868   Int    i;
869
870   VG_(memset)(cc, 0, sizeof(*cc));
871   vg_assert(c && c->barf);
872   cc->barf = c->barf;
873
874   /* initial_length field */
875   cc->unit_length
876      = get_Initial_Length( &cc->is_dw64, c,
877           "parse_CU_Header: invalid initial-length field" );
878
879   TRACE_D3("   Length:        %lld\n", cc->unit_length );
880
881   /* version */
882   cc->version = get_UShort( c );
883   if (cc->version != 2 && cc->version != 3 && cc->version != 4)
884      cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
885   TRACE_D3("   Version:       %d\n", (Int)cc->version );
886
887   /* debug_abbrev_offset */
888   debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
889   if (debug_abbrev_offset >= escn_debug_abbv.szB)
890      cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
891   TRACE_D3("   Abbrev Offset: %lld\n", debug_abbrev_offset );
892
893   /* address size.  If this isn't equal to the host word size, just
894      give up.  This makes it safe to assume elsewhere that
895      DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
896      word. */
897   address_size = get_UChar( c );
898   if (address_size != sizeof(void*))
899      cc->barf( "parse_CU_Header: invalid address_size" );
900   TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
901
902   cc->is_type_unit = type_unit;
903   cc->is_alt_info = alt_info;
904
905   if (type_unit) {
906      cc->type_signature = get_ULong( c );
907      cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
908   }
909
910   /* Set up cc->debug_abbv to point to the relevant table for this
911      CU.  Set its .szB so that at least we can't read off the end of
912      the debug_abbrev section -- potentially (and quite likely) too
913      big, if this isn't the last table in the section, but at least
914      it's safe.
915
916      This amounts to taking debug_abbv_escn and moving the start
917      position along by debug_abbrev_offset bytes, hence forming a
918      smaller DiSlice which has the same end point.  Since we checked
919      just above that debug_abbrev_offset is less than the size of
920      debug_abbv_escn, this should leave us with a nonempty slice. */
921   vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
922   cc->debug_abbv      = escn_debug_abbv;
923   cc->debug_abbv.ioff += debug_abbrev_offset;
924   cc->debug_abbv.szB  -= debug_abbrev_offset;
925
926   /* and empty out the set_abbv_Cursor cache */
927   if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
928   for (i = 0; i < N_ABBV_CACHE; i++) {
929      cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
930      cc->saC_cache[i].posn = 0;
931   }
932   cc->saC_cache_queries = 0;
933   cc->saC_cache_misses = 0;
934}
935
936
937/* Set up 'c' so it is ready to parse the abbv table entry code
938   'abbv_code' for this compilation unit.  */
939static __attribute__((noinline))
940void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
941                       CUConst* cc, ULong abbv_code )
942{
943   Int   i;
944   ULong acode;
945
946   if (abbv_code == 0)
947      cc->barf("set_abbv_Cursor: abbv_code == 0" );
948
949   /* (ULong)-1 is used to represent an empty cache slot.  So we can't
950      allow it.  In any case no valid DWARF3 should make a reference
951      to a negative abbreviation code.  [at least, they always seem to
952      be numbered upwards from zero as far as I have seen] */
953   vg_assert(abbv_code != (ULong)-1);
954
955   /* First search the cache. */
956   if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
957   cc->saC_cache_queries++;
958   for (i = 0; i < N_ABBV_CACHE; i++) {
959      /* No need to test the cached abbv_codes for -1 (empty), since
960         we just asserted that abbv_code is not -1. */
961      if (LIKELY(cc->saC_cache[i].abbv_code == abbv_code)) {
962         /* Found it.  Set up the parser using the cached position,
963            and move this cache entry to the front. */
964         if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
965         init_Cursor( c, cc->debug_abbv, cc->saC_cache[i].posn,
966                      cc->barf,
967                      "Overrun whilst parsing .debug_abbrev section(1)" );
968         if (i > 0) {
969            ULong t_abbv_code = cc->saC_cache[i].abbv_code;
970            UWord t_posn      = cc->saC_cache[i].posn;
971            while (i > 0) {
972               cc->saC_cache[i] = cc->saC_cache[i-1];
973               i--;
974            }
975            cc->saC_cache[0].abbv_code = t_abbv_code;
976            cc->saC_cache[0].posn      = t_posn;
977         }
978         return;
979      }
980   }
981
982   /* No.  It's not in the cache.  We have to search through
983      .debug_abbrev, of course taking care to update the cache
984      when done. */
985
986   cc->saC_cache_misses++;
987   init_Cursor( c, cc->debug_abbv, 0, cc->barf,
988               "Overrun whilst parsing .debug_abbrev section(2)" );
989
990   /* Now iterate though the table until we find the requested
991      entry. */
992   while (True) {
993      //ULong atag;
994      //UInt  has_children;
995      acode = get_ULEB128( c );
996      if (acode == 0) break; /* end of the table */
997      if (acode == abbv_code) break; /* found it */
998      /*atag         = */ get_ULEB128( c );
999      /*has_children = */ get_UChar( c );
1000      //TRACE_D3("   %llu      %s    [%s]\n",
1001      //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
1002      while (True) {
1003         ULong at_name = get_ULEB128( c );
1004         ULong at_form = get_ULEB128( c );
1005         if (at_name == 0 && at_form == 0) break;
1006         //TRACE_D3("    %18s %s\n",
1007         //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
1008      }
1009   }
1010
1011   if (acode == 0) {
1012      /* Not found.  This is fatal. */
1013      cc->barf("set_abbv_Cursor: abbv_code not found");
1014   }
1015
1016   /* Otherwise, 'c' is now set correctly to parse the relevant entry,
1017      starting from the abbreviation entry's tag.  So just cache
1018      the result, and return. */
1019   for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
1020      cc->saC_cache[i] = cc->saC_cache[i-1];
1021   }
1022   if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
1023   cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
1024   cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
1025}
1026
1027/* This represents a single signatured type.  It maps a type signature
1028   (a ULong) to a cooked DIE offset.  Objects of this type are stored
1029   in the type signature hash table.  */
1030typedef
1031   struct D3SignatureType {
1032      struct D3SignatureType *next;
1033      UWord data;
1034      ULong type_signature;
1035      UWord die;
1036   }
1037   D3SignatureType;
1038
1039/* Record a signatured type in the hash table.  */
1040static void record_signatured_type ( VgHashTable tab,
1041                                     ULong type_signature,
1042                                     UWord die )
1043{
1044   D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1045                                                 sizeof(D3SignatureType) );
1046   dstype->data = (UWord) type_signature;
1047   dstype->type_signature = type_signature;
1048   dstype->die = die;
1049   VG_(HT_add_node) ( tab, dstype );
1050}
1051
1052/* Given a type signature hash table and a type signature, return the
1053   cooked DIE offset of the type.  If the type cannot be found, call
1054   BARF.  */
1055static UWord lookup_signatured_type ( VgHashTable tab,
1056                                      ULong type_signature,
1057                                      void (*barf)( const HChar* ) __attribute__((noreturn)) )
1058{
1059   D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1060   /* This may be unwarranted chumminess with the hash table
1061      implementation.  */
1062   while ( dstype != NULL && dstype->type_signature != type_signature)
1063      dstype = dstype->next;
1064   if (dstype == NULL) {
1065      barf("lookup_signatured_type: could not find signatured type");
1066      /*NOTREACHED*/
1067      vg_assert(0);
1068   }
1069   return dstype->die;
1070}
1071
1072
1073/* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1074   lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1075   result is an image section beginning at u.cur and with size -szB.
1076   No other szB values are allowed. */
1077typedef
1078   struct {
1079      Long szB; // 1, 2, 4, 8 or non-positive values only.
1080      union { ULong val; DiCursor cur; } u;
1081   }
1082   FormContents;
1083
1084/* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1085   byte scalar value, or (a reference to) zero or more bytes starting
1086   at a DiCursor.*/
1087static
1088void get_Form_contents ( /*OUT*/FormContents* cts,
1089                         CUConst* cc, Cursor* c,
1090                         Bool td3, DW_FORM form )
1091{
1092   VG_(bzero_inline)(cts, sizeof(*cts));
1093   switch (form) {
1094      case DW_FORM_data1:
1095         cts->u.val = (ULong)(UChar)get_UChar(c);
1096         cts->szB   = 1;
1097         TRACE_D3("%u", (UInt)cts->u.val);
1098         break;
1099      case DW_FORM_data2:
1100         cts->u.val = (ULong)(UShort)get_UShort(c);
1101         cts->szB   = 2;
1102         TRACE_D3("%u", (UInt)cts->u.val);
1103         break;
1104      case DW_FORM_data4:
1105         cts->u.val = (ULong)(UInt)get_UInt(c);
1106         cts->szB   = 4;
1107         TRACE_D3("%u", (UInt)cts->u.val);
1108         break;
1109      case DW_FORM_data8:
1110         cts->u.val = get_ULong(c);
1111         cts->szB   = 8;
1112         TRACE_D3("%llu", cts->u.val);
1113         break;
1114      case DW_FORM_sec_offset:
1115         cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1116         cts->szB   = cc->is_dw64 ? 8 : 4;
1117         TRACE_D3("%llu", cts->u.val);
1118         break;
1119      case DW_FORM_sdata:
1120         cts->u.val = (ULong)(Long)get_SLEB128(c);
1121         cts->szB   = 8;
1122         TRACE_D3("%lld", (Long)cts->u.val);
1123         break;
1124      case DW_FORM_udata:
1125         cts->u.val = (ULong)(Long)get_ULEB128(c);
1126         cts->szB   = 8;
1127         TRACE_D3("%llu", (Long)cts->u.val);
1128         break;
1129      case DW_FORM_addr:
1130         /* note, this is a hack.  DW_FORM_addr is defined as getting
1131            a word the size of the target machine as defined by the
1132            address_size field in the CU Header.  However,
1133            parse_CU_Header() rejects all inputs except those for
1134            which address_size == sizeof(Word), hence we can just
1135            treat it as a (host) Word.  */
1136         cts->u.val = (ULong)(UWord)get_UWord(c);
1137         cts->szB   = sizeof(UWord);
1138         TRACE_D3("0x%lx", (UWord)cts->u.val);
1139         break;
1140
1141      case DW_FORM_ref_addr:
1142         /* We make the same word-size assumption as DW_FORM_addr. */
1143         /* What does this really mean?  From D3 Sec 7.5.4,
1144            description of "reference", it would appear to reference
1145            some other DIE, by specifying the offset from the
1146            beginning of a .debug_info section.  The D3 spec mentions
1147            that this might be in some other shared object and
1148            executable.  But I don't see how the name of the other
1149            object/exe is specified.
1150
1151            At least for the DW_FORM_ref_addrs created by icc11, the
1152            references seem to be within the same object/executable.
1153            So for the moment we merely range-check, to see that they
1154            actually do specify a plausible offset within this
1155            object's .debug_info, and return the value unchanged.
1156
1157            In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1158            DWARF 3 and later, it is offset-sized.
1159         */
1160         if (cc->version == 2) {
1161            cts->u.val = (ULong)(UWord)get_UWord(c);
1162            cts->szB   = sizeof(UWord);
1163         } else {
1164            cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1165            cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1166         }
1167         TRACE_D3("0x%lx", (UWord)cts->u.val);
1168         if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1169         if (/* the following is surely impossible, but ... */
1170             !ML_(sli_is_valid)(cc->escn_debug_info)
1171             || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1172            /* Hmm.  Offset is nonsensical for this object's .debug_info
1173               section.  Be safe and reject it. */
1174            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1175                     "outside .debug_info");
1176         }
1177         break;
1178
1179      case DW_FORM_strp: {
1180         /* this is an offset into .debug_str */
1181         UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1182         if (!ML_(sli_is_valid)(cc->escn_debug_str)
1183             || uw >= cc->escn_debug_str.szB)
1184            cc->barf("get_Form_contents: DW_FORM_strp "
1185                     "points outside .debug_str");
1186         /* FIXME: check the entire string lies inside debug_str,
1187            not just the first byte of it. */
1188         DiCursor str
1189            = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1190         if (td3) {
1191            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1192            TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1193            ML_(dinfo_free)(tmp);
1194         }
1195         cts->u.cur = str;
1196         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1197         break;
1198      }
1199      case DW_FORM_string: {
1200         DiCursor str = get_AsciiZ(c);
1201         if (td3) {
1202            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1203            TRACE_D3("%s", tmp);
1204            ML_(dinfo_free)(tmp);
1205         }
1206         cts->u.cur = str;
1207         /* strlen is safe because get_AsciiZ already 'vetted' the
1208            entire string */
1209         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1210         break;
1211      }
1212      case DW_FORM_ref1: {
1213         UChar u8   = get_UChar(c);
1214         UWord res  = cc->cu_start_offset + (UWord)u8;
1215         cts->u.val = (ULong)res;
1216         cts->szB   = sizeof(UWord);
1217         TRACE_D3("<%lx>", res);
1218         break;
1219      }
1220      case DW_FORM_ref2: {
1221         UShort u16 = get_UShort(c);
1222         UWord  res = cc->cu_start_offset + (UWord)u16;
1223         cts->u.val = (ULong)res;
1224         cts->szB   = sizeof(UWord);
1225         TRACE_D3("<%lx>", res);
1226         break;
1227      }
1228      case DW_FORM_ref4: {
1229         UInt  u32  = get_UInt(c);
1230         UWord res  = cc->cu_start_offset + (UWord)u32;
1231         cts->u.val = (ULong)res;
1232         cts->szB   = sizeof(UWord);
1233         TRACE_D3("<%lx>", res);
1234         break;
1235      }
1236      case DW_FORM_ref8: {
1237         ULong u64  = get_ULong(c);
1238         UWord res  = cc->cu_start_offset + (UWord)u64;
1239         cts->u.val = (ULong)res;
1240         cts->szB   = sizeof(UWord);
1241         TRACE_D3("<%lx>", res);
1242         break;
1243      }
1244      case DW_FORM_ref_udata: {
1245         ULong u64  = get_ULEB128(c);
1246         UWord res  = cc->cu_start_offset + (UWord)u64;
1247         cts->u.val = (ULong)res;
1248         cts->szB   = sizeof(UWord);
1249         TRACE_D3("<%lx>", res);
1250         break;
1251      }
1252      case DW_FORM_flag: {
1253         UChar u8 = get_UChar(c);
1254         TRACE_D3("%u", (UInt)u8);
1255         cts->u.val = (ULong)u8;
1256         cts->szB   = 1;
1257         break;
1258      }
1259      case DW_FORM_flag_present:
1260         TRACE_D3("1");
1261         cts->u.val = 1;
1262         cts->szB   = 1;
1263         break;
1264      case DW_FORM_block1: {
1265         ULong    u64b;
1266         ULong    u64   = (ULong)get_UChar(c);
1267         DiCursor block = get_DiCursor_from_Cursor(c);
1268         TRACE_D3("%llu byte block: ", u64);
1269         for (u64b = u64; u64b > 0; u64b--) {
1270            UChar u8 = get_UChar(c);
1271            TRACE_D3("%x ", (UInt)u8);
1272         }
1273         cts->u.cur = block;
1274         cts->szB   = - (Long)u64;
1275         break;
1276      }
1277      case DW_FORM_block2: {
1278         ULong    u64b;
1279         ULong    u64   = (ULong)get_UShort(c);
1280         DiCursor block = get_DiCursor_from_Cursor(c);
1281         TRACE_D3("%llu byte block: ", u64);
1282         for (u64b = u64; u64b > 0; u64b--) {
1283            UChar u8 = get_UChar(c);
1284            TRACE_D3("%x ", (UInt)u8);
1285         }
1286         cts->u.cur = block;
1287         cts->szB   = - (Long)u64;
1288         break;
1289      }
1290      case DW_FORM_block4: {
1291         ULong    u64b;
1292         ULong    u64   = (ULong)get_UInt(c);
1293         DiCursor block = get_DiCursor_from_Cursor(c);
1294         TRACE_D3("%llu byte block: ", u64);
1295         for (u64b = u64; u64b > 0; u64b--) {
1296            UChar u8 = get_UChar(c);
1297            TRACE_D3("%x ", (UInt)u8);
1298         }
1299         cts->u.cur = block;
1300         cts->szB   = - (Long)u64;
1301         break;
1302      }
1303      case DW_FORM_exprloc:
1304      case DW_FORM_block: {
1305         ULong    u64b;
1306         ULong    u64   = (ULong)get_ULEB128(c);
1307         DiCursor block = get_DiCursor_from_Cursor(c);
1308         TRACE_D3("%llu byte block: ", u64);
1309         for (u64b = u64; u64b > 0; u64b--) {
1310            UChar u8 = get_UChar(c);
1311            TRACE_D3("%x ", (UInt)u8);
1312         }
1313         cts->u.cur = block;
1314         cts->szB   = - (Long)u64;
1315         break;
1316      }
1317      case DW_FORM_ref_sig8: {
1318         ULong  u64b;
1319         ULong  signature = get_ULong (c);
1320         ULong  work = signature;
1321         TRACE_D3("8 byte signature: ");
1322         for (u64b = 8; u64b > 0; u64b--) {
1323            UChar u8 = work & 0xff;
1324            TRACE_D3("%x ", (UInt)u8);
1325            work >>= 8;
1326         }
1327         /* Due to the way that the hash table is constructed, the
1328            resulting DIE offset here is already "cooked".  See
1329            cook_die_using_form.  */
1330         cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1331                                              c->barf);
1332         cts->szB   = sizeof(UWord);
1333         break;
1334      }
1335      case DW_FORM_indirect:
1336         get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1337         return;
1338
1339      case DW_FORM_GNU_ref_alt:
1340         cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1341         cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1342         TRACE_D3("0x%lx", (UWord)cts->u.val);
1343         if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1344         if (/* the following is surely impossible, but ... */
1345             !ML_(sli_is_valid)(cc->escn_debug_info_alt)
1346             || cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1347            /* Hmm.  Offset is nonsensical for this object's .debug_info
1348               section.  Be safe and reject it. */
1349            cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1350                     "outside alternate .debug_info");
1351         }
1352         break;
1353
1354      case DW_FORM_GNU_strp_alt: {
1355         /* this is an offset into alternate .debug_str */
1356         SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1357         if (!ML_(sli_is_valid)(cc->escn_debug_str_alt)
1358             || uw >= cc->escn_debug_str_alt.szB)
1359            cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1360                     "points outside alternate .debug_str");
1361         /* FIXME: check the entire string lies inside debug_str,
1362            not just the first byte of it. */
1363         DiCursor str
1364            = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1365         if (td3) {
1366            HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1367            TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1368            ML_(dinfo_free)(tmp);
1369         }
1370         cts->u.cur = str;
1371         cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1372         break;
1373      }
1374
1375      default:
1376         VG_(printf)(
1377            "get_Form_contents: unhandled %d (%s) at <%llx>\n",
1378            form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1379         c->barf("get_Form_contents: unhandled DW_FORM");
1380   }
1381}
1382
1383
1384/*------------------------------------------------------------*/
1385/*---                                                      ---*/
1386/*--- Parsing of variable-related DIEs                     ---*/
1387/*---                                                      ---*/
1388/*------------------------------------------------------------*/
1389
1390typedef
1391   struct _TempVar {
1392      HChar*  name; /* in DebugInfo's .strchunks */
1393      /* Represent ranges economically.  nRanges is the number of
1394         ranges.  Cases:
1395         0: .rngOneMin .rngOneMax .manyRanges are all zero
1396         1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1397         2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1398         This is merely an optimisation to avoid having to allocate
1399         and free the XArray in the common (98%) of cases where there
1400         is zero or one address ranges. */
1401      UWord   nRanges;
1402      Addr    rngOneMin;
1403      Addr    rngOneMax;
1404      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1405      /* Do not free .rngMany, since many TempVars will have the same
1406         value.  Instead the associated storage is to be freed by
1407         deleting 'rangetree', which stores a single copy of each
1408         range. */
1409      /* --- */
1410      Int     level;
1411      UWord   typeR; /* a cuOff */
1412      GExpr*  gexpr; /* for this variable */
1413      GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1414                        any */
1415      HChar*  fName; /* declaring file name, or NULL */
1416      Int     fLine; /* declaring file line number, or zero */
1417      /* offset in .debug_info, so that abstract instances can be
1418         found to satisfy references from concrete instances. */
1419      UWord   dioff;
1420      UWord   absOri; /* so the absOri fields refer to dioff fields
1421                         in some other, related TempVar. */
1422   }
1423   TempVar;
1424
1425#define N_D3_VAR_STACK 48
1426
1427typedef
1428   struct {
1429      /* Contains the range stack: a stack of address ranges, one
1430         stack entry for each nested scope.
1431
1432         Some scope entries are created by function definitions
1433         (DW_AT_subprogram), and for those, we also note the GExpr
1434         derived from its DW_AT_frame_base attribute, if any.
1435         Consequently it should be possible to find, for any
1436         variable's DIE, the GExpr for the the containing function's
1437         DW_AT_frame_base by scanning back through the stack to find
1438         the nearest entry associated with a function.  This somewhat
1439         elaborate scheme is provided so as to make it possible to
1440         obtain the correct DW_AT_frame_base expression even in the
1441         presence of nested functions (or to be more precise, in the
1442         presence of nested DW_AT_subprogram DIEs).
1443      */
1444      Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1445                     stack */
1446      XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1447      Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1448      Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1449      GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1450                                         expr, else NULL */
1451      /* The file name table.  Is a mapping from integer index to the
1452         (permanent) copy of the string in in DebugInfo's .strchunks. */
1453      XArray* /* of UChar* */ filenameTable;
1454   }
1455   D3VarParser;
1456
1457static void varstack_show ( D3VarParser* parser, const HChar* str ) {
1458   Word i, j;
1459   VG_(printf)("  varstack (%s) {\n", str);
1460   for (i = 0; i <= parser->sp; i++) {
1461      XArray* xa = parser->ranges[i];
1462      vg_assert(xa);
1463      VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1464      if (parser->isFunc[i]) {
1465         VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1466      } else {
1467         vg_assert(parser->fbGX[i] == NULL);
1468      }
1469      VG_(printf)(": ");
1470      if (VG_(sizeXA)( xa ) == 0) {
1471         VG_(printf)("** empty PC range array **");
1472      } else {
1473         for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1474            AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1475            vg_assert(range);
1476            VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1477         }
1478      }
1479      VG_(printf)("\n");
1480   }
1481   VG_(printf)("  }\n");
1482}
1483
1484/* Remove from the stack, all entries with .level > 'level' */
1485static
1486void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1487{
1488   Bool changed = False;
1489   vg_assert(parser->sp < N_D3_VAR_STACK);
1490   while (True) {
1491      vg_assert(parser->sp >= -1);
1492      if (parser->sp == -1) break;
1493      if (parser->level[parser->sp] <= level) break;
1494      if (0)
1495         TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1496      vg_assert(parser->ranges[parser->sp]);
1497      /* Who allocated this xa?  get_range_list() or
1498         unitary_range_list(). */
1499      VG_(deleteXA)( parser->ranges[parser->sp] );
1500      parser->ranges[parser->sp] = NULL;
1501      parser->level[parser->sp]  = 0;
1502      parser->isFunc[parser->sp] = False;
1503      parser->fbGX[parser->sp]   = NULL;
1504      parser->sp--;
1505      changed = True;
1506   }
1507   if (changed && td3)
1508      varstack_show( parser, "after preen" );
1509}
1510
1511static void varstack_push ( CUConst* cc,
1512                            D3VarParser* parser,
1513                            Bool td3,
1514                            XArray* ranges, Int level,
1515                            Bool    isFunc, GExpr* fbGX ) {
1516   if (0)
1517   TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1518            parser->sp+1, level, ranges);
1519
1520   /* First we need to zap everything >= 'level', as we are about to
1521      replace any previous entry at 'level', so .. */
1522   varstack_preen(parser, /*td3*/False, level-1);
1523
1524   vg_assert(parser->sp >= -1);
1525   vg_assert(parser->sp < N_D3_VAR_STACK);
1526   if (parser->sp == N_D3_VAR_STACK-1)
1527      cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1528               "increase and recompile");
1529   if (parser->sp >= 0)
1530      vg_assert(parser->level[parser->sp] < level);
1531   parser->sp++;
1532   vg_assert(parser->ranges[parser->sp] == NULL);
1533   vg_assert(parser->level[parser->sp]  == 0);
1534   vg_assert(parser->isFunc[parser->sp] == False);
1535   vg_assert(parser->fbGX[parser->sp]   == NULL);
1536   vg_assert(ranges != NULL);
1537   if (!isFunc) vg_assert(fbGX == NULL);
1538   parser->ranges[parser->sp] = ranges;
1539   parser->level[parser->sp]  = level;
1540   parser->isFunc[parser->sp] = isFunc;
1541   parser->fbGX[parser->sp]   = fbGX;
1542   if (td3)
1543      varstack_show( parser, "after push" );
1544}
1545
1546
1547/* cts is derived from a DW_AT_location and so refers either to a
1548   location expression or to a location list.  Figure out which, and
1549   in both cases bundle the expression or location list into a
1550   so-called GExpr (guarded expression). */
1551__attribute__((noinline))
1552static GExpr* get_GX ( CUConst* cc, Bool td3, const FormContents* cts )
1553{
1554   GExpr* gexpr = NULL;
1555   if (cts->szB < 0) {
1556      /* represents a non-empty in-line location expression, and
1557         cts->u.cur points at the image bytes */
1558      gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1559   }
1560   else
1561   if (cts->szB > 0) {
1562      /* represents a location list.  cts->u.val is the offset of it
1563         in .debug_loc. */
1564      if (!cc->cu_svma_known)
1565         cc->barf("get_GX: location list, but CU svma is unknown");
1566      gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1567   }
1568   else {
1569      vg_assert(0); /* else caller is bogus */
1570   }
1571   return gexpr;
1572}
1573
1574
1575static
1576void read_filename_table( /*MOD*/D3VarParser* parser,
1577                          CUConst* cc, ULong debug_line_offset,
1578                          Bool td3 )
1579{
1580   Bool   is_dw64;
1581   Cursor c;
1582   Word   i;
1583   UShort version;
1584   UChar  opcode_base;
1585   HChar* str;
1586
1587   vg_assert(parser && cc && cc->barf);
1588   if (!ML_(sli_is_valid)(cc->escn_debug_line)
1589       || cc->escn_debug_line.szB <= debug_line_offset) {
1590      cc->barf("read_filename_table: .debug_line is missing?");
1591   }
1592
1593   init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1594                "Overrun whilst reading .debug_line section(1)" );
1595
1596   /* unit_length = */
1597      get_Initial_Length( &is_dw64, &c,
1598           "read_filename_table: invalid initial-length field" );
1599   version = get_UShort( &c );
1600   if (version != 2 && version != 3 && version != 4)
1601     cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1602              "is currently supported.");
1603   /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1604   /*minimum_instruction_length = */ get_UChar( &c );
1605   if (version >= 4)
1606      /*maximum_operations_per_insn = */ get_UChar( &c );
1607   /*default_is_stmt            = */ get_UChar( &c );
1608   /*line_base                  = (Char)*/ get_UChar( &c );
1609   /*line_range                 = */ get_UChar( &c );
1610   opcode_base                = get_UChar( &c );
1611   /* skip over "standard_opcode_lengths" */
1612   for (i = 1; i < (Word)opcode_base; i++)
1613     (void)get_UChar( &c );
1614
1615   /* skip over the directory names table */
1616   while (peek_UChar(&c) != 0) {
1617     (void)get_AsciiZ(&c);
1618   }
1619   (void)get_UChar(&c); /* skip terminating zero */
1620
1621   /* Read and record the file names table */
1622   vg_assert(parser->filenameTable);
1623   vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1624   /* Add a dummy index-zero entry.  DWARF3 numbers its files
1625      from 1, for some reason. */
1626   str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1627   VG_(addToXA)( parser->filenameTable, &str );
1628   while (peek_UChar(&c) != 0) {
1629      DiCursor cur = get_AsciiZ(&c);
1630      str = ML_(addStrFromCursor)( cc->di, cur );
1631      TRACE_D3("  read_filename_table: %ld %s\n",
1632               VG_(sizeXA)(parser->filenameTable), str);
1633      VG_(addToXA)( parser->filenameTable, &str );
1634      (void)get_ULEB128( &c ); /* skip directory index # */
1635      (void)get_ULEB128( &c ); /* skip last mod time */
1636      (void)get_ULEB128( &c ); /* file size */
1637   }
1638   /* We're done!  The rest of it is not interesting. */
1639}
1640
1641__attribute__((noinline))
1642static void bad_DIE_confusion(int linenr)
1643{
1644   VG_(printf)("\nparse_var_DIE(%d): confused by:\n", linenr);
1645}
1646#define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
1647
1648__attribute__((noinline))
1649static void parse_var_DIE (
1650   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1651   /*MOD*/XArray* /* of TempVar* */ tempvars,
1652   /*MOD*/XArray* /* of GExpr* */ gexprs,
1653   /*MOD*/D3VarParser* parser,
1654   DW_TAG dtag,
1655   UWord posn,
1656   Int level,
1657   Cursor* c_die,
1658   Cursor* c_abbv,
1659   CUConst* cc,
1660   Bool td3
1661)
1662{
1663   FormContents cts;
1664
1665   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1666   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1667   Bool  debug_types_flag;
1668   Bool  alt_flag;
1669
1670   varstack_preen( parser, td3, level-1 );
1671
1672   if (dtag == DW_TAG_compile_unit
1673       || dtag == DW_TAG_type_unit
1674       || dtag == DW_TAG_partial_unit) {
1675      Bool have_lo    = False;
1676      Bool have_hi1   = False;
1677      Bool hiIsRelative = False;
1678      Bool have_range = False;
1679      Addr ip_lo    = 0;
1680      Addr ip_hi1   = 0;
1681      Addr rangeoff = 0;
1682      while (True) {
1683         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1684         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1685         if (attr == 0 && form == 0) break;
1686         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1687         if (attr == DW_AT_low_pc && cts.szB > 0) {
1688            ip_lo   = cts.u.val;
1689            have_lo = True;
1690         }
1691         if (attr == DW_AT_high_pc && cts.szB > 0) {
1692            ip_hi1   = cts.u.val;
1693            have_hi1 = True;
1694            if (form != DW_FORM_addr)
1695               hiIsRelative = True;
1696         }
1697         if (attr == DW_AT_ranges && cts.szB > 0) {
1698            rangeoff   = cts.u.val;
1699            have_range = True;
1700         }
1701         if (attr == DW_AT_stmt_list && cts.szB > 0) {
1702            read_filename_table( parser, cc, cts.u.val, td3 );
1703         }
1704      }
1705      if (have_lo && have_hi1 && hiIsRelative)
1706         ip_hi1 += ip_lo;
1707      /* Now, does this give us an opportunity to find this
1708         CU's svma? */
1709#if 0
1710      if (level == 0 && have_lo) {
1711         vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1712         because we've already seen a DW_TAG_compile_unit DIE at level
1713         0.  But that can't happen, because DWARF3 only allows exactly
1714         one top level DIE per CU. */
1715         cc->cu_svma_known = True;
1716         cc->cu_svma = ip_lo;
1717         if (1)
1718            TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1719         /* Now, it may be that this DIE doesn't tell us the CU's
1720            SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1721            the CU doesn't *have* to have its SVMA specified.
1722
1723            But as per last para D3 spec sec 3.1.1 ("Normal and
1724            Partial Compilation Unit Entries", "If the base address
1725            (viz, the SVMA) is undefined, then any DWARF entry of
1726            structure defined interms of the base address of that
1727            compilation unit is not valid.".  So that means, if whilst
1728            processing the children of this top level DIE (or their
1729            children, etc) we see a DW_AT_range, and cu_svma_known is
1730            False, then the DIE that contains it is (per the spec)
1731            invalid, and we can legitimately stop and complain. */
1732      }
1733#else
1734      /* .. whereas The Reality is, simply assume the SVMA is zero
1735         if it isn't specified. */
1736      if (level == 0) {
1737         vg_assert(!cc->cu_svma_known);
1738         cc->cu_svma_known = True;
1739         if (have_lo)
1740            cc->cu_svma = ip_lo;
1741         else
1742            cc->cu_svma = 0;
1743      }
1744#endif
1745      /* Do we have something that looks sane? */
1746      if (have_lo && have_hi1 && (!have_range)) {
1747         if (ip_lo < ip_hi1)
1748            varstack_push( cc, parser, td3,
1749                           unitary_range_list(ip_lo, ip_hi1 - 1),
1750                           level,
1751                           False/*isFunc*/, NULL/*fbGX*/ );
1752         else if (ip_lo == 0 && ip_hi1 == 0)
1753            /* CU has no code, presumably?
1754               Such situations have been encountered for code
1755               compiled with -ffunction-sections -fdata-sections
1756               and linked with --gc-sections. Completely
1757               eliminated CU gives such 0 lo/hi pc. Similarly
1758               to a CU which has no lo/hi/range pc, we push
1759               an empty range list. */
1760            varstack_push( cc, parser, td3,
1761                           empty_range_list(),
1762                           level,
1763                           False/*isFunc*/, NULL/*fbGX*/ );
1764      } else
1765      if ((!have_lo) && (!have_hi1) && have_range) {
1766         varstack_push( cc, parser, td3,
1767                        get_range_list( cc, td3,
1768                                        rangeoff, cc->cu_svma ),
1769                        level,
1770                        False/*isFunc*/, NULL/*fbGX*/ );
1771      } else
1772      if ((!have_lo) && (!have_hi1) && (!have_range)) {
1773         /* CU has no code, presumably? */
1774         varstack_push( cc, parser, td3,
1775                        empty_range_list(),
1776                        level,
1777                        False/*isFunc*/, NULL/*fbGX*/ );
1778      } else
1779      if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1780         /* broken DIE created by gcc-4.3.X ?  Ignore the
1781            apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1782            instead. */
1783         varstack_push( cc, parser, td3,
1784                        get_range_list( cc, td3,
1785                                        rangeoff, cc->cu_svma ),
1786                        level,
1787                        False/*isFunc*/, NULL/*fbGX*/ );
1788      } else {
1789         if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1790                            (Int)have_lo, (Int)have_hi1, (Int)have_range);
1791         goto_bad_DIE;
1792      }
1793   }
1794
1795   if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1796      Bool   have_lo    = False;
1797      Bool   have_hi1   = False;
1798      Bool   have_range = False;
1799      Bool   hiIsRelative = False;
1800      Addr   ip_lo      = 0;
1801      Addr   ip_hi1     = 0;
1802      Addr   rangeoff   = 0;
1803      Bool   isFunc     = dtag == DW_TAG_subprogram;
1804      GExpr* fbGX       = NULL;
1805      while (True) {
1806         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1807         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1808         if (attr == 0 && form == 0) break;
1809         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1810         if (attr == DW_AT_low_pc && cts.szB > 0) {
1811            ip_lo   = cts.u.val;
1812            have_lo = True;
1813         }
1814         if (attr == DW_AT_high_pc && cts.szB > 0) {
1815            ip_hi1   = cts.u.val;
1816            have_hi1 = True;
1817            if (form != DW_FORM_addr)
1818               hiIsRelative = True;
1819         }
1820         if (attr == DW_AT_ranges && cts.szB > 0) {
1821            rangeoff   = cts.u.val;
1822            have_range = True;
1823         }
1824         if (isFunc
1825             && attr == DW_AT_frame_base
1826             && cts.szB != 0 /* either scalar or nonempty block */) {
1827            fbGX = get_GX( cc, False/*td3*/, &cts );
1828            vg_assert(fbGX);
1829            VG_(addToXA)(gexprs, &fbGX);
1830         }
1831      }
1832      if (have_lo && have_hi1 && hiIsRelative)
1833         ip_hi1 += ip_lo;
1834      /* Do we have something that looks sane? */
1835      if (dtag == DW_TAG_subprogram
1836          && (!have_lo) && (!have_hi1) && (!have_range)) {
1837         /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1838            representing a subroutine declaration that is not also a
1839            definition does not have code address or range
1840            attributes." */
1841      } else
1842      if (dtag == DW_TAG_lexical_block
1843          && (!have_lo) && (!have_hi1) && (!have_range)) {
1844         /* I believe this is legit, and means the lexical block
1845            contains no insns (whatever that might mean).  Ignore. */
1846      } else
1847      if (have_lo && have_hi1 && (!have_range)) {
1848         /* This scope supplies just a single address range. */
1849         if (ip_lo < ip_hi1)
1850            varstack_push( cc, parser, td3,
1851                           unitary_range_list(ip_lo, ip_hi1 - 1),
1852                           level, isFunc, fbGX );
1853      } else
1854      if ((!have_lo) && (!have_hi1) && have_range) {
1855         /* This scope supplies multiple address ranges via the use of
1856            a range list. */
1857         varstack_push( cc, parser, td3,
1858                        get_range_list( cc, td3,
1859                                        rangeoff, cc->cu_svma ),
1860                        level, isFunc, fbGX );
1861      } else
1862      if (have_lo && (!have_hi1) && (!have_range)) {
1863         /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1864            Entries) says fairly clearly that a scope must have either
1865            _range or (_low_pc and _high_pc). */
1866         /* The spec is a bit ambiguous though.  Perhaps a single byte
1867            range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1868         /* This case is here because icc9 produced this:
1869         <2><13bd>: DW_TAG_lexical_block
1870            DW_AT_decl_line   : 5229
1871            DW_AT_decl_column : 37
1872            DW_AT_decl_file   : 1
1873            DW_AT_low_pc      : 0x401b03
1874         */
1875         /* Ignore (seems safe than pushing a single byte range) */
1876      } else
1877         goto_bad_DIE;
1878   }
1879
1880   if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1881      HChar* name        = NULL;
1882      UWord  typeR       = D3_INVALID_CUOFF;
1883      Bool   global      = False;
1884      GExpr* gexpr       = NULL;
1885      Int    n_attrs     = 0;
1886      UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1887      Int    lineNo      = 0;
1888      HChar* fileName    = NULL;
1889      while (True) {
1890         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1891         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1892         if (attr == 0 && form == 0) break;
1893         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1894         n_attrs++;
1895         if (attr == DW_AT_name && cts.szB < 0) {
1896            name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
1897         }
1898         if (attr == DW_AT_location
1899             && cts.szB != 0 /* either scalar or nonempty block */) {
1900            gexpr = get_GX( cc, False/*td3*/, &cts );
1901            vg_assert(gexpr);
1902            VG_(addToXA)(gexprs, &gexpr);
1903         }
1904         if (attr == DW_AT_type && cts.szB > 0) {
1905            typeR = cook_die_using_form( cc, cts.u.val, form );
1906         }
1907         if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
1908            global = True;
1909         }
1910         if (attr == DW_AT_abstract_origin && cts.szB > 0) {
1911            abs_ori = (UWord)cts.u.val;
1912         }
1913         if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
1914            /*declaration = True;*/
1915         }
1916         if (attr == DW_AT_decl_line && cts.szB > 0) {
1917            lineNo = (Int)cts.u.val;
1918         }
1919         if (attr == DW_AT_decl_file && cts.szB > 0) {
1920            Int ftabIx = (Int)cts.u.val;
1921            if (ftabIx >= 1
1922                && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1923               fileName = *(HChar**)
1924                          VG_(indexXA)( parser->filenameTable, ftabIx );
1925               vg_assert(fileName);
1926            }
1927            if (0) VG_(printf)("XXX filename = %s\n", fileName);
1928         }
1929      }
1930      if (!global && dtag == DW_TAG_variable && level == 1) {
1931         /* Case of a static variable. It is better to declare
1932            it global as the variable is not really related to
1933            a PC range, as its address can be used by program
1934            counters outside of the ranges where it is visible . */
1935         global = True;
1936      }
1937
1938      /* We'll collect it under if one of the following three
1939         conditions holds:
1940         (1) has location and type    -> completed
1941         (2) has type only            -> is an abstract instance
1942         (3) has location and abs_ori -> is a concrete instance
1943         Name, filename and line number are all optional frills.
1944      */
1945      if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1946           /* 2 */ || (typeR != D3_INVALID_CUOFF)
1947           /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1948
1949         /* Add this variable to the list of interesting looking
1950            variables.  Crucially, note along with it the address
1951            range(s) associated with the variable, which for locals
1952            will be the address ranges at the top of the varparser's
1953            stack. */
1954         GExpr*   fbGX = NULL;
1955         Word     i, nRanges;
1956         XArray*  /* of AddrRange */ xa;
1957         TempVar* tv;
1958         /* Stack can't be empty; we put a dummy entry on it for the
1959            entire address range before starting with the DIEs for
1960            this CU. */
1961         vg_assert(parser->sp >= 0);
1962
1963         /* If this is a local variable (non-global), try to find
1964            the GExpr for the DW_AT_frame_base of the containing
1965            function.  It should have been pushed on the stack at the
1966            time we encountered its DW_TAG_subprogram DIE, so the way
1967            to find it is to scan back down the stack looking for it.
1968            If there isn't an enclosing stack entry marked 'isFunc'
1969            then we must be seeing variable or formal param DIEs
1970            outside of a function, so we deem the Dwarf to be
1971            malformed if that happens.  Note that the fbGX may be NULL
1972            if the containing DT_TAG_subprogram didn't supply a
1973            DW_AT_frame_base -- that's OK, but there must actually be
1974            a containing DW_TAG_subprogram. */
1975         if (!global) {
1976            Bool found = False;
1977            for (i = parser->sp; i >= 0; i--) {
1978               if (parser->isFunc[i]) {
1979                  fbGX = parser->fbGX[i];
1980                  found = True;
1981                  break;
1982               }
1983            }
1984            if (!found) {
1985               if (0 && VG_(clo_verbosity) >= 0) {
1986                  VG_(message)(Vg_DebugMsg,
1987                     "warning: parse_var_DIE: non-global variable "
1988                     "outside DW_TAG_subprogram\n");
1989               }
1990               /* goto_bad_DIE; */
1991               /* This seems to happen a lot.  Just ignore it -- if,
1992                  when we come to evaluation of the location (guarded)
1993                  expression, it requires a frame base value, and
1994                  there's no expression for that, then evaluation as a
1995                  whole will fail.  Harmless - a bit of a waste of
1996                  cycles but nothing more. */
1997            }
1998         }
1999
2000         /* re "global ? 0 : parser->sp" (twice), if the var is
2001            marked 'global' then we must put it at the global scope,
2002            as only the global scope (level 0) covers the entire PC
2003            address space.  It is asserted elsewhere that level 0
2004            always covers the entire address space. */
2005         xa = parser->ranges[global ? 0 : parser->sp];
2006         nRanges = VG_(sizeXA)(xa);
2007         vg_assert(nRanges >= 0);
2008
2009         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2010         tv->name   = name;
2011         tv->level  = global ? 0 : parser->sp;
2012         tv->typeR  = typeR;
2013         tv->gexpr  = gexpr;
2014         tv->fbGX   = fbGX;
2015         tv->fName  = fileName;
2016         tv->fLine  = lineNo;
2017         tv->dioff  = posn;
2018         tv->absOri = abs_ori;
2019
2020         /* See explanation on definition of type TempVar for the
2021            reason for this elaboration. */
2022         tv->nRanges = nRanges;
2023         tv->rngOneMin = 0;
2024         tv->rngOneMax = 0;
2025         tv->rngMany = NULL;
2026         if (nRanges == 1) {
2027            AddrRange* range = VG_(indexXA)(xa, 0);
2028            tv->rngOneMin = range->aMin;
2029            tv->rngOneMax = range->aMax;
2030         }
2031         else if (nRanges > 1) {
2032            /* See if we already have a range list which is
2033               structurally identical.  If so, use that; if not, clone
2034               this one, and add it to our collection. */
2035            UWord keyW, valW;
2036            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2037               XArray* old = (XArray*)keyW;
2038               tl_assert(valW == 0);
2039               tl_assert(old != xa);
2040               tv->rngMany = old;
2041            } else {
2042               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2043               tv->rngMany = cloned;
2044               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2045            }
2046         }
2047
2048         VG_(addToXA)( tempvars, &tv );
2049
2050         TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2051                  VG_(sizeXA)(xa) );
2052         /* collect stats on how effective the ->ranges special
2053            casing is */
2054         if (0) {
2055            static Int ntot=0, ngt=0;
2056            ntot++;
2057            if (tv->rngMany) ngt++;
2058            if (0 == (ntot % 100000))
2059               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2060         }
2061
2062      }
2063
2064      /* Here are some other weird cases seen in the wild:
2065
2066            We have a variable with a name and a type, but no
2067            location.  I guess that's a sign that it has been
2068            optimised away.  Ignore it.  Here's an example:
2069
2070            static Int lc_compar(void* n1, void* n2) {
2071               MC_Chunk* mc1 = *(MC_Chunk**)n1;
2072               MC_Chunk* mc2 = *(MC_Chunk**)n2;
2073               return (mc1->data < mc2->data ? -1 : 1);
2074            }
2075
2076            Both mc1 and mc2 are like this
2077            <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2078                DW_AT_name        : mc1
2079                DW_AT_decl_file   : 1
2080                DW_AT_decl_line   : 216
2081                DW_AT_type        : <5d3>
2082
2083            whereas n1 and n2 do have locations specified.
2084
2085            ---------------------------------------------
2086
2087            We see a DW_TAG_formal_parameter with a type, but
2088            no name and no location.  It's probably part of a function type
2089            construction, thusly, hence ignore it:
2090         <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2091             DW_AT_sibling     : <2c9>
2092             DW_AT_prototyped  : 1
2093             DW_AT_type        : <114>
2094         <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2095             DW_AT_type        : <13e>
2096         <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2097             DW_AT_type        : <133>
2098
2099            ---------------------------------------------
2100
2101            Is very minimal, like this:
2102            <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2103                DW_AT_abstract_origin: <7ba>
2104            What that signifies I have no idea.  Ignore.
2105
2106            ----------------------------------------------
2107
2108            Is very minimal, like this:
2109            <200f>: DW_TAG_formal_parameter
2110                DW_AT_abstract_ori: <1f4c>
2111                DW_AT_location    : 13440
2112            What that signifies I have no idea.  Ignore.
2113            It might be significant, though: the variable at least
2114            has a location and so might exist somewhere.
2115            Maybe we should handle this.
2116
2117            ---------------------------------------------
2118
2119            <22407>: DW_TAG_variable
2120              DW_AT_name        : (indirect string, offset: 0x6579):
2121                                  vgPlain_trampoline_stuff_start
2122              DW_AT_decl_file   : 29
2123              DW_AT_decl_line   : 56
2124              DW_AT_external    : 1
2125              DW_AT_declaration : 1
2126
2127            Nameless and typeless variable that has a location?  Who
2128            knows.  Not me.
2129            <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2130                 DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2131                                     (DW_OP_addr: 3813c7c0)
2132
2133            No, really.  Check it out.  gcc is quite simply borked.
2134            <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2135            // followed by no attributes, and the next DIE is a sibling,
2136            // not a child
2137            */
2138   }
2139   return;
2140
2141  bad_DIE:
2142   set_position_of_Cursor( c_die,  saved_die_c_offset );
2143   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2144   posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2145   VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
2146   if (debug_types_flag) {
2147      VG_(printf)(" (in .debug_types)");
2148   }
2149   else if (alt_flag) {
2150      VG_(printf)(" (in alternate .debug_info)");
2151   }
2152   VG_(printf)("\n");
2153   while (True) {
2154      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2155      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2156      if (attr == 0 && form == 0) break;
2157      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2158      /* Get the form contents, so as to print them */
2159      get_Form_contents( &cts, cc, c_die, True, form );
2160      VG_(printf)("\t\n");
2161   }
2162   VG_(printf)("\n");
2163   cc->barf("parse_var_DIE: confused by the above DIE");
2164   /*NOTREACHED*/
2165}
2166
2167
2168/*------------------------------------------------------------*/
2169/*---                                                      ---*/
2170/*--- Parsing of type-related DIEs                         ---*/
2171/*---                                                      ---*/
2172/*------------------------------------------------------------*/
2173
2174#define N_D3_TYPE_STACK 16
2175
2176typedef
2177   struct {
2178      /* What source language?  'A'=Ada83/95,
2179                                'C'=C/C++,
2180                                'F'=Fortran,
2181                                '?'=other
2182         Established once per compilation unit. */
2183      UChar language;
2184      /* A stack of types which are currently under construction */
2185      Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2186                   stack */
2187      /* Note that the TyEnts in qparentE are temporary copies of the
2188         ones accumulating in the main tyent array.  So it is not safe
2189         to free up anything on them when popping them off the stack
2190         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2191         memset them to zero when done. */
2192      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
2193      Int   qlevel[N_D3_TYPE_STACK];
2194
2195   }
2196   D3TypeParser;
2197
2198static void typestack_show ( D3TypeParser* parser, const HChar* str ) {
2199   Word i;
2200   VG_(printf)("  typestack (%s) {\n", str);
2201   for (i = 0; i <= parser->sp; i++) {
2202      VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2203      ML_(pp_TyEnt)( &parser->qparentE[i] );
2204      VG_(printf)("\n");
2205   }
2206   VG_(printf)("  }\n");
2207}
2208
2209/* Remove from the stack, all entries with .level > 'level' */
2210static
2211void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2212{
2213   Bool changed = False;
2214   vg_assert(parser->sp < N_D3_TYPE_STACK);
2215   while (True) {
2216      vg_assert(parser->sp >= -1);
2217      if (parser->sp == -1) break;
2218      if (parser->qlevel[parser->sp] <= level) break;
2219      if (0)
2220         TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2221      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2222      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
2223      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
2224      parser->qparentE[parser->sp].tag = Te_EMPTY;
2225      parser->qlevel[parser->sp] = 0;
2226      parser->sp--;
2227      changed = True;
2228   }
2229   if (changed && td3)
2230      typestack_show( parser, "after preen" );
2231}
2232
2233static Bool typestack_is_empty ( D3TypeParser* parser ) {
2234   vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2235   return parser->sp == -1;
2236}
2237
2238static void typestack_push ( CUConst* cc,
2239                             D3TypeParser* parser,
2240                             Bool td3,
2241                             TyEnt* parentE, Int level ) {
2242   if (0)
2243   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2244            parser->sp+1, level, parentE->cuOff);
2245
2246   /* First we need to zap everything >= 'level', as we are about to
2247      replace any previous entry at 'level', so .. */
2248   typestack_preen(parser, /*td3*/False, level-1);
2249
2250   vg_assert(parser->sp >= -1);
2251   vg_assert(parser->sp < N_D3_TYPE_STACK);
2252   if (parser->sp == N_D3_TYPE_STACK-1)
2253      cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2254               "increase and recompile");
2255   if (parser->sp >= 0)
2256      vg_assert(parser->qlevel[parser->sp] < level);
2257   parser->sp++;
2258   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2259   vg_assert(parser->qlevel[parser->sp]  == 0);
2260   vg_assert(parentE);
2261   vg_assert(ML_(TyEnt__is_type)(parentE));
2262   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2263   parser->qparentE[parser->sp] = *parentE;
2264   parser->qlevel[parser->sp]  = level;
2265   if (td3)
2266      typestack_show( parser, "after push" );
2267}
2268
2269/* True if the subrange type being parsed gives the bounds of an array. */
2270static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2271                                                 DW_TAG dtag ) {
2272   vg_assert(dtag == DW_TAG_subrange_type);
2273   /* For most languages, a subrange_type dtag always gives the
2274      bounds of an array.
2275      For Ada, there are additional conditions as a subrange_type
2276      is also used for other purposes. */
2277   if (parser->language != 'A')
2278      /* not Ada, so it definitely denotes an array bound. */
2279      return True;
2280   else
2281      /* Extra constraints for Ada: it only denotes an array bound if .. */
2282      return (! typestack_is_empty(parser)
2283              && parser->qparentE[parser->sp].tag == Te_TyArray);
2284}
2285
2286/* Parse a type-related DIE.  'parser' holds the current parser state.
2287   'admin' is where the completed types are dumped.  'dtag' is the tag
2288   for this DIE.  'c_die' points to the start of the data fields (FORM
2289   stuff) for the DIE.  c_abbv points to the start of the (name,form)
2290   pairs which describe the DIE.
2291
2292   We may find the DIE uninteresting, in which case we should ignore
2293   it.
2294
2295   What happens: the DIE is examined.  If uninteresting, it is ignored.
2296   Otherwise, the DIE gives rise to two things:
2297
2298   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2299   (2) a TyAdmin structure, which holds the type, or related stuff
2300
2301   (2) is added at the end of 'tyadmins', at some index, say 'i'.
2302
2303   A pair (cuOffset, i) is added to 'tydict'.
2304
2305   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2306   a mapping from cuOffset to the index of the corresponding entry in
2307   'tyadmin'.
2308
2309   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2310   in the tydict (by binary search).  This gives an index into
2311   tyadmins, and the required entity lives in tyadmins at that index.
2312*/
2313__attribute__((noinline))
2314static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2315                             /*MOD*/D3TypeParser* parser,
2316                             DW_TAG dtag,
2317                             UWord posn,
2318                             Int level,
2319                             Cursor* c_die,
2320                             Cursor* c_abbv,
2321                             CUConst* cc,
2322                             Bool td3 )
2323{
2324   FormContents cts;
2325   TyEnt typeE;
2326   TyEnt atomE;
2327   TyEnt fieldE;
2328   TyEnt boundE;
2329   Bool  debug_types_flag;
2330   Bool  alt_flag;
2331
2332   UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2333   UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2334
2335   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2336   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2337   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2338   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2339
2340   /* If we've returned to a level at or above any previously noted
2341      parent, un-note it, so we don't believe we're still collecting
2342      its children. */
2343   typestack_preen( parser, td3, level-1 );
2344
2345   if (dtag == DW_TAG_compile_unit
2346       || dtag == DW_TAG_type_unit
2347       || dtag == DW_TAG_partial_unit) {
2348      /* See if we can find DW_AT_language, since it is important for
2349         establishing array bounds (see DW_TAG_subrange_type below in
2350         this fn) */
2351      while (True) {
2352         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2353         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2354         if (attr == 0 && form == 0) break;
2355         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2356         if (attr != DW_AT_language)
2357            continue;
2358         if (cts.szB <= 0)
2359           goto_bad_DIE;
2360         switch (cts.u.val) {
2361            case DW_LANG_C89: case DW_LANG_C:
2362            case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2363            case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2364            case DW_LANG_Upc: case DW_LANG_C99:
2365               parser->language = 'C'; break;
2366            case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2367            case DW_LANG_Fortran95:
2368               parser->language = 'F'; break;
2369            case DW_LANG_Ada83: case DW_LANG_Ada95:
2370               parser->language = 'A'; break;
2371            case DW_LANG_Cobol74:
2372            case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2373            case DW_LANG_Modula2: case DW_LANG_Java:
2374            case DW_LANG_PLI:
2375            case DW_LANG_D: case DW_LANG_Python:
2376            case DW_LANG_Mips_Assembler:
2377               parser->language = '?'; break;
2378            default:
2379               goto_bad_DIE;
2380         }
2381      }
2382   }
2383
2384   if (dtag == DW_TAG_base_type) {
2385      /* We can pick up a new base type any time. */
2386      VG_(memset)(&typeE, 0, sizeof(typeE));
2387      typeE.cuOff = D3_INVALID_CUOFF;
2388      typeE.tag   = Te_TyBase;
2389      while (True) {
2390         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2391         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2392         if (attr == 0 && form == 0) break;
2393         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2394         if (attr == DW_AT_name && cts.szB < 0) {
2395            typeE.Te.TyBase.name
2396               = ML_(cur_read_strdup)( cts.u.cur,
2397                                       "di.readdwarf3.ptD.base_type.1" );
2398         }
2399         if (attr == DW_AT_byte_size && cts.szB > 0) {
2400            typeE.Te.TyBase.szB = cts.u.val;
2401         }
2402         if (attr == DW_AT_encoding && cts.szB > 0) {
2403            switch (cts.u.val) {
2404               case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2405               case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2406               case DW_ATE_boolean:/* FIXME - is this correct? */
2407               case DW_ATE_unsigned_fixed:
2408                  typeE.Te.TyBase.enc = 'U'; break;
2409               case DW_ATE_signed: case DW_ATE_signed_char:
2410               case DW_ATE_signed_fixed:
2411                  typeE.Te.TyBase.enc = 'S'; break;
2412               case DW_ATE_float:
2413                  typeE.Te.TyBase.enc = 'F'; break;
2414               case DW_ATE_complex_float:
2415                  typeE.Te.TyBase.enc = 'C'; break;
2416               default:
2417                  goto_bad_DIE;
2418            }
2419         }
2420      }
2421
2422      /* Invent a name if it doesn't have one.  gcc-4.3
2423         -ftree-vectorize is observed to emit nameless base types. */
2424      if (!typeE.Te.TyBase.name)
2425         typeE.Te.TyBase.name
2426            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2427                                 "<anon_base_type>" );
2428
2429      /* Do we have something that looks sane? */
2430      if (/* must have a name */
2431          typeE.Te.TyBase.name == NULL
2432          /* and a plausible size.  Yes, really 32: "complex long
2433             double" apparently has size=32 */
2434          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2435          /* and a plausible encoding */
2436          || (typeE.Te.TyBase.enc != 'U'
2437              && typeE.Te.TyBase.enc != 'S'
2438              && typeE.Te.TyBase.enc != 'F'
2439              && typeE.Te.TyBase.enc != 'C'))
2440         goto_bad_DIE;
2441      /* Last minute hack: if we see this
2442         <1><515>: DW_TAG_base_type
2443             DW_AT_byte_size   : 0
2444             DW_AT_encoding    : 5
2445             DW_AT_name        : void
2446         convert it into a real Void type. */
2447      if (typeE.Te.TyBase.szB == 0
2448          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2449         ML_(TyEnt__make_EMPTY)(&typeE);
2450         typeE.tag = Te_TyVoid;
2451         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2452      }
2453
2454      goto acquire_Type;
2455   }
2456
2457   /*
2458    * An example of DW_TAG_rvalue_reference_type:
2459    *
2460    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
2461    *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
2462    *     <1015>   DW_AT_byte_size   : 4
2463    *     <1016>   DW_AT_type        : <0xe52>
2464    */
2465   if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2466       || dtag == DW_TAG_ptr_to_member_type
2467       || dtag == DW_TAG_rvalue_reference_type) {
2468      /* This seems legit for _pointer_type and _reference_type.  I
2469         don't know if rolling _ptr_to_member_type in here really is
2470         legit, but it's better than not handling it at all. */
2471      VG_(memset)(&typeE, 0, sizeof(typeE));
2472      typeE.cuOff = D3_INVALID_CUOFF;
2473      switch (dtag) {
2474      case DW_TAG_pointer_type:
2475         typeE.tag = Te_TyPtr;
2476         break;
2477      case DW_TAG_reference_type:
2478         typeE.tag = Te_TyRef;
2479         break;
2480      case DW_TAG_ptr_to_member_type:
2481         typeE.tag = Te_TyPtrMbr;
2482         break;
2483      case DW_TAG_rvalue_reference_type:
2484         typeE.tag = Te_TyRvalRef;
2485         break;
2486      default:
2487         vg_assert(False);
2488      }
2489      /* target type defaults to void */
2490      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2491      /* These four type kinds don't *have* to specify their size, in
2492         which case we assume it's a machine word.  But if they do
2493         specify it, it must be a machine word :-)  This probably
2494         assumes that the word size of the Dwarf3 we're reading is the
2495         same size as that on the machine.  gcc appears to give a size
2496         whereas icc9 doesn't. */
2497      typeE.Te.TyPorR.szB = sizeof(UWord);
2498      while (True) {
2499         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2500         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2501         if (attr == 0 && form == 0) break;
2502         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2503         if (attr == DW_AT_byte_size && cts.szB > 0) {
2504            typeE.Te.TyPorR.szB = cts.u.val;
2505         }
2506         if (attr == DW_AT_type && cts.szB > 0) {
2507            typeE.Te.TyPorR.typeR
2508               = cook_die_using_form( cc, (UWord)cts.u.val, form );
2509         }
2510      }
2511      /* Do we have something that looks sane? */
2512      if (typeE.Te.TyPorR.szB != sizeof(UWord))
2513         goto_bad_DIE;
2514      else
2515         goto acquire_Type;
2516   }
2517
2518   if (dtag == DW_TAG_enumeration_type) {
2519      /* Create a new Type to hold the results. */
2520      VG_(memset)(&typeE, 0, sizeof(typeE));
2521      typeE.cuOff = posn;
2522      typeE.tag   = Te_TyEnum;
2523      Bool is_decl = False;
2524      typeE.Te.TyEnum.atomRs
2525         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2526                       ML_(dinfo_free),
2527                       sizeof(UWord) );
2528      while (True) {
2529         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2530         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2531         if (attr == 0 && form == 0) break;
2532         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2533         if (attr == DW_AT_name && cts.szB < 0) {
2534            typeE.Te.TyEnum.name
2535               = ML_(cur_read_strdup)( cts.u.cur,
2536                                       "di.readdwarf3.pTD.enum_type.2" );
2537         }
2538         if (attr == DW_AT_byte_size && cts.szB > 0) {
2539            typeE.Te.TyEnum.szB = cts.u.val;
2540         }
2541         if (attr == DW_AT_declaration) {
2542            is_decl = True;
2543         }
2544      }
2545
2546      if (!typeE.Te.TyEnum.name)
2547         typeE.Te.TyEnum.name
2548            = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2549                                 "<anon_enum_type>" );
2550
2551      /* Do we have something that looks sane? */
2552      if (typeE.Te.TyEnum.szB == 0
2553          /* we must know the size */
2554          /* but not for Ada, which uses such dummy
2555             enumerations as helper for gdb ada mode.
2556             Also GCC allows incomplete enums as GNU extension.
2557             http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
2558             These are marked as DW_AT_declaration and won't have
2559             a size. They can only be used in declaration or as
2560             pointer types.  You can't allocate variables or storage
2561             using such an enum type. (Also GCC seems to have a bug
2562             that will put such an enumeration_type into a .debug_types
2563             unit which should only contain complete types.) */
2564          && (parser->language != 'A' && !is_decl)) {
2565         goto_bad_DIE;
2566      }
2567
2568      /* On't stack! */
2569      typestack_push( cc, parser, td3, &typeE, level );
2570      goto acquire_Type;
2571   }
2572
2573   /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2574      DW_TAG_enumerator with only a DW_AT_name but no
2575      DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2576      and appears to be a new "feature" of gcc - versions 4.3.x and
2577      earlier do not appear to do this.  So accept DW_TAG_enumerator
2578      which only have a name but no value.  An example:
2579
2580      <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2581         <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2582                                     QtMsgType
2583         <185>   DW_AT_byte_size   : 4
2584         <186>   DW_AT_decl_file   : 14
2585         <187>   DW_AT_decl_line   : 1480
2586         <189>   DW_AT_sibling     : <0x1a7>
2587      <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2588         <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2589                                     QtDebugMsg
2590      <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2591         <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2592                                     QtWarningMsg
2593      <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2594         <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2595                                     QtCriticalMsg
2596      <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2597         <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2598                                     QtFatalMsg
2599      <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2600         <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2601                                     QtSystemMsg
2602   */
2603   if (dtag == DW_TAG_enumerator) {
2604      VG_(memset)( &atomE, 0, sizeof(atomE) );
2605      atomE.cuOff = posn;
2606      atomE.tag   = Te_Atom;
2607      while (True) {
2608         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2609         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2610         if (attr == 0 && form == 0) break;
2611         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2612         if (attr == DW_AT_name && cts.szB < 0) {
2613            atomE.Te.Atom.name
2614              = ML_(cur_read_strdup)( cts.u.cur,
2615                                      "di.readdwarf3.pTD.enumerator.1" );
2616         }
2617         if (attr == DW_AT_const_value && cts.szB > 0) {
2618            atomE.Te.Atom.value      = cts.u.val;
2619            atomE.Te.Atom.valueKnown = True;
2620         }
2621      }
2622      /* Do we have something that looks sane? */
2623      if (atomE.Te.Atom.name == NULL)
2624         goto_bad_DIE;
2625      /* Do we have a plausible parent? */
2626      if (typestack_is_empty(parser)) goto_bad_DIE;
2627      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2628      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2629      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2630      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
2631      /* Record this child in the parent */
2632      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2633      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2634                    &atomE );
2635      /* And record the child itself */
2636      goto acquire_Atom;
2637   }
2638
2639   /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2640      don't know if this is correct, but it at least makes this reader
2641      usable for gcc-4.3 produced Dwarf3. */
2642   if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2643       || dtag == DW_TAG_union_type) {
2644      Bool have_szB = False;
2645      Bool is_decl  = False;
2646      Bool is_spec  = False;
2647      /* Create a new Type to hold the results. */
2648      VG_(memset)(&typeE, 0, sizeof(typeE));
2649      typeE.cuOff = posn;
2650      typeE.tag   = Te_TyStOrUn;
2651      typeE.Te.TyStOrUn.name = NULL;
2652      typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
2653      typeE.Te.TyStOrUn.fieldRs
2654         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2655                       ML_(dinfo_free),
2656                       sizeof(UWord) );
2657      typeE.Te.TyStOrUn.complete = True;
2658      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2659                                   || dtag == DW_TAG_class_type;
2660      while (True) {
2661         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2662         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2663         if (attr == 0 && form == 0) break;
2664         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2665         if (attr == DW_AT_name && cts.szB < 0) {
2666            typeE.Te.TyStOrUn.name
2667               = ML_(cur_read_strdup)( cts.u.cur,
2668                                       "di.readdwarf3.ptD.struct_type.2" );
2669         }
2670         if (attr == DW_AT_byte_size && cts.szB >= 0) {
2671            typeE.Te.TyStOrUn.szB = cts.u.val;
2672            have_szB = True;
2673         }
2674         if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2675            is_decl = True;
2676         }
2677         if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
2678            is_spec = True;
2679         }
2680         if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
2681             && cts.szB > 0) {
2682            have_szB = True;
2683            typeE.Te.TyStOrUn.szB = 8;
2684            typeE.Te.TyStOrUn.typeR
2685               = cook_die_using_form( cc, (UWord)cts.u.val, form );
2686         }
2687      }
2688      /* Do we have something that looks sane? */
2689      if (is_decl && (!is_spec)) {
2690         /* It's a DW_AT_declaration.  We require the name but
2691            nothing else. */
2692         /* JRS 2012-06-28: following discussion w/ tromey, if the the
2693            type doesn't have name, just make one up, and accept it.
2694            It might be referred to by other DIEs, so ignoring it
2695            doesn't seem like a safe option. */
2696         if (typeE.Te.TyStOrUn.name == NULL)
2697            typeE.Te.TyStOrUn.name
2698               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
2699                                    "<anon_struct_type>" );
2700         typeE.Te.TyStOrUn.complete = False;
2701         /* JRS 2009 Aug 10: <possible kludge>? */
2702         /* Push this tyent on the stack, even though it's incomplete.
2703            It appears that gcc-4.4 on Fedora 11 will sometimes create
2704            DW_TAG_member entries for it, and so we need to have a
2705            plausible parent present in order for that to work.  See
2706            #200029 comments 8 and 9. */
2707         typestack_push( cc, parser, td3, &typeE, level );
2708         /* </possible kludge> */
2709         goto acquire_Type;
2710      }
2711      if ((!is_decl) /* && (!is_spec) */) {
2712         /* this is the common, ordinary case */
2713         /* The name can be present, or not */
2714         if (!have_szB) {
2715            /* We must know the size.
2716               But in Ada, record with discriminants might have no size.
2717               But in C, VLA in the middle of a struct (gcc extension)
2718               might have no size.
2719               Instead, some GNAT dwarf extensions and/or dwarf entries
2720               allow to calculate the struct size at runtime.
2721               We cannot do that (yet?) so, the temporary kludge is to use
2722               a small size. */
2723            typeE.Te.TyStOrUn.szB = 1;
2724         }
2725         /* On't stack! */
2726         typestack_push( cc, parser, td3, &typeE, level );
2727         goto acquire_Type;
2728      }
2729      else {
2730         /* don't know how to handle any other variants just now */
2731         goto_bad_DIE;
2732      }
2733   }
2734
2735   if (dtag == DW_TAG_member) {
2736      /* Acquire member entries for both DW_TAG_structure_type and
2737         DW_TAG_union_type.  They differ minorly, in that struct
2738         members must have a DW_AT_data_member_location expression
2739         whereas union members must not. */
2740      Bool parent_is_struct;
2741      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2742      fieldE.cuOff = posn;
2743      fieldE.tag   = Te_Field;
2744      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2745      while (True) {
2746         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2747         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2748         if (attr == 0 && form == 0) break;
2749         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2750         if (attr == DW_AT_name && cts.szB < 0) {
2751            fieldE.Te.Field.name
2752               = ML_(cur_read_strdup)( cts.u.cur,
2753                                       "di.readdwarf3.ptD.member.1" );
2754         }
2755         if (attr == DW_AT_type && cts.szB > 0) {
2756            fieldE.Te.Field.typeR
2757               = cook_die_using_form( cc, (UWord)cts.u.val, form );
2758         }
2759         /* There are 2 different cases for DW_AT_data_member_location.
2760            If it is a constant class attribute, it contains byte offset
2761            from the beginning of the containing entity.
2762            Otherwise it is a location expression.  */
2763         if (attr == DW_AT_data_member_location && cts.szB > 0) {
2764            fieldE.Te.Field.nLoc = -1;
2765            fieldE.Te.Field.pos.offset = cts.u.val;
2766         }
2767         if (attr == DW_AT_data_member_location && cts.szB <= 0) {
2768            fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
2769            fieldE.Te.Field.pos.loc
2770               = ML_(cur_read_memdup)( cts.u.cur,
2771                                       (SizeT)fieldE.Te.Field.nLoc,
2772                                       "di.readdwarf3.ptD.member.2" );
2773         }
2774      }
2775      /* Do we have a plausible parent? */
2776      if (typestack_is_empty(parser)) goto_bad_DIE;
2777      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2778      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2779      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2780      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
2781      /* Do we have something that looks sane?  If this a member of a
2782         struct, we must have a location expression; but if a member
2783         of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2784         to reject in the latter case, but some compilers have been
2785         observed to emit constant-zero expressions.  So just ignore
2786         them. */
2787      parent_is_struct
2788         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2789      if (!fieldE.Te.Field.name)
2790         fieldE.Te.Field.name
2791            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2792                                 "<anon_field>" );
2793      vg_assert(fieldE.Te.Field.name);
2794      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2795         goto_bad_DIE;
2796      if (fieldE.Te.Field.nLoc) {
2797         if (!parent_is_struct) {
2798            /* If this is a union type, pretend we haven't seen the data
2799               member location expression, as it is by definition
2800               redundant (it must be zero). */
2801            if (fieldE.Te.Field.nLoc > 0)
2802               ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2803            fieldE.Te.Field.pos.loc = NULL;
2804            fieldE.Te.Field.nLoc = 0;
2805         }
2806         /* Record this child in the parent */
2807         fieldE.Te.Field.isStruct = parent_is_struct;
2808         vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2809         VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2810                       &posn );
2811         /* And record the child itself */
2812         goto acquire_Field;
2813      } else {
2814         /* Member with no location - this can happen with static
2815            const members in C++ code which are compile time constants
2816            that do no exist in the class. They're not of any interest
2817            to us so we ignore them. */
2818         ML_(TyEnt__make_EMPTY)(&fieldE);
2819      }
2820   }
2821
2822   if (dtag == DW_TAG_array_type) {
2823      VG_(memset)(&typeE, 0, sizeof(typeE));
2824      typeE.cuOff = posn;
2825      typeE.tag   = Te_TyArray;
2826      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2827      typeE.Te.TyArray.boundRs
2828         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2829                       ML_(dinfo_free),
2830                       sizeof(UWord) );
2831      while (True) {
2832         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2833         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2834         if (attr == 0 && form == 0) break;
2835         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2836         if (attr == DW_AT_type && cts.szB > 0) {
2837            typeE.Te.TyArray.typeR
2838               = cook_die_using_form( cc, (UWord)cts.u.val, form );
2839         }
2840      }
2841      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2842         goto_bad_DIE;
2843      /* On't stack! */
2844      typestack_push( cc, parser, td3, &typeE, level );
2845      goto acquire_Type;
2846   }
2847
2848   /* this is a subrange type defining the bounds of an array. */
2849   if (dtag == DW_TAG_subrange_type
2850       && subrange_type_denotes_array_bounds(parser, dtag)) {
2851      Bool have_lower = False;
2852      Bool have_upper = False;
2853      Bool have_count = False;
2854      Long lower = 0;
2855      Long upper = 0;
2856
2857      switch (parser->language) {
2858         case 'C': have_lower = True;  lower = 0; break;
2859         case 'F': have_lower = True;  lower = 1; break;
2860         case '?': have_lower = False; break;
2861         case 'A': have_lower = False; break;
2862         default:  vg_assert(0); /* assured us by handling of
2863                                    DW_TAG_compile_unit in this fn */
2864      }
2865
2866      VG_(memset)( &boundE, 0, sizeof(boundE) );
2867      boundE.cuOff = D3_INVALID_CUOFF;
2868      boundE.tag   = Te_Bound;
2869      while (True) {
2870         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2871         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2872         if (attr == 0 && form == 0) break;
2873         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2874         if (attr == DW_AT_lower_bound && cts.szB > 0) {
2875            lower      = (Long)cts.u.val;
2876            have_lower = True;
2877         }
2878         if (attr == DW_AT_upper_bound && cts.szB > 0) {
2879            upper      = (Long)cts.u.val;
2880            have_upper = True;
2881         }
2882         if (attr == DW_AT_count && cts.szB > 0) {
2883            /*count    = (Long)cts.u.val;*/
2884            have_count = True;
2885         }
2886      }
2887      /* FIXME: potentially skip the rest if no parent present, since
2888         it could be the case that this subrange type is free-standing
2889         (not being used to describe the bounds of a containing array
2890         type) */
2891      /* Do we have a plausible parent? */
2892      if (typestack_is_empty(parser)) goto_bad_DIE;
2893      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2894      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2895      if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2896      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
2897
2898      /* Figure out if we have a definite range or not */
2899      if (have_lower && have_upper && (!have_count)) {
2900         boundE.Te.Bound.knownL = True;
2901         boundE.Te.Bound.knownU = True;
2902         boundE.Te.Bound.boundL = lower;
2903         boundE.Te.Bound.boundU = upper;
2904      }
2905      else if (have_lower && (!have_upper) && (!have_count)) {
2906         boundE.Te.Bound.knownL = True;
2907         boundE.Te.Bound.knownU = False;
2908         boundE.Te.Bound.boundL = lower;
2909         boundE.Te.Bound.boundU = 0;
2910      }
2911      else if ((!have_lower) && have_upper && (!have_count)) {
2912         boundE.Te.Bound.knownL = False;
2913         boundE.Te.Bound.knownU = True;
2914         boundE.Te.Bound.boundL = 0;
2915         boundE.Te.Bound.boundU = upper;
2916      }
2917      else if ((!have_lower) && (!have_upper) && (!have_count)) {
2918         boundE.Te.Bound.knownL = False;
2919         boundE.Te.Bound.knownU = False;
2920         boundE.Te.Bound.boundL = 0;
2921         boundE.Te.Bound.boundU = 0;
2922      } else {
2923         /* FIXME: handle more cases */
2924         goto_bad_DIE;
2925      }
2926
2927      /* Record this bound in the parent */
2928      boundE.cuOff = posn;
2929      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2930      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2931                    &boundE.cuOff );
2932      /* And record the child itself */
2933      goto acquire_Bound;
2934   }
2935
2936   /* typedef or subrange_type other than array bounds. */
2937   if (dtag == DW_TAG_typedef
2938       || (dtag == DW_TAG_subrange_type
2939           && !subrange_type_denotes_array_bounds(parser, dtag))) {
2940      /* subrange_type other than array bound is only for Ada. */
2941      vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2942      /* We can pick up a new typedef/subrange_type any time. */
2943      VG_(memset)(&typeE, 0, sizeof(typeE));
2944      typeE.cuOff = D3_INVALID_CUOFF;
2945      typeE.tag   = Te_TyTyDef;
2946      typeE.Te.TyTyDef.name = NULL;
2947      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2948      while (True) {
2949         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2950         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2951         if (attr == 0 && form == 0) break;
2952         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2953         if (attr == DW_AT_name && cts.szB < 0) {
2954            typeE.Te.TyTyDef.name
2955               = ML_(cur_read_strdup)( cts.u.cur,
2956                                       "di.readdwarf3.ptD.typedef.1" );
2957         }
2958         if (attr == DW_AT_type && cts.szB > 0) {
2959            typeE.Te.TyTyDef.typeR
2960               = cook_die_using_form( cc, (UWord)cts.u.val, form );
2961         }
2962      }
2963      /* Do we have something that looks sane?
2964         gcc gnat Ada generates minimal typedef
2965         such as the below
2966         <6><91cc>: DW_TAG_typedef
2967            DW_AT_abstract_ori: <9066>
2968         g++ for OMP can generate artificial functions that have
2969         parameters that refer to pointers to unnamed typedefs.
2970         See https://bugs.kde.org/show_bug.cgi?id=273475
2971         So we cannot require a name for a DW_TAG_typedef.
2972      */
2973      goto acquire_Type;
2974   }
2975
2976   if (dtag == DW_TAG_subroutine_type) {
2977      /* function type? just record that one fact and ask no
2978         further questions. */
2979      VG_(memset)(&typeE, 0, sizeof(typeE));
2980      typeE.cuOff = D3_INVALID_CUOFF;
2981      typeE.tag   = Te_TyFn;
2982      goto acquire_Type;
2983   }
2984
2985   if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2986      Int have_ty = 0;
2987      VG_(memset)(&typeE, 0, sizeof(typeE));
2988      typeE.cuOff = D3_INVALID_CUOFF;
2989      typeE.tag   = Te_TyQual;
2990      typeE.Te.TyQual.qual
2991         = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2992      /* target type defaults to 'void' */
2993      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2994      while (True) {
2995         DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2996         DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2997         if (attr == 0 && form == 0) break;
2998         get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2999         if (attr == DW_AT_type && cts.szB > 0) {
3000            typeE.Te.TyQual.typeR
3001               = cook_die_using_form( cc, (UWord)cts.u.val, form );
3002            have_ty++;
3003         }
3004      }
3005      /* gcc sometimes generates DW_TAG_const/volatile_type without
3006         DW_AT_type and GDB appears to interpret the type as 'const
3007         void' (resp. 'volatile void').  So just allow it .. */
3008      if (have_ty == 1 || have_ty == 0)
3009         goto acquire_Type;
3010      else
3011         goto_bad_DIE;
3012   }
3013
3014   /*
3015    * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3016    *
3017    * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3018    *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3019    *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3020    */
3021   if (dtag == DW_TAG_unspecified_type) {
3022      VG_(memset)(&typeE, 0, sizeof(typeE));
3023      typeE.cuOff           = D3_INVALID_CUOFF;
3024      typeE.tag             = Te_TyQual;
3025      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3026      goto acquire_Type;
3027   }
3028
3029   /* else ignore this DIE */
3030   return;
3031   /*NOTREACHED*/
3032
3033  acquire_Type:
3034   if (0) VG_(printf)("YYYY Acquire Type\n");
3035   vg_assert(ML_(TyEnt__is_type)( &typeE ));
3036   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3037   typeE.cuOff = posn;
3038   VG_(addToXA)( tyents, &typeE );
3039   return;
3040   /*NOTREACHED*/
3041
3042  acquire_Atom:
3043   if (0) VG_(printf)("YYYY Acquire Atom\n");
3044   vg_assert(atomE.tag == Te_Atom);
3045   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3046   atomE.cuOff = posn;
3047   VG_(addToXA)( tyents, &atomE );
3048   return;
3049   /*NOTREACHED*/
3050
3051  acquire_Field:
3052   /* For union members, Expr should be absent */
3053   if (0) VG_(printf)("YYYY Acquire Field\n");
3054   vg_assert(fieldE.tag == Te_Field);
3055   vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3056   vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3057   if (fieldE.Te.Field.isStruct) {
3058      vg_assert(fieldE.Te.Field.nLoc != 0);
3059   } else {
3060      vg_assert(fieldE.Te.Field.nLoc == 0);
3061   }
3062   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3063   fieldE.cuOff = posn;
3064   VG_(addToXA)( tyents, &fieldE );
3065   return;
3066   /*NOTREACHED*/
3067
3068  acquire_Bound:
3069   if (0) VG_(printf)("YYYY Acquire Bound\n");
3070   vg_assert(boundE.tag == Te_Bound);
3071   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3072   boundE.cuOff = posn;
3073   VG_(addToXA)( tyents, &boundE );
3074   return;
3075   /*NOTREACHED*/
3076
3077  bad_DIE:
3078   set_position_of_Cursor( c_die,  saved_die_c_offset );
3079   set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
3080   posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
3081   VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
3082   if (debug_types_flag) {
3083      VG_(printf)(" (in .debug_types)");
3084   } else if (alt_flag) {
3085      VG_(printf)(" (in alternate .debug_info)");
3086   }
3087   VG_(printf)("\n");
3088   while (True) {
3089      DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
3090      DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
3091      if (attr == 0 && form == 0) break;
3092      VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
3093      /* Get the form contents, so as to print them */
3094      get_Form_contents( &cts, cc, c_die, True, form );
3095      VG_(printf)("\t\n");
3096   }
3097   VG_(printf)("\n");
3098   cc->barf("parse_type_DIE: confused by the above DIE");
3099   /*NOTREACHED*/
3100}
3101
3102
3103/*------------------------------------------------------------*/
3104/*---                                                      ---*/
3105/*--- Compression of type DIE information                  ---*/
3106/*---                                                      ---*/
3107/*------------------------------------------------------------*/
3108
3109static UWord chase_cuOff ( Bool* changed,
3110                           XArray* /* of TyEnt */ ents,
3111                           TyEntIndexCache* ents_cache,
3112                           UWord cuOff )
3113{
3114   TyEnt* ent;
3115   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3116
3117   if (!ent) {
3118      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3119      *changed = False;
3120      return cuOff;
3121   }
3122
3123   vg_assert(ent->tag != Te_EMPTY);
3124   if (ent->tag != Te_INDIR) {
3125      *changed = False;
3126      return cuOff;
3127   } else {
3128      vg_assert(ent->Te.INDIR.indR < cuOff);
3129      *changed = True;
3130      return ent->Te.INDIR.indR;
3131   }
3132}
3133
3134static
3135void chase_cuOffs_in_XArray ( Bool* changed,
3136                              XArray* /* of TyEnt */ ents,
3137                              TyEntIndexCache* ents_cache,
3138                              /*MOD*/XArray* /* of UWord */ cuOffs )
3139{
3140   Bool b2 = False;
3141   Word i, n = VG_(sizeXA)( cuOffs );
3142   for (i = 0; i < n; i++) {
3143      Bool   b = False;
3144      UWord* p = VG_(indexXA)( cuOffs, i );
3145      *p = chase_cuOff( &b, ents, ents_cache, *p );
3146      if (b)
3147         b2 = True;
3148   }
3149   *changed = b2;
3150}
3151
3152static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
3153                                    TyEntIndexCache* ents_cache,
3154                                    /*MOD*/TyEnt* te )
3155{
3156   Bool b, changed = False;
3157   switch (te->tag) {
3158      case Te_EMPTY:
3159         break;
3160      case Te_INDIR:
3161         te->Te.INDIR.indR
3162            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3163         if (b) changed = True;
3164         break;
3165      case Te_UNKNOWN:
3166         break;
3167      case Te_Atom:
3168         break;
3169      case Te_Field:
3170         te->Te.Field.typeR
3171            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3172         if (b) changed = True;
3173         break;
3174      case Te_Bound:
3175         break;
3176      case Te_TyBase:
3177         break;
3178      case Te_TyPtr:
3179      case Te_TyRef:
3180      case Te_TyPtrMbr:
3181      case Te_TyRvalRef:
3182         te->Te.TyPorR.typeR
3183            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3184         if (b) changed = True;
3185         break;
3186      case Te_TyTyDef:
3187         te->Te.TyTyDef.typeR
3188            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3189         if (b) changed = True;
3190         break;
3191      case Te_TyStOrUn:
3192         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3193         if (b) changed = True;
3194         break;
3195      case Te_TyEnum:
3196         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3197         if (b) changed = True;
3198         break;
3199      case Te_TyArray:
3200         te->Te.TyArray.typeR
3201            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3202         if (b) changed = True;
3203         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3204         if (b) changed = True;
3205         break;
3206      case Te_TyFn:
3207         break;
3208      case Te_TyQual:
3209         te->Te.TyQual.typeR
3210            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3211         if (b) changed = True;
3212         break;
3213      case Te_TyVoid:
3214         break;
3215      default:
3216         ML_(pp_TyEnt)(te);
3217         vg_assert(0);
3218   }
3219   return changed;
3220}
3221
3222/* Make a pass over 'ents'.  For each tyent, inspect the target of any
3223   'R' or 'Rs' fields (those which refer to other tyents), and replace
3224   any which point to INDIR nodes with the target of the indirection
3225   (which should not itself be an indirection).  In summary, this
3226   routine shorts out all references to indirection nodes. */
3227static
3228Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3229                                     TyEntIndexCache* ents_cache )
3230{
3231   Word i, n, nChanged = 0;
3232   Bool b;
3233   n = VG_(sizeXA)( ents );
3234   for (i = 0; i < n; i++) {
3235      TyEnt* ent = VG_(indexXA)( ents, i );
3236      vg_assert(ent->tag != Te_EMPTY);
3237      /* We have to substitute everything, even indirections, so as to
3238         ensure that chains of indirections don't build up. */
3239      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3240      if (b)
3241         nChanged++;
3242   }
3243
3244   return nChanged;
3245}
3246
3247
3248/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3249   Look up each new tyent in the dictionary in turn.  If it is already
3250   in the dictionary, replace this tyent with an indirection to the
3251   existing one, and delete any malloc'd stuff hanging off this one.
3252   In summary, this routine commons up all tyents that are identical
3253   as defined by TyEnt__cmp_by_all_except_cuOff. */
3254static
3255Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3256{
3257   Word    n, i, nDeleted;
3258   WordFM* dict; /* TyEnt* -> void */
3259   TyEnt*  ent;
3260   UWord   keyW, valW;
3261
3262   dict = VG_(newFM)(
3263             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3264             ML_(dinfo_free),
3265             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3266          );
3267
3268   nDeleted = 0;
3269   n = VG_(sizeXA)( ents );
3270   for (i = 0; i < n; i++) {
3271      ent = VG_(indexXA)( ents, i );
3272      vg_assert(ent->tag != Te_EMPTY);
3273
3274      /* Ignore indirections, although check that they are
3275         not forming a cycle. */
3276      if (ent->tag == Te_INDIR) {
3277         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3278         continue;
3279      }
3280
3281      keyW = valW = 0;
3282      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3283         /* it's already in the dictionary. */
3284         TyEnt* old = (TyEnt*)keyW;
3285         vg_assert(valW == 0);
3286         vg_assert(old != ent);
3287         vg_assert(old->tag != Te_INDIR);
3288         /* since we are traversing the array in increasing order of
3289            cuOff: */
3290         vg_assert(old->cuOff < ent->cuOff);
3291         /* So anyway, dump this entry and replace it with an
3292            indirection to the one in the dictionary.  Note that the
3293            assertion above guarantees that we cannot create cycles of
3294            indirections, since we are always creating an indirection
3295            to a tyent with a cuOff lower than this one. */
3296         ML_(TyEnt__make_EMPTY)( ent );
3297         ent->tag = Te_INDIR;
3298         ent->Te.INDIR.indR = old->cuOff;
3299         nDeleted++;
3300      } else {
3301         /* not in dictionary; add it and keep going. */
3302         VG_(addToFM)( dict, (UWord)ent, 0 );
3303      }
3304   }
3305
3306   VG_(deleteFM)( dict, NULL, NULL );
3307
3308   return nDeleted;
3309}
3310
3311
3312static
3313void dedup_types ( Bool td3,
3314                   /*MOD*/XArray* /* of TyEnt */ ents,
3315                   TyEntIndexCache* ents_cache )
3316{
3317   Word m, n, i, nDel, nSubst, nThresh;
3318   if (0) td3 = True;
3319
3320   n = VG_(sizeXA)( ents );
3321
3322   /* If a commoning pass and a substitution pass both make fewer than
3323      this many changes, just stop.  It's pointless to burn up CPU
3324      time trying to compress the last 1% or so out of the array. */
3325   nThresh = n / 200;
3326
3327   /* First we must sort .ents by its .cuOff fields, so we
3328      can index into it. */
3329   VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
3330   VG_(sortXA)( ents );
3331
3332   /* Now repeatedly do commoning and substitution passes over
3333      the array, until there are no more changes. */
3334   do {
3335      nDel   = dedup_types_commoning_pass ( ents );
3336      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3337      vg_assert(nDel >= 0 && nSubst >= 0);
3338      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
3339   } while (nDel > nThresh || nSubst > nThresh);
3340
3341   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3342      In fact this should be true at the end of every loop iteration
3343      above (a commoning pass followed by a substitution pass), but
3344      checking it on every iteration is excessively expensive.  Note,
3345      this loop also computes 'm' for the stats printing below it. */
3346   m = 0;
3347   n = VG_(sizeXA)( ents );
3348   for (i = 0; i < n; i++) {
3349      TyEnt *ent, *ind;
3350      ent = VG_(indexXA)( ents, i );
3351      if (ent->tag != Te_INDIR) continue;
3352      m++;
3353      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3354                                         ent->Te.INDIR.indR );
3355      vg_assert(ind);
3356      vg_assert(ind->tag != Te_INDIR);
3357   }
3358
3359   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3360}
3361
3362
3363/*------------------------------------------------------------*/
3364/*---                                                      ---*/
3365/*--- Resolution of references to type DIEs                ---*/
3366/*---                                                      ---*/
3367/*------------------------------------------------------------*/
3368
3369/* Make a pass through the (temporary) variables array.  Examine the
3370   type of each variable, check is it found, and chase any Te_INDIRs.
3371   Postcondition is: each variable has a typeR field that refers to a
3372   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3373   not to refer to a Te_INDIR.  (This is so that we can throw all the
3374   Te_INDIRs away later). */
3375
3376__attribute__((noinline))
3377static void resolve_variable_types (
3378               void (*barf)( const HChar* ) __attribute__((noreturn)),
3379               /*R-O*/XArray* /* of TyEnt */ ents,
3380               /*MOD*/TyEntIndexCache* ents_cache,
3381               /*MOD*/XArray* /* of TempVar* */ vars
3382            )
3383{
3384   Word i, n;
3385   n = VG_(sizeXA)( vars );
3386   for (i = 0; i < n; i++) {
3387      TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3388      /* This is the stated type of the variable.  But it might be
3389         an indirection, so be careful. */
3390      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3391                                                var->typeR );
3392      if (ent && ent->tag == Te_INDIR) {
3393         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3394                                            ent->Te.INDIR.indR );
3395         vg_assert(ent);
3396         vg_assert(ent->tag != Te_INDIR);
3397      }
3398
3399      /* Deal first with "normal" cases */
3400      if (ent && ML_(TyEnt__is_type)(ent)) {
3401         var->typeR = ent->cuOff;
3402         continue;
3403      }
3404
3405      /* If there's no ent, it probably we did not manage to read a
3406         type at the cuOffset which is stated as being this variable's
3407         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3408      if (ent == NULL) {
3409         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3410         barf("resolve_variable_types: "
3411              "cuOff does not refer to a known type");
3412      }
3413      vg_assert(ent);
3414      /* If ent has any other tag, something bad happened, along the
3415         lines of var->typeR not referring to a type at all. */
3416      vg_assert(ent->tag == Te_UNKNOWN);
3417      /* Just accept it; the type will be useless, but at least keep
3418         going. */
3419      var->typeR = ent->cuOff;
3420   }
3421}
3422
3423
3424/*------------------------------------------------------------*/
3425/*---                                                      ---*/
3426/*--- Parsing of Compilation Units                         ---*/
3427/*---                                                      ---*/
3428/*------------------------------------------------------------*/
3429
3430static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
3431   const TempVar* t1 = *(const TempVar *const *)v1;
3432   const TempVar* t2 = *(const TempVar *const *)v2;
3433   if (t1->dioff < t2->dioff) return -1;
3434   if (t1->dioff > t2->dioff) return 1;
3435   return 0;
3436}
3437
3438static void read_DIE (
3439   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3440   /*MOD*/XArray* /* of TyEnt */ tyents,
3441   /*MOD*/XArray* /* of TempVar* */ tempvars,
3442   /*MOD*/XArray* /* of GExpr* */ gexprs,
3443   /*MOD*/D3TypeParser* typarser,
3444   /*MOD*/D3VarParser* varparser,
3445   Cursor* c, Bool td3, CUConst* cc, Int level
3446)
3447{
3448   Cursor abbv;
3449   ULong  atag, abbv_code;
3450   UWord  posn;
3451   UInt   has_children;
3452   UWord  start_die_c_offset, start_abbv_c_offset;
3453   UWord  after_die_c_offset, after_abbv_c_offset;
3454
3455   /* --- Deal with this DIE --- */
3456   posn      = cook_die( cc, get_position_of_Cursor( c ) );
3457   abbv_code = get_ULEB128( c );
3458   set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3459   atag      = get_ULEB128( &abbv );
3460   TRACE_D3("\n");
3461   TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3462            level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3463
3464   if (atag == 0)
3465      cc->barf("read_DIE: invalid zero tag on DIE");
3466
3467   has_children = get_UChar( &abbv );
3468   if (has_children != DW_children_no && has_children != DW_children_yes)
3469      cc->barf("read_DIE: invalid has_children value");
3470
3471   /* We're set up to look at the fields of this DIE.  Hand it off to
3472      any parser(s) that want to see it.  Since they will in general
3473      advance both the DIE and abbrev cursors, remember their current
3474      settings so that we can then back up and do one final pass over
3475      the DIE, to print out its contents. */
3476
3477   start_die_c_offset  = get_position_of_Cursor( c );
3478   start_abbv_c_offset = get_position_of_Cursor( &abbv );
3479
3480   while (True) {
3481      FormContents cts;
3482      ULong at_name = get_ULEB128( &abbv );
3483      ULong at_form = get_ULEB128( &abbv );
3484      if (at_name == 0 && at_form == 0) break;
3485      TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3486      /* Get the form contents, but ignore them; the only purpose is
3487         to print them, if td3 is True */
3488      get_Form_contents( &cts, cc, c, td3, (DW_FORM)at_form );
3489      TRACE_D3("\t");
3490      TRACE_D3("\n");
3491   }
3492
3493   after_die_c_offset  = get_position_of_Cursor( c );
3494   after_abbv_c_offset = get_position_of_Cursor( &abbv );
3495
3496   set_position_of_Cursor( c,     start_die_c_offset );
3497   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3498
3499   parse_type_DIE( tyents,
3500                   typarser,
3501                   (DW_TAG)atag,
3502                   posn,
3503                   level,
3504                   c,     /* DIE cursor */
3505                   &abbv, /* abbrev cursor */
3506                   cc,
3507                   td3 );
3508
3509   set_position_of_Cursor( c,     start_die_c_offset );
3510   set_position_of_Cursor( &abbv, start_abbv_c_offset );
3511
3512   parse_var_DIE( rangestree,
3513                  tempvars,
3514                  gexprs,
3515                  varparser,
3516                  (DW_TAG)atag,
3517                  posn,
3518                  level,
3519                  c,     /* DIE cursor */
3520                  &abbv, /* abbrev cursor */
3521                  cc,
3522                  td3 );
3523
3524   set_position_of_Cursor( c,     after_die_c_offset );
3525   set_position_of_Cursor( &abbv, after_abbv_c_offset );
3526
3527   /* --- Now recurse into its children, if any --- */
3528   if (has_children == DW_children_yes) {
3529      if (0) TRACE_D3("BEGIN children of level %d\n", level);
3530      while (True) {
3531         atag = peek_ULEB128( c );
3532         if (atag == 0) break;
3533         read_DIE( rangestree, tyents, tempvars, gexprs,
3534                   typarser, varparser,
3535                   c, td3, cc, level+1 );
3536      }
3537      /* Now we need to eat the terminating zero */
3538      atag = get_ULEB128( c );
3539      vg_assert(atag == 0);
3540      if (0) TRACE_D3("END children of level %d\n", level);
3541   }
3542
3543}
3544
3545
3546static
3547void new_dwarf3_reader_wrk (
3548   struct _DebugInfo* di,
3549   __attribute__((noreturn)) void (*barf)( const HChar* ),
3550   DiSlice escn_debug_info,      DiSlice escn_debug_types,
3551   DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
3552   DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
3553   DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
3554   DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
3555   DiSlice escn_debug_str_alt
3556)
3557{
3558   XArray* /* of TyEnt */     tyents;
3559   XArray* /* of TyEnt */     tyents_to_keep;
3560   XArray* /* of GExpr* */    gexprs;
3561   XArray* /* of TempVar* */  tempvars;
3562   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3563   TyEntIndexCache* tyents_cache = NULL;
3564   TyEntIndexCache* tyents_to_keep_cache = NULL;
3565   TempVar *varp, *varp2;
3566   GExpr* gexpr;
3567   Cursor abbv; /* for showing .debug_abbrev */
3568   Cursor info; /* primary cursor for parsing .debug_info */
3569   Cursor ranges; /* for showing .debug_ranges */
3570   D3TypeParser typarser;
3571   D3VarParser varparser;
3572   Addr  dr_base;
3573   UWord dr_offset;
3574   Word  i, j, n;
3575   Bool td3 = di->trace_symtab;
3576   XArray* /* of TempVar* */ dioff_lookup_tab;
3577   Int pass;
3578   VgHashTable signature_types;
3579#if 0
3580   /* This doesn't work properly because it assumes all entries are
3581      packed end to end, with no holes.  But that doesn't always
3582      appear to be the case, so it loses sync.  And the D3 spec
3583      doesn't appear to require a no-hole situation either. */
3584   /* Display .debug_loc */
3585   Addr  dl_base;
3586   UWord dl_offset;
3587   Cursor loc; /* for showing .debug_loc */
3588   TRACE_SYMTAB("\n");
3589   TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3590   TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3591   init_Cursor( &loc, debug_loc_img,
3592                debug_loc_sz, 0, barf,
3593                "Overrun whilst reading .debug_loc section(1)" );
3594   dl_base = 0;
3595   dl_offset = 0;
3596   while (True) {
3597      UWord  w1, w2;
3598      UWord  len;
3599      if (is_at_end_Cursor( &loc ))
3600         break;
3601
3602      /* Read a (host-)word pair.  This is something of a hack since
3603         the word size to read is really dictated by the ELF file;
3604         however, we assume we're reading a file with the same
3605         word-sizeness as the host.  Reasonably enough. */
3606      w1 = get_UWord( &loc );
3607      w2 = get_UWord( &loc );
3608
3609      if (w1 == 0 && w2 == 0) {
3610         /* end of list.  reset 'base' */
3611         TRACE_D3("    %08lx <End of list>\n", dl_offset);
3612         dl_base = 0;
3613         dl_offset = get_position_of_Cursor( &loc );
3614         continue;
3615      }
3616
3617      if (w1 == -1UL) {
3618         /* new value for 'base' */
3619         TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3620                  dl_offset, w1, w2);
3621         dl_base = w2;
3622         continue;
3623      }
3624
3625      /* else a location expression follows */
3626      TRACE_D3("    %08lx %08lx %08lx ",
3627               dl_offset, w1 + dl_base, w2 + dl_base);
3628      len = (UWord)get_UShort( &loc );
3629      while (len > 0) {
3630         UChar byte = get_UChar( &loc );
3631         TRACE_D3("%02x", (UInt)byte);
3632         len--;
3633      }
3634      TRACE_SYMTAB("\n");
3635   }
3636#endif
3637
3638   /* Display .debug_ranges */
3639   TRACE_SYMTAB("\n");
3640   TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3641   TRACE_SYMTAB("    Offset   Begin    End\n");
3642   if (ML_(sli_is_valid)(escn_debug_ranges)) {
3643      init_Cursor( &ranges, escn_debug_ranges, 0, barf,
3644                   "Overrun whilst reading .debug_ranges section(1)" );
3645      dr_base = 0;
3646      dr_offset = 0;
3647      while (True) {
3648         UWord  w1, w2;
3649
3650         if (is_at_end_Cursor( &ranges ))
3651            break;
3652
3653         /* Read a (host-)word pair.  This is something of a hack since
3654            the word size to read is really dictated by the ELF file;
3655            however, we assume we're reading a file with the same
3656            word-sizeness as the host.  Reasonably enough. */
3657         w1 = get_UWord( &ranges );
3658         w2 = get_UWord( &ranges );
3659
3660         if (w1 == 0 && w2 == 0) {
3661            /* end of list.  reset 'base' */
3662            TRACE_D3("    %08lx <End of list>\n", dr_offset);
3663            dr_base = 0;
3664            dr_offset = get_position_of_Cursor( &ranges );
3665            continue;
3666         }
3667
3668         if (w1 == -1UL) {
3669            /* new value for 'base' */
3670            TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3671                     dr_offset, w1, w2);
3672            dr_base = w2;
3673            continue;
3674         }
3675
3676         /* else a range [w1+base, w2+base) is denoted */
3677         TRACE_D3("    %08lx %08lx %08lx\n",
3678                  dr_offset, w1 + dr_base, w2 + dr_base);
3679      }
3680   }
3681
3682   /* Display .debug_abbrev */
3683   TRACE_SYMTAB("\n");
3684   TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3685   if (ML_(sli_is_valid)(escn_debug_abbv)) {
3686      init_Cursor( &abbv, escn_debug_abbv, 0, barf,
3687                   "Overrun whilst reading .debug_abbrev section" );
3688      while (True) {
3689         if (is_at_end_Cursor( &abbv ))
3690            break;
3691         /* Read one abbreviation table */
3692         TRACE_D3("  Number TAG\n");
3693         while (True) {
3694            ULong atag;
3695            UInt  has_children;
3696            ULong acode = get_ULEB128( &abbv );
3697            if (acode == 0) break; /* end of the table */
3698            atag = get_ULEB128( &abbv );
3699            has_children = get_UChar( &abbv );
3700            TRACE_D3("   %llu      %s    [%s]\n",
3701                     acode, ML_(pp_DW_TAG)(atag),
3702                            ML_(pp_DW_children)(has_children));
3703            while (True) {
3704               ULong at_name = get_ULEB128( &abbv );
3705               ULong at_form = get_ULEB128( &abbv );
3706               if (at_name == 0 && at_form == 0) break;
3707               TRACE_D3("    %18s %s\n",
3708                        ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3709            }
3710         }
3711      }
3712   }
3713   TRACE_SYMTAB("\n");
3714
3715   /* We'll park the harvested type information in here.  Also create
3716      a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3717      have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3718      huge and presumably will not occur in any valid DWARF3 file --
3719      it would need to have a .debug_info section 4GB long for that to
3720      happen.  These type entries end up in the DebugInfo. */
3721   tyents = VG_(newXA)( ML_(dinfo_zalloc),
3722                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3723                        ML_(dinfo_free), sizeof(TyEnt) );
3724   { TyEnt tyent;
3725     VG_(memset)(&tyent, 0, sizeof(tyent));
3726     tyent.tag   = Te_TyVoid;
3727     tyent.cuOff = D3_FAKEVOID_CUOFF;
3728     tyent.Te.TyVoid.isFake = True;
3729     VG_(addToXA)( tyents, &tyent );
3730   }
3731   { TyEnt tyent;
3732     VG_(memset)(&tyent, 0, sizeof(tyent));
3733     tyent.tag   = Te_UNKNOWN;
3734     tyent.cuOff = D3_INVALID_CUOFF;
3735     VG_(addToXA)( tyents, &tyent );
3736   }
3737
3738   /* This is a tree used to unique-ify the range lists that are
3739      manufactured by parse_var_DIE.  References to the keys in the
3740      tree wind up in .rngMany fields in TempVars.  We'll need to
3741      delete this tree, and the XArrays attached to it, at the end of
3742      this function. */
3743   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3744                            "di.readdwarf3.ndrw.2 (rangestree)",
3745                            ML_(dinfo_free),
3746                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3747
3748   /* List of variables we're accumulating.  These don't end up in the
3749      DebugInfo; instead their contents are handed to ML_(addVar) and
3750      the list elements are then deleted. */
3751   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3752                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
3753                          ML_(dinfo_free),
3754                          sizeof(TempVar*) );
3755
3756   /* List of GExprs we're accumulating.  These wind up in the
3757      DebugInfo. */
3758   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3759                        ML_(dinfo_free), sizeof(GExpr*) );
3760
3761   /* We need a D3TypeParser to keep track of partially constructed
3762      types.  It'll be discarded as soon as we've completed the CU,
3763      since the resulting information is tipped in to 'tyents' as it
3764      is generated. */
3765   VG_(memset)( &typarser, 0, sizeof(typarser) );
3766   typarser.sp = -1;
3767   typarser.language = '?';
3768   for (i = 0; i < N_D3_TYPE_STACK; i++) {
3769      typarser.qparentE[i].tag   = Te_EMPTY;
3770      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3771   }
3772
3773   VG_(memset)( &varparser, 0, sizeof(varparser) );
3774   varparser.sp = -1;
3775
3776   signature_types = VG_(HT_construct) ("signature_types");
3777
3778   /* Do an initial pass to scan the .debug_types section, if any, and
3779      fill in the signatured types hash table.  This lets us handle
3780      mapping from a type signature to a (cooked) DIE offset directly
3781      in get_Form_contents.  */
3782   if (ML_(sli_is_valid)(escn_debug_types)) {
3783      init_Cursor( &info, escn_debug_types, 0, barf,
3784                   "Overrun whilst reading .debug_types section" );
3785      TRACE_D3("\n------ Collecting signatures from "
3786               ".debug_types section ------\n");
3787
3788      while (True) {
3789         UWord   cu_start_offset, cu_offset_now;
3790         CUConst cc;
3791
3792         cu_start_offset = get_position_of_Cursor( &info );
3793         TRACE_D3("\n");
3794         TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3795         /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3796            (saC_cache) */
3797         parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
3798
3799         /* Needed by cook_die.  */
3800         cc.types_cuOff_bias = escn_debug_info.szB;
3801
3802         record_signatured_type( signature_types, cc.type_signature,
3803                                 cook_die( &cc, cc.type_offset ));
3804
3805         /* Until proven otherwise we assume we don't need the icc9
3806            workaround in this case; see the DIE-reading loop below
3807            for details.  */
3808         cu_offset_now = (cu_start_offset + cc.unit_length
3809                          + (cc.is_dw64 ? 12 : 4));
3810
3811         if (cu_offset_now >= escn_debug_types.szB)
3812            break;
3813
3814         set_position_of_Cursor ( &info, cu_offset_now );
3815      }
3816   }
3817
3818   /* Perform three DIE-reading passes.  The first pass reads DIEs from
3819      alternate .debug_info (if any), the second pass reads DIEs from
3820      .debug_info, and the third pass reads DIEs from .debug_types.
3821      Moving the body of this loop into a separate function would
3822      require a large number of arguments to be passed in, so it is
3823      kept inline instead.  */
3824   for (pass = 0; pass < 3; ++pass) {
3825      ULong section_size;
3826
3827      if (pass == 0) {
3828         if (!ML_(sli_is_valid)(escn_debug_info_alt))
3829	    continue;
3830         /* Now loop over the Compilation Units listed in the alternate
3831            .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
3832            Each compilation unit contains a Compilation Unit Header
3833            followed by precisely one DW_TAG_compile_unit or
3834            DW_TAG_partial_unit DIE. */
3835         init_Cursor( &info, escn_debug_info_alt, 0, barf,
3836                      "Overrun whilst reading alternate .debug_info section" );
3837         section_size = escn_debug_info_alt.szB;
3838
3839         TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
3840      } else if (pass == 1) {
3841         /* Now loop over the Compilation Units listed in the .debug_info
3842            section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3843            unit contains a Compilation Unit Header followed by precisely
3844            one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3845         init_Cursor( &info, escn_debug_info, 0, barf,
3846                      "Overrun whilst reading .debug_info section" );
3847         section_size = escn_debug_info.szB;
3848
3849         TRACE_D3("\n------ Parsing .debug_info section ------\n");
3850      } else {
3851         if (!ML_(sli_is_valid)(escn_debug_types))
3852            continue;
3853         init_Cursor( &info, escn_debug_types, 0, barf,
3854                      "Overrun whilst reading .debug_types section" );
3855         section_size = escn_debug_types.szB;
3856
3857         TRACE_D3("\n------ Parsing .debug_types section ------\n");
3858      }
3859
3860      while (True) {
3861         ULong   cu_start_offset, cu_offset_now;
3862         CUConst cc;
3863         /* It may be that the stated size of this CU is larger than the
3864            amount of stuff actually in it.  icc9 seems to generate CUs
3865            thusly.  We use these variables to figure out if this is
3866            indeed the case, and if so how many bytes we need to skip to
3867            get to the start of the next CU.  Not skipping those bytes
3868            causes us to misidentify the start of the next CU, and it all
3869            goes badly wrong after that (not surprisingly). */
3870         UWord cu_size_including_IniLen, cu_amount_used;
3871
3872         /* It seems icc9 finishes the DIE info before debug_info_sz
3873            bytes have been used up.  So be flexible, and declare the
3874            sequence complete if there is not enough remaining bytes to
3875            hold even the smallest conceivable CU header.  (11 bytes I
3876            reckon). */
3877         /* JRS 23Jan09: I suspect this is no longer necessary now that
3878            the code below contains a 'while (cu_amount_used <
3879            cu_size_including_IniLen ...'  style loop, which skips over
3880            any leftover bytes at the end of a CU in the case where the
3881            CU's stated size is larger than its actual size (as
3882            determined by reading all its DIEs).  However, for prudence,
3883            I'll leave the following test in place.  I can't see that a
3884            CU header can be smaller than 11 bytes, so I don't think
3885            there's any harm possible through the test -- it just adds
3886            robustness. */
3887         Word avail = get_remaining_length_Cursor( &info );
3888         if (avail < 11) {
3889            if (avail > 0)
3890               TRACE_D3("new_dwarf3_reader_wrk: warning: "
3891                        "%ld unused bytes after end of DIEs\n", avail);
3892            break;
3893         }
3894
3895         /* Check the varparser's stack is in a sane state. */
3896         vg_assert(varparser.sp == -1);
3897         for (i = 0; i < N_D3_VAR_STACK; i++) {
3898            vg_assert(varparser.ranges[i] == NULL);
3899            vg_assert(varparser.level[i] == 0);
3900         }
3901         for (i = 0; i < N_D3_TYPE_STACK; i++) {
3902            vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3903            vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3904            vg_assert(typarser.qlevel[i] == 0);
3905         }
3906
3907         cu_start_offset = get_position_of_Cursor( &info );
3908         TRACE_D3("\n");
3909         TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
3910         /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3911            (saC_cache) */
3912         if (pass == 0) {
3913            parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
3914                             False, True );
3915         } else {
3916            parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
3917                             pass == 2, False );
3918         }
3919         cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
3920                                            : escn_debug_str;
3921         cc.escn_debug_ranges   = escn_debug_ranges;
3922         cc.escn_debug_loc      = escn_debug_loc;
3923         cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
3924                                            : escn_debug_line;
3925         cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
3926                                            : escn_debug_info;
3927         cc.escn_debug_types    = escn_debug_types;
3928         cc.escn_debug_info_alt = escn_debug_info_alt;
3929         cc.escn_debug_str_alt  = escn_debug_str_alt;
3930         cc.types_cuOff_bias    = escn_debug_info.szB;
3931         cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
3932         cc.cu_start_offset     = cu_start_offset;
3933         cc.di = di;
3934         /* The CU's svma can be deduced by looking at the AT_low_pc
3935            value in the top level TAG_compile_unit, which is the topmost
3936            DIE.  We'll leave it for the 'varparser' to acquire that info
3937            and fill it in -- since it is the only party to want to know
3938            it. */
3939         cc.cu_svma_known = False;
3940         cc.cu_svma       = 0;
3941
3942         cc.signature_types = signature_types;
3943
3944         /* Create a fake outermost-level range covering the entire
3945            address range.  So we always have *something* to catch all
3946            variable declarations. */
3947         varstack_push( &cc, &varparser, td3,
3948                        unitary_range_list(0UL, ~0UL),
3949                        -1, False/*isFunc*/, NULL/*fbGX*/ );
3950
3951         /* And set up the file name table.  When we come across the top
3952            level DIE for this CU (which is what the next call to
3953            read_DIE should process) we will copy all the file names out
3954            of the .debug_line img area and use this table to look up the
3955            copies when we later see filename numbers in DW_TAG_variables
3956            etc. */
3957         vg_assert(!varparser.filenameTable );
3958         varparser.filenameTable
3959            = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3960                          ML_(dinfo_free),
3961                          sizeof(UChar*) );
3962         vg_assert(varparser.filenameTable);
3963
3964         /* Now read the one-and-only top-level DIE for this CU. */
3965         vg_assert(varparser.sp == 0);
3966         read_DIE( rangestree,
3967                   tyents, tempvars, gexprs,
3968                   &typarser, &varparser,
3969                   &info, td3, &cc, 0 );
3970
3971         cu_offset_now = get_position_of_Cursor( &info );
3972
3973         if (0) VG_(printf)("Travelled: %llu  size %llu\n",
3974                            cu_offset_now - cc.cu_start_offset,
3975                            cc.unit_length + (cc.is_dw64 ? 12 : 4));
3976
3977         /* How big the CU claims it is .. */
3978         cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3979         /* .. vs how big we have found it to be */
3980         cu_amount_used = cu_offset_now - cc.cu_start_offset;
3981
3982         if (1) TRACE_D3("offset now %lld, d-i-size %lld\n",
3983                         cu_offset_now, section_size);
3984         if (cu_offset_now > section_size)
3985            barf("toplevel DIEs beyond end of CU");
3986
3987         /* If the CU is bigger than it claims to be, we've got a serious
3988            problem. */
3989         if (cu_amount_used > cu_size_including_IniLen)
3990            barf("CU's actual size appears to be larger than it claims it is");
3991
3992         /* If the CU is smaller than it claims to be, we need to skip some
3993            bytes.  Loop updates cu_offset_new and cu_amount_used. */
3994         while (cu_amount_used < cu_size_including_IniLen
3995                && get_remaining_length_Cursor( &info ) > 0) {
3996            if (0) VG_(printf)("SKIP\n");
3997            (void)get_UChar( &info );
3998            cu_offset_now = get_position_of_Cursor( &info );
3999            cu_amount_used = cu_offset_now - cc.cu_start_offset;
4000         }
4001
4002         /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4003            anywhere else at all.  Our fake the-entire-address-space
4004            range is at level -1, so preening to -2 should completely
4005            empty the stack out. */
4006         TRACE_D3("\n");
4007         varstack_preen( &varparser, td3, -2 );
4008         /* Similarly, empty the type stack out. */
4009         typestack_preen( &typarser, td3, -2 );
4010
4011         TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
4012                  cc.saC_cache_queries, cc.saC_cache_misses);
4013
4014         vg_assert(varparser.filenameTable );
4015         VG_(deleteXA)( varparser.filenameTable );
4016         varparser.filenameTable = NULL;
4017
4018         if (cu_offset_now == section_size)
4019            break;
4020         /* else keep going */
4021      }
4022   }
4023
4024   /* From here on we're post-processing the stuff we got
4025      out of the .debug_info section. */
4026   if (td3) {
4027      TRACE_D3("\n");
4028      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4029      TRACE_D3("\n");
4030      TRACE_D3("------ Compressing type entries ------\n");
4031   }
4032
4033   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4034                                     sizeof(TyEntIndexCache) );
4035   ML_(TyEntIndexCache__invalidate)( tyents_cache );
4036   dedup_types( td3, tyents, tyents_cache );
4037   if (td3) {
4038      TRACE_D3("\n");
4039      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4040   }
4041
4042   TRACE_D3("\n");
4043   TRACE_D3("------ Resolving the types of variables ------\n" );
4044   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4045
4046   /* Copy all the non-INDIR tyents into a new table.  For large
4047      .so's, about 90% of the tyents will by now have been resolved to
4048      INDIRs, and we no longer need them, and so don't need to store
4049      them. */
4050   tyents_to_keep
4051      = VG_(newXA)( ML_(dinfo_zalloc),
4052                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4053                    ML_(dinfo_free), sizeof(TyEnt) );
4054   n = VG_(sizeXA)( tyents );
4055   for (i = 0; i < n; i++) {
4056      TyEnt* ent = VG_(indexXA)( tyents, i );
4057      if (ent->tag != Te_INDIR)
4058         VG_(addToXA)( tyents_to_keep, ent );
4059   }
4060
4061   VG_(deleteXA)( tyents );
4062   tyents = NULL;
4063   ML_(dinfo_free)( tyents_cache );
4064   tyents_cache = NULL;
4065
4066   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4067      minor) waste of time, since tyents itself is sorted, but
4068      necessary since VG_(lookupXA) refuses to cooperate if we
4069      don't. */
4070   VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4071   VG_(sortXA)( tyents_to_keep );
4072
4073   /* Enable cacheing on tyents_to_keep */
4074   tyents_to_keep_cache
4075      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4076                           sizeof(TyEntIndexCache) );
4077   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4078
4079   /* And record the tyents in the DebugInfo.  We do this before
4080      starting to hand variables to ML_(addVar), since if ML_(addVar)
4081      wants to do debug printing (of the types of said vars) then it
4082      will need the tyents.*/
4083   vg_assert(!di->admin_tyents);
4084   di->admin_tyents = tyents_to_keep;
4085
4086   /* Bias all the location expressions. */
4087   TRACE_D3("\n");
4088   TRACE_D3("------ Biasing the location expressions ------\n" );
4089
4090   n = VG_(sizeXA)( gexprs );
4091   for (i = 0; i < n; i++) {
4092      gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4093      bias_GX( gexpr, di );
4094   }
4095
4096   TRACE_D3("\n");
4097   TRACE_D3("------ Acquired the following variables: ------\n\n");
4098
4099   /* Park (pointers to) all the vars in an XArray, so we can look up
4100      abstract origins quickly.  The array is sorted (hence, looked-up
4101      by) the .dioff fields.  Since the .dioffs should be in strictly
4102      ascending order, there is no need to sort the array after
4103      construction.  The ascendingness is however asserted for. */
4104   dioff_lookup_tab
4105      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4106                    ML_(dinfo_free),
4107                    sizeof(TempVar*) );
4108   vg_assert(dioff_lookup_tab);
4109
4110   n = VG_(sizeXA)( tempvars );
4111   Word first_primary_var = 0;
4112   for (first_primary_var = 0;
4113        escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4114        first_primary_var++) {
4115      varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4116      if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4117         break;
4118   }
4119   for (i = 0; i < n; i++) {
4120      varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4121      if (i > first_primary_var) {
4122         varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4123                                           (i + first_primary_var - 1) % n );
4124         /* why should this hold?  Only, I think, because we've
4125            constructed the array by reading .debug_info sequentially,
4126            and so the array .dioff fields should reflect that, and be
4127            strictly ascending. */
4128         vg_assert(varp2->dioff < varp->dioff);
4129      }
4130      VG_(addToXA)( dioff_lookup_tab, &varp );
4131   }
4132   VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4133   VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4134
4135   /* Now visit each var.  Collect up as much info as possible for
4136      each var and hand it to ML_(addVar). */
4137   n = VG_(sizeXA)( tempvars );
4138   for (j = 0; j < n; j++) {
4139      TyEnt* ent;
4140      varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4141
4142      /* Possibly show .. */
4143      if (td3) {
4144         VG_(printf)("<%lx> addVar: level %d: %s :: ",
4145                     varp->dioff,
4146                     varp->level,
4147                     varp->name ? varp->name : "<anon_var>" );
4148         if (varp->typeR) {
4149            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4150         } else {
4151            VG_(printf)("NULL");
4152         }
4153         VG_(printf)("\n  Loc=");
4154         if (varp->gexpr) {
4155            ML_(pp_GX)(varp->gexpr);
4156         } else {
4157            VG_(printf)("NULL");
4158         }
4159         VG_(printf)("\n");
4160         if (varp->fbGX) {
4161            VG_(printf)("  FrB=");
4162            ML_(pp_GX)( varp->fbGX );
4163            VG_(printf)("\n");
4164         } else {
4165            VG_(printf)("  FrB=none\n");
4166         }
4167         VG_(printf)("  declared at: %s:%d\n",
4168                     varp->fName ? varp->fName : "NULL",
4169                     varp->fLine );
4170         if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4171            VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
4172      }
4173
4174      /* Skip variables which have no location.  These must be
4175         abstract instances; they are useless as-is since with no
4176         location they have no specified memory location.  They will
4177         presumably be referred to via the absOri fields of other
4178         variables. */
4179      if (!varp->gexpr) {
4180         TRACE_D3("  SKIP (no location)\n\n");
4181         continue;
4182      }
4183
4184      /* So it has a location, at least.  If it refers to some other
4185         entry through its absOri field, pull in further info through
4186         that. */
4187      if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4188         Bool found;
4189         Word ixFirst, ixLast;
4190         TempVar key;
4191         TempVar* keyp = &key;
4192         TempVar *varAI;
4193         VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4194         key.dioff = varp->absOri; /* this is what we want to find */
4195         found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4196                                &ixFirst, &ixLast );
4197         if (!found) {
4198            /* barf("DW_AT_abstract_origin can't be resolved"); */
4199            TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
4200            continue;
4201         }
4202         /* If the following fails, there is more than one entry with
4203            the same dioff.  Which can't happen. */
4204         vg_assert(ixFirst == ixLast);
4205         varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
4206         /* stay sane */
4207         vg_assert(varAI);
4208         vg_assert(varAI->dioff == varp->absOri);
4209
4210         /* Copy what useful info we can. */
4211         if (varAI->typeR && !varp->typeR)
4212            varp->typeR = varAI->typeR;
4213         if (varAI->name && !varp->name)
4214            varp->name = varAI->name;
4215         if (varAI->fName && !varp->fName)
4216            varp->fName = varAI->fName;
4217         if (varAI->fLine > 0 && varp->fLine == 0)
4218            varp->fLine = varAI->fLine;
4219      }
4220
4221      /* Give it a name if it doesn't have one. */
4222      if (!varp->name)
4223         varp->name = ML_(addStr)( di, "<anon_var>", -1 );
4224
4225      /* So now does it have enough info to be useful? */
4226      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
4227         the type didn't get resolved.  Really, in that case
4228         something's broken earlier on, and should be fixed, rather
4229         than just skipping the variable. */
4230      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
4231                                         tyents_to_keep_cache,
4232                                         varp->typeR );
4233      /* The next two assertions should be guaranteed by
4234         our previous call to resolve_variable_types. */
4235      vg_assert(ent);
4236      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
4237
4238      if (ent->tag == Te_UNKNOWN) continue;
4239
4240      vg_assert(varp->gexpr);
4241      vg_assert(varp->name);
4242      vg_assert(varp->typeR);
4243      vg_assert(varp->level >= 0);
4244
4245      /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
4246         each address range in which the variable exists. */
4247      TRACE_D3("  ACQUIRE for range(s) ");
4248      { AddrRange  oneRange;
4249        AddrRange* varPcRanges;
4250        Word       nVarPcRanges;
4251        /* Set up to iterate over address ranges, however
4252           represented. */
4253        if (varp->nRanges == 0 || varp->nRanges == 1) {
4254           vg_assert(!varp->rngMany);
4255           if (varp->nRanges == 0) {
4256              vg_assert(varp->rngOneMin == 0);
4257              vg_assert(varp->rngOneMax == 0);
4258           }
4259           nVarPcRanges = varp->nRanges;
4260           oneRange.aMin = varp->rngOneMin;
4261           oneRange.aMax = varp->rngOneMax;
4262           varPcRanges = &oneRange;
4263        } else {
4264           vg_assert(varp->rngMany);
4265           vg_assert(varp->rngOneMin == 0);
4266           vg_assert(varp->rngOneMax == 0);
4267           nVarPcRanges = VG_(sizeXA)(varp->rngMany);
4268           vg_assert(nVarPcRanges >= 2);
4269           vg_assert(nVarPcRanges == (Word)varp->nRanges);
4270           varPcRanges = VG_(indexXA)(varp->rngMany, 0);
4271        }
4272        if (varp->level == 0)
4273           vg_assert( nVarPcRanges == 1 );
4274        /* and iterate */
4275        for (i = 0; i < nVarPcRanges; i++) {
4276           Addr pcMin = varPcRanges[i].aMin;
4277           Addr pcMax = varPcRanges[i].aMax;
4278           vg_assert(pcMin <= pcMax);
4279           /* Level 0 is the global address range.  So at level 0 we
4280              don't want to bias pcMin/pcMax; but at all other levels
4281              we do since those are derived from svmas in the Dwarf
4282              we're reading.  Be paranoid ... */
4283           if (varp->level == 0) {
4284              vg_assert(pcMin == (Addr)0);
4285              vg_assert(pcMax == ~(Addr)0);
4286           } else {
4287              /* vg_assert(pcMin > (Addr)0);
4288                 No .. we can legitimately expect to see ranges like
4289                 0x0-0x11D (pre-biasing, of course). */
4290              vg_assert(pcMax < ~(Addr)0);
4291           }
4292
4293           /* Apply text biasing, for non-global variables. */
4294           if (varp->level > 0) {
4295              pcMin += di->text_debug_bias;
4296              pcMax += di->text_debug_bias;
4297           }
4298
4299           if (i > 0 && (i%2) == 0)
4300              TRACE_D3("\n                       ");
4301           TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
4302
4303           ML_(addVar)(
4304              di, varp->level,
4305                  pcMin, pcMax,
4306                  varp->name,  varp->typeR,
4307                  varp->gexpr, varp->fbGX,
4308                  varp->fName, varp->fLine, td3
4309           );
4310        }
4311      }
4312
4313      TRACE_D3("\n\n");
4314      /* and move on to the next var */
4315   }
4316
4317   /* Now free all the TempVars */
4318   n = VG_(sizeXA)( tempvars );
4319   for (i = 0; i < n; i++) {
4320      varp = *(TempVar**)VG_(indexXA)( tempvars, i );
4321      ML_(dinfo_free)(varp);
4322   }
4323   VG_(deleteXA)( tempvars );
4324   tempvars = NULL;
4325
4326   /* and the temp lookup table */
4327   VG_(deleteXA)( dioff_lookup_tab );
4328
4329   /* and the ranges tree.  Note that we need to also free the XArrays
4330      which constitute the keys, hence pass VG_(deleteXA) as a
4331      key-finalizer. */
4332   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
4333
4334   /* and the tyents_to_keep cache */
4335   ML_(dinfo_free)( tyents_to_keep_cache );
4336   tyents_to_keep_cache = NULL;
4337
4338   vg_assert( varparser.filenameTable == NULL );
4339
4340   /* And the signatured type hash.  */
4341   VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
4342
4343   /* record the GExprs in di so they can be freed later */
4344   vg_assert(!di->admin_gexprs);
4345   di->admin_gexprs = gexprs;
4346}
4347
4348
4349/*------------------------------------------------------------*/
4350/*---                                                      ---*/
4351/*--- The "new" DWARF3 reader -- top level control logic   ---*/
4352/*---                                                      ---*/
4353/*------------------------------------------------------------*/
4354
4355static Bool               d3rd_jmpbuf_valid  = False;
4356static const HChar*       d3rd_jmpbuf_reason = NULL;
4357static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
4358
4359static __attribute__((noreturn)) void barf ( const HChar* reason ) {
4360   vg_assert(d3rd_jmpbuf_valid);
4361   d3rd_jmpbuf_reason = reason;
4362   VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
4363   /*NOTREACHED*/
4364   vg_assert(0);
4365}
4366
4367
4368void
4369ML_(new_dwarf3_reader) (
4370   struct _DebugInfo* di,
4371   DiSlice escn_debug_info,      DiSlice escn_debug_types,
4372   DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4373   DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4374   DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4375   DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4376   DiSlice escn_debug_str_alt
4377)
4378{
4379   volatile Int  jumped;
4380   volatile Bool td3 = di->trace_symtab;
4381
4382   /* Run the _wrk function to read the dwarf3.  If it succeeds, it
4383      just returns normally.  If there is any failure, it longjmp's
4384      back here, having first set d3rd_jmpbuf_reason to something
4385      useful. */
4386   vg_assert(d3rd_jmpbuf_valid  == False);
4387   vg_assert(d3rd_jmpbuf_reason == NULL);
4388
4389   d3rd_jmpbuf_valid = True;
4390   jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
4391   if (jumped == 0) {
4392      /* try this ... */
4393      new_dwarf3_reader_wrk( di, barf,
4394                             escn_debug_info,     escn_debug_types,
4395                             escn_debug_abbv,     escn_debug_line,
4396                             escn_debug_str,      escn_debug_ranges,
4397                             escn_debug_loc,      escn_debug_info_alt,
4398                             escn_debug_abbv_alt, escn_debug_line_alt,
4399                             escn_debug_str_alt );
4400      d3rd_jmpbuf_valid = False;
4401      TRACE_D3("\n------ .debug_info reading was successful ------\n");
4402   } else {
4403      /* It longjmp'd. */
4404      d3rd_jmpbuf_valid = False;
4405      /* Can't longjump without giving some sort of reason. */
4406      vg_assert(d3rd_jmpbuf_reason != NULL);
4407
4408      TRACE_D3("\n------ .debug_info reading failed ------\n");
4409
4410      ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4411   }
4412
4413   d3rd_jmpbuf_valid  = False;
4414   d3rd_jmpbuf_reason = NULL;
4415}
4416
4417
4418
4419/* --- Unused code fragments which might be useful one day. --- */
4420
4421#if 0
4422   /* Read the arange tables */
4423   TRACE_SYMTAB("\n");
4424   TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4425   init_Cursor( &aranges, debug_aranges_img,
4426                debug_aranges_sz, 0, barf,
4427                "Overrun whilst reading .debug_aranges section" );
4428   while (True) {
4429      ULong  len, d_i_offset;
4430      Bool   is64;
4431      UShort version;
4432      UChar  asize, segsize;
4433
4434      if (is_at_end_Cursor( &aranges ))
4435         break;
4436      /* Read one arange thingy */
4437      /* initial_length field */
4438      len = get_Initial_Length( &is64, &aranges,
4439               "in .debug_aranges: invalid initial-length field" );
4440      version    = get_UShort( &aranges );
4441      d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4442      asize      = get_UChar( &aranges );
4443      segsize    = get_UChar( &aranges );
4444      TRACE_D3("  Length:                   %llu\n", len);
4445      TRACE_D3("  Version:                  %d\n", (Int)version);
4446      TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4447      TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4448      TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4449      TRACE_D3("\n");
4450      TRACE_D3("    Address            Length\n");
4451
4452      while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4453         (void)get_UChar( & aranges );
4454      }
4455      while (True) {
4456         ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4457         ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4458         TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4459         if (address == 0 && length == 0) break;
4460      }
4461   }
4462   TRACE_SYMTAB("\n");
4463#endif
4464
4465#endif // defined(VGO_linux) || defined(VGO_darwin)
4466
4467/*--------------------------------------------------------------------*/
4468/*--- end                                                          ---*/
4469/*--------------------------------------------------------------------*/
4470