readdwarf3.c revision 50c5093772c2b23fd0897d3590dcfaec1c92ac83
1 2/*--------------------------------------------------------------------*/ 3/*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/ 4/*--- readdwarf3.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2010 OpenWorks LLP 12 info@open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35*/ 36 37#if defined(VGO_linux) || defined(VGO_darwin) 38 39/* REFERENCE (without which this code will not make much sense): 40 41 DWARF Debugging Information Format, Version 3, 42 dated 20 December 2005 (the "D3 spec"). 43 44 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 45 .doc (MS Word) version, but for some reason the section numbers 46 between the Word and PDF versions differ by 1 in the first digit. 47 All section references in this code are to the PDF version. 48 49 CURRENT HACKS: 50 51 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 52 assumed to mean "const void" or "volatile void" respectively. 53 GDB appears to interpret them like this, anyway. 54 55 In many cases it is important to know the svma of a CU (the "base 56 address of the CU", as the D3 spec calls it). There are some 57 situations in which the spec implies this value is unknown, but the 58 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 59 merely zero when not explicitly stated. So we too have to make 60 that assumption. 61 62 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't 63 unitary_range_list() bias the resulting range list in the same way 64 that its more general cousin, get_range_list(), does? I don't 65 know. 66 67 TODO, 2008 Feb 17: 68 69 get rid of cu_svma_known and document the assumed-zero svma hack. 70 71 ML_(sizeOfType): differentiate between zero sized types and types 72 for which the size is unknown. Is this important? I don't know. 73 74 DW_AT_array_types: deal with explicit sizes (currently we compute 75 the size from the bounds and the element size, although that's 76 fragile, if the bounds incompletely specified, or completely 77 absent) 78 79 Document reason for difference (by 1) of stack preening depth in 80 parse_var_DIE vs parse_type_DIE. 81 82 Don't hand to ML_(addVars), vars whose locations are entirely in 83 registers (DW_OP_reg*). This is merely a space-saving 84 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 85 expressions correctly, by failing to evaluate them and hence 86 effectively ignoring the variable with which they are associated. 87 88 Deal with DW_AT_array_types which have element size != stride 89 90 In some cases, the info for a variable is split between two 91 different DIEs (generally a declarer and a definer). We punt on 92 these. Could do better here. 93 94 The 'data_bias' argument passed to the expression evaluator 95 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 96 MaybeUWord, to make it clear when we do vs don't know what it is 97 for the evaluation of an expression. At the moment zero is passed 98 for this parameter in the don't know case. That's a bit fragile 99 and obscure; using a MaybeUWord would be clearer. 100 101 POTENTIAL PERFORMANCE IMPROVEMENTS: 102 103 Currently, duplicate removal and all other queries for the type 104 entities array is done using cuOffset-based pointing, which 105 involves a binary search (VG_(lookupXA)) for each access. This is 106 wildly inefficient, although simple. It would be better to 107 translate all the cuOffset-based references (iow, all the "R" and 108 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in 109 'tyents' right at the start of dedup_types(), and use direct 110 indexing (VG_(indexXA)) wherever possible after that. 111 112 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move 113 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use 114 points, and possibly also make an _UNCHECKED version which skips 115 the range checks in performance-critical situations such as this. 116 117 Handle interaction between read_DIE and parse_{var,type}_DIE 118 better. Currently read_DIE reads the entire DIE just to find where 119 the end is (and for debug printing), so that it can later reliably 120 move the cursor to the end regardless of what parse_{var,type}_DIE 121 do. This means many DIEs (most, even?) are read twice. It would 122 be smarter to make parse_{var,type}_DIE return a Bool indicating 123 whether or not they advanced the DIE cursor, and only if they 124 didn't should read_DIE itself read through the DIE. 125 126 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 127 zero variables in their .vars XArray. Rather than have an XArray 128 with zero elements (which uses 2 malloc'd blocks), allow the .vars 129 pointer to be NULL in this case. 130 131 More generally, reduce the amount of memory allocated and freed 132 while reading Dwarf3 type/variable information. Even modest (20MB) 133 objects cause this module to allocate and free hundreds of 134 thousands of small blocks, and ML_(arena_malloc) and its various 135 groupies always show up at the top of performance profiles. */ 136 137#include "pub_core_basics.h" 138#include "pub_core_debuginfo.h" 139#include "pub_core_libcbase.h" 140#include "pub_core_libcassert.h" 141#include "pub_core_libcprint.h" 142#include "pub_core_options.h" 143#include "pub_core_tooliface.h" /* VG_(needs) */ 144#include "pub_core_xarray.h" 145#include "pub_core_wordfm.h" 146#include "priv_misc.h" /* dinfo_zalloc/free */ 147#include "priv_tytypes.h" 148#include "priv_d3basics.h" 149#include "priv_storage.h" 150#include "priv_readdwarf3.h" /* self */ 151 152 153/*------------------------------------------------------------*/ 154/*--- ---*/ 155/*--- Basic machinery for parsing DIEs. ---*/ 156/*--- ---*/ 157/*------------------------------------------------------------*/ 158 159#define TRACE_D3(format, args...) \ 160 if (td3) { VG_(printf)(format, ## args); } 161 162#define D3_INVALID_CUOFF ((UWord)(-1UL)) 163#define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) 164 165typedef 166 struct { 167 UChar* region_start_img; 168 UWord region_szB; 169 UWord region_next; 170 void (*barf)( HChar* ) __attribute__((noreturn)); 171 HChar* barfstr; 172 } 173 Cursor; 174 175static inline Bool is_sane_Cursor ( Cursor* c ) { 176 if (!c) return False; 177 if (!c->barf) return False; 178 if (!c->barfstr) return False; 179 return True; 180} 181 182static void init_Cursor ( Cursor* c, 183 UChar* region_start_img, 184 UWord region_szB, 185 UWord region_next, 186 __attribute__((noreturn)) void (*barf)( HChar* ), 187 HChar* barfstr ) 188{ 189 vg_assert(c); 190 VG_(memset)(c, 0, sizeof(*c)); 191 c->region_start_img = region_start_img; 192 c->region_szB = region_szB; 193 c->region_next = region_next; 194 c->barf = barf; 195 c->barfstr = barfstr; 196 vg_assert(is_sane_Cursor(c)); 197} 198 199static Bool is_at_end_Cursor ( Cursor* c ) { 200 vg_assert(is_sane_Cursor(c)); 201 return c->region_next >= c->region_szB; 202} 203 204static inline UWord get_position_of_Cursor ( Cursor* c ) { 205 vg_assert(is_sane_Cursor(c)); 206 return c->region_next; 207} 208static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { 209 c->region_next = pos; 210 vg_assert(is_sane_Cursor(c)); 211} 212 213static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { 214 vg_assert(is_sane_Cursor(c)); 215 return c->region_szB - c->region_next; 216} 217 218static UChar* get_address_of_Cursor ( Cursor* c ) { 219 vg_assert(is_sane_Cursor(c)); 220 return &c->region_start_img[ c->region_next ]; 221} 222 223/* FIXME: document assumptions on endianness for 224 get_UShort/UInt/ULong. */ 225static inline UChar get_UChar ( Cursor* c ) { 226 UChar r; 227 /* vg_assert(is_sane_Cursor(c)); */ 228 if (c->region_next + sizeof(UChar) > c->region_szB) { 229 c->barf(c->barfstr); 230 /*NOTREACHED*/ 231 vg_assert(0); 232 } 233 r = * (UChar*) &c->region_start_img[ c->region_next ]; 234 c->region_next += sizeof(UChar); 235 return r; 236} 237static UShort get_UShort ( Cursor* c ) { 238 UShort r; 239 vg_assert(is_sane_Cursor(c)); 240 if (c->region_next + sizeof(UShort) > c->region_szB) { 241 c->barf(c->barfstr); 242 /*NOTREACHED*/ 243 vg_assert(0); 244 } 245 r = * (UShort*) &c->region_start_img[ c->region_next ]; 246 c->region_next += sizeof(UShort); 247 return r; 248} 249static UInt get_UInt ( Cursor* c ) { 250 UInt r; 251 vg_assert(is_sane_Cursor(c)); 252 if (c->region_next + sizeof(UInt) > c->region_szB) { 253 c->barf(c->barfstr); 254 /*NOTREACHED*/ 255 vg_assert(0); 256 } 257 r = * (UInt*) &c->region_start_img[ c->region_next ]; 258 c->region_next += sizeof(UInt); 259 return r; 260} 261static ULong get_ULong ( Cursor* c ) { 262 ULong r; 263 vg_assert(is_sane_Cursor(c)); 264 if (c->region_next + sizeof(ULong) > c->region_szB) { 265 c->barf(c->barfstr); 266 /*NOTREACHED*/ 267 vg_assert(0); 268 } 269 r = * (ULong*) &c->region_start_img[ c->region_next ]; 270 c->region_next += sizeof(ULong); 271 return r; 272} 273static inline ULong get_ULEB128 ( Cursor* c ) { 274 ULong result; 275 Int shift; 276 UChar byte; 277 /* unroll first iteration */ 278 byte = get_UChar( c ); 279 result = (ULong)(byte & 0x7f); 280 if (LIKELY(!(byte & 0x80))) return result; 281 shift = 7; 282 /* end unroll first iteration */ 283 do { 284 byte = get_UChar( c ); 285 result |= ((ULong)(byte & 0x7f)) << shift; 286 shift += 7; 287 } while (byte & 0x80); 288 return result; 289} 290static Long get_SLEB128 ( Cursor* c ) { 291 ULong result = 0; 292 Int shift = 0; 293 UChar byte; 294 do { 295 byte = get_UChar(c); 296 result |= ((ULong)(byte & 0x7f)) << shift; 297 shift += 7; 298 } while (byte & 0x80); 299 if (shift < 64 && (byte & 0x40)) 300 result |= -(1ULL << shift); 301 return result; 302} 303 304/* Assume 'c' points to the start of a string. Return the absolute 305 address of whatever it points at, and advance it past the 306 terminating zero. This makes it safe for the caller to then copy 307 the string with ML_(addStr), since (w.r.t. image overruns) the 308 process of advancing past the terminating zero will already have 309 "vetted" the string. */ 310static UChar* get_AsciiZ ( Cursor* c ) { 311 UChar uc; 312 UChar* res = get_address_of_Cursor(c); 313 do { uc = get_UChar(c); } while (uc != 0); 314 return res; 315} 316 317static ULong peek_ULEB128 ( Cursor* c ) { 318 Word here = c->region_next; 319 ULong r = get_ULEB128( c ); 320 c->region_next = here; 321 return r; 322} 323static UChar peek_UChar ( Cursor* c ) { 324 Word here = c->region_next; 325 UChar r = get_UChar( c ); 326 c->region_next = here; 327 return r; 328} 329 330static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 331 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 332} 333 334static UWord get_UWord ( Cursor* c ) { 335 vg_assert(sizeof(UWord) == sizeof(void*)); 336 if (sizeof(UWord) == 4) return get_UInt(c); 337 if (sizeof(UWord) == 8) return get_ULong(c); 338 vg_assert(0); 339} 340 341/* Read a DWARF3 'Initial Length' field */ 342static ULong get_Initial_Length ( /*OUT*/Bool* is64, 343 Cursor* c, 344 HChar* barfMsg ) 345{ 346 ULong w64; 347 UInt w32; 348 *is64 = False; 349 w32 = get_UInt( c ); 350 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 351 c->barf( barfMsg ); 352 } 353 else if (w32 == 0xFFFFFFFF) { 354 *is64 = True; 355 w64 = get_ULong( c ); 356 } else { 357 *is64 = False; 358 w64 = (ULong)w32; 359 } 360 return w64; 361} 362 363 364/*------------------------------------------------------------*/ 365/*--- ---*/ 366/*--- "CUConst" structure ---*/ 367/*--- ---*/ 368/*------------------------------------------------------------*/ 369 370#define N_ABBV_CACHE 32 371 372/* Holds information that is constant through the parsing of a 373 Compilation Unit. This is basically plumbed through to 374 everywhere. */ 375typedef 376 struct { 377 /* Call here if anything goes wrong */ 378 void (*barf)( HChar* ) __attribute__((noreturn)); 379 /* Is this 64-bit DWARF ? */ 380 Bool is_dw64; 381 /* Which DWARF version ? (2, 3 or 4) */ 382 UShort version; 383 /* Length of this Compilation Unit, as stated in the 384 .unit_length :: InitialLength field of the CU Header. 385 However, this size (as specified by the D3 spec) does not 386 include the size of the .unit_length field itself, which is 387 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value 388 can be obtained through the expression ".is_dw64 ? 12 : 4". */ 389 ULong unit_length; 390 /* Offset of start of this unit in .debug_info */ 391 UWord cu_start_offset; 392 /* SVMA for this CU. In the D3 spec, is known as the "base 393 address of the compilation unit (last para sec 3.1.1). 394 Needed for (amongst things) interpretation of location-list 395 values. */ 396 Addr cu_svma; 397 Bool cu_svma_known; 398 /* The debug_abbreviations table to be used for this Unit */ 399 UChar* debug_abbv; 400 /* Upper bound on size thereof (an overestimate, in general) */ 401 UWord debug_abbv_maxszB; 402 /* Where is .debug_str ? */ 403 UChar* debug_str_img; 404 UWord debug_str_sz; 405 /* Where is .debug_ranges ? */ 406 UChar* debug_ranges_img; 407 UWord debug_ranges_sz; 408 /* Where is .debug_loc ? */ 409 UChar* debug_loc_img; 410 UWord debug_loc_sz; 411 /* Where is .debug_line? */ 412 UChar* debug_line_img; 413 UWord debug_line_sz; 414 /* Where is .debug_info? */ 415 UChar* debug_info_img; 416 UWord debug_info_sz; 417 /* --- Needed so we can add stuff to the string table. --- */ 418 struct _DebugInfo* di; 419 /* --- a cache for set_abbv_Cursor --- */ 420 /* abbv_code == (ULong)-1 for an unused entry. */ 421 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; 422 UWord saC_cache_queries; 423 UWord saC_cache_misses; 424 } 425 CUConst; 426 427 428/*------------------------------------------------------------*/ 429/*--- ---*/ 430/*--- Helper functions for Guarded Expressions ---*/ 431/*--- ---*/ 432/*------------------------------------------------------------*/ 433 434/* Parse the location list starting at img-offset 'debug_loc_offset' 435 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 436 and so I believe are correct SVMAs for the object as a whole. This 437 function allocates the UChar*, and the caller must deallocate it. 438 The resulting block is in so-called Guarded-Expression format. 439 440 Guarded-Expression format is similar but not identical to the DWARF3 441 location-list format. The format of each returned block is: 442 443 UChar biasMe; 444 UChar isEnd; 445 followed by zero or more of 446 447 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 448 449 '..bytes..' is an standard DWARF3 location expression which is 450 valid when aMin <= pc <= aMax (possibly after suitable biasing). 451 452 The number of bytes in '..bytes..' is nbytes. 453 454 The end of the sequence is marked by an isEnd == 1 value. All 455 previous isEnd values must be zero. 456 457 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 458 text_bias added before use, and 0 if the GX is this is not 459 necessary (is ready to go). 460 461 Hence the block can be quickly parsed and is self-describing. Note 462 that aMax is 1 less than the corresponding value in a DWARF3 463 location list. Zero length ranges, with aMax == aMin-1, are not 464 allowed. 465*/ 466/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where 467 it more logically belongs. */ 468 469 470/* Apply a text bias to a GX. */ 471static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di ) 472{ 473 UShort nbytes; 474 Addr* pA; 475 UChar* p = &gx->payload[0]; 476 UChar uc; 477 uc = *p++; /*biasMe*/ 478 if (uc == 0) 479 return; 480 vg_assert(uc == 1); 481 p[-1] = 0; /* mark it as done */ 482 while (True) { 483 uc = *p++; 484 if (uc == 1) 485 break; /*isEnd*/ 486 vg_assert(uc == 0); 487 /* t-bias aMin */ 488 pA = (Addr*)p; 489 *pA += di->text_debug_bias; 490 p += sizeof(Addr); 491 /* t-bias aMax */ 492 pA = (Addr*)p; 493 *pA += di->text_debug_bias; 494 p += sizeof(Addr); 495 /* nbytes, and actual expression */ 496 nbytes = * (UShort*)p; p += sizeof(UShort); 497 p += nbytes; 498 } 499} 500 501__attribute__((noinline)) 502static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) 503{ 504 SizeT bytesReqd; 505 GExpr* gx; 506 UChar *p, *pstart; 507 508 vg_assert(sizeof(UWord) == sizeof(Addr)); 509 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 510 bytesReqd 511 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 512 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 513 + sizeof(UShort) /*nbytes*/ + nbytes 514 + sizeof(UChar); /*isEnd*/ 515 516 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 517 sizeof(GExpr) + bytesReqd ); 518 vg_assert(gx); 519 520 p = pstart = &gx->payload[0]; 521 522 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar); 523 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar); 524 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr); 525 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr); 526 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort); 527 VG_(memcpy)(p, block, nbytes); p += nbytes; 528 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar); 529 530 vg_assert( (SizeT)(p - pstart) == bytesReqd); 531 vg_assert( &gx->payload[bytesReqd] 532 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 533 534 return gx; 535} 536 537__attribute__((noinline)) 538static GExpr* make_general_GX ( CUConst* cc, 539 Bool td3, 540 UWord debug_loc_offset, 541 Addr svma_of_referencing_CU ) 542{ 543 Addr base; 544 Cursor loc; 545 XArray* xa; /* XArray of UChar */ 546 GExpr* gx; 547 Word nbytes; 548 549 vg_assert(sizeof(UWord) == sizeof(Addr)); 550 if (cc->debug_loc_sz == 0) 551 cc->barf("make_general_GX: .debug_loc is empty/missing"); 552 553 init_Cursor( &loc, cc->debug_loc_img, 554 cc->debug_loc_sz, 0, cc->barf, 555 "Overrun whilst reading .debug_loc section(2)" ); 556 set_position_of_Cursor( &loc, debug_loc_offset ); 557 558 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", 559 debug_loc_offset, get_address_of_Cursor( &loc ) ); 560 561 /* Who frees this xa? It is freed before this fn exits. */ 562 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 563 ML_(dinfo_free), 564 sizeof(UChar) ); 565 566 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 567 568 base = 0; 569 while (True) { 570 Bool acquire; 571 UWord len; 572 /* Read a (host-)word pair. This is something of a hack since 573 the word size to read is really dictated by the ELF file; 574 however, we assume we're reading a file with the same 575 word-sizeness as the host. Reasonably enough. */ 576 UWord w1 = get_UWord( &loc ); 577 UWord w2 = get_UWord( &loc ); 578 579 TRACE_D3(" %08lx %08lx\n", w1, w2); 580 if (w1 == 0 && w2 == 0) 581 break; /* end of list */ 582 583 if (w1 == -1UL) { 584 /* new value for 'base' */ 585 base = w2; 586 continue; 587 } 588 589 /* else a location expression follows */ 590 /* else enumerate [w1+base, w2+base) */ 591 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 592 (sec 2.17.2) */ 593 if (w1 > w2) { 594 TRACE_D3("negative range is for .debug_loc expr at " 595 "file offset %lu\n", 596 debug_loc_offset); 597 cc->barf( "negative range in .debug_loc section" ); 598 } 599 600 /* ignore zero length ranges */ 601 acquire = w1 < w2; 602 len = (UWord)get_UShort( &loc ); 603 604 if (acquire) { 605 UWord w; 606 UShort s; 607 UChar c; 608 c = 0; /* !isEnd*/ 609 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 610 w = w1 + base + svma_of_referencing_CU; 611 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 612 w = w2 -1 + base + svma_of_referencing_CU; 613 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 614 s = (UShort)len; 615 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 616 } 617 618 while (len > 0) { 619 UChar byte = get_UChar( &loc ); 620 TRACE_D3("%02x", (UInt)byte); 621 if (acquire) 622 VG_(addBytesToXA)( xa, &byte, 1 ); 623 len--; 624 } 625 TRACE_D3("\n"); 626 } 627 628 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 629 630 nbytes = VG_(sizeXA)( xa ); 631 vg_assert(nbytes >= 1); 632 633 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); 634 vg_assert(gx); 635 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 636 vg_assert( &gx->payload[nbytes] 637 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 638 639 VG_(deleteXA)( xa ); 640 641 TRACE_D3("}\n"); 642 643 return gx; 644} 645 646 647/*------------------------------------------------------------*/ 648/*--- ---*/ 649/*--- Helper functions for range lists and CU headers ---*/ 650/*--- ---*/ 651/*------------------------------------------------------------*/ 652 653/* Denotes an address range. Both aMin and aMax are included in the 654 range; hence a complete range is (0, ~0) and an empty range is any 655 (X, X-1) for X > 0.*/ 656typedef 657 struct { Addr aMin; Addr aMax; } 658 AddrRange; 659 660 661/* Generate an arbitrary structural total ordering on 662 XArray* of AddrRange. */ 663static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 ) 664{ 665 Word n1, n2, i; 666 tl_assert(rngs1 && rngs2); 667 n1 = VG_(sizeXA)( rngs1 ); 668 n2 = VG_(sizeXA)( rngs2 ); 669 if (n1 < n2) return -1; 670 if (n1 > n2) return 1; 671 for (i = 0; i < n1; i++) { 672 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); 673 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); 674 if (rng1->aMin < rng2->aMin) return -1; 675 if (rng1->aMin > rng2->aMin) return 1; 676 if (rng1->aMax < rng2->aMax) return -1; 677 if (rng1->aMax > rng2->aMax) return 1; 678 } 679 return 0; 680} 681 682 683__attribute__((noinline)) 684static XArray* /* of AddrRange */ empty_range_list ( void ) 685{ 686 XArray* xa; /* XArray of AddrRange */ 687 /* Who frees this xa? varstack_preen() does. */ 688 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", 689 ML_(dinfo_free), 690 sizeof(AddrRange) ); 691 return xa; 692} 693 694 695__attribute__((noinline)) 696static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 697{ 698 XArray* xa; 699 AddrRange pair; 700 vg_assert(aMin <= aMax); 701 /* Who frees this xa? varstack_preen() does. */ 702 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", 703 ML_(dinfo_free), 704 sizeof(AddrRange) ); 705 pair.aMin = aMin; 706 pair.aMax = aMax; 707 VG_(addToXA)( xa, &pair ); 708 return xa; 709} 710 711 712/* Enumerate the address ranges starting at img-offset 713 'debug_ranges_offset' in .debug_ranges. Results are biased with 714 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 715 object as a whole. This function allocates the XArray, and the 716 caller must deallocate it. */ 717__attribute__((noinline)) 718static XArray* /* of AddrRange */ 719 get_range_list ( CUConst* cc, 720 Bool td3, 721 UWord debug_ranges_offset, 722 Addr svma_of_referencing_CU ) 723{ 724 Addr base; 725 Cursor ranges; 726 XArray* xa; /* XArray of AddrRange */ 727 AddrRange pair; 728 729 if (cc->debug_ranges_sz == 0) 730 cc->barf("get_range_list: .debug_ranges is empty/missing"); 731 732 init_Cursor( &ranges, cc->debug_ranges_img, 733 cc->debug_ranges_sz, 0, cc->barf, 734 "Overrun whilst reading .debug_ranges section(2)" ); 735 set_position_of_Cursor( &ranges, debug_ranges_offset ); 736 737 /* Who frees this xa? varstack_preen() does. */ 738 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), 739 sizeof(AddrRange) ); 740 base = 0; 741 while (True) { 742 /* Read a (host-)word pair. This is something of a hack since 743 the word size to read is really dictated by the ELF file; 744 however, we assume we're reading a file with the same 745 word-sizeness as the host. Reasonably enough. */ 746 UWord w1 = get_UWord( &ranges ); 747 UWord w2 = get_UWord( &ranges ); 748 749 if (w1 == 0 && w2 == 0) 750 break; /* end of list. */ 751 752 if (w1 == -1UL) { 753 /* new value for 'base' */ 754 base = w2; 755 continue; 756 } 757 758 /* else enumerate [w1+base, w2+base) */ 759 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 760 (sec 2.17.2) */ 761 if (w1 > w2) 762 cc->barf( "negative range in .debug_ranges section" ); 763 if (w1 < w2) { 764 pair.aMin = w1 + base + svma_of_referencing_CU; 765 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 766 vg_assert(pair.aMin <= pair.aMax); 767 VG_(addToXA)( xa, &pair ); 768 } 769 } 770 return xa; 771} 772 773 774/* Parse the Compilation Unit header indicated at 'c' and 775 initialise 'cc' accordingly. */ 776static __attribute__((noinline)) 777void parse_CU_Header ( /*OUT*/CUConst* cc, 778 Bool td3, 779 Cursor* c, 780 UChar* debug_abbv_img, UWord debug_abbv_sz ) 781{ 782 UChar address_size; 783 UWord debug_abbrev_offset; 784 Int i; 785 786 VG_(memset)(cc, 0, sizeof(*cc)); 787 vg_assert(c && c->barf); 788 cc->barf = c->barf; 789 790 /* initial_length field */ 791 cc->unit_length 792 = get_Initial_Length( &cc->is_dw64, c, 793 "parse_CU_Header: invalid initial-length field" ); 794 795 TRACE_D3(" Length: %lld\n", cc->unit_length ); 796 797 /* version */ 798 cc->version = get_UShort( c ); 799 if (cc->version != 2 && cc->version != 3 && cc->version != 4) 800 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" ); 801 TRACE_D3(" Version: %d\n", (Int)cc->version ); 802 803 /* debug_abbrev_offset */ 804 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 805 if (debug_abbrev_offset >= debug_abbv_sz) 806 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 807 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); 808 809 /* address size. If this isn't equal to the host word size, just 810 give up. This makes it safe to assume elsewhere that 811 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host 812 word. */ 813 address_size = get_UChar( c ); 814 if (address_size != sizeof(void*)) 815 cc->barf( "parse_CU_Header: invalid address_size" ); 816 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 817 818 /* Set up so that cc->debug_abbv points to the relevant table for 819 this CU. Set the szB so that at least we can't read off the end 820 of the debug_abbrev section -- potentially (and quite likely) 821 too big, if this isn't the last table in the section, but at 822 least it's safe. */ 823 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; 824 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; 825 /* and empty out the set_abbv_Cursor cache */ 826 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); 827 for (i = 0; i < N_ABBV_CACHE; i++) { 828 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ 829 cc->saC_cache[i].posn = 0; 830 } 831 cc->saC_cache_queries = 0; 832 cc->saC_cache_misses = 0; 833} 834 835 836/* Set up 'c' so it is ready to parse the abbv table entry code 837 'abbv_code' for this compilation unit. */ 838static __attribute__((noinline)) 839void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, 840 CUConst* cc, ULong abbv_code ) 841{ 842 Int i; 843 ULong acode; 844 845 if (abbv_code == 0) 846 cc->barf("set_abbv_Cursor: abbv_code == 0" ); 847 848 /* (ULong)-1 is used to represent an empty cache slot. So we can't 849 allow it. In any case no valid DWARF3 should make a reference 850 to a negative abbreviation code. [at least, they always seem to 851 be numbered upwards from zero as far as I have seen] */ 852 vg_assert(abbv_code != (ULong)-1); 853 854 /* First search the cache. */ 855 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); 856 cc->saC_cache_queries++; 857 for (i = 0; i < N_ABBV_CACHE; i++) { 858 /* No need to test the cached abbv_codes for -1 (empty), since 859 we just asserted that abbv_code is not -1. */ 860 if (cc->saC_cache[i].abbv_code == abbv_code) { 861 /* Found it. Cool. Set up the parser using the cached 862 position, and move this cache entry 1 step closer to the 863 front. */ 864 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); 865 init_Cursor( c, cc->debug_abbv, 866 cc->debug_abbv_maxszB, cc->saC_cache[i].posn, 867 cc->barf, 868 "Overrun whilst parsing .debug_abbrev section(1)" ); 869 if (i > 0) { 870 ULong t_abbv_code = cc->saC_cache[i].abbv_code; 871 UWord t_posn = cc->saC_cache[i].posn; 872 while (i > 0) { 873 cc->saC_cache[i] = cc->saC_cache[i-1]; 874 cc->saC_cache[0].abbv_code = t_abbv_code; 875 cc->saC_cache[0].posn = t_posn; 876 i--; 877 } 878 } 879 return; 880 } 881 } 882 883 /* No. It's not in the cache. We have to search through 884 .debug_abbrev, of course taking care to update the cache 885 when done. */ 886 887 cc->saC_cache_misses++; 888 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, 889 "Overrun whilst parsing .debug_abbrev section(2)" ); 890 891 /* Now iterate though the table until we find the requested 892 entry. */ 893 while (True) { 894 //ULong atag; 895 //UInt has_children; 896 acode = get_ULEB128( c ); 897 if (acode == 0) break; /* end of the table */ 898 if (acode == abbv_code) break; /* found it */ 899 /*atag = */ get_ULEB128( c ); 900 /*has_children = */ get_UChar( c ); 901 //TRACE_D3(" %llu %s [%s]\n", 902 // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); 903 while (True) { 904 ULong at_name = get_ULEB128( c ); 905 ULong at_form = get_ULEB128( c ); 906 if (at_name == 0 && at_form == 0) break; 907 //TRACE_D3(" %18s %s\n", 908 // pp_DW_AT(at_name), pp_DW_FORM(at_form)); 909 } 910 } 911 912 if (acode == 0) { 913 /* Not found. This is fatal. */ 914 cc->barf("set_abbv_Cursor: abbv_code not found"); 915 } 916 917 /* Otherwise, 'c' is now set correctly to parse the relevant entry, 918 starting from the abbreviation entry's tag. So just cache 919 the result, and return. */ 920 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { 921 cc->saC_cache[i] = cc->saC_cache[i-1]; 922 } 923 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); 924 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; 925 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); 926} 927 928 929/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. 930 931 If *cts itself contains the entire result, then *ctsSzB is set to 932 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. 933 934 Alternatively, the result can be a block of data (in the 935 transiently mapped-in object, so-called "image" space). If so then 936 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said 937 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. 938 939 Unfortunately this means it is impossible to represent a zero-size 940 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 941 and so is ambiguous (which case it is?) 942 943 Invariant on successful return: 944 (*ctsSzB > 0 && *ctsMemSzB == 0) 945 || (*ctsSzB == 0 && *ctsMemSzB > 0) 946*/ 947static 948void get_Form_contents ( /*OUT*/ULong* cts, 949 /*OUT*/Int* ctsSzB, 950 /*OUT*/UWord* ctsMemSzB, 951 CUConst* cc, Cursor* c, 952 Bool td3, DW_FORM form ) 953{ 954 *cts = 0; 955 *ctsSzB = 0; 956 *ctsMemSzB = 0; 957 switch (form) { 958 case DW_FORM_data1: 959 *cts = (ULong)(UChar)get_UChar(c); 960 *ctsSzB = 1; 961 TRACE_D3("%u", (UInt)*cts); 962 break; 963 case DW_FORM_data2: 964 *cts = (ULong)(UShort)get_UShort(c); 965 *ctsSzB = 2; 966 TRACE_D3("%u", (UInt)*cts); 967 break; 968 case DW_FORM_data4: 969 *cts = (ULong)(UInt)get_UInt(c); 970 *ctsSzB = 4; 971 TRACE_D3("%u", (UInt)*cts); 972 break; 973 case DW_FORM_data8: 974 *cts = get_ULong(c); 975 *ctsSzB = 8; 976 TRACE_D3("%llu", *cts); 977 break; 978 case DW_FORM_sec_offset: 979 *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 ); 980 *ctsSzB = cc->is_dw64 ? 8 : 4; 981 TRACE_D3("%llu", *cts); 982 break; 983 case DW_FORM_sdata: 984 *cts = (ULong)(Long)get_SLEB128(c); 985 *ctsSzB = 8; 986 TRACE_D3("%lld", (Long)*cts); 987 break; 988 case DW_FORM_udata: 989 *cts = (ULong)(Long)get_ULEB128(c); 990 *ctsSzB = 8; 991 TRACE_D3("%llu", (Long)*cts); 992 break; 993 case DW_FORM_addr: 994 /* note, this is a hack. DW_FORM_addr is defined as getting 995 a word the size of the target machine as defined by the 996 address_size field in the CU Header. However, 997 parse_CU_Header() rejects all inputs except those for 998 which address_size == sizeof(Word), hence we can just 999 treat it as a (host) Word. */ 1000 *cts = (ULong)(UWord)get_UWord(c); 1001 *ctsSzB = sizeof(UWord); 1002 TRACE_D3("0x%lx", (UWord)*cts); 1003 break; 1004 1005 case DW_FORM_ref_addr: 1006 /* We make the same word-size assumption as DW_FORM_addr. */ 1007 /* What does this really mean? From D3 Sec 7.5.4, 1008 description of "reference", it would appear to reference 1009 some other DIE, by specifying the offset from the 1010 beginning of a .debug_info section. The D3 spec mentions 1011 that this might be in some other shared object and 1012 executable. But I don't see how the name of the other 1013 object/exe is specified. 1014 1015 At least for the DW_FORM_ref_addrs created by icc11, the 1016 references seem to be within the same object/executable. 1017 So for the moment we merely range-check, to see that they 1018 actually do specify a plausible offset within this 1019 object's .debug_info, and return the value unchanged. 1020 */ 1021 *cts = (ULong)(UWord)get_UWord(c); 1022 *ctsSzB = sizeof(UWord); 1023 TRACE_D3("0x%lx", (UWord)*cts); 1024 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts); 1025 if (/* the following 2 are surely impossible, but ... */ 1026 cc->debug_info_img == NULL || cc->debug_info_sz == 0 1027 || *cts >= (ULong)cc->debug_info_sz) { 1028 /* Hmm. Offset is nonsensical for this object's .debug_info 1029 section. Be safe and reject it. */ 1030 cc->barf("get_Form_contents: DW_FORM_ref_addr points " 1031 "outside .debug_info"); 1032 } 1033 break; 1034 1035 case DW_FORM_strp: { 1036 /* this is an offset into .debug_str */ 1037 UChar* str; 1038 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1039 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) 1040 cc->barf("get_Form_contents: DW_FORM_strp " 1041 "points outside .debug_str"); 1042 /* FIXME: check the entire string lies inside debug_str, 1043 not just the first byte of it. */ 1044 str = (UChar*)cc->debug_str_img + uw; 1045 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); 1046 *cts = (ULong)(UWord)str; 1047 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1048 break; 1049 } 1050 case DW_FORM_string: { 1051 UChar* str = get_AsciiZ(c); 1052 TRACE_D3("%s", str); 1053 *cts = (ULong)(UWord)str; 1054 /* strlen is safe because get_AsciiZ already 'vetted' the 1055 entire string */ 1056 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1057 break; 1058 } 1059 case DW_FORM_ref1: { 1060 UChar u8 = get_UChar(c); 1061 UWord res = cc->cu_start_offset + (UWord)u8; 1062 *cts = (ULong)res; 1063 *ctsSzB = sizeof(UWord); 1064 TRACE_D3("<%lx>", res); 1065 break; 1066 } 1067 case DW_FORM_ref2: { 1068 UShort u16 = get_UShort(c); 1069 UWord res = cc->cu_start_offset + (UWord)u16; 1070 *cts = (ULong)res; 1071 *ctsSzB = sizeof(UWord); 1072 TRACE_D3("<%lx>", res); 1073 break; 1074 } 1075 case DW_FORM_ref4: { 1076 UInt u32 = get_UInt(c); 1077 UWord res = cc->cu_start_offset + (UWord)u32; 1078 *cts = (ULong)res; 1079 *ctsSzB = sizeof(UWord); 1080 TRACE_D3("<%lx>", res); 1081 break; 1082 } 1083 case DW_FORM_ref8: { 1084 ULong u64 = get_ULong(c); 1085 UWord res = cc->cu_start_offset + (UWord)u64; 1086 *cts = (ULong)res; 1087 *ctsSzB = sizeof(UWord); 1088 TRACE_D3("<%lx>", res); 1089 break; 1090 } 1091 case DW_FORM_ref_udata: { 1092 ULong u64 = get_ULEB128(c); 1093 UWord res = cc->cu_start_offset + (UWord)u64; 1094 *cts = (ULong)res; 1095 *ctsSzB = sizeof(UWord); 1096 TRACE_D3("<%lx>", res); 1097 break; 1098 } 1099 case DW_FORM_flag: { 1100 UChar u8 = get_UChar(c); 1101 TRACE_D3("%u", (UInt)u8); 1102 *cts = (ULong)u8; 1103 *ctsSzB = 1; 1104 break; 1105 } 1106 case DW_FORM_flag_present: 1107 TRACE_D3("1"); 1108 *cts = 1; 1109 *ctsSzB = 1; 1110 break; 1111 case DW_FORM_block1: { 1112 ULong u64b; 1113 ULong u64 = (ULong)get_UChar(c); 1114 UChar* block = get_address_of_Cursor(c); 1115 TRACE_D3("%llu byte block: ", u64); 1116 for (u64b = u64; u64b > 0; u64b--) { 1117 UChar u8 = get_UChar(c); 1118 TRACE_D3("%x ", (UInt)u8); 1119 } 1120 *cts = (ULong)(UWord)block; 1121 *ctsMemSzB = (UWord)u64; 1122 break; 1123 } 1124 case DW_FORM_block2: { 1125 ULong u64b; 1126 ULong u64 = (ULong)get_UShort(c); 1127 UChar* block = get_address_of_Cursor(c); 1128 TRACE_D3("%llu byte block: ", u64); 1129 for (u64b = u64; u64b > 0; u64b--) { 1130 UChar u8 = get_UChar(c); 1131 TRACE_D3("%x ", (UInt)u8); 1132 } 1133 *cts = (ULong)(UWord)block; 1134 *ctsMemSzB = (UWord)u64; 1135 break; 1136 } 1137 case DW_FORM_block4: { 1138 ULong u64b; 1139 ULong u64 = (ULong)get_UInt(c); 1140 UChar* block = get_address_of_Cursor(c); 1141 TRACE_D3("%llu byte block: ", u64); 1142 for (u64b = u64; u64b > 0; u64b--) { 1143 UChar u8 = get_UChar(c); 1144 TRACE_D3("%x ", (UInt)u8); 1145 } 1146 *cts = (ULong)(UWord)block; 1147 *ctsMemSzB = (UWord)u64; 1148 break; 1149 } 1150 case DW_FORM_exprloc: 1151 case DW_FORM_block: { 1152 ULong u64b; 1153 ULong u64 = (ULong)get_ULEB128(c); 1154 UChar* block = get_address_of_Cursor(c); 1155 TRACE_D3("%llu byte block: ", u64); 1156 for (u64b = u64; u64b > 0; u64b--) { 1157 UChar u8 = get_UChar(c); 1158 TRACE_D3("%x ", (UInt)u8); 1159 } 1160 *cts = (ULong)(UWord)block; 1161 *ctsMemSzB = (UWord)u64; 1162 break; 1163 } 1164 case DW_FORM_ref_sig8: { 1165 ULong u64b; 1166 UChar* block = get_address_of_Cursor(c); 1167 TRACE_D3("8 byte signature: "); 1168 for (u64b = 8; u64b > 0; u64b--) { 1169 UChar u8 = get_UChar(c); 1170 TRACE_D3("%x ", (UInt)u8); 1171 } 1172 *cts = (ULong)(UWord)block; 1173 *ctsMemSzB = 8; 1174 break; 1175 } 1176 case DW_FORM_indirect: 1177 get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3, 1178 (DW_FORM)get_ULEB128(c)); 1179 return; 1180 1181 default: 1182 VG_(printf)( 1183 "get_Form_contents: unhandled %d (%s) at <%lx>\n", 1184 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); 1185 c->barf("get_Form_contents: unhandled DW_FORM"); 1186 } 1187} 1188 1189 1190/*------------------------------------------------------------*/ 1191/*--- ---*/ 1192/*--- Parsing of variable-related DIEs ---*/ 1193/*--- ---*/ 1194/*------------------------------------------------------------*/ 1195 1196typedef 1197 struct _TempVar { 1198 UChar* name; /* in DebugInfo's .strchunks */ 1199 /* Represent ranges economically. nRanges is the number of 1200 ranges. Cases: 1201 0: .rngOneMin .rngOneMax .manyRanges are all zero 1202 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1203 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1204 This is merely an optimisation to avoid having to allocate 1205 and free the XArray in the common (98%) of cases where there 1206 is zero or one address ranges. */ 1207 UWord nRanges; 1208 Addr rngOneMin; 1209 Addr rngOneMax; 1210 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ 1211 /* Do not free .rngMany, since many TempVars will have the same 1212 value. Instead the associated storage is to be freed by 1213 deleting 'rangetree', which stores a single copy of each 1214 range. */ 1215 /* --- */ 1216 Int level; 1217 UWord typeR; /* a cuOff */ 1218 GExpr* gexpr; /* for this variable */ 1219 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1220 any */ 1221 UChar* fName; /* declaring file name, or NULL */ 1222 Int fLine; /* declaring file line number, or zero */ 1223 /* offset in .debug_info, so that abstract instances can be 1224 found to satisfy references from concrete instances. */ 1225 UWord dioff; 1226 UWord absOri; /* so the absOri fields refer to dioff fields 1227 in some other, related TempVar. */ 1228 } 1229 TempVar; 1230 1231#define N_D3_VAR_STACK 48 1232 1233typedef 1234 struct { 1235 /* Contains the range stack: a stack of address ranges, one 1236 stack entry for each nested scope. 1237 1238 Some scope entries are created by function definitions 1239 (DW_AT_subprogram), and for those, we also note the GExpr 1240 derived from its DW_AT_frame_base attribute, if any. 1241 Consequently it should be possible to find, for any 1242 variable's DIE, the GExpr for the the containing function's 1243 DW_AT_frame_base by scanning back through the stack to find 1244 the nearest entry associated with a function. This somewhat 1245 elaborate scheme is provided so as to make it possible to 1246 obtain the correct DW_AT_frame_base expression even in the 1247 presence of nested functions (or to be more precise, in the 1248 presence of nested DW_AT_subprogram DIEs). 1249 */ 1250 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1251 stack */ 1252 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ 1253 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ 1254 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ 1255 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB 1256 expr, else NULL */ 1257 /* The file name table. Is a mapping from integer index to the 1258 (permanent) copy of the string, iow a non-img area. */ 1259 XArray* /* of UChar* */ filenameTable; 1260 } 1261 D3VarParser; 1262 1263static void varstack_show ( D3VarParser* parser, HChar* str ) { 1264 Word i, j; 1265 VG_(printf)(" varstack (%s) {\n", str); 1266 for (i = 0; i <= parser->sp; i++) { 1267 XArray* xa = parser->ranges[i]; 1268 vg_assert(xa); 1269 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1270 if (parser->isFunc[i]) { 1271 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1272 } else { 1273 vg_assert(parser->fbGX[i] == NULL); 1274 } 1275 VG_(printf)(": "); 1276 if (VG_(sizeXA)( xa ) == 0) { 1277 VG_(printf)("** empty PC range array **"); 1278 } else { 1279 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1280 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1281 vg_assert(range); 1282 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); 1283 } 1284 } 1285 VG_(printf)("\n"); 1286 } 1287 VG_(printf)(" }\n"); 1288} 1289 1290/* Remove from the stack, all entries with .level > 'level' */ 1291static 1292void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1293{ 1294 Bool changed = False; 1295 vg_assert(parser->sp < N_D3_VAR_STACK); 1296 while (True) { 1297 vg_assert(parser->sp >= -1); 1298 if (parser->sp == -1) break; 1299 if (parser->level[parser->sp] <= level) break; 1300 if (0) 1301 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1302 vg_assert(parser->ranges[parser->sp]); 1303 /* Who allocated this xa? get_range_list() or 1304 unitary_range_list(). */ 1305 VG_(deleteXA)( parser->ranges[parser->sp] ); 1306 parser->ranges[parser->sp] = NULL; 1307 parser->level[parser->sp] = 0; 1308 parser->isFunc[parser->sp] = False; 1309 parser->fbGX[parser->sp] = NULL; 1310 parser->sp--; 1311 changed = True; 1312 } 1313 if (changed && td3) 1314 varstack_show( parser, "after preen" ); 1315} 1316 1317static void varstack_push ( CUConst* cc, 1318 D3VarParser* parser, 1319 Bool td3, 1320 XArray* ranges, Int level, 1321 Bool isFunc, GExpr* fbGX ) { 1322 if (0) 1323 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1324 parser->sp+1, level, ranges); 1325 1326 /* First we need to zap everything >= 'level', as we are about to 1327 replace any previous entry at 'level', so .. */ 1328 varstack_preen(parser, /*td3*/False, level-1); 1329 1330 vg_assert(parser->sp >= -1); 1331 vg_assert(parser->sp < N_D3_VAR_STACK); 1332 if (parser->sp == N_D3_VAR_STACK-1) 1333 cc->barf("varstack_push: N_D3_VAR_STACK is too low; " 1334 "increase and recompile"); 1335 if (parser->sp >= 0) 1336 vg_assert(parser->level[parser->sp] < level); 1337 parser->sp++; 1338 vg_assert(parser->ranges[parser->sp] == NULL); 1339 vg_assert(parser->level[parser->sp] == 0); 1340 vg_assert(parser->isFunc[parser->sp] == False); 1341 vg_assert(parser->fbGX[parser->sp] == NULL); 1342 vg_assert(ranges != NULL); 1343 if (!isFunc) vg_assert(fbGX == NULL); 1344 parser->ranges[parser->sp] = ranges; 1345 parser->level[parser->sp] = level; 1346 parser->isFunc[parser->sp] = isFunc; 1347 parser->fbGX[parser->sp] = fbGX; 1348 if (td3) 1349 varstack_show( parser, "after push" ); 1350} 1351 1352 1353/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so 1354 refer either to a location expression or to a location list. 1355 Figure out which, and in both cases bundle the expression or 1356 location list into a so-called GExpr (guarded expression). */ 1357__attribute__((noinline)) 1358static GExpr* get_GX ( CUConst* cc, Bool td3, 1359 ULong cts, Int ctsSzB, UWord ctsMemSzB ) 1360{ 1361 GExpr* gexpr = NULL; 1362 if (ctsMemSzB > 0 && ctsSzB == 0) { 1363 /* represents an in-line location expression, and cts points 1364 right at it */ 1365 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); 1366 } 1367 else 1368 if (ctsMemSzB == 0 && ctsSzB > 0) { 1369 /* represents location list. cts is the offset of it in 1370 .debug_loc. */ 1371 if (!cc->cu_svma_known) 1372 cc->barf("get_GX: location list, but CU svma is unknown"); 1373 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); 1374 } 1375 else { 1376 vg_assert(0); /* else caller is bogus */ 1377 } 1378 return gexpr; 1379} 1380 1381 1382static 1383void read_filename_table( /*MOD*/D3VarParser* parser, 1384 CUConst* cc, UWord debug_line_offset, 1385 Bool td3 ) 1386{ 1387 Bool is_dw64; 1388 Cursor c; 1389 Word i; 1390 UShort version; 1391 UChar opcode_base; 1392 UChar* str; 1393 1394 vg_assert(parser && cc && cc->barf); 1395 if ((!cc->debug_line_img) 1396 || cc->debug_line_sz <= debug_line_offset) 1397 cc->barf("read_filename_table: .debug_line is missing?"); 1398 1399 init_Cursor( &c, cc->debug_line_img, 1400 cc->debug_line_sz, debug_line_offset, cc->barf, 1401 "Overrun whilst reading .debug_line section(1)" ); 1402 1403 /* unit_length = */ 1404 get_Initial_Length( &is_dw64, &c, 1405 "read_filename_table: invalid initial-length field" ); 1406 version = get_UShort( &c ); 1407 if (version != 2 && version != 3 && version != 4) 1408 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info " 1409 "is currently supported."); 1410 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); 1411 /*minimum_instruction_length = */ get_UChar( &c ); 1412 if (version >= 4) 1413 /*maximum_operations_per_insn = */ get_UChar( &c ); 1414 /*default_is_stmt = */ get_UChar( &c ); 1415 /*line_base = (Char)*/ get_UChar( &c ); 1416 /*line_range = */ get_UChar( &c ); 1417 opcode_base = get_UChar( &c ); 1418 /* skip over "standard_opcode_lengths" */ 1419 for (i = 1; i < (Word)opcode_base; i++) 1420 (void)get_UChar( &c ); 1421 1422 /* skip over the directory names table */ 1423 while (peek_UChar(&c) != 0) { 1424 (void)get_AsciiZ(&c); 1425 } 1426 (void)get_UChar(&c); /* skip terminating zero */ 1427 1428 /* Read and record the file names table */ 1429 vg_assert(parser->filenameTable); 1430 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); 1431 /* Add a dummy index-zero entry. DWARF3 numbers its files 1432 from 1, for some reason. */ 1433 str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); 1434 VG_(addToXA)( parser->filenameTable, &str ); 1435 while (peek_UChar(&c) != 0) { 1436 str = get_AsciiZ(&c); 1437 TRACE_D3(" read_filename_table: %ld %s\n", 1438 VG_(sizeXA)(parser->filenameTable), str); 1439 str = ML_(addStr)( cc->di, str, -1 ); 1440 VG_(addToXA)( parser->filenameTable, &str ); 1441 (void)get_ULEB128( &c ); /* skip directory index # */ 1442 (void)get_ULEB128( &c ); /* skip last mod time */ 1443 (void)get_ULEB128( &c ); /* file size */ 1444 } 1445 /* We're done! The rest of it is not interesting. */ 1446} 1447 1448 1449__attribute__((noinline)) 1450static void parse_var_DIE ( 1451 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 1452 /*MOD*/XArray* /* of TempVar* */ tempvars, 1453 /*MOD*/XArray* /* of GExpr* */ gexprs, 1454 /*MOD*/D3VarParser* parser, 1455 DW_TAG dtag, 1456 UWord posn, 1457 Int level, 1458 Cursor* c_die, 1459 Cursor* c_abbv, 1460 CUConst* cc, 1461 Bool td3 1462) 1463{ 1464 ULong cts; 1465 Int ctsSzB; 1466 UWord ctsMemSzB; 1467 1468 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1469 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1470 1471 varstack_preen( parser, td3, level-1 ); 1472 1473 if (dtag == DW_TAG_compile_unit) { 1474 Bool have_lo = False; 1475 Bool have_hi1 = False; 1476 Bool have_range = False; 1477 Addr ip_lo = 0; 1478 Addr ip_hi1 = 0; 1479 Addr rangeoff = 0; 1480 while (True) { 1481 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1482 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1483 if (attr == 0 && form == 0) break; 1484 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1485 cc, c_die, False/*td3*/, form ); 1486 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1487 ip_lo = cts; 1488 have_lo = True; 1489 } 1490 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1491 ip_hi1 = cts; 1492 have_hi1 = True; 1493 } 1494 if (attr == DW_AT_ranges && ctsSzB > 0) { 1495 rangeoff = cts; 1496 have_range = True; 1497 } 1498 if (attr == DW_AT_stmt_list && ctsSzB > 0) { 1499 read_filename_table( parser, cc, (UWord)cts, td3 ); 1500 } 1501 } 1502 /* Now, does this give us an opportunity to find this 1503 CU's svma? */ 1504#if 0 1505 if (level == 0 && have_lo) { 1506 vg_assert(!cc->cu_svma_known); /* if this fails, it must be 1507 because we've already seen a DW_TAG_compile_unit DIE at level 1508 0. But that can't happen, because DWARF3 only allows exactly 1509 one top level DIE per CU. */ 1510 cc->cu_svma_known = True; 1511 cc->cu_svma = ip_lo; 1512 if (1) 1513 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); 1514 /* Now, it may be that this DIE doesn't tell us the CU's 1515 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1516 the CU doesn't *have* to have its SVMA specified. 1517 1518 But as per last para D3 spec sec 3.1.1 ("Normal and 1519 Partial Compilation Unit Entries", "If the base address 1520 (viz, the SVMA) is undefined, then any DWARF entry of 1521 structure defined interms of the base address of that 1522 compilation unit is not valid.". So that means, if whilst 1523 processing the children of this top level DIE (or their 1524 children, etc) we see a DW_AT_range, and cu_svma_known is 1525 False, then the DIE that contains it is (per the spec) 1526 invalid, and we can legitimately stop and complain. */ 1527 } 1528#else 1529 /* .. whereas The Reality is, simply assume the SVMA is zero 1530 if it isn't specified. */ 1531 if (level == 0) { 1532 vg_assert(!cc->cu_svma_known); 1533 cc->cu_svma_known = True; 1534 if (have_lo) 1535 cc->cu_svma = ip_lo; 1536 else 1537 cc->cu_svma = 0; 1538 } 1539#endif 1540 /* Do we have something that looks sane? */ 1541 if (have_lo && have_hi1 && (!have_range)) { 1542 if (ip_lo < ip_hi1) 1543 varstack_push( cc, parser, td3, 1544 unitary_range_list(ip_lo, ip_hi1 - 1), 1545 level, 1546 False/*isFunc*/, NULL/*fbGX*/ ); 1547 } else 1548 if ((!have_lo) && (!have_hi1) && have_range) { 1549 varstack_push( cc, parser, td3, 1550 get_range_list( cc, td3, 1551 rangeoff, cc->cu_svma ), 1552 level, 1553 False/*isFunc*/, NULL/*fbGX*/ ); 1554 } else 1555 if ((!have_lo) && (!have_hi1) && (!have_range)) { 1556 /* CU has no code, presumably? */ 1557 varstack_push( cc, parser, td3, 1558 empty_range_list(), 1559 level, 1560 False/*isFunc*/, NULL/*fbGX*/ ); 1561 } else 1562 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { 1563 /* broken DIE created by gcc-4.3.X ? Ignore the 1564 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges 1565 instead. */ 1566 varstack_push( cc, parser, td3, 1567 get_range_list( cc, td3, 1568 rangeoff, cc->cu_svma ), 1569 level, 1570 False/*isFunc*/, NULL/*fbGX*/ ); 1571 } else { 1572 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", 1573 (Int)have_lo, (Int)have_hi1, (Int)have_range); 1574 goto bad_DIE; 1575 } 1576 } 1577 1578 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 1579 Bool have_lo = False; 1580 Bool have_hi1 = False; 1581 Bool have_range = False; 1582 Addr ip_lo = 0; 1583 Addr ip_hi1 = 0; 1584 Addr rangeoff = 0; 1585 Bool isFunc = dtag == DW_TAG_subprogram; 1586 GExpr* fbGX = NULL; 1587 while (True) { 1588 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1589 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1590 if (attr == 0 && form == 0) break; 1591 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1592 cc, c_die, False/*td3*/, form ); 1593 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1594 ip_lo = cts; 1595 have_lo = True; 1596 } 1597 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1598 ip_hi1 = cts; 1599 have_hi1 = True; 1600 } 1601 if (attr == DW_AT_ranges && ctsSzB > 0) { 1602 rangeoff = cts; 1603 have_range = True; 1604 } 1605 if (isFunc 1606 && attr == DW_AT_frame_base 1607 && ((ctsMemSzB > 0 && ctsSzB == 0) 1608 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1609 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1610 vg_assert(fbGX); 1611 VG_(addToXA)(gexprs, &fbGX); 1612 } 1613 } 1614 /* Do we have something that looks sane? */ 1615 if (dtag == DW_TAG_subprogram 1616 && (!have_lo) && (!have_hi1) && (!have_range)) { 1617 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 1618 representing a subroutine declaration that is not also a 1619 definition does not have code address or range 1620 attributes." */ 1621 } else 1622 if (dtag == DW_TAG_lexical_block 1623 && (!have_lo) && (!have_hi1) && (!have_range)) { 1624 /* I believe this is legit, and means the lexical block 1625 contains no insns (whatever that might mean). Ignore. */ 1626 } else 1627 if (have_lo && have_hi1 && (!have_range)) { 1628 /* This scope supplies just a single address range. */ 1629 if (ip_lo < ip_hi1) 1630 varstack_push( cc, parser, td3, 1631 unitary_range_list(ip_lo, ip_hi1 - 1), 1632 level, isFunc, fbGX ); 1633 } else 1634 if ((!have_lo) && (!have_hi1) && have_range) { 1635 /* This scope supplies multiple address ranges via the use of 1636 a range list. */ 1637 varstack_push( cc, parser, td3, 1638 get_range_list( cc, td3, 1639 rangeoff, cc->cu_svma ), 1640 level, isFunc, fbGX ); 1641 } else 1642 if (have_lo && (!have_hi1) && (!have_range)) { 1643 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 1644 Entries) says fairly clearly that a scope must have either 1645 _range or (_low_pc and _high_pc). */ 1646 /* The spec is a bit ambiguous though. Perhaps a single byte 1647 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 1648 /* This case is here because icc9 produced this: 1649 <2><13bd>: DW_TAG_lexical_block 1650 DW_AT_decl_line : 5229 1651 DW_AT_decl_column : 37 1652 DW_AT_decl_file : 1 1653 DW_AT_low_pc : 0x401b03 1654 */ 1655 /* Ignore (seems safe than pushing a single byte range) */ 1656 } else 1657 goto bad_DIE; 1658 } 1659 1660 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 1661 UChar* name = NULL; 1662 UWord typeR = D3_INVALID_CUOFF; 1663 Bool external = False; 1664 GExpr* gexpr = NULL; 1665 Int n_attrs = 0; 1666 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 1667 Int lineNo = 0; 1668 UChar* fileName = NULL; 1669 while (True) { 1670 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1671 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1672 if (attr == 0 && form == 0) break; 1673 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1674 cc, c_die, False/*td3*/, form ); 1675 n_attrs++; 1676 if (attr == DW_AT_name && ctsMemSzB > 0) { 1677 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1678 } 1679 if (attr == DW_AT_location 1680 && ((ctsMemSzB > 0 && ctsSzB == 0) 1681 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1682 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1683 vg_assert(gexpr); 1684 VG_(addToXA)(gexprs, &gexpr); 1685 } 1686 if (attr == DW_AT_type && ctsSzB > 0) { 1687 typeR = (UWord)cts; 1688 } 1689 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { 1690 external = True; 1691 } 1692 if (attr == DW_AT_abstract_origin && ctsSzB > 0) { 1693 abs_ori = (UWord)cts; 1694 } 1695 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 1696 /*declaration = True;*/ 1697 } 1698 if (attr == DW_AT_decl_line && ctsSzB > 0) { 1699 lineNo = (Int)cts; 1700 } 1701 if (attr == DW_AT_decl_file && ctsSzB > 0) { 1702 Int ftabIx = (Int)cts; 1703 if (ftabIx >= 1 1704 && ftabIx < VG_(sizeXA)( parser->filenameTable )) { 1705 fileName = *(UChar**) 1706 VG_(indexXA)( parser->filenameTable, ftabIx ); 1707 vg_assert(fileName); 1708 } 1709 if (0) VG_(printf)("XXX filename = %s\n", fileName); 1710 } 1711 } 1712 /* We'll collect it under if one of the following three 1713 conditions holds: 1714 (1) has location and type -> completed 1715 (2) has type only -> is an abstract instance 1716 (3) has location and abs_ori -> is a concrete instance 1717 Name, filename and line number are all optional frills. 1718 */ 1719 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 1720 /* 2 */ || (typeR != D3_INVALID_CUOFF) 1721 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 1722 1723 /* Add this variable to the list of interesting looking 1724 variables. Crucially, note along with it the address 1725 range(s) associated with the variable, which for locals 1726 will be the address ranges at the top of the varparser's 1727 stack. */ 1728 GExpr* fbGX = NULL; 1729 Word i, nRanges; 1730 XArray* /* of AddrRange */ xa; 1731 TempVar* tv; 1732 /* Stack can't be empty; we put a dummy entry on it for the 1733 entire address range before starting with the DIEs for 1734 this CU. */ 1735 vg_assert(parser->sp >= 0); 1736 1737 /* If this is a local variable (non-external), try to find 1738 the GExpr for the DW_AT_frame_base of the containing 1739 function. It should have been pushed on the stack at the 1740 time we encountered its DW_TAG_subprogram DIE, so the way 1741 to find it is to scan back down the stack looking for it. 1742 If there isn't an enclosing stack entry marked 'isFunc' 1743 then we must be seeing variable or formal param DIEs 1744 outside of a function, so we deem the Dwarf to be 1745 malformed if that happens. Note that the fbGX may be NULL 1746 if the containing DT_TAG_subprogram didn't supply a 1747 DW_AT_frame_base -- that's OK, but there must actually be 1748 a containing DW_TAG_subprogram. */ 1749 if (!external) { 1750 Bool found = False; 1751 for (i = parser->sp; i >= 0; i--) { 1752 if (parser->isFunc[i]) { 1753 fbGX = parser->fbGX[i]; 1754 found = True; 1755 break; 1756 } 1757 } 1758 if (!found) { 1759 if (0 && VG_(clo_verbosity) >= 0) { 1760 VG_(message)(Vg_DebugMsg, 1761 "warning: parse_var_DIE: non-external variable " 1762 "outside DW_TAG_subprogram\n"); 1763 } 1764 /* goto bad_DIE; */ 1765 /* This seems to happen a lot. Just ignore it -- if, 1766 when we come to evaluation of the location (guarded) 1767 expression, it requires a frame base value, and 1768 there's no expression for that, then evaluation as a 1769 whole will fail. Harmless - a bit of a waste of 1770 cycles but nothing more. */ 1771 } 1772 } 1773 1774 /* re "external ? 0 : parser->sp" (twice), if the var is 1775 marked 'external' then we must put it at the global scope, 1776 as only the global scope (level 0) covers the entire PC 1777 address space. It is asserted elsewhere that level 0 1778 always covers the entire address space. */ 1779 xa = parser->ranges[external ? 0 : parser->sp]; 1780 nRanges = VG_(sizeXA)(xa); 1781 vg_assert(nRanges >= 0); 1782 1783 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); 1784 tv->name = name; 1785 tv->level = external ? 0 : parser->sp; 1786 tv->typeR = typeR; 1787 tv->gexpr = gexpr; 1788 tv->fbGX = fbGX; 1789 tv->fName = fileName; 1790 tv->fLine = lineNo; 1791 tv->dioff = posn; 1792 tv->absOri = abs_ori; 1793 1794 /* See explanation on definition of type TempVar for the 1795 reason for this elaboration. */ 1796 tv->nRanges = nRanges; 1797 tv->rngOneMin = 0; 1798 tv->rngOneMax = 0; 1799 tv->rngMany = NULL; 1800 if (nRanges == 1) { 1801 AddrRange* range = VG_(indexXA)(xa, 0); 1802 tv->rngOneMin = range->aMin; 1803 tv->rngOneMax = range->aMax; 1804 } 1805 else if (nRanges > 1) { 1806 /* See if we already have a range list which is 1807 structurally identical. If so, use that; if not, clone 1808 this one, and add it to our collection. */ 1809 UWord keyW, valW; 1810 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { 1811 XArray* old = (XArray*)keyW; 1812 tl_assert(valW == 0); 1813 tl_assert(old != xa); 1814 tv->rngMany = old; 1815 } else { 1816 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); 1817 tv->rngMany = cloned; 1818 VG_(addToFM)( rangestree, (UWord)cloned, 0 ); 1819 } 1820 } 1821 1822 VG_(addToXA)( tempvars, &tv ); 1823 1824 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 1825 VG_(sizeXA)(xa) ); 1826 /* collect stats on how effective the ->ranges special 1827 casing is */ 1828 if (0) { 1829 static Int ntot=0, ngt=0; 1830 ntot++; 1831 if (tv->rngMany) ngt++; 1832 if (0 == (ntot % 100000)) 1833 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 1834 } 1835 1836 } 1837 1838 /* Here are some other weird cases seen in the wild: 1839 1840 We have a variable with a name and a type, but no 1841 location. I guess that's a sign that it has been 1842 optimised away. Ignore it. Here's an example: 1843 1844 static Int lc_compar(void* n1, void* n2) { 1845 MC_Chunk* mc1 = *(MC_Chunk**)n1; 1846 MC_Chunk* mc2 = *(MC_Chunk**)n2; 1847 return (mc1->data < mc2->data ? -1 : 1); 1848 } 1849 1850 Both mc1 and mc2 are like this 1851 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 1852 DW_AT_name : mc1 1853 DW_AT_decl_file : 1 1854 DW_AT_decl_line : 216 1855 DW_AT_type : <5d3> 1856 1857 whereas n1 and n2 do have locations specified. 1858 1859 --------------------------------------------- 1860 1861 We see a DW_TAG_formal_parameter with a type, but 1862 no name and no location. It's probably part of a function type 1863 construction, thusly, hence ignore it: 1864 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 1865 DW_AT_sibling : <2c9> 1866 DW_AT_prototyped : 1 1867 DW_AT_type : <114> 1868 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1869 DW_AT_type : <13e> 1870 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1871 DW_AT_type : <133> 1872 1873 --------------------------------------------- 1874 1875 Is very minimal, like this: 1876 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 1877 DW_AT_abstract_origin: <7ba> 1878 What that signifies I have no idea. Ignore. 1879 1880 ---------------------------------------------- 1881 1882 Is very minimal, like this: 1883 <200f>: DW_TAG_formal_parameter 1884 DW_AT_abstract_ori: <1f4c> 1885 DW_AT_location : 13440 1886 What that signifies I have no idea. Ignore. 1887 It might be significant, though: the variable at least 1888 has a location and so might exist somewhere. 1889 Maybe we should handle this. 1890 1891 --------------------------------------------- 1892 1893 <22407>: DW_TAG_variable 1894 DW_AT_name : (indirect string, offset: 0x6579): 1895 vgPlain_trampoline_stuff_start 1896 DW_AT_decl_file : 29 1897 DW_AT_decl_line : 56 1898 DW_AT_external : 1 1899 DW_AT_declaration : 1 1900 1901 Nameless and typeless variable that has a location? Who 1902 knows. Not me. 1903 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 1904 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 1905 (DW_OP_addr: 3813c7c0) 1906 1907 No, really. Check it out. gcc is quite simply borked. 1908 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 1909 // followed by no attributes, and the next DIE is a sibling, 1910 // not a child 1911 */ 1912 } 1913 return; 1914 1915 bad_DIE: 1916 set_position_of_Cursor( c_die, saved_die_c_offset ); 1917 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 1918 VG_(printf)("\nparse_var_DIE: confused by:\n"); 1919 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 1920 while (True) { 1921 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1922 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1923 if (attr == 0 && form == 0) break; 1924 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 1925 /* Get the form contents, so as to print them */ 1926 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1927 cc, c_die, True, form ); 1928 VG_(printf)("\t\n"); 1929 } 1930 VG_(printf)("\n"); 1931 cc->barf("parse_var_DIE: confused by the above DIE"); 1932 /*NOTREACHED*/ 1933} 1934 1935 1936/*------------------------------------------------------------*/ 1937/*--- ---*/ 1938/*--- Parsing of type-related DIEs ---*/ 1939/*--- ---*/ 1940/*------------------------------------------------------------*/ 1941 1942#define N_D3_TYPE_STACK 16 1943 1944typedef 1945 struct { 1946 /* What source language? 'C'=C/C++, 'F'=Fortran, '?'=other 1947 Established once per compilation unit. */ 1948 UChar language; 1949 /* A stack of types which are currently under construction */ 1950 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1951 stack */ 1952 /* Note that the TyEnts in qparentE are temporary copies of the 1953 ones accumulating in the main tyent array. So it is not safe 1954 to free up anything on them when popping them off the stack 1955 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just 1956 memset them to zero when done. */ 1957 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */ 1958 Int qlevel[N_D3_TYPE_STACK]; 1959 1960 } 1961 D3TypeParser; 1962 1963static void typestack_show ( D3TypeParser* parser, HChar* str ) { 1964 Word i; 1965 VG_(printf)(" typestack (%s) {\n", str); 1966 for (i = 0; i <= parser->sp; i++) { 1967 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 1968 ML_(pp_TyEnt)( &parser->qparentE[i] ); 1969 VG_(printf)("\n"); 1970 } 1971 VG_(printf)(" }\n"); 1972} 1973 1974/* Remove from the stack, all entries with .level > 'level' */ 1975static 1976void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 1977{ 1978 Bool changed = False; 1979 vg_assert(parser->sp < N_D3_TYPE_STACK); 1980 while (True) { 1981 vg_assert(parser->sp >= -1); 1982 if (parser->sp == -1) break; 1983 if (parser->qlevel[parser->sp] <= level) break; 1984 if (0) 1985 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 1986 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 1987 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt)); 1988 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF; 1989 parser->qparentE[parser->sp].tag = Te_EMPTY; 1990 parser->qlevel[parser->sp] = 0; 1991 parser->sp--; 1992 changed = True; 1993 } 1994 if (changed && td3) 1995 typestack_show( parser, "after preen" ); 1996} 1997 1998static Bool typestack_is_empty ( D3TypeParser* parser ) { 1999 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); 2000 return parser->sp == -1; 2001} 2002 2003static void typestack_push ( CUConst* cc, 2004 D3TypeParser* parser, 2005 Bool td3, 2006 TyEnt* parentE, Int level ) { 2007 if (0) 2008 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", 2009 parser->sp+1, level, parentE->cuOff); 2010 2011 /* First we need to zap everything >= 'level', as we are about to 2012 replace any previous entry at 'level', so .. */ 2013 typestack_preen(parser, /*td3*/False, level-1); 2014 2015 vg_assert(parser->sp >= -1); 2016 vg_assert(parser->sp < N_D3_TYPE_STACK); 2017 if (parser->sp == N_D3_TYPE_STACK-1) 2018 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " 2019 "increase and recompile"); 2020 if (parser->sp >= 0) 2021 vg_assert(parser->qlevel[parser->sp] < level); 2022 parser->sp++; 2023 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY); 2024 vg_assert(parser->qlevel[parser->sp] == 0); 2025 vg_assert(parentE); 2026 vg_assert(ML_(TyEnt__is_type)(parentE)); 2027 vg_assert(parentE->cuOff != D3_INVALID_CUOFF); 2028 parser->qparentE[parser->sp] = *parentE; 2029 parser->qlevel[parser->sp] = level; 2030 if (td3) 2031 typestack_show( parser, "after push" ); 2032} 2033 2034 2035/* Parse a type-related DIE. 'parser' holds the current parser state. 2036 'admin' is where the completed types are dumped. 'dtag' is the tag 2037 for this DIE. 'c_die' points to the start of the data fields (FORM 2038 stuff) for the DIE. c_abbv points to the start of the (name,form) 2039 pairs which describe the DIE. 2040 2041 We may find the DIE uninteresting, in which case we should ignore 2042 it. 2043 2044 What happens: the DIE is examined. If uninteresting, it is ignored. 2045 Otherwise, the DIE gives rise to two things: 2046 2047 (1) the offset of this DIE in the CU -- the cuOffset, a UWord 2048 (2) a TyAdmin structure, which holds the type, or related stuff 2049 2050 (2) is added at the end of 'tyadmins', at some index, say 'i'. 2051 2052 A pair (cuOffset, i) is added to 'tydict'. 2053 2054 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds 2055 a mapping from cuOffset to the index of the corresponding entry in 2056 'tyadmin'. 2057 2058 When resolving a cuOffset to a TyAdmin, first look up the cuOffset 2059 in the tydict (by binary search). This gives an index into 2060 tyadmins, and the required entity lives in tyadmins at that index. 2061*/ 2062__attribute__((noinline)) 2063static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, 2064 /*MOD*/D3TypeParser* parser, 2065 DW_TAG dtag, 2066 UWord posn, 2067 Int level, 2068 Cursor* c_die, 2069 Cursor* c_abbv, 2070 CUConst* cc, 2071 Bool td3 ) 2072{ 2073 ULong cts; 2074 Int ctsSzB; 2075 UWord ctsMemSzB; 2076 TyEnt typeE; 2077 TyEnt atomE; 2078 TyEnt fieldE; 2079 TyEnt boundE; 2080 2081 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2082 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 2083 2084 VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); 2085 VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); 2086 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); 2087 VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); 2088 2089 /* If we've returned to a level at or above any previously noted 2090 parent, un-note it, so we don't believe we're still collecting 2091 its children. */ 2092 typestack_preen( parser, td3, level-1 ); 2093 2094 if (dtag == DW_TAG_compile_unit) { 2095 /* See if we can find DW_AT_language, since it is important for 2096 establishing array bounds (see DW_TAG_subrange_type below in 2097 this fn) */ 2098 while (True) { 2099 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2100 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2101 if (attr == 0 && form == 0) break; 2102 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2103 cc, c_die, False/*td3*/, form ); 2104 if (attr != DW_AT_language) 2105 continue; 2106 if (ctsSzB == 0) 2107 goto bad_DIE; 2108 switch (cts) { 2109 case DW_LANG_C89: case DW_LANG_C: 2110 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 2111 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 2112 case DW_LANG_Upc: case DW_LANG_C99: 2113 parser->language = 'C'; break; 2114 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 2115 case DW_LANG_Fortran95: 2116 parser->language = 'F'; break; 2117 case DW_LANG_Ada83: case DW_LANG_Cobol74: 2118 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 2119 case DW_LANG_Modula2: case DW_LANG_Java: 2120 case DW_LANG_Ada95: case DW_LANG_PLI: 2121 case DW_LANG_D: case DW_LANG_Python: 2122 case DW_LANG_Mips_Assembler: 2123 parser->language = '?'; break; 2124 default: 2125 goto bad_DIE; 2126 } 2127 } 2128 } 2129 2130 if (dtag == DW_TAG_base_type) { 2131 /* We can pick up a new base type any time. */ 2132 VG_(memset)(&typeE, 0, sizeof(typeE)); 2133 typeE.cuOff = D3_INVALID_CUOFF; 2134 typeE.tag = Te_TyBase; 2135 while (True) { 2136 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2137 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2138 if (attr == 0 && form == 0) break; 2139 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2140 cc, c_die, False/*td3*/, form ); 2141 if (attr == DW_AT_name && ctsMemSzB > 0) { 2142 typeE.Te.TyBase.name 2143 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1", 2144 (UChar*)(UWord)cts ); 2145 } 2146 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2147 typeE.Te.TyBase.szB = cts; 2148 } 2149 if (attr == DW_AT_encoding && ctsSzB > 0) { 2150 switch (cts) { 2151 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 2152 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */ 2153 case DW_ATE_boolean:/* FIXME - is this correct? */ 2154 typeE.Te.TyBase.enc = 'U'; break; 2155 case DW_ATE_signed: case DW_ATE_signed_char: 2156 typeE.Te.TyBase.enc = 'S'; break; 2157 case DW_ATE_float: 2158 typeE.Te.TyBase.enc = 'F'; break; 2159 case DW_ATE_complex_float: 2160 typeE.Te.TyBase.enc = 'C'; break; 2161 default: 2162 goto bad_DIE; 2163 } 2164 } 2165 } 2166 2167 /* Invent a name if it doesn't have one. gcc-4.3 2168 -ftree-vectorize is observed to emit nameless base types. */ 2169 if (!typeE.Te.TyBase.name) 2170 typeE.Te.TyBase.name 2171 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", 2172 "<anon_base_type>" ); 2173 2174 /* Do we have something that looks sane? */ 2175 if (/* must have a name */ 2176 typeE.Te.TyBase.name == NULL 2177 /* and a plausible size. Yes, really 32: "complex long 2178 double" apparently has size=32 */ 2179 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 2180 /* and a plausible encoding */ 2181 || (typeE.Te.TyBase.enc != 'U' 2182 && typeE.Te.TyBase.enc != 'S' 2183 && typeE.Te.TyBase.enc != 'F' 2184 && typeE.Te.TyBase.enc != 'C')) 2185 goto bad_DIE; 2186 /* Last minute hack: if we see this 2187 <1><515>: DW_TAG_base_type 2188 DW_AT_byte_size : 0 2189 DW_AT_encoding : 5 2190 DW_AT_name : void 2191 convert it into a real Void type. */ 2192 if (typeE.Te.TyBase.szB == 0 2193 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { 2194 ML_(TyEnt__make_EMPTY)(&typeE); 2195 typeE.tag = Te_TyVoid; 2196 typeE.Te.TyVoid.isFake = False; /* it's a real one! */ 2197 } 2198 2199 goto acquire_Type; 2200 } 2201 2202 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 2203 || dtag == DW_TAG_ptr_to_member_type) { 2204 /* This seems legit for _pointer_type and _reference_type. I 2205 don't know if rolling _ptr_to_member_type in here really is 2206 legit, but it's better than not handling it at all. */ 2207 VG_(memset)(&typeE, 0, sizeof(typeE)); 2208 typeE.cuOff = D3_INVALID_CUOFF; 2209 typeE.tag = Te_TyPorR; 2210 /* target type defaults to void */ 2211 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; 2212 typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type 2213 || dtag == DW_TAG_ptr_to_member_type; 2214 /* These three type kinds don't *have* to specify their size, in 2215 which case we assume it's a machine word. But if they do 2216 specify it, it must be a machine word :-) This probably 2217 assumes that the word size of the Dwarf3 we're reading is the 2218 same size as that on the machine. gcc appears to give a size 2219 whereas icc9 doesn't. */ 2220 typeE.Te.TyPorR.szB = sizeof(UWord); 2221 while (True) { 2222 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2223 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2224 if (attr == 0 && form == 0) break; 2225 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2226 cc, c_die, False/*td3*/, form ); 2227 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2228 typeE.Te.TyPorR.szB = cts; 2229 } 2230 if (attr == DW_AT_type && ctsSzB > 0) { 2231 typeE.Te.TyPorR.typeR = (UWord)cts; 2232 } 2233 } 2234 /* Do we have something that looks sane? */ 2235 if (typeE.Te.TyPorR.szB != sizeof(UWord)) 2236 goto bad_DIE; 2237 else 2238 goto acquire_Type; 2239 } 2240 2241 if (dtag == DW_TAG_enumeration_type) { 2242 /* Create a new Type to hold the results. */ 2243 VG_(memset)(&typeE, 0, sizeof(typeE)); 2244 typeE.cuOff = posn; 2245 typeE.tag = Te_TyEnum; 2246 typeE.Te.TyEnum.atomRs 2247 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 2248 ML_(dinfo_free), 2249 sizeof(UWord) ); 2250 while (True) { 2251 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2252 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2253 if (attr == 0 && form == 0) break; 2254 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2255 cc, c_die, False/*td3*/, form ); 2256 if (attr == DW_AT_name && ctsMemSzB > 0) { 2257 typeE.Te.TyEnum.name 2258 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2", 2259 (UChar*)(UWord)cts ); 2260 } 2261 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2262 typeE.Te.TyEnum.szB = cts; 2263 } 2264 } 2265 2266 if (!typeE.Te.TyEnum.name) 2267 typeE.Te.TyEnum.name 2268 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", 2269 "<anon_enum_type>" ); 2270 2271 /* Do we have something that looks sane? */ 2272 if (typeE.Te.TyEnum.szB == 0 /* we must know the size */) 2273 goto bad_DIE; 2274 /* On't stack! */ 2275 typestack_push( cc, parser, td3, &typeE, level ); 2276 goto acquire_Type; 2277 } 2278 2279 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces 2280 DW_TAG_enumerator with only a DW_AT_name but no 2281 DW_AT_const_value. This is in violation of the Dwarf3 standard, 2282 and appears to be a new "feature" of gcc - versions 4.3.x and 2283 earlier do not appear to do this. So accept DW_TAG_enumerator 2284 which only have a name but no value. An example: 2285 2286 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) 2287 <181> DW_AT_name : (indirect string, offset: 0xda70): 2288 QtMsgType 2289 <185> DW_AT_byte_size : 4 2290 <186> DW_AT_decl_file : 14 2291 <187> DW_AT_decl_line : 1480 2292 <189> DW_AT_sibling : <0x1a7> 2293 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) 2294 <18e> DW_AT_name : (indirect string, offset: 0x9e18): 2295 QtDebugMsg 2296 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) 2297 <193> DW_AT_name : (indirect string, offset: 0x1505f): 2298 QtWarningMsg 2299 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) 2300 <198> DW_AT_name : (indirect string, offset: 0x16f4a): 2301 QtCriticalMsg 2302 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) 2303 <19d> DW_AT_name : (indirect string, offset: 0x156dd): 2304 QtFatalMsg 2305 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) 2306 <1a2> DW_AT_name : (indirect string, offset: 0x13660): 2307 QtSystemMsg 2308 */ 2309 if (dtag == DW_TAG_enumerator) { 2310 VG_(memset)( &atomE, 0, sizeof(atomE) ); 2311 atomE.cuOff = posn; 2312 atomE.tag = Te_Atom; 2313 while (True) { 2314 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2315 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2316 if (attr == 0 && form == 0) break; 2317 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2318 cc, c_die, False/*td3*/, form ); 2319 if (attr == DW_AT_name && ctsMemSzB > 0) { 2320 atomE.Te.Atom.name 2321 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1", 2322 (UChar*)(UWord)cts ); 2323 } 2324 if (attr == DW_AT_const_value && ctsSzB > 0) { 2325 atomE.Te.Atom.value = cts; 2326 atomE.Te.Atom.valueKnown = True; 2327 } 2328 } 2329 /* Do we have something that looks sane? */ 2330 if (atomE.Te.Atom.name == NULL) 2331 goto bad_DIE; 2332 /* Do we have a plausible parent? */ 2333 if (typestack_is_empty(parser)) goto bad_DIE; 2334 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2335 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2336 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2337 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE; 2338 /* Record this child in the parent */ 2339 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); 2340 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, 2341 &atomE ); 2342 /* And record the child itself */ 2343 goto acquire_Atom; 2344 } 2345 2346 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I 2347 don't know if this is correct, but it at least makes this reader 2348 usable for gcc-4.3 produced Dwarf3. */ 2349 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type 2350 || dtag == DW_TAG_union_type) { 2351 Bool have_szB = False; 2352 Bool is_decl = False; 2353 Bool is_spec = False; 2354 /* Create a new Type to hold the results. */ 2355 VG_(memset)(&typeE, 0, sizeof(typeE)); 2356 typeE.cuOff = posn; 2357 typeE.tag = Te_TyStOrUn; 2358 typeE.Te.TyStOrUn.name = NULL; 2359 typeE.Te.TyStOrUn.fieldRs 2360 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 2361 ML_(dinfo_free), 2362 sizeof(UWord) ); 2363 typeE.Te.TyStOrUn.complete = True; 2364 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 2365 || dtag == DW_TAG_class_type; 2366 while (True) { 2367 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2368 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2369 if (attr == 0 && form == 0) break; 2370 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2371 cc, c_die, False/*td3*/, form ); 2372 if (attr == DW_AT_name && ctsMemSzB > 0) { 2373 typeE.Te.TyStOrUn.name 2374 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2", 2375 (UChar*)(UWord)cts ); 2376 } 2377 if (attr == DW_AT_byte_size && ctsSzB >= 0) { 2378 typeE.Te.TyStOrUn.szB = cts; 2379 have_szB = True; 2380 } 2381 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 2382 is_decl = True; 2383 } 2384 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { 2385 is_spec = True; 2386 } 2387 } 2388 /* Do we have something that looks sane? */ 2389 if (is_decl && (!is_spec)) { 2390 /* It's a DW_AT_declaration. We require the name but 2391 nothing else. */ 2392 if (typeE.Te.TyStOrUn.name == NULL) 2393 goto bad_DIE; 2394 typeE.Te.TyStOrUn.complete = False; 2395 /* JRS 2009 Aug 10: <possible kludge>? */ 2396 /* Push this tyent on the stack, even though it's incomplete. 2397 It appears that gcc-4.4 on Fedora 11 will sometimes create 2398 DW_TAG_member entries for it, and so we need to have a 2399 plausible parent present in order for that to work. See 2400 #200029 comments 8 and 9. */ 2401 typestack_push( cc, parser, td3, &typeE, level ); 2402 /* </possible kludge> */ 2403 goto acquire_Type; 2404 } 2405 if ((!is_decl) /* && (!is_spec) */) { 2406 /* this is the common, ordinary case */ 2407 if ((!have_szB) /* we must know the size */ 2408 /* But the name can be present, or not */) 2409 goto bad_DIE; 2410 /* On't stack! */ 2411 typestack_push( cc, parser, td3, &typeE, level ); 2412 goto acquire_Type; 2413 } 2414 else { 2415 /* don't know how to handle any other variants just now */ 2416 goto bad_DIE; 2417 } 2418 } 2419 2420 if (dtag == DW_TAG_member) { 2421 /* Acquire member entries for both DW_TAG_structure_type and 2422 DW_TAG_union_type. They differ minorly, in that struct 2423 members must have a DW_AT_data_member_location expression 2424 whereas union members must not. */ 2425 Bool parent_is_struct; 2426 VG_(memset)( &fieldE, 0, sizeof(fieldE) ); 2427 fieldE.cuOff = posn; 2428 fieldE.tag = Te_Field; 2429 fieldE.Te.Field.typeR = D3_INVALID_CUOFF; 2430 while (True) { 2431 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2432 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2433 if (attr == 0 && form == 0) break; 2434 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2435 cc, c_die, False/*td3*/, form ); 2436 if (attr == DW_AT_name && ctsMemSzB > 0) { 2437 fieldE.Te.Field.name 2438 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1", 2439 (UChar*)(UWord)cts ); 2440 } 2441 if (attr == DW_AT_type && ctsSzB > 0) { 2442 fieldE.Te.Field.typeR = (UWord)cts; 2443 } 2444 /* There are 2 different cases for DW_AT_data_member_location. 2445 If it is a constant class attribute, it contains byte offset 2446 from the beginning of the containing entity. 2447 Otherwise it is a location expression. */ 2448 if (attr == DW_AT_data_member_location && ctsSzB > 0) { 2449 fieldE.Te.Field.nLoc = -1; 2450 fieldE.Te.Field.pos.offset = cts; 2451 } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { 2452 fieldE.Te.Field.nLoc = (UWord)ctsMemSzB; 2453 fieldE.Te.Field.pos.loc 2454 = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2", 2455 (UChar*)(UWord)cts, 2456 (SizeT)fieldE.Te.Field.nLoc ); 2457 } 2458 } 2459 /* Do we have a plausible parent? */ 2460 if (typestack_is_empty(parser)) goto bad_DIE; 2461 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2462 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2463 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2464 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE; 2465 /* Do we have something that looks sane? If this a member of a 2466 struct, we must have a location expression; but if a member 2467 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 2468 to reject in the latter case, but some compilers have been 2469 observed to emit constant-zero expressions. So just ignore 2470 them. */ 2471 parent_is_struct 2472 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; 2473 if (!fieldE.Te.Field.name) 2474 fieldE.Te.Field.name 2475 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", 2476 "<anon_field>" ); 2477 vg_assert(fieldE.Te.Field.name); 2478 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) 2479 goto bad_DIE; 2480 if (fieldE.Te.Field.nLoc) { 2481 if (!parent_is_struct) { 2482 /* If this is a union type, pretend we haven't seen the data 2483 member location expression, as it is by definition 2484 redundant (it must be zero). */ 2485 if (fieldE.Te.Field.nLoc > 0) 2486 ML_(dinfo_free)(fieldE.Te.Field.pos.loc); 2487 fieldE.Te.Field.pos.loc = NULL; 2488 fieldE.Te.Field.nLoc = 0; 2489 } 2490 /* Record this child in the parent */ 2491 fieldE.Te.Field.isStruct = parent_is_struct; 2492 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); 2493 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, 2494 &posn ); 2495 /* And record the child itself */ 2496 goto acquire_Field; 2497 } else { 2498 /* Member with no location - this can happen with static 2499 const members in C++ code which are compile time constants 2500 that do no exist in the class. They're not of any interest 2501 to us so we ignore them. */ 2502 } 2503 } 2504 2505 if (dtag == DW_TAG_array_type) { 2506 VG_(memset)(&typeE, 0, sizeof(typeE)); 2507 typeE.cuOff = posn; 2508 typeE.tag = Te_TyArray; 2509 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; 2510 typeE.Te.TyArray.boundRs 2511 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", 2512 ML_(dinfo_free), 2513 sizeof(UWord) ); 2514 while (True) { 2515 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2516 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2517 if (attr == 0 && form == 0) break; 2518 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2519 cc, c_die, False/*td3*/, form ); 2520 if (attr == DW_AT_type && ctsSzB > 0) { 2521 typeE.Te.TyArray.typeR = (UWord)cts; 2522 } 2523 } 2524 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) 2525 goto bad_DIE; 2526 /* On't stack! */ 2527 typestack_push( cc, parser, td3, &typeE, level ); 2528 goto acquire_Type; 2529 } 2530 2531 if (dtag == DW_TAG_subrange_type) { 2532 Bool have_lower = False; 2533 Bool have_upper = False; 2534 Bool have_count = False; 2535 Long lower = 0; 2536 Long upper = 0; 2537 2538 switch (parser->language) { 2539 case 'C': have_lower = True; lower = 0; break; 2540 case 'F': have_lower = True; lower = 1; break; 2541 case '?': have_lower = False; break; 2542 default: vg_assert(0); /* assured us by handling of 2543 DW_TAG_compile_unit in this fn */ 2544 } 2545 2546 VG_(memset)( &boundE, 0, sizeof(boundE) ); 2547 boundE.cuOff = D3_INVALID_CUOFF; 2548 boundE.tag = Te_Bound; 2549 while (True) { 2550 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2551 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2552 if (attr == 0 && form == 0) break; 2553 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2554 cc, c_die, False/*td3*/, form ); 2555 if (attr == DW_AT_lower_bound && ctsSzB > 0) { 2556 lower = (Long)cts; 2557 have_lower = True; 2558 } 2559 if (attr == DW_AT_upper_bound && ctsSzB > 0) { 2560 upper = (Long)cts; 2561 have_upper = True; 2562 } 2563 if (attr == DW_AT_count && ctsSzB > 0) { 2564 /*count = (Long)cts;*/ 2565 have_count = True; 2566 } 2567 } 2568 /* FIXME: potentially skip the rest if no parent present, since 2569 it could be the case that this subrange type is free-standing 2570 (not being used to describe the bounds of a containing array 2571 type) */ 2572 /* Do we have a plausible parent? */ 2573 if (typestack_is_empty(parser)) goto bad_DIE; 2574 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2575 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2576 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2577 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE; 2578 2579 /* Figure out if we have a definite range or not */ 2580 if (have_lower && have_upper && (!have_count)) { 2581 boundE.Te.Bound.knownL = True; 2582 boundE.Te.Bound.knownU = True; 2583 boundE.Te.Bound.boundL = lower; 2584 boundE.Te.Bound.boundU = upper; 2585 } 2586 else if (have_lower && (!have_upper) && (!have_count)) { 2587 boundE.Te.Bound.knownL = True; 2588 boundE.Te.Bound.knownU = False; 2589 boundE.Te.Bound.boundL = lower; 2590 boundE.Te.Bound.boundU = 0; 2591 } 2592 else if ((!have_lower) && have_upper && (!have_count)) { 2593 boundE.Te.Bound.knownL = False; 2594 boundE.Te.Bound.knownU = True; 2595 boundE.Te.Bound.boundL = 0; 2596 boundE.Te.Bound.boundU = upper; 2597 } 2598 else if ((!have_lower) && (!have_upper) && (!have_count)) { 2599 boundE.Te.Bound.knownL = False; 2600 boundE.Te.Bound.knownU = False; 2601 boundE.Te.Bound.boundL = 0; 2602 boundE.Te.Bound.boundU = 0; 2603 } else { 2604 /* FIXME: handle more cases */ 2605 goto bad_DIE; 2606 } 2607 2608 /* Record this bound in the parent */ 2609 boundE.cuOff = posn; 2610 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); 2611 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, 2612 &boundE ); 2613 /* And record the child itself */ 2614 goto acquire_Bound; 2615 } 2616 2617 if (dtag == DW_TAG_typedef) { 2618 /* We can pick up a new typedef any time. */ 2619 VG_(memset)(&typeE, 0, sizeof(typeE)); 2620 typeE.cuOff = D3_INVALID_CUOFF; 2621 typeE.tag = Te_TyTyDef; 2622 typeE.Te.TyTyDef.name = NULL; 2623 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; 2624 while (True) { 2625 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2626 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2627 if (attr == 0 && form == 0) break; 2628 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2629 cc, c_die, False/*td3*/, form ); 2630 if (attr == DW_AT_name && ctsMemSzB > 0) { 2631 typeE.Te.TyTyDef.name 2632 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1", 2633 (UChar*)(UWord)cts ); 2634 } 2635 if (attr == DW_AT_type && ctsSzB > 0) { 2636 typeE.Te.TyTyDef.typeR = (UWord)cts; 2637 } 2638 } 2639 /* Do we have something that looks sane? */ 2640 if (/* must have a name */ 2641 typeE.Te.TyTyDef.name == NULL 2642 /* but the referred-to type can be absent */) 2643 goto bad_DIE; 2644 else 2645 goto acquire_Type; 2646 } 2647 2648 if (dtag == DW_TAG_subroutine_type) { 2649 /* function type? just record that one fact and ask no 2650 further questions. */ 2651 VG_(memset)(&typeE, 0, sizeof(typeE)); 2652 typeE.cuOff = D3_INVALID_CUOFF; 2653 typeE.tag = Te_TyFn; 2654 goto acquire_Type; 2655 } 2656 2657 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { 2658 Int have_ty = 0; 2659 VG_(memset)(&typeE, 0, sizeof(typeE)); 2660 typeE.cuOff = D3_INVALID_CUOFF; 2661 typeE.tag = Te_TyQual; 2662 typeE.Te.TyQual.qual 2663 = dtag == DW_TAG_volatile_type ? 'V' : 'C'; 2664 /* target type defaults to 'void' */ 2665 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 2666 while (True) { 2667 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2668 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2669 if (attr == 0 && form == 0) break; 2670 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2671 cc, c_die, False/*td3*/, form ); 2672 if (attr == DW_AT_type && ctsSzB > 0) { 2673 typeE.Te.TyQual.typeR = (UWord)cts; 2674 have_ty++; 2675 } 2676 } 2677 /* gcc sometimes generates DW_TAG_const/volatile_type without 2678 DW_AT_type and GDB appears to interpret the type as 'const 2679 void' (resp. 'volatile void'). So just allow it .. */ 2680 if (have_ty == 1 || have_ty == 0) 2681 goto acquire_Type; 2682 else 2683 goto bad_DIE; 2684 } 2685 2686 /* else ignore this DIE */ 2687 return; 2688 /*NOTREACHED*/ 2689 2690 acquire_Type: 2691 if (0) VG_(printf)("YYYY Acquire Type\n"); 2692 vg_assert(ML_(TyEnt__is_type)( &typeE )); 2693 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); 2694 typeE.cuOff = posn; 2695 VG_(addToXA)( tyents, &typeE ); 2696 return; 2697 /*NOTREACHED*/ 2698 2699 acquire_Atom: 2700 if (0) VG_(printf)("YYYY Acquire Atom\n"); 2701 vg_assert(atomE.tag == Te_Atom); 2702 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); 2703 atomE.cuOff = posn; 2704 VG_(addToXA)( tyents, &atomE ); 2705 return; 2706 /*NOTREACHED*/ 2707 2708 acquire_Field: 2709 /* For union members, Expr should be absent */ 2710 if (0) VG_(printf)("YYYY Acquire Field\n"); 2711 vg_assert(fieldE.tag == Te_Field); 2712 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL); 2713 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL); 2714 if (fieldE.Te.Field.isStruct) { 2715 vg_assert(fieldE.Te.Field.nLoc != 0); 2716 } else { 2717 vg_assert(fieldE.Te.Field.nLoc == 0); 2718 } 2719 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); 2720 fieldE.cuOff = posn; 2721 VG_(addToXA)( tyents, &fieldE ); 2722 return; 2723 /*NOTREACHED*/ 2724 2725 acquire_Bound: 2726 if (0) VG_(printf)("YYYY Acquire Bound\n"); 2727 vg_assert(boundE.tag == Te_Bound); 2728 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); 2729 boundE.cuOff = posn; 2730 VG_(addToXA)( tyents, &boundE ); 2731 return; 2732 /*NOTREACHED*/ 2733 2734 bad_DIE: 2735 set_position_of_Cursor( c_die, saved_die_c_offset ); 2736 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 2737 VG_(printf)("\nparse_type_DIE: confused by:\n"); 2738 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 2739 while (True) { 2740 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2741 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2742 if (attr == 0 && form == 0) break; 2743 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 2744 /* Get the form contents, so as to print them */ 2745 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2746 cc, c_die, True, form ); 2747 VG_(printf)("\t\n"); 2748 } 2749 VG_(printf)("\n"); 2750 cc->barf("parse_type_DIE: confused by the above DIE"); 2751 /*NOTREACHED*/ 2752} 2753 2754 2755/*------------------------------------------------------------*/ 2756/*--- ---*/ 2757/*--- Compression of type DIE information ---*/ 2758/*--- ---*/ 2759/*------------------------------------------------------------*/ 2760 2761static UWord chase_cuOff ( Bool* changed, 2762 XArray* /* of TyEnt */ ents, 2763 TyEntIndexCache* ents_cache, 2764 UWord cuOff ) 2765{ 2766 TyEnt* ent; 2767 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); 2768 2769 if (!ent) { 2770 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); 2771 *changed = False; 2772 return cuOff; 2773 } 2774 2775 vg_assert(ent->tag != Te_EMPTY); 2776 if (ent->tag != Te_INDIR) { 2777 *changed = False; 2778 return cuOff; 2779 } else { 2780 vg_assert(ent->Te.INDIR.indR < cuOff); 2781 *changed = True; 2782 return ent->Te.INDIR.indR; 2783 } 2784} 2785 2786static 2787void chase_cuOffs_in_XArray ( Bool* changed, 2788 XArray* /* of TyEnt */ ents, 2789 TyEntIndexCache* ents_cache, 2790 /*MOD*/XArray* /* of UWord */ cuOffs ) 2791{ 2792 Bool b2 = False; 2793 Word i, n = VG_(sizeXA)( cuOffs ); 2794 for (i = 0; i < n; i++) { 2795 Bool b = False; 2796 UWord* p = VG_(indexXA)( cuOffs, i ); 2797 *p = chase_cuOff( &b, ents, ents_cache, *p ); 2798 if (b) 2799 b2 = True; 2800 } 2801 *changed = b2; 2802} 2803 2804static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents, 2805 TyEntIndexCache* ents_cache, 2806 /*MOD*/TyEnt* te ) 2807{ 2808 Bool b, changed = False; 2809 switch (te->tag) { 2810 case Te_EMPTY: 2811 break; 2812 case Te_INDIR: 2813 te->Te.INDIR.indR 2814 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); 2815 if (b) changed = True; 2816 break; 2817 case Te_UNKNOWN: 2818 break; 2819 case Te_Atom: 2820 break; 2821 case Te_Field: 2822 te->Te.Field.typeR 2823 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); 2824 if (b) changed = True; 2825 break; 2826 case Te_Bound: 2827 break; 2828 case Te_TyBase: 2829 break; 2830 case Te_TyPorR: 2831 te->Te.TyPorR.typeR 2832 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); 2833 if (b) changed = True; 2834 break; 2835 case Te_TyTyDef: 2836 te->Te.TyTyDef.typeR 2837 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); 2838 if (b) changed = True; 2839 break; 2840 case Te_TyStOrUn: 2841 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); 2842 if (b) changed = True; 2843 break; 2844 case Te_TyEnum: 2845 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); 2846 if (b) changed = True; 2847 break; 2848 case Te_TyArray: 2849 te->Te.TyArray.typeR 2850 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); 2851 if (b) changed = True; 2852 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); 2853 if (b) changed = True; 2854 break; 2855 case Te_TyFn: 2856 break; 2857 case Te_TyQual: 2858 te->Te.TyQual.typeR 2859 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); 2860 if (b) changed = True; 2861 break; 2862 case Te_TyVoid: 2863 break; 2864 default: 2865 ML_(pp_TyEnt)(te); 2866 vg_assert(0); 2867 } 2868 return changed; 2869} 2870 2871/* Make a pass over 'ents'. For each tyent, inspect the target of any 2872 'R' or 'Rs' fields (those which refer to other tyents), and replace 2873 any which point to INDIR nodes with the target of the indirection 2874 (which should not itself be an indirection). In summary, this 2875 routine shorts out all references to indirection nodes. */ 2876static 2877Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, 2878 TyEntIndexCache* ents_cache ) 2879{ 2880 Word i, n, nChanged = 0; 2881 Bool b; 2882 n = VG_(sizeXA)( ents ); 2883 for (i = 0; i < n; i++) { 2884 TyEnt* ent = VG_(indexXA)( ents, i ); 2885 vg_assert(ent->tag != Te_EMPTY); 2886 /* We have to substitute everything, even indirections, so as to 2887 ensure that chains of indirections don't build up. */ 2888 b = TyEnt__subst_R_fields( ents, ents_cache, ent ); 2889 if (b) 2890 nChanged++; 2891 } 2892 2893 return nChanged; 2894} 2895 2896 2897/* Make a pass over 'ents', building a dictionary of TyEnts as we go. 2898 Look up each new tyent in the dictionary in turn. If it is already 2899 in the dictionary, replace this tyent with an indirection to the 2900 existing one, and delete any malloc'd stuff hanging off this one. 2901 In summary, this routine commons up all tyents that are identical 2902 as defined by TyEnt__cmp_by_all_except_cuOff. */ 2903static 2904Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) 2905{ 2906 Word n, i, nDeleted; 2907 WordFM* dict; /* TyEnt* -> void */ 2908 TyEnt* ent; 2909 UWord keyW, valW; 2910 2911 dict = VG_(newFM)( 2912 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 2913 ML_(dinfo_free), 2914 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) 2915 ); 2916 2917 nDeleted = 0; 2918 n = VG_(sizeXA)( ents ); 2919 for (i = 0; i < n; i++) { 2920 ent = VG_(indexXA)( ents, i ); 2921 vg_assert(ent->tag != Te_EMPTY); 2922 2923 /* Ignore indirections, although check that they are 2924 not forming a cycle. */ 2925 if (ent->tag == Te_INDIR) { 2926 vg_assert(ent->Te.INDIR.indR < ent->cuOff); 2927 continue; 2928 } 2929 2930 keyW = valW = 0; 2931 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { 2932 /* it's already in the dictionary. */ 2933 TyEnt* old = (TyEnt*)keyW; 2934 vg_assert(valW == 0); 2935 vg_assert(old != ent); 2936 vg_assert(old->tag != Te_INDIR); 2937 /* since we are traversing the array in increasing order of 2938 cuOff: */ 2939 vg_assert(old->cuOff < ent->cuOff); 2940 /* So anyway, dump this entry and replace it with an 2941 indirection to the one in the dictionary. Note that the 2942 assertion above guarantees that we cannot create cycles of 2943 indirections, since we are always creating an indirection 2944 to a tyent with a cuOff lower than this one. */ 2945 ML_(TyEnt__make_EMPTY)( ent ); 2946 ent->tag = Te_INDIR; 2947 ent->Te.INDIR.indR = old->cuOff; 2948 nDeleted++; 2949 } else { 2950 /* not in dictionary; add it and keep going. */ 2951 VG_(addToFM)( dict, (UWord)ent, 0 ); 2952 } 2953 } 2954 2955 VG_(deleteFM)( dict, NULL, NULL ); 2956 2957 return nDeleted; 2958} 2959 2960 2961static 2962void dedup_types ( Bool td3, 2963 /*MOD*/XArray* /* of TyEnt */ ents, 2964 TyEntIndexCache* ents_cache ) 2965{ 2966 Word m, n, i, nDel, nSubst, nThresh; 2967 if (0) td3 = True; 2968 2969 n = VG_(sizeXA)( ents ); 2970 2971 /* If a commoning pass and a substitution pass both make fewer than 2972 this many changes, just stop. It's pointless to burn up CPU 2973 time trying to compress the last 1% or so out of the array. */ 2974 nThresh = n / 200; 2975 2976 /* First we must sort .ents by its .cuOff fields, so we 2977 can index into it. */ 2978 VG_(setCmpFnXA)( 2979 ents, 2980 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 2981 ); 2982 VG_(sortXA)( ents ); 2983 2984 /* Now repeatedly do commoning and substitution passes over 2985 the array, until there are no more changes. */ 2986 do { 2987 nDel = dedup_types_commoning_pass ( ents ); 2988 nSubst = dedup_types_substitution_pass ( ents, ents_cache ); 2989 vg_assert(nDel >= 0 && nSubst >= 0); 2990 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); 2991 } while (nDel > nThresh || nSubst > nThresh); 2992 2993 /* Sanity check: all INDIR nodes should point at a non-INDIR thing. 2994 In fact this should be true at the end of every loop iteration 2995 above (a commoning pass followed by a substitution pass), but 2996 checking it on every iteration is excessively expensive. Note, 2997 this loop also computes 'm' for the stats printing below it. */ 2998 m = 0; 2999 n = VG_(sizeXA)( ents ); 3000 for (i = 0; i < n; i++) { 3001 TyEnt *ent, *ind; 3002 ent = VG_(indexXA)( ents, i ); 3003 if (ent->tag != Te_INDIR) continue; 3004 m++; 3005 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3006 ent->Te.INDIR.indR ); 3007 vg_assert(ind); 3008 vg_assert(ind->tag != Te_INDIR); 3009 } 3010 3011 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); 3012} 3013 3014 3015/*------------------------------------------------------------*/ 3016/*--- ---*/ 3017/*--- Resolution of references to type DIEs ---*/ 3018/*--- ---*/ 3019/*------------------------------------------------------------*/ 3020 3021/* Make a pass through the (temporary) variables array. Examine the 3022 type of each variable, check is it found, and chase any Te_INDIRs. 3023 Postcondition is: each variable has a typeR field that refers to a 3024 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed 3025 not to refer to a Te_INDIR. (This is so that we can throw all the 3026 Te_INDIRs away later). */ 3027 3028__attribute__((noinline)) 3029static void resolve_variable_types ( 3030 void (*barf)( HChar* ) __attribute__((noreturn)), 3031 /*R-O*/XArray* /* of TyEnt */ ents, 3032 /*MOD*/TyEntIndexCache* ents_cache, 3033 /*MOD*/XArray* /* of TempVar* */ vars 3034 ) 3035{ 3036 Word i, n; 3037 n = VG_(sizeXA)( vars ); 3038 for (i = 0; i < n; i++) { 3039 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); 3040 /* This is the stated type of the variable. But it might be 3041 an indirection, so be careful. */ 3042 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3043 var->typeR ); 3044 if (ent && ent->tag == Te_INDIR) { 3045 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3046 ent->Te.INDIR.indR ); 3047 vg_assert(ent); 3048 vg_assert(ent->tag != Te_INDIR); 3049 } 3050 3051 /* Deal first with "normal" cases */ 3052 if (ent && ML_(TyEnt__is_type)(ent)) { 3053 var->typeR = ent->cuOff; 3054 continue; 3055 } 3056 3057 /* If there's no ent, it probably we did not manage to read a 3058 type at the cuOffset which is stated as being this variable's 3059 type. Maybe a deficiency in parse_type_DIE. Complain. */ 3060 if (ent == NULL) { 3061 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); 3062 barf("resolve_variable_types: " 3063 "cuOff does not refer to a known type"); 3064 } 3065 vg_assert(ent); 3066 /* If ent has any other tag, something bad happened, along the 3067 lines of var->typeR not referring to a type at all. */ 3068 vg_assert(ent->tag == Te_UNKNOWN); 3069 /* Just accept it; the type will be useless, but at least keep 3070 going. */ 3071 var->typeR = ent->cuOff; 3072 } 3073} 3074 3075 3076/*------------------------------------------------------------*/ 3077/*--- ---*/ 3078/*--- Parsing of Compilation Units ---*/ 3079/*--- ---*/ 3080/*------------------------------------------------------------*/ 3081 3082static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { 3083 TempVar* t1 = *(TempVar**)v1; 3084 TempVar* t2 = *(TempVar**)v2; 3085 if (t1->dioff < t2->dioff) return -1; 3086 if (t1->dioff > t2->dioff) return 1; 3087 return 0; 3088} 3089 3090static void read_DIE ( 3091 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 3092 /*MOD*/XArray* /* of TyEnt */ tyents, 3093 /*MOD*/XArray* /* of TempVar* */ tempvars, 3094 /*MOD*/XArray* /* of GExpr* */ gexprs, 3095 /*MOD*/D3TypeParser* typarser, 3096 /*MOD*/D3VarParser* varparser, 3097 Cursor* c, Bool td3, CUConst* cc, Int level 3098) 3099{ 3100 Cursor abbv; 3101 ULong atag, abbv_code; 3102 UWord posn; 3103 UInt has_children; 3104 UWord start_die_c_offset, start_abbv_c_offset; 3105 UWord after_die_c_offset, after_abbv_c_offset; 3106 3107 /* --- Deal with this DIE --- */ 3108 posn = get_position_of_Cursor( c ); 3109 abbv_code = get_ULEB128( c ); 3110 set_abbv_Cursor( &abbv, td3, cc, abbv_code ); 3111 atag = get_ULEB128( &abbv ); 3112 TRACE_D3("\n"); 3113 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", 3114 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 3115 3116 if (atag == 0) 3117 cc->barf("read_DIE: invalid zero tag on DIE"); 3118 3119 has_children = get_UChar( &abbv ); 3120 if (has_children != DW_children_no && has_children != DW_children_yes) 3121 cc->barf("read_DIE: invalid has_children value"); 3122 3123 /* We're set up to look at the fields of this DIE. Hand it off to 3124 any parser(s) that want to see it. Since they will in general 3125 advance both the DIE and abbrev cursors, remember their current 3126 settings so that we can then back up and do one final pass over 3127 the DIE, to print out its contents. */ 3128 3129 start_die_c_offset = get_position_of_Cursor( c ); 3130 start_abbv_c_offset = get_position_of_Cursor( &abbv ); 3131 3132 while (True) { 3133 ULong cts; 3134 Int ctsSzB; 3135 UWord ctsMemSzB; 3136 ULong at_name = get_ULEB128( &abbv ); 3137 ULong at_form = get_ULEB128( &abbv ); 3138 if (at_name == 0 && at_form == 0) break; 3139 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); 3140 /* Get the form contents, but ignore them; the only purpose is 3141 to print them, if td3 is True */ 3142 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 3143 cc, c, td3, (DW_FORM)at_form ); 3144 TRACE_D3("\t"); 3145 TRACE_D3("\n"); 3146 } 3147 3148 after_die_c_offset = get_position_of_Cursor( c ); 3149 after_abbv_c_offset = get_position_of_Cursor( &abbv ); 3150 3151 set_position_of_Cursor( c, start_die_c_offset ); 3152 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3153 3154 parse_type_DIE( tyents, 3155 typarser, 3156 (DW_TAG)atag, 3157 posn, 3158 level, 3159 c, /* DIE cursor */ 3160 &abbv, /* abbrev cursor */ 3161 cc, 3162 td3 ); 3163 3164 set_position_of_Cursor( c, start_die_c_offset ); 3165 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3166 3167 parse_var_DIE( rangestree, 3168 tempvars, 3169 gexprs, 3170 varparser, 3171 (DW_TAG)atag, 3172 posn, 3173 level, 3174 c, /* DIE cursor */ 3175 &abbv, /* abbrev cursor */ 3176 cc, 3177 td3 ); 3178 3179 set_position_of_Cursor( c, after_die_c_offset ); 3180 set_position_of_Cursor( &abbv, after_abbv_c_offset ); 3181 3182 /* --- Now recurse into its children, if any --- */ 3183 if (has_children == DW_children_yes) { 3184 if (0) TRACE_D3("BEGIN children of level %d\n", level); 3185 while (True) { 3186 atag = peek_ULEB128( c ); 3187 if (atag == 0) break; 3188 read_DIE( rangestree, tyents, tempvars, gexprs, 3189 typarser, varparser, 3190 c, td3, cc, level+1 ); 3191 } 3192 /* Now we need to eat the terminating zero */ 3193 atag = get_ULEB128( c ); 3194 vg_assert(atag == 0); 3195 if (0) TRACE_D3("END children of level %d\n", level); 3196 } 3197 3198} 3199 3200 3201static 3202void new_dwarf3_reader_wrk ( 3203 struct _DebugInfo* di, 3204 __attribute__((noreturn)) void (*barf)( HChar* ), 3205 UChar* debug_info_img, SizeT debug_info_sz, 3206 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3207 UChar* debug_line_img, SizeT debug_line_sz, 3208 UChar* debug_str_img, SizeT debug_str_sz, 3209 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3210 UChar* debug_loc_img, SizeT debug_loc_sz 3211) 3212{ 3213 XArray* /* of TyEnt */ tyents; 3214 XArray* /* of TyEnt */ tyents_to_keep; 3215 XArray* /* of GExpr* */ gexprs; 3216 XArray* /* of TempVar* */ tempvars; 3217 WordFM* /* of (XArray* of AddrRange, void) */ rangestree; 3218 TyEntIndexCache* tyents_cache = NULL; 3219 TyEntIndexCache* tyents_to_keep_cache = NULL; 3220 TempVar *varp, *varp2; 3221 GExpr* gexpr; 3222 Cursor abbv; /* for showing .debug_abbrev */ 3223 Cursor info; /* primary cursor for parsing .debug_info */ 3224 Cursor ranges; /* for showing .debug_ranges */ 3225 D3TypeParser typarser; 3226 D3VarParser varparser; 3227 Addr dr_base; 3228 UWord dr_offset; 3229 Word i, j, n; 3230 Bool td3 = di->trace_symtab; 3231 XArray* /* of TempVar* */ dioff_lookup_tab; 3232#if 0 3233 /* This doesn't work properly because it assumes all entries are 3234 packed end to end, with no holes. But that doesn't always 3235 appear to be the case, so it loses sync. And the D3 spec 3236 doesn't appear to require a no-hole situation either. */ 3237 /* Display .debug_loc */ 3238 Addr dl_base; 3239 UWord dl_offset; 3240 Cursor loc; /* for showing .debug_loc */ 3241 TRACE_SYMTAB("\n"); 3242 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 3243 TRACE_SYMTAB(" Offset Begin End Expression\n"); 3244 init_Cursor( &loc, debug_loc_img, 3245 debug_loc_sz, 0, barf, 3246 "Overrun whilst reading .debug_loc section(1)" ); 3247 dl_base = 0; 3248 dl_offset = 0; 3249 while (True) { 3250 UWord w1, w2; 3251 UWord len; 3252 if (is_at_end_Cursor( &loc )) 3253 break; 3254 3255 /* Read a (host-)word pair. This is something of a hack since 3256 the word size to read is really dictated by the ELF file; 3257 however, we assume we're reading a file with the same 3258 word-sizeness as the host. Reasonably enough. */ 3259 w1 = get_UWord( &loc ); 3260 w2 = get_UWord( &loc ); 3261 3262 if (w1 == 0 && w2 == 0) { 3263 /* end of list. reset 'base' */ 3264 TRACE_D3(" %08lx <End of list>\n", dl_offset); 3265 dl_base = 0; 3266 dl_offset = get_position_of_Cursor( &loc ); 3267 continue; 3268 } 3269 3270 if (w1 == -1UL) { 3271 /* new value for 'base' */ 3272 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3273 dl_offset, w1, w2); 3274 dl_base = w2; 3275 continue; 3276 } 3277 3278 /* else a location expression follows */ 3279 TRACE_D3(" %08lx %08lx %08lx ", 3280 dl_offset, w1 + dl_base, w2 + dl_base); 3281 len = (UWord)get_UShort( &loc ); 3282 while (len > 0) { 3283 UChar byte = get_UChar( &loc ); 3284 TRACE_D3("%02x", (UInt)byte); 3285 len--; 3286 } 3287 TRACE_SYMTAB("\n"); 3288 } 3289#endif 3290 3291 /* Display .debug_ranges */ 3292 TRACE_SYMTAB("\n"); 3293 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 3294 TRACE_SYMTAB(" Offset Begin End\n"); 3295 init_Cursor( &ranges, debug_ranges_img, 3296 debug_ranges_sz, 0, barf, 3297 "Overrun whilst reading .debug_ranges section(1)" ); 3298 dr_base = 0; 3299 dr_offset = 0; 3300 while (True) { 3301 UWord w1, w2; 3302 3303 if (is_at_end_Cursor( &ranges )) 3304 break; 3305 3306 /* Read a (host-)word pair. This is something of a hack since 3307 the word size to read is really dictated by the ELF file; 3308 however, we assume we're reading a file with the same 3309 word-sizeness as the host. Reasonably enough. */ 3310 w1 = get_UWord( &ranges ); 3311 w2 = get_UWord( &ranges ); 3312 3313 if (w1 == 0 && w2 == 0) { 3314 /* end of list. reset 'base' */ 3315 TRACE_D3(" %08lx <End of list>\n", dr_offset); 3316 dr_base = 0; 3317 dr_offset = get_position_of_Cursor( &ranges ); 3318 continue; 3319 } 3320 3321 if (w1 == -1UL) { 3322 /* new value for 'base' */ 3323 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3324 dr_offset, w1, w2); 3325 dr_base = w2; 3326 continue; 3327 } 3328 3329 /* else a range [w1+base, w2+base) is denoted */ 3330 TRACE_D3(" %08lx %08lx %08lx\n", 3331 dr_offset, w1 + dr_base, w2 + dr_base); 3332 } 3333 3334 /* Display .debug_abbrev */ 3335 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, 3336 "Overrun whilst reading .debug_abbrev section" ); 3337 TRACE_SYMTAB("\n"); 3338 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 3339 while (True) { 3340 if (is_at_end_Cursor( &abbv )) 3341 break; 3342 /* Read one abbreviation table */ 3343 TRACE_D3(" Number TAG\n"); 3344 while (True) { 3345 ULong atag; 3346 UInt has_children; 3347 ULong acode = get_ULEB128( &abbv ); 3348 if (acode == 0) break; /* end of the table */ 3349 atag = get_ULEB128( &abbv ); 3350 has_children = get_UChar( &abbv ); 3351 TRACE_D3(" %llu %s [%s]\n", 3352 acode, ML_(pp_DW_TAG)(atag), 3353 ML_(pp_DW_children)(has_children)); 3354 while (True) { 3355 ULong at_name = get_ULEB128( &abbv ); 3356 ULong at_form = get_ULEB128( &abbv ); 3357 if (at_name == 0 && at_form == 0) break; 3358 TRACE_D3(" %18s %s\n", 3359 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 3360 } 3361 } 3362 } 3363 TRACE_SYMTAB("\n"); 3364 3365 /* Now loop over the Compilation Units listed in the .debug_info 3366 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 3367 unit contains a Compilation Unit Header followed by precisely 3368 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 3369 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, 3370 "Overrun whilst reading .debug_info section" ); 3371 3372 /* We'll park the harvested type information in here. Also create 3373 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 3374 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 3375 huge and presumably will not occur in any valid DWARF3 file -- 3376 it would need to have a .debug_info section 4GB long for that to 3377 happen. These type entries end up in the DebugInfo. */ 3378 tyents = VG_(newXA)( ML_(dinfo_zalloc), 3379 "di.readdwarf3.ndrw.1 (TyEnt temp array)", 3380 ML_(dinfo_free), sizeof(TyEnt) ); 3381 { TyEnt tyent; 3382 VG_(memset)(&tyent, 0, sizeof(tyent)); 3383 tyent.tag = Te_TyVoid; 3384 tyent.cuOff = D3_FAKEVOID_CUOFF; 3385 tyent.Te.TyVoid.isFake = True; 3386 VG_(addToXA)( tyents, &tyent ); 3387 } 3388 { TyEnt tyent; 3389 VG_(memset)(&tyent, 0, sizeof(tyent)); 3390 tyent.tag = Te_UNKNOWN; 3391 tyent.cuOff = D3_INVALID_CUOFF; 3392 VG_(addToXA)( tyents, &tyent ); 3393 } 3394 3395 /* This is a tree used to unique-ify the range lists that are 3396 manufactured by parse_var_DIE. References to the keys in the 3397 tree wind up in .rngMany fields in TempVars. We'll need to 3398 delete this tree, and the XArrays attached to it, at the end of 3399 this function. */ 3400 rangestree = VG_(newFM)( ML_(dinfo_zalloc), 3401 "di.readdwarf3.ndrw.2 (rangestree)", 3402 ML_(dinfo_free), 3403 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); 3404 3405 /* List of variables we're accumulating. These don't end up in the 3406 DebugInfo; instead their contents are handed to ML_(addVar) and 3407 the list elements are then deleted. */ 3408 tempvars = VG_(newXA)( ML_(dinfo_zalloc), 3409 "di.readdwarf3.ndrw.3 (TempVar*s array)", 3410 ML_(dinfo_free), 3411 sizeof(TempVar*) ); 3412 3413 /* List of GExprs we're accumulating. These wind up in the 3414 DebugInfo. */ 3415 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", 3416 ML_(dinfo_free), sizeof(GExpr*) ); 3417 3418 /* We need a D3TypeParser to keep track of partially constructed 3419 types. It'll be discarded as soon as we've completed the CU, 3420 since the resulting information is tipped in to 'tyents' as it 3421 is generated. */ 3422 VG_(memset)( &typarser, 0, sizeof(typarser) ); 3423 typarser.sp = -1; 3424 typarser.language = '?'; 3425 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3426 typarser.qparentE[i].tag = Te_EMPTY; 3427 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF; 3428 } 3429 3430 VG_(memset)( &varparser, 0, sizeof(varparser) ); 3431 varparser.sp = -1; 3432 3433 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 3434 while (True) { 3435 UWord cu_start_offset, cu_offset_now; 3436 CUConst cc; 3437 /* It may be that the stated size of this CU is larger than the 3438 amount of stuff actually in it. icc9 seems to generate CUs 3439 thusly. We use these variables to figure out if this is 3440 indeed the case, and if so how many bytes we need to skip to 3441 get to the start of the next CU. Not skipping those bytes 3442 causes us to misidentify the start of the next CU, and it all 3443 goes badly wrong after that (not surprisingly). */ 3444 UWord cu_size_including_IniLen, cu_amount_used; 3445 3446 /* It seems icc9 finishes the DIE info before debug_info_sz 3447 bytes have been used up. So be flexible, and declare the 3448 sequence complete if there is not enough remaining bytes to 3449 hold even the smallest conceivable CU header. (11 bytes I 3450 reckon). */ 3451 /* JRS 23Jan09: I suspect this is no longer necessary now that 3452 the code below contains a 'while (cu_amount_used < 3453 cu_size_including_IniLen ...' style loop, which skips over 3454 any leftover bytes at the end of a CU in the case where the 3455 CU's stated size is larger than its actual size (as 3456 determined by reading all its DIEs). However, for prudence, 3457 I'll leave the following test in place. I can't see that a 3458 CU header can be smaller than 11 bytes, so I don't think 3459 there's any harm possible through the test -- it just adds 3460 robustness. */ 3461 Word avail = get_remaining_length_Cursor( &info ); 3462 if (avail < 11) { 3463 if (avail > 0) 3464 TRACE_D3("new_dwarf3_reader_wrk: warning: " 3465 "%ld unused bytes after end of DIEs\n", avail); 3466 break; 3467 } 3468 3469 /* Check the varparser's stack is in a sane state. */ 3470 vg_assert(varparser.sp == -1); 3471 for (i = 0; i < N_D3_VAR_STACK; i++) { 3472 vg_assert(varparser.ranges[i] == NULL); 3473 vg_assert(varparser.level[i] == 0); 3474 } 3475 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3476 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF); 3477 vg_assert(typarser.qparentE[i].tag == Te_EMPTY); 3478 vg_assert(typarser.qlevel[i] == 0); 3479 } 3480 3481 cu_start_offset = get_position_of_Cursor( &info ); 3482 TRACE_D3("\n"); 3483 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 3484 /* parse_CU_header initialises the CU's set_abbv_Cursor cache 3485 (saC_cache) */ 3486 parse_CU_Header( &cc, td3, &info, 3487 (UChar*)debug_abbv_img, debug_abbv_sz ); 3488 cc.debug_str_img = debug_str_img; 3489 cc.debug_str_sz = debug_str_sz; 3490 cc.debug_ranges_img = debug_ranges_img; 3491 cc.debug_ranges_sz = debug_ranges_sz; 3492 cc.debug_loc_img = debug_loc_img; 3493 cc.debug_loc_sz = debug_loc_sz; 3494 cc.debug_line_img = debug_line_img; 3495 cc.debug_line_sz = debug_line_sz; 3496 cc.debug_info_img = debug_info_img; 3497 cc.debug_info_sz = debug_info_sz; 3498 cc.cu_start_offset = cu_start_offset; 3499 cc.di = di; 3500 /* The CU's svma can be deduced by looking at the AT_low_pc 3501 value in the top level TAG_compile_unit, which is the topmost 3502 DIE. We'll leave it for the 'varparser' to acquire that info 3503 and fill it in -- since it is the only party to want to know 3504 it. */ 3505 cc.cu_svma_known = False; 3506 cc.cu_svma = 0; 3507 3508 /* Create a fake outermost-level range covering the entire 3509 address range. So we always have *something* to catch all 3510 variable declarations. */ 3511 varstack_push( &cc, &varparser, td3, 3512 unitary_range_list(0UL, ~0UL), 3513 -1, False/*isFunc*/, NULL/*fbGX*/ ); 3514 3515 /* And set up the file name table. When we come across the top 3516 level DIE for this CU (which is what the next call to 3517 read_DIE should process) we will copy all the file names out 3518 of the .debug_line img area and use this table to look up the 3519 copies when we later see filename numbers in DW_TAG_variables 3520 etc. */ 3521 vg_assert(!varparser.filenameTable ); 3522 varparser.filenameTable 3523 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5", 3524 ML_(dinfo_free), 3525 sizeof(UChar*) ); 3526 vg_assert(varparser.filenameTable); 3527 3528 /* Now read the one-and-only top-level DIE for this CU. */ 3529 vg_assert(varparser.sp == 0); 3530 read_DIE( rangestree, 3531 tyents, tempvars, gexprs, 3532 &typarser, &varparser, 3533 &info, td3, &cc, 0 ); 3534 3535 cu_offset_now = get_position_of_Cursor( &info ); 3536 3537 if (0) VG_(printf)("Travelled: %lu size %llu\n", 3538 cu_offset_now - cc.cu_start_offset, 3539 cc.unit_length + (cc.is_dw64 ? 12 : 4)); 3540 3541 /* How big the CU claims it is .. */ 3542 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); 3543 /* .. vs how big we have found it to be */ 3544 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3545 3546 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", 3547 cu_offset_now, debug_info_sz); 3548 if (cu_offset_now > debug_info_sz) 3549 barf("toplevel DIEs beyond end of CU"); 3550 3551 /* If the CU is bigger than it claims to be, we've got a serious 3552 problem. */ 3553 if (cu_amount_used > cu_size_including_IniLen) 3554 barf("CU's actual size appears to be larger than it claims it is"); 3555 3556 /* If the CU is smaller than it claims to be, we need to skip some 3557 bytes. Loop updates cu_offset_new and cu_amount_used. */ 3558 while (cu_amount_used < cu_size_including_IniLen 3559 && get_remaining_length_Cursor( &info ) > 0) { 3560 if (0) VG_(printf)("SKIP\n"); 3561 (void)get_UChar( &info ); 3562 cu_offset_now = get_position_of_Cursor( &info ); 3563 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3564 } 3565 3566 if (cu_offset_now == debug_info_sz) 3567 break; 3568 3569 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 3570 anywhere else at all. Our fake the-entire-address-space 3571 range is at level -1, so preening to -2 should completely 3572 empty the stack out. */ 3573 TRACE_D3("\n"); 3574 varstack_preen( &varparser, td3, -2 ); 3575 /* Similarly, empty the type stack out. */ 3576 typestack_preen( &typarser, td3, -2 ); 3577 /* else keep going */ 3578 3579 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", 3580 cc.saC_cache_queries, cc.saC_cache_misses); 3581 3582 vg_assert(varparser.filenameTable ); 3583 VG_(deleteXA)( varparser.filenameTable ); 3584 varparser.filenameTable = NULL; 3585 } 3586 3587 /* From here on we're post-processing the stuff we got 3588 out of the .debug_info section. */ 3589 if (td3) { 3590 TRACE_D3("\n"); 3591 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); 3592 TRACE_D3("\n"); 3593 TRACE_D3("------ Compressing type entries ------\n"); 3594 } 3595 3596 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", 3597 sizeof(TyEntIndexCache) ); 3598 ML_(TyEntIndexCache__invalidate)( tyents_cache ); 3599 dedup_types( td3, tyents, tyents_cache ); 3600 if (td3) { 3601 TRACE_D3("\n"); 3602 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); 3603 } 3604 3605 TRACE_D3("\n"); 3606 TRACE_D3("------ Resolving the types of variables ------\n" ); 3607 resolve_variable_types( barf, tyents, tyents_cache, tempvars ); 3608 3609 /* Copy all the non-INDIR tyents into a new table. For large 3610 .so's, about 90% of the tyents will by now have been resolved to 3611 INDIRs, and we no longer need them, and so don't need to store 3612 them. */ 3613 tyents_to_keep 3614 = VG_(newXA)( ML_(dinfo_zalloc), 3615 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", 3616 ML_(dinfo_free), sizeof(TyEnt) ); 3617 n = VG_(sizeXA)( tyents ); 3618 for (i = 0; i < n; i++) { 3619 TyEnt* ent = VG_(indexXA)( tyents, i ); 3620 if (ent->tag != Te_INDIR) 3621 VG_(addToXA)( tyents_to_keep, ent ); 3622 } 3623 3624 VG_(deleteXA)( tyents ); 3625 tyents = NULL; 3626 ML_(dinfo_free)( tyents_cache ); 3627 tyents_cache = NULL; 3628 3629 /* Sort tyents_to_keep so we can lookup in it. A complete (if 3630 minor) waste of time, since tyents itself is sorted, but 3631 necessary since VG_(lookupXA) refuses to cooperate if we 3632 don't. */ 3633 VG_(setCmpFnXA)( 3634 tyents_to_keep, 3635 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3636 ); 3637 VG_(sortXA)( tyents_to_keep ); 3638 3639 /* Enable cacheing on tyents_to_keep */ 3640 tyents_to_keep_cache 3641 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", 3642 sizeof(TyEntIndexCache) ); 3643 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); 3644 3645 /* And record the tyents in the DebugInfo. We do this before 3646 starting to hand variables to ML_(addVar), since if ML_(addVar) 3647 wants to do debug printing (of the types of said vars) then it 3648 will need the tyents.*/ 3649 vg_assert(!di->admin_tyents); 3650 di->admin_tyents = tyents_to_keep; 3651 3652 /* Bias all the location expressions. */ 3653 TRACE_D3("\n"); 3654 TRACE_D3("------ Biasing the location expressions ------\n" ); 3655 3656 n = VG_(sizeXA)( gexprs ); 3657 for (i = 0; i < n; i++) { 3658 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); 3659 bias_GX( gexpr, di ); 3660 } 3661 3662 TRACE_D3("\n"); 3663 TRACE_D3("------ Acquired the following variables: ------\n\n"); 3664 3665 /* Park (pointers to) all the vars in an XArray, so we can look up 3666 abstract origins quickly. The array is sorted (hence, looked-up 3667 by) the .dioff fields. Since the .dioffs should be in strictly 3668 ascending order, there is no need to sort the array after 3669 construction. The ascendingness is however asserted for. */ 3670 dioff_lookup_tab 3671 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", 3672 ML_(dinfo_free), 3673 sizeof(TempVar*) ); 3674 vg_assert(dioff_lookup_tab); 3675 3676 n = VG_(sizeXA)( tempvars ); 3677 for (i = 0; i < n; i++) { 3678 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3679 if (i > 0) { 3680 varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 ); 3681 /* why should this hold? Only, I think, because we've 3682 constructed the array by reading .debug_info sequentially, 3683 and so the array .dioff fields should reflect that, and be 3684 strictly ascending. */ 3685 vg_assert(varp2->dioff < varp->dioff); 3686 } 3687 VG_(addToXA)( dioff_lookup_tab, &varp ); 3688 } 3689 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 3690 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 3691 3692 /* Now visit each var. Collect up as much info as possible for 3693 each var and hand it to ML_(addVar). */ 3694 n = VG_(sizeXA)( tempvars ); 3695 for (j = 0; j < n; j++) { 3696 TyEnt* ent; 3697 varp = *(TempVar**)VG_(indexXA)( tempvars, j ); 3698 3699 /* Possibly show .. */ 3700 if (td3) { 3701 VG_(printf)("<%lx> addVar: level %d: %s :: ", 3702 varp->dioff, 3703 varp->level, 3704 varp->name ? varp->name : (UChar*)"<anon_var>" ); 3705 if (varp->typeR) { 3706 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); 3707 } else { 3708 VG_(printf)("NULL"); 3709 } 3710 VG_(printf)("\n Loc="); 3711 if (varp->gexpr) { 3712 ML_(pp_GX)(varp->gexpr); 3713 } else { 3714 VG_(printf)("NULL"); 3715 } 3716 VG_(printf)("\n"); 3717 if (varp->fbGX) { 3718 VG_(printf)(" FrB="); 3719 ML_(pp_GX)( varp->fbGX ); 3720 VG_(printf)("\n"); 3721 } else { 3722 VG_(printf)(" FrB=none\n"); 3723 } 3724 VG_(printf)(" declared at: %s:%d\n", 3725 varp->fName ? varp->fName : (UChar*)"NULL", 3726 varp->fLine ); 3727 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 3728 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 3729 } 3730 3731 /* Skip variables which have no location. These must be 3732 abstract instances; they are useless as-is since with no 3733 location they have no specified memory location. They will 3734 presumably be referred to via the absOri fields of other 3735 variables. */ 3736 if (!varp->gexpr) { 3737 TRACE_D3(" SKIP (no location)\n\n"); 3738 continue; 3739 } 3740 3741 /* So it has a location, at least. If it refers to some other 3742 entry through its absOri field, pull in further info through 3743 that. */ 3744 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 3745 Bool found; 3746 Word ixFirst, ixLast; 3747 TempVar key; 3748 TempVar* keyp = &key; 3749 TempVar *varAI; 3750 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 3751 key.dioff = varp->absOri; /* this is what we want to find */ 3752 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 3753 &ixFirst, &ixLast ); 3754 if (!found) { 3755 /* barf("DW_AT_abstract_origin can't be resolved"); */ 3756 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n"); 3757 continue; 3758 } 3759 /* If the following fails, there is more than one entry with 3760 the same dioff. Which can't happen. */ 3761 vg_assert(ixFirst == ixLast); 3762 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 3763 /* stay sane */ 3764 vg_assert(varAI); 3765 vg_assert(varAI->dioff == varp->absOri); 3766 3767 /* Copy what useful info we can. */ 3768 if (varAI->typeR && !varp->typeR) 3769 varp->typeR = varAI->typeR; 3770 if (varAI->name && !varp->name) 3771 varp->name = varAI->name; 3772 if (varAI->fName && !varp->fName) 3773 varp->fName = varAI->fName; 3774 if (varAI->fLine > 0 && varp->fLine == 0) 3775 varp->fLine = varAI->fLine; 3776 } 3777 3778 /* Give it a name if it doesn't have one. */ 3779 if (!varp->name) 3780 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 3781 3782 /* So now does it have enough info to be useful? */ 3783 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then 3784 the type didn't get resolved. Really, in that case 3785 something's broken earlier on, and should be fixed, rather 3786 than just skipping the variable. */ 3787 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, 3788 tyents_to_keep_cache, 3789 varp->typeR ); 3790 /* The next two assertions should be guaranteed by 3791 our previous call to resolve_variable_types. */ 3792 vg_assert(ent); 3793 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); 3794 3795 if (ent->tag == Te_UNKNOWN) continue; 3796 3797 vg_assert(varp->gexpr); 3798 vg_assert(varp->name); 3799 vg_assert(varp->typeR); 3800 vg_assert(varp->level >= 0); 3801 3802 /* Ok. So we're going to keep it. Call ML_(addVar) once for 3803 each address range in which the variable exists. */ 3804 TRACE_D3(" ACQUIRE for range(s) "); 3805 { AddrRange oneRange; 3806 AddrRange* varPcRanges; 3807 Word nVarPcRanges; 3808 /* Set up to iterate over address ranges, however 3809 represented. */ 3810 if (varp->nRanges == 0 || varp->nRanges == 1) { 3811 vg_assert(!varp->rngMany); 3812 if (varp->nRanges == 0) { 3813 vg_assert(varp->rngOneMin == 0); 3814 vg_assert(varp->rngOneMax == 0); 3815 } 3816 nVarPcRanges = varp->nRanges; 3817 oneRange.aMin = varp->rngOneMin; 3818 oneRange.aMax = varp->rngOneMax; 3819 varPcRanges = &oneRange; 3820 } else { 3821 vg_assert(varp->rngMany); 3822 vg_assert(varp->rngOneMin == 0); 3823 vg_assert(varp->rngOneMax == 0); 3824 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 3825 vg_assert(nVarPcRanges >= 2); 3826 vg_assert(nVarPcRanges == (Word)varp->nRanges); 3827 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 3828 } 3829 if (varp->level == 0) 3830 vg_assert( nVarPcRanges == 1 ); 3831 /* and iterate */ 3832 for (i = 0; i < nVarPcRanges; i++) { 3833 Addr pcMin = varPcRanges[i].aMin; 3834 Addr pcMax = varPcRanges[i].aMax; 3835 vg_assert(pcMin <= pcMax); 3836 /* Level 0 is the global address range. So at level 0 we 3837 don't want to bias pcMin/pcMax; but at all other levels 3838 we do since those are derived from svmas in the Dwarf 3839 we're reading. Be paranoid ... */ 3840 if (varp->level == 0) { 3841 vg_assert(pcMin == (Addr)0); 3842 vg_assert(pcMax == ~(Addr)0); 3843 } else { 3844 /* vg_assert(pcMin > (Addr)0); 3845 No .. we can legitimately expect to see ranges like 3846 0x0-0x11D (pre-biasing, of course). */ 3847 vg_assert(pcMax < ~(Addr)0); 3848 } 3849 3850 /* Apply text biasing, for non-global variables. */ 3851 if (varp->level > 0) { 3852 pcMin += di->text_debug_bias; 3853 pcMax += di->text_debug_bias; 3854 } 3855 3856 if (i > 0 && (i%2) == 0) 3857 TRACE_D3("\n "); 3858 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); 3859 3860 ML_(addVar)( 3861 di, varp->level, 3862 pcMin, pcMax, 3863 varp->name, varp->typeR, 3864 varp->gexpr, varp->fbGX, 3865 varp->fName, varp->fLine, td3 3866 ); 3867 } 3868 } 3869 3870 TRACE_D3("\n\n"); 3871 /* and move on to the next var */ 3872 } 3873 3874 /* Now free all the TempVars */ 3875 n = VG_(sizeXA)( tempvars ); 3876 for (i = 0; i < n; i++) { 3877 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3878 ML_(dinfo_free)(varp); 3879 } 3880 VG_(deleteXA)( tempvars ); 3881 tempvars = NULL; 3882 3883 /* and the temp lookup table */ 3884 VG_(deleteXA)( dioff_lookup_tab ); 3885 3886 /* and the ranges tree. Note that we need to also free the XArrays 3887 which constitute the keys, hence pass VG_(deleteXA) as a 3888 key-finalizer. */ 3889 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); 3890 3891 /* and the tyents_to_keep cache */ 3892 ML_(dinfo_free)( tyents_to_keep_cache ); 3893 tyents_to_keep_cache = NULL; 3894 3895 /* and the file name table (just the array, not the entries 3896 themselves). (Apparently, 2008-Oct-23, varparser.filenameTable 3897 can be NULL here, for icc9 generated Dwarf3. Not sure what that 3898 signifies (a deeper problem with the reader?)) */ 3899 if (varparser.filenameTable) { 3900 VG_(deleteXA)( varparser.filenameTable ); 3901 varparser.filenameTable = NULL; 3902 } 3903 3904 /* record the GExprs in di so they can be freed later */ 3905 vg_assert(!di->admin_gexprs); 3906 di->admin_gexprs = gexprs; 3907} 3908 3909 3910/*------------------------------------------------------------*/ 3911/*--- ---*/ 3912/*--- The "new" DWARF3 reader -- top level control logic ---*/ 3913/*--- ---*/ 3914/*------------------------------------------------------------*/ 3915 3916/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 3917#include <setjmp.h> /* For jmp_buf */ 3918/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 3919 3920static Bool d3rd_jmpbuf_valid = False; 3921static HChar* d3rd_jmpbuf_reason = NULL; 3922static jmp_buf d3rd_jmpbuf; 3923 3924static __attribute__((noreturn)) void barf ( HChar* reason ) { 3925 vg_assert(d3rd_jmpbuf_valid); 3926 d3rd_jmpbuf_reason = reason; 3927 __builtin_longjmp(&d3rd_jmpbuf, 1); 3928 /*NOTREACHED*/ 3929 vg_assert(0); 3930} 3931 3932 3933void 3934ML_(new_dwarf3_reader) ( 3935 struct _DebugInfo* di, 3936 UChar* debug_info_img, SizeT debug_info_sz, 3937 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3938 UChar* debug_line_img, SizeT debug_line_sz, 3939 UChar* debug_str_img, SizeT debug_str_sz, 3940 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3941 UChar* debug_loc_img, SizeT debug_loc_sz 3942) 3943{ 3944 volatile Int jumped; 3945 volatile Bool td3 = di->trace_symtab; 3946 3947 /* Run the _wrk function to read the dwarf3. If it succeeds, it 3948 just returns normally. If there is any failure, it longjmp's 3949 back here, having first set d3rd_jmpbuf_reason to something 3950 useful. */ 3951 vg_assert(d3rd_jmpbuf_valid == False); 3952 vg_assert(d3rd_jmpbuf_reason == NULL); 3953 3954 d3rd_jmpbuf_valid = True; 3955 jumped = __builtin_setjmp(&d3rd_jmpbuf); 3956 if (jumped == 0) { 3957 /* try this ... */ 3958 new_dwarf3_reader_wrk( di, barf, 3959 debug_info_img, debug_info_sz, 3960 debug_abbv_img, debug_abbv_sz, 3961 debug_line_img, debug_line_sz, 3962 debug_str_img, debug_str_sz, 3963 debug_ranges_img, debug_ranges_sz, 3964 debug_loc_img, debug_loc_sz ); 3965 d3rd_jmpbuf_valid = False; 3966 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 3967 } else { 3968 /* It longjmp'd. */ 3969 d3rd_jmpbuf_valid = False; 3970 /* Can't longjump without giving some sort of reason. */ 3971 vg_assert(d3rd_jmpbuf_reason != NULL); 3972 3973 TRACE_D3("\n------ .debug_info reading failed ------\n"); 3974 3975 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 3976 } 3977 3978 d3rd_jmpbuf_valid = False; 3979 d3rd_jmpbuf_reason = NULL; 3980} 3981 3982 3983 3984/* --- Unused code fragments which might be useful one day. --- */ 3985 3986#if 0 3987 /* Read the arange tables */ 3988 TRACE_SYMTAB("\n"); 3989 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 3990 init_Cursor( &aranges, debug_aranges_img, 3991 debug_aranges_sz, 0, barf, 3992 "Overrun whilst reading .debug_aranges section" ); 3993 while (True) { 3994 ULong len, d_i_offset; 3995 Bool is64; 3996 UShort version; 3997 UChar asize, segsize; 3998 3999 if (is_at_end_Cursor( &aranges )) 4000 break; 4001 /* Read one arange thingy */ 4002 /* initial_length field */ 4003 len = get_Initial_Length( &is64, &aranges, 4004 "in .debug_aranges: invalid initial-length field" ); 4005 version = get_UShort( &aranges ); 4006 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 4007 asize = get_UChar( &aranges ); 4008 segsize = get_UChar( &aranges ); 4009 TRACE_D3(" Length: %llu\n", len); 4010 TRACE_D3(" Version: %d\n", (Int)version); 4011 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 4012 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 4013 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 4014 TRACE_D3("\n"); 4015 TRACE_D3(" Address Length\n"); 4016 4017 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 4018 (void)get_UChar( & aranges ); 4019 } 4020 while (True) { 4021 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 4022 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 4023 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 4024 if (address == 0 && length == 0) break; 4025 } 4026 } 4027 TRACE_SYMTAB("\n"); 4028#endif 4029 4030#endif // defined(VGO_linux) || defined(VGO_darwin) 4031 4032/*--------------------------------------------------------------------*/ 4033/*--- end ---*/ 4034/*--------------------------------------------------------------------*/ 4035