readdwarf3.c revision bdee918842b4b2d4a09146a4642e999dc71b3652
1 2/*--------------------------------------------------------------------*/ 3/*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/ 4/*--- readdwarf3.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2010 OpenWorks LLP 12 info@open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35*/ 36 37#if defined(VGO_linux) || defined(VGO_darwin) 38 39/* REFERENCE (without which this code will not make much sense): 40 41 DWARF Debugging Information Format, Version 3, 42 dated 20 December 2005 (the "D3 spec"). 43 44 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 45 .doc (MS Word) version, but for some reason the section numbers 46 between the Word and PDF versions differ by 1 in the first digit. 47 All section references in this code are to the PDF version. 48 49 CURRENT HACKS: 50 51 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 52 assumed to mean "const void" or "volatile void" respectively. 53 GDB appears to interpret them like this, anyway. 54 55 In many cases it is important to know the svma of a CU (the "base 56 address of the CU", as the D3 spec calls it). There are some 57 situations in which the spec implies this value is unknown, but the 58 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 59 merely zero when not explicitly stated. So we too have to make 60 that assumption. 61 62 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't 63 unitary_range_list() bias the resulting range list in the same way 64 that its more general cousin, get_range_list(), does? I don't 65 know. 66 67 TODO, 2008 Feb 17: 68 69 get rid of cu_svma_known and document the assumed-zero svma hack. 70 71 ML_(sizeOfType): differentiate between zero sized types and types 72 for which the size is unknown. Is this important? I don't know. 73 74 DW_AT_array_types: deal with explicit sizes (currently we compute 75 the size from the bounds and the element size, although that's 76 fragile, if the bounds incompletely specified, or completely 77 absent) 78 79 Document reason for difference (by 1) of stack preening depth in 80 parse_var_DIE vs parse_type_DIE. 81 82 Don't hand to ML_(addVars), vars whose locations are entirely in 83 registers (DW_OP_reg*). This is merely a space-saving 84 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 85 expressions correctly, by failing to evaluate them and hence 86 effectively ignoring the variable with which they are associated. 87 88 Deal with DW_AT_array_types which have element size != stride 89 90 In some cases, the info for a variable is split between two 91 different DIEs (generally a declarer and a definer). We punt on 92 these. Could do better here. 93 94 The 'data_bias' argument passed to the expression evaluator 95 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 96 MaybeUWord, to make it clear when we do vs don't know what it is 97 for the evaluation of an expression. At the moment zero is passed 98 for this parameter in the don't know case. That's a bit fragile 99 and obscure; using a MaybeUWord would be clearer. 100 101 POTENTIAL PERFORMANCE IMPROVEMENTS: 102 103 Currently, duplicate removal and all other queries for the type 104 entities array is done using cuOffset-based pointing, which 105 involves a binary search (VG_(lookupXA)) for each access. This is 106 wildly inefficient, although simple. It would be better to 107 translate all the cuOffset-based references (iow, all the "R" and 108 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in 109 'tyents' right at the start of dedup_types(), and use direct 110 indexing (VG_(indexXA)) wherever possible after that. 111 112 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move 113 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use 114 points, and possibly also make an _UNCHECKED version which skips 115 the range checks in performance-critical situations such as this. 116 117 Handle interaction between read_DIE and parse_{var,type}_DIE 118 better. Currently read_DIE reads the entire DIE just to find where 119 the end is (and for debug printing), so that it can later reliably 120 move the cursor to the end regardless of what parse_{var,type}_DIE 121 do. This means many DIEs (most, even?) are read twice. It would 122 be smarter to make parse_{var,type}_DIE return a Bool indicating 123 whether or not they advanced the DIE cursor, and only if they 124 didn't should read_DIE itself read through the DIE. 125 126 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 127 zero variables in their .vars XArray. Rather than have an XArray 128 with zero elements (which uses 2 malloc'd blocks), allow the .vars 129 pointer to be NULL in this case. 130 131 More generally, reduce the amount of memory allocated and freed 132 while reading Dwarf3 type/variable information. Even modest (20MB) 133 objects cause this module to allocate and free hundreds of 134 thousands of small blocks, and ML_(arena_malloc) and its various 135 groupies always show up at the top of performance profiles. */ 136 137#include "pub_core_basics.h" 138#include "pub_core_debuginfo.h" 139#include "pub_core_libcbase.h" 140#include "pub_core_libcassert.h" 141#include "pub_core_libcprint.h" 142#include "pub_core_options.h" 143#include "pub_core_tooliface.h" /* VG_(needs) */ 144#include "pub_core_xarray.h" 145#include "pub_core_wordfm.h" 146#include "priv_misc.h" /* dinfo_zalloc/free */ 147#include "priv_tytypes.h" 148#include "priv_d3basics.h" 149#include "priv_storage.h" 150#include "priv_readdwarf3.h" /* self */ 151 152 153/*------------------------------------------------------------*/ 154/*--- ---*/ 155/*--- Basic machinery for parsing DIEs. ---*/ 156/*--- ---*/ 157/*------------------------------------------------------------*/ 158 159#define TRACE_D3(format, args...) \ 160 if (td3) { VG_(printf)(format, ## args); } 161 162#define D3_INVALID_CUOFF ((UWord)(-1UL)) 163#define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) 164 165typedef 166 struct { 167 UChar* region_start_img; 168 UWord region_szB; 169 UWord region_next; 170 void (*barf)( HChar* ) __attribute__((noreturn)); 171 HChar* barfstr; 172 } 173 Cursor; 174 175static inline Bool is_sane_Cursor ( Cursor* c ) { 176 if (!c) return False; 177 if (!c->barf) return False; 178 if (!c->barfstr) return False; 179 return True; 180} 181 182static void init_Cursor ( Cursor* c, 183 UChar* region_start_img, 184 UWord region_szB, 185 UWord region_next, 186 __attribute__((noreturn)) void (*barf)( HChar* ), 187 HChar* barfstr ) 188{ 189 vg_assert(c); 190 VG_(memset)(c, 0, sizeof(*c)); 191 c->region_start_img = region_start_img; 192 c->region_szB = region_szB; 193 c->region_next = region_next; 194 c->barf = barf; 195 c->barfstr = barfstr; 196 vg_assert(is_sane_Cursor(c)); 197} 198 199static Bool is_at_end_Cursor ( Cursor* c ) { 200 vg_assert(is_sane_Cursor(c)); 201 return c->region_next >= c->region_szB; 202} 203 204static inline UWord get_position_of_Cursor ( Cursor* c ) { 205 vg_assert(is_sane_Cursor(c)); 206 return c->region_next; 207} 208static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { 209 c->region_next = pos; 210 vg_assert(is_sane_Cursor(c)); 211} 212 213static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { 214 vg_assert(is_sane_Cursor(c)); 215 return c->region_szB - c->region_next; 216} 217 218static UChar* get_address_of_Cursor ( Cursor* c ) { 219 vg_assert(is_sane_Cursor(c)); 220 return &c->region_start_img[ c->region_next ]; 221} 222 223__attribute__((noreturn)) 224static void failWith ( Cursor* c, HChar* str ) { 225 vg_assert(c); 226 vg_assert(c->barf); 227 c->barf(str); 228 /*NOTREACHED*/ 229 vg_assert(0); 230} 231 232/* FIXME: document assumptions on endianness for 233 get_UShort/UInt/ULong. */ 234static inline UChar get_UChar ( Cursor* c ) { 235 UChar r; 236 /* vg_assert(is_sane_Cursor(c)); */ 237 if (c->region_next + sizeof(UChar) > c->region_szB) { 238 c->barf(c->barfstr); 239 /*NOTREACHED*/ 240 vg_assert(0); 241 } 242 r = * (UChar*) &c->region_start_img[ c->region_next ]; 243 c->region_next += sizeof(UChar); 244 return r; 245} 246static UShort get_UShort ( Cursor* c ) { 247 UShort r; 248 vg_assert(is_sane_Cursor(c)); 249 if (c->region_next + sizeof(UShort) > c->region_szB) { 250 c->barf(c->barfstr); 251 /*NOTREACHED*/ 252 vg_assert(0); 253 } 254 r = * (UShort*) &c->region_start_img[ c->region_next ]; 255 c->region_next += sizeof(UShort); 256 return r; 257} 258static UInt get_UInt ( Cursor* c ) { 259 UInt r; 260 vg_assert(is_sane_Cursor(c)); 261 if (c->region_next + sizeof(UInt) > c->region_szB) { 262 c->barf(c->barfstr); 263 /*NOTREACHED*/ 264 vg_assert(0); 265 } 266 r = * (UInt*) &c->region_start_img[ c->region_next ]; 267 c->region_next += sizeof(UInt); 268 return r; 269} 270static ULong get_ULong ( Cursor* c ) { 271 ULong r; 272 vg_assert(is_sane_Cursor(c)); 273 if (c->region_next + sizeof(ULong) > c->region_szB) { 274 c->barf(c->barfstr); 275 /*NOTREACHED*/ 276 vg_assert(0); 277 } 278 r = * (ULong*) &c->region_start_img[ c->region_next ]; 279 c->region_next += sizeof(ULong); 280 return r; 281} 282static inline ULong get_ULEB128 ( Cursor* c ) { 283 ULong result; 284 Int shift; 285 UChar byte; 286 /* unroll first iteration */ 287 byte = get_UChar( c ); 288 result = (ULong)(byte & 0x7f); 289 if (LIKELY(!(byte & 0x80))) return result; 290 shift = 7; 291 /* end unroll first iteration */ 292 do { 293 byte = get_UChar( c ); 294 result |= ((ULong)(byte & 0x7f)) << shift; 295 shift += 7; 296 } while (byte & 0x80); 297 return result; 298} 299static Long get_SLEB128 ( Cursor* c ) { 300 ULong result = 0; 301 Int shift = 0; 302 UChar byte; 303 do { 304 byte = get_UChar(c); 305 result |= ((ULong)(byte & 0x7f)) << shift; 306 shift += 7; 307 } while (byte & 0x80); 308 if (shift < 64 && (byte & 0x40)) 309 result |= -(1ULL << shift); 310 return result; 311} 312 313/* Assume 'c' points to the start of a string. Return the absolute 314 address of whatever it points at, and advance it past the 315 terminating zero. This makes it safe for the caller to then copy 316 the string with ML_(addStr), since (w.r.t. image overruns) the 317 process of advancing past the terminating zero will already have 318 "vetted" the string. */ 319static UChar* get_AsciiZ ( Cursor* c ) { 320 UChar uc; 321 UChar* res = get_address_of_Cursor(c); 322 do { uc = get_UChar(c); } while (uc != 0); 323 return res; 324} 325 326static ULong peek_ULEB128 ( Cursor* c ) { 327 Word here = c->region_next; 328 ULong r = get_ULEB128( c ); 329 c->region_next = here; 330 return r; 331} 332static UChar peek_UChar ( Cursor* c ) { 333 Word here = c->region_next; 334 UChar r = get_UChar( c ); 335 c->region_next = here; 336 return r; 337} 338 339static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 340 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 341} 342 343static UWord get_UWord ( Cursor* c ) { 344 vg_assert(sizeof(UWord) == sizeof(void*)); 345 if (sizeof(UWord) == 4) return get_UInt(c); 346 if (sizeof(UWord) == 8) return get_ULong(c); 347 vg_assert(0); 348} 349 350/* Read a DWARF3 'Initial Length' field */ 351static ULong get_Initial_Length ( /*OUT*/Bool* is64, 352 Cursor* c, 353 HChar* barfMsg ) 354{ 355 ULong w64; 356 UInt w32; 357 *is64 = False; 358 w32 = get_UInt( c ); 359 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 360 c->barf( barfMsg ); 361 } 362 else if (w32 == 0xFFFFFFFF) { 363 *is64 = True; 364 w64 = get_ULong( c ); 365 } else { 366 *is64 = False; 367 w64 = (ULong)w32; 368 } 369 return w64; 370} 371 372 373/*------------------------------------------------------------*/ 374/*--- ---*/ 375/*--- "CUConst" structure ---*/ 376/*--- ---*/ 377/*------------------------------------------------------------*/ 378 379#define N_ABBV_CACHE 32 380 381/* Holds information that is constant through the parsing of a 382 Compilation Unit. This is basically plumbed through to 383 everywhere. */ 384typedef 385 struct { 386 /* Call here if anything goes wrong */ 387 void (*barf)( HChar* ) __attribute__((noreturn)); 388 /* Is this 64-bit DWARF ? */ 389 Bool is_dw64; 390 /* Which DWARF version ? (2, 3 or 4) */ 391 UShort version; 392 /* Length of this Compilation Unit, as stated in the 393 .unit_length :: InitialLength field of the CU Header. 394 However, this size (as specified by the D3 spec) does not 395 include the size of the .unit_length field itself, which is 396 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value 397 can be obtained through the expression ".is_dw64 ? 12 : 4". */ 398 ULong unit_length; 399 /* Offset of start of this unit in .debug_info */ 400 UWord cu_start_offset; 401 /* SVMA for this CU. In the D3 spec, is known as the "base 402 address of the compilation unit (last para sec 3.1.1). 403 Needed for (amongst things) interpretation of location-list 404 values. */ 405 Addr cu_svma; 406 Bool cu_svma_known; 407 /* The debug_abbreviations table to be used for this Unit */ 408 UChar* debug_abbv; 409 /* Upper bound on size thereof (an overestimate, in general) */ 410 UWord debug_abbv_maxszB; 411 /* Where is .debug_str ? */ 412 UChar* debug_str_img; 413 UWord debug_str_sz; 414 /* Where is .debug_ranges ? */ 415 UChar* debug_ranges_img; 416 UWord debug_ranges_sz; 417 /* Where is .debug_loc ? */ 418 UChar* debug_loc_img; 419 UWord debug_loc_sz; 420 /* Where is .debug_line? */ 421 UChar* debug_line_img; 422 UWord debug_line_sz; 423 /* Where is .debug_info? */ 424 UChar* debug_info_img; 425 UWord debug_info_sz; 426 /* --- Needed so we can add stuff to the string table. --- */ 427 struct _DebugInfo* di; 428 /* --- a cache for set_abbv_Cursor --- */ 429 /* abbv_code == (ULong)-1 for an unused entry. */ 430 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; 431 UWord saC_cache_queries; 432 UWord saC_cache_misses; 433 } 434 CUConst; 435 436 437/*------------------------------------------------------------*/ 438/*--- ---*/ 439/*--- Helper functions for Guarded Expressions ---*/ 440/*--- ---*/ 441/*------------------------------------------------------------*/ 442 443/* Parse the location list starting at img-offset 'debug_loc_offset' 444 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 445 and so I believe are correct SVMAs for the object as a whole. This 446 function allocates the UChar*, and the caller must deallocate it. 447 The resulting block is in so-called Guarded-Expression format. 448 449 Guarded-Expression format is similar but not identical to the DWARF3 450 location-list format. The format of each returned block is: 451 452 UChar biasMe; 453 UChar isEnd; 454 followed by zero or more of 455 456 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 457 458 '..bytes..' is an standard DWARF3 location expression which is 459 valid when aMin <= pc <= aMax (possibly after suitable biasing). 460 461 The number of bytes in '..bytes..' is nbytes. 462 463 The end of the sequence is marked by an isEnd == 1 value. All 464 previous isEnd values must be zero. 465 466 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 467 text_bias added before use, and 0 if the GX is this is not 468 necessary (is ready to go). 469 470 Hence the block can be quickly parsed and is self-describing. Note 471 that aMax is 1 less than the corresponding value in a DWARF3 472 location list. Zero length ranges, with aMax == aMin-1, are not 473 allowed. 474*/ 475/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where 476 it more logically belongs. */ 477 478 479/* Apply a text bias to a GX. */ 480static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di ) 481{ 482 UShort nbytes; 483 Addr* pA; 484 UChar* p = &gx->payload[0]; 485 UChar uc; 486 uc = *p++; /*biasMe*/ 487 if (uc == 0) 488 return; 489 vg_assert(uc == 1); 490 p[-1] = 0; /* mark it as done */ 491 while (True) { 492 uc = *p++; 493 if (uc == 1) 494 break; /*isEnd*/ 495 vg_assert(uc == 0); 496 /* t-bias aMin */ 497 pA = (Addr*)p; 498 *pA += di->text_debug_bias; 499 p += sizeof(Addr); 500 /* t-bias aMax */ 501 pA = (Addr*)p; 502 *pA += di->text_debug_bias; 503 p += sizeof(Addr); 504 /* nbytes, and actual expression */ 505 nbytes = * (UShort*)p; p += sizeof(UShort); 506 p += nbytes; 507 } 508} 509 510__attribute__((noinline)) 511static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) 512{ 513 SizeT bytesReqd; 514 GExpr* gx; 515 UChar *p, *pstart; 516 517 vg_assert(sizeof(UWord) == sizeof(Addr)); 518 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 519 bytesReqd 520 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 521 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 522 + sizeof(UShort) /*nbytes*/ + nbytes 523 + sizeof(UChar); /*isEnd*/ 524 525 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 526 sizeof(GExpr) + bytesReqd ); 527 vg_assert(gx); 528 529 p = pstart = &gx->payload[0]; 530 531 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar); 532 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar); 533 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr); 534 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr); 535 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort); 536 VG_(memcpy)(p, block, nbytes); p += nbytes; 537 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar); 538 539 vg_assert( (SizeT)(p - pstart) == bytesReqd); 540 vg_assert( &gx->payload[bytesReqd] 541 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 542 543 return gx; 544} 545 546__attribute__((noinline)) 547static GExpr* make_general_GX ( CUConst* cc, 548 Bool td3, 549 UWord debug_loc_offset, 550 Addr svma_of_referencing_CU ) 551{ 552 Addr base; 553 Cursor loc; 554 XArray* xa; /* XArray of UChar */ 555 GExpr* gx; 556 Word nbytes; 557 558 vg_assert(sizeof(UWord) == sizeof(Addr)); 559 if (cc->debug_loc_sz == 0) 560 cc->barf("make_general_GX: .debug_loc is empty/missing"); 561 562 init_Cursor( &loc, cc->debug_loc_img, 563 cc->debug_loc_sz, 0, cc->barf, 564 "Overrun whilst reading .debug_loc section(2)" ); 565 set_position_of_Cursor( &loc, debug_loc_offset ); 566 567 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", 568 debug_loc_offset, get_address_of_Cursor( &loc ) ); 569 570 /* Who frees this xa? It is freed before this fn exits. */ 571 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 572 ML_(dinfo_free), 573 sizeof(UChar) ); 574 575 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 576 577 base = 0; 578 while (True) { 579 Bool acquire; 580 UWord len; 581 /* Read a (host-)word pair. This is something of a hack since 582 the word size to read is really dictated by the ELF file; 583 however, we assume we're reading a file with the same 584 word-sizeness as the host. Reasonably enough. */ 585 UWord w1 = get_UWord( &loc ); 586 UWord w2 = get_UWord( &loc ); 587 588 TRACE_D3(" %08lx %08lx\n", w1, w2); 589 if (w1 == 0 && w2 == 0) 590 break; /* end of list */ 591 592 if (w1 == -1UL) { 593 /* new value for 'base' */ 594 base = w2; 595 continue; 596 } 597 598 /* else a location expression follows */ 599 /* else enumerate [w1+base, w2+base) */ 600 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 601 (sec 2.17.2) */ 602 if (w1 > w2) { 603 TRACE_D3("negative range is for .debug_loc expr at " 604 "file offset %lu\n", 605 debug_loc_offset); 606 cc->barf( "negative range in .debug_loc section" ); 607 } 608 609 /* ignore zero length ranges */ 610 acquire = w1 < w2; 611 len = (UWord)get_UShort( &loc ); 612 613 if (acquire) { 614 UWord w; 615 UShort s; 616 UChar c; 617 c = 0; /* !isEnd*/ 618 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 619 w = w1 + base + svma_of_referencing_CU; 620 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 621 w = w2 -1 + base + svma_of_referencing_CU; 622 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 623 s = (UShort)len; 624 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 625 } 626 627 while (len > 0) { 628 UChar byte = get_UChar( &loc ); 629 TRACE_D3("%02x", (UInt)byte); 630 if (acquire) 631 VG_(addBytesToXA)( xa, &byte, 1 ); 632 len--; 633 } 634 TRACE_D3("\n"); 635 } 636 637 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 638 639 nbytes = VG_(sizeXA)( xa ); 640 vg_assert(nbytes >= 1); 641 642 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); 643 vg_assert(gx); 644 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 645 vg_assert( &gx->payload[nbytes] 646 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 647 648 VG_(deleteXA)( xa ); 649 650 TRACE_D3("}\n"); 651 652 return gx; 653} 654 655 656/*------------------------------------------------------------*/ 657/*--- ---*/ 658/*--- Helper functions for range lists and CU headers ---*/ 659/*--- ---*/ 660/*------------------------------------------------------------*/ 661 662/* Denotes an address range. Both aMin and aMax are included in the 663 range; hence a complete range is (0, ~0) and an empty range is any 664 (X, X-1) for X > 0.*/ 665typedef 666 struct { Addr aMin; Addr aMax; } 667 AddrRange; 668 669 670/* Generate an arbitrary structural total ordering on 671 XArray* of AddrRange. */ 672static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 ) 673{ 674 Word n1, n2, i; 675 tl_assert(rngs1 && rngs2); 676 n1 = VG_(sizeXA)( rngs1 ); 677 n2 = VG_(sizeXA)( rngs2 ); 678 if (n1 < n2) return -1; 679 if (n1 > n2) return 1; 680 for (i = 0; i < n1; i++) { 681 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); 682 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); 683 if (rng1->aMin < rng2->aMin) return -1; 684 if (rng1->aMin > rng2->aMin) return 1; 685 if (rng1->aMax < rng2->aMax) return -1; 686 if (rng1->aMax > rng2->aMax) return 1; 687 } 688 return 0; 689} 690 691 692__attribute__((noinline)) 693static XArray* /* of AddrRange */ empty_range_list ( void ) 694{ 695 XArray* xa; /* XArray of AddrRange */ 696 /* Who frees this xa? varstack_preen() does. */ 697 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", 698 ML_(dinfo_free), 699 sizeof(AddrRange) ); 700 return xa; 701} 702 703 704__attribute__((noinline)) 705static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 706{ 707 XArray* xa; 708 AddrRange pair; 709 vg_assert(aMin <= aMax); 710 /* Who frees this xa? varstack_preen() does. */ 711 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", 712 ML_(dinfo_free), 713 sizeof(AddrRange) ); 714 pair.aMin = aMin; 715 pair.aMax = aMax; 716 VG_(addToXA)( xa, &pair ); 717 return xa; 718} 719 720 721/* Enumerate the address ranges starting at img-offset 722 'debug_ranges_offset' in .debug_ranges. Results are biased with 723 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 724 object as a whole. This function allocates the XArray, and the 725 caller must deallocate it. */ 726__attribute__((noinline)) 727static XArray* /* of AddrRange */ 728 get_range_list ( CUConst* cc, 729 Bool td3, 730 UWord debug_ranges_offset, 731 Addr svma_of_referencing_CU ) 732{ 733 Addr base; 734 Cursor ranges; 735 XArray* xa; /* XArray of AddrRange */ 736 AddrRange pair; 737 738 if (cc->debug_ranges_sz == 0) 739 cc->barf("get_range_list: .debug_ranges is empty/missing"); 740 741 init_Cursor( &ranges, cc->debug_ranges_img, 742 cc->debug_ranges_sz, 0, cc->barf, 743 "Overrun whilst reading .debug_ranges section(2)" ); 744 set_position_of_Cursor( &ranges, debug_ranges_offset ); 745 746 /* Who frees this xa? varstack_preen() does. */ 747 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), 748 sizeof(AddrRange) ); 749 base = 0; 750 while (True) { 751 /* Read a (host-)word pair. This is something of a hack since 752 the word size to read is really dictated by the ELF file; 753 however, we assume we're reading a file with the same 754 word-sizeness as the host. Reasonably enough. */ 755 UWord w1 = get_UWord( &ranges ); 756 UWord w2 = get_UWord( &ranges ); 757 758 if (w1 == 0 && w2 == 0) 759 break; /* end of list. */ 760 761 if (w1 == -1UL) { 762 /* new value for 'base' */ 763 base = w2; 764 continue; 765 } 766 767 /* else enumerate [w1+base, w2+base) */ 768 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 769 (sec 2.17.2) */ 770 if (w1 > w2) 771 cc->barf( "negative range in .debug_ranges section" ); 772 if (w1 < w2) { 773 pair.aMin = w1 + base + svma_of_referencing_CU; 774 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 775 vg_assert(pair.aMin <= pair.aMax); 776 VG_(addToXA)( xa, &pair ); 777 } 778 } 779 return xa; 780} 781 782 783/* Parse the Compilation Unit header indicated at 'c' and 784 initialise 'cc' accordingly. */ 785static __attribute__((noinline)) 786void parse_CU_Header ( /*OUT*/CUConst* cc, 787 Bool td3, 788 Cursor* c, 789 UChar* debug_abbv_img, UWord debug_abbv_sz ) 790{ 791 UChar address_size; 792 UWord debug_abbrev_offset; 793 Int i; 794 795 VG_(memset)(cc, 0, sizeof(*cc)); 796 vg_assert(c && c->barf); 797 cc->barf = c->barf; 798 799 /* initial_length field */ 800 cc->unit_length 801 = get_Initial_Length( &cc->is_dw64, c, 802 "parse_CU_Header: invalid initial-length field" ); 803 804 TRACE_D3(" Length: %lld\n", cc->unit_length ); 805 806 /* version */ 807 cc->version = get_UShort( c ); 808 if (cc->version != 2 && cc->version != 3 && cc->version != 4) 809 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" ); 810 TRACE_D3(" Version: %d\n", (Int)cc->version ); 811 812 /* debug_abbrev_offset */ 813 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 814 if (debug_abbrev_offset >= debug_abbv_sz) 815 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 816 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); 817 818 /* address size. If this isn't equal to the host word size, just 819 give up. This makes it safe to assume elsewhere that 820 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host 821 word. */ 822 address_size = get_UChar( c ); 823 if (address_size != sizeof(void*)) 824 cc->barf( "parse_CU_Header: invalid address_size" ); 825 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 826 827 /* Set up so that cc->debug_abbv points to the relevant table for 828 this CU. Set the szB so that at least we can't read off the end 829 of the debug_abbrev section -- potentially (and quite likely) 830 too big, if this isn't the last table in the section, but at 831 least it's safe. */ 832 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; 833 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; 834 /* and empty out the set_abbv_Cursor cache */ 835 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); 836 for (i = 0; i < N_ABBV_CACHE; i++) { 837 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ 838 cc->saC_cache[i].posn = 0; 839 } 840 cc->saC_cache_queries = 0; 841 cc->saC_cache_misses = 0; 842} 843 844 845/* Set up 'c' so it is ready to parse the abbv table entry code 846 'abbv_code' for this compilation unit. */ 847static __attribute__((noinline)) 848void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, 849 CUConst* cc, ULong abbv_code ) 850{ 851 Int i; 852 ULong acode; 853 854 if (abbv_code == 0) 855 cc->barf("set_abbv_Cursor: abbv_code == 0" ); 856 857 /* (ULong)-1 is used to represent an empty cache slot. So we can't 858 allow it. In any case no valid DWARF3 should make a reference 859 to a negative abbreviation code. [at least, they always seem to 860 be numbered upwards from zero as far as I have seen] */ 861 vg_assert(abbv_code != (ULong)-1); 862 863 /* First search the cache. */ 864 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); 865 cc->saC_cache_queries++; 866 for (i = 0; i < N_ABBV_CACHE; i++) { 867 /* No need to test the cached abbv_codes for -1 (empty), since 868 we just asserted that abbv_code is not -1. */ 869 if (cc->saC_cache[i].abbv_code == abbv_code) { 870 /* Found it. Cool. Set up the parser using the cached 871 position, and move this cache entry 1 step closer to the 872 front. */ 873 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); 874 init_Cursor( c, cc->debug_abbv, 875 cc->debug_abbv_maxszB, cc->saC_cache[i].posn, 876 cc->barf, 877 "Overrun whilst parsing .debug_abbrev section(1)" ); 878 if (i > 0) { 879 ULong t_abbv_code = cc->saC_cache[i].abbv_code; 880 UWord t_posn = cc->saC_cache[i].posn; 881 while (i > 0) { 882 cc->saC_cache[i] = cc->saC_cache[i-1]; 883 cc->saC_cache[0].abbv_code = t_abbv_code; 884 cc->saC_cache[0].posn = t_posn; 885 i--; 886 } 887 } 888 return; 889 } 890 } 891 892 /* No. It's not in the cache. We have to search through 893 .debug_abbrev, of course taking care to update the cache 894 when done. */ 895 896 cc->saC_cache_misses++; 897 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, 898 "Overrun whilst parsing .debug_abbrev section(2)" ); 899 900 /* Now iterate though the table until we find the requested 901 entry. */ 902 while (True) { 903 //ULong atag; 904 //UInt has_children; 905 acode = get_ULEB128( c ); 906 if (acode == 0) break; /* end of the table */ 907 if (acode == abbv_code) break; /* found it */ 908 /*atag = */ get_ULEB128( c ); 909 /*has_children = */ get_UChar( c ); 910 //TRACE_D3(" %llu %s [%s]\n", 911 // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); 912 while (True) { 913 ULong at_name = get_ULEB128( c ); 914 ULong at_form = get_ULEB128( c ); 915 if (at_name == 0 && at_form == 0) break; 916 //TRACE_D3(" %18s %s\n", 917 // pp_DW_AT(at_name), pp_DW_FORM(at_form)); 918 } 919 } 920 921 if (acode == 0) { 922 /* Not found. This is fatal. */ 923 cc->barf("set_abbv_Cursor: abbv_code not found"); 924 } 925 926 /* Otherwise, 'c' is now set correctly to parse the relevant entry, 927 starting from the abbreviation entry's tag. So just cache 928 the result, and return. */ 929 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { 930 cc->saC_cache[i] = cc->saC_cache[i-1]; 931 } 932 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); 933 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; 934 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); 935} 936 937 938/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. 939 940 If *cts itself contains the entire result, then *ctsSzB is set to 941 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. 942 943 Alternatively, the result can be a block of data (in the 944 transiently mapped-in object, so-called "image" space). If so then 945 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said 946 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. 947 948 Unfortunately this means it is impossible to represent a zero-size 949 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 950 and so is ambiguous (which case it is?) 951 952 Invariant on successful return: 953 (*ctsSzB > 0 && *ctsMemSzB == 0) 954 || (*ctsSzB == 0 && *ctsMemSzB > 0) 955*/ 956static 957void get_Form_contents ( /*OUT*/ULong* cts, 958 /*OUT*/Int* ctsSzB, 959 /*OUT*/UWord* ctsMemSzB, 960 CUConst* cc, Cursor* c, 961 Bool td3, DW_FORM form ) 962{ 963 *cts = 0; 964 *ctsSzB = 0; 965 *ctsMemSzB = 0; 966 switch (form) { 967 case DW_FORM_data1: 968 *cts = (ULong)(UChar)get_UChar(c); 969 *ctsSzB = 1; 970 TRACE_D3("%u", (UInt)*cts); 971 break; 972 case DW_FORM_data2: 973 *cts = (ULong)(UShort)get_UShort(c); 974 *ctsSzB = 2; 975 TRACE_D3("%u", (UInt)*cts); 976 break; 977 case DW_FORM_data4: 978 *cts = (ULong)(UInt)get_UInt(c); 979 *ctsSzB = 4; 980 TRACE_D3("%u", (UInt)*cts); 981 break; 982 case DW_FORM_data8: 983 *cts = get_ULong(c); 984 *ctsSzB = 8; 985 TRACE_D3("%llu", *cts); 986 break; 987 case DW_FORM_sec_offset: 988 *cts = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 ); 989 *ctsSzB = cc->is_dw64 ? 8 : 4; 990 TRACE_D3("%llu", *cts); 991 break; 992 case DW_FORM_sdata: 993 *cts = (ULong)(Long)get_SLEB128(c); 994 *ctsSzB = 8; 995 TRACE_D3("%lld", (Long)*cts); 996 break; 997 case DW_FORM_udata: 998 *cts = (ULong)(Long)get_ULEB128(c); 999 *ctsSzB = 8; 1000 TRACE_D3("%llu", (Long)*cts); 1001 break; 1002 case DW_FORM_addr: 1003 /* note, this is a hack. DW_FORM_addr is defined as getting 1004 a word the size of the target machine as defined by the 1005 address_size field in the CU Header. However, 1006 parse_CU_Header() rejects all inputs except those for 1007 which address_size == sizeof(Word), hence we can just 1008 treat it as a (host) Word. */ 1009 *cts = (ULong)(UWord)get_UWord(c); 1010 *ctsSzB = sizeof(UWord); 1011 TRACE_D3("0x%lx", (UWord)*cts); 1012 break; 1013 1014 case DW_FORM_ref_addr: 1015 /* We make the same word-size assumption as DW_FORM_addr. */ 1016 /* What does this really mean? From D3 Sec 7.5.4, 1017 description of "reference", it would appear to reference 1018 some other DIE, by specifying the offset from the 1019 beginning of a .debug_info section. The D3 spec mentions 1020 that this might be in some other shared object and 1021 executable. But I don't see how the name of the other 1022 object/exe is specified. 1023 1024 At least for the DW_FORM_ref_addrs created by icc11, the 1025 references seem to be within the same object/executable. 1026 So for the moment we merely range-check, to see that they 1027 actually do specify a plausible offset within this 1028 object's .debug_info, and return the value unchanged. 1029 */ 1030 *cts = (ULong)(UWord)get_UWord(c); 1031 *ctsSzB = sizeof(UWord); 1032 TRACE_D3("0x%lx", (UWord)*cts); 1033 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts); 1034 if (/* the following 2 are surely impossible, but ... */ 1035 cc->debug_info_img == NULL || cc->debug_info_sz == 0 1036 || *cts >= (ULong)cc->debug_info_sz) { 1037 /* Hmm. Offset is nonsensical for this object's .debug_info 1038 section. Be safe and reject it. */ 1039 cc->barf("get_Form_contents: DW_FORM_ref_addr points " 1040 "outside .debug_info"); 1041 } 1042 break; 1043 1044 case DW_FORM_strp: { 1045 /* this is an offset into .debug_str */ 1046 UChar* str; 1047 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 1048 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) 1049 cc->barf("get_Form_contents: DW_FORM_strp " 1050 "points outside .debug_str"); 1051 /* FIXME: check the entire string lies inside debug_str, 1052 not just the first byte of it. */ 1053 str = (UChar*)cc->debug_str_img + uw; 1054 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); 1055 *cts = (ULong)(UWord)str; 1056 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1057 break; 1058 } 1059 case DW_FORM_string: { 1060 UChar* str = get_AsciiZ(c); 1061 TRACE_D3("%s", str); 1062 *cts = (ULong)(UWord)str; 1063 /* strlen is safe because get_AsciiZ already 'vetted' the 1064 entire string */ 1065 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 1066 break; 1067 } 1068 case DW_FORM_ref1: { 1069 UChar u8 = get_UChar(c); 1070 UWord res = cc->cu_start_offset + (UWord)u8; 1071 *cts = (ULong)res; 1072 *ctsSzB = sizeof(UWord); 1073 TRACE_D3("<%lx>", res); 1074 break; 1075 } 1076 case DW_FORM_ref2: { 1077 UShort u16 = get_UShort(c); 1078 UWord res = cc->cu_start_offset + (UWord)u16; 1079 *cts = (ULong)res; 1080 *ctsSzB = sizeof(UWord); 1081 TRACE_D3("<%lx>", res); 1082 break; 1083 } 1084 case DW_FORM_ref4: { 1085 UInt u32 = get_UInt(c); 1086 UWord res = cc->cu_start_offset + (UWord)u32; 1087 *cts = (ULong)res; 1088 *ctsSzB = sizeof(UWord); 1089 TRACE_D3("<%lx>", res); 1090 break; 1091 } 1092 case DW_FORM_ref8: { 1093 ULong u64 = get_ULong(c); 1094 UWord res = cc->cu_start_offset + (UWord)u64; 1095 *cts = (ULong)res; 1096 *ctsSzB = sizeof(UWord); 1097 TRACE_D3("<%lx>", res); 1098 break; 1099 } 1100 case DW_FORM_ref_udata: { 1101 ULong u64 = get_ULEB128(c); 1102 UWord res = cc->cu_start_offset + (UWord)u64; 1103 *cts = (ULong)res; 1104 *ctsSzB = sizeof(UWord); 1105 TRACE_D3("<%lx>", res); 1106 break; 1107 } 1108 case DW_FORM_flag: { 1109 UChar u8 = get_UChar(c); 1110 TRACE_D3("%u", (UInt)u8); 1111 *cts = (ULong)u8; 1112 *ctsSzB = 1; 1113 break; 1114 } 1115 case DW_FORM_flag_present: 1116 TRACE_D3("1"); 1117 *cts = 1; 1118 *ctsSzB = 1; 1119 break; 1120 case DW_FORM_block1: { 1121 ULong u64b; 1122 ULong u64 = (ULong)get_UChar(c); 1123 UChar* block = get_address_of_Cursor(c); 1124 TRACE_D3("%llu byte block: ", u64); 1125 for (u64b = u64; u64b > 0; u64b--) { 1126 UChar u8 = get_UChar(c); 1127 TRACE_D3("%x ", (UInt)u8); 1128 } 1129 *cts = (ULong)(UWord)block; 1130 *ctsMemSzB = (UWord)u64; 1131 break; 1132 } 1133 case DW_FORM_block2: { 1134 ULong u64b; 1135 ULong u64 = (ULong)get_UShort(c); 1136 UChar* block = get_address_of_Cursor(c); 1137 TRACE_D3("%llu byte block: ", u64); 1138 for (u64b = u64; u64b > 0; u64b--) { 1139 UChar u8 = get_UChar(c); 1140 TRACE_D3("%x ", (UInt)u8); 1141 } 1142 *cts = (ULong)(UWord)block; 1143 *ctsMemSzB = (UWord)u64; 1144 break; 1145 } 1146 case DW_FORM_block4: { 1147 ULong u64b; 1148 ULong u64 = (ULong)get_UInt(c); 1149 UChar* block = get_address_of_Cursor(c); 1150 TRACE_D3("%llu byte block: ", u64); 1151 for (u64b = u64; u64b > 0; u64b--) { 1152 UChar u8 = get_UChar(c); 1153 TRACE_D3("%x ", (UInt)u8); 1154 } 1155 *cts = (ULong)(UWord)block; 1156 *ctsMemSzB = (UWord)u64; 1157 break; 1158 } 1159 case DW_FORM_exprloc: 1160 case DW_FORM_block: { 1161 ULong u64b; 1162 ULong u64 = (ULong)get_ULEB128(c); 1163 UChar* block = get_address_of_Cursor(c); 1164 TRACE_D3("%llu byte block: ", u64); 1165 for (u64b = u64; u64b > 0; u64b--) { 1166 UChar u8 = get_UChar(c); 1167 TRACE_D3("%x ", (UInt)u8); 1168 } 1169 *cts = (ULong)(UWord)block; 1170 *ctsMemSzB = (UWord)u64; 1171 break; 1172 } 1173 case DW_FORM_ref_sig8: { 1174 ULong u64b; 1175 UChar* block = get_address_of_Cursor(c); 1176 TRACE_D3("8 byte signature: "); 1177 for (u64b = 8; u64b > 0; u64b--) { 1178 UChar u8 = get_UChar(c); 1179 TRACE_D3("%x ", (UInt)u8); 1180 } 1181 *cts = (ULong)(UWord)block; 1182 *ctsMemSzB = 8; 1183 break; 1184 } 1185 case DW_FORM_indirect: 1186 get_Form_contents (cts, ctsSzB, ctsMemSzB, cc, c, td3, 1187 (DW_FORM)get_ULEB128(c)); 1188 return; 1189 1190 default: 1191 VG_(printf)( 1192 "get_Form_contents: unhandled %d (%s) at <%lx>\n", 1193 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); 1194 c->barf("get_Form_contents: unhandled DW_FORM"); 1195 } 1196} 1197 1198 1199/*------------------------------------------------------------*/ 1200/*--- ---*/ 1201/*--- Parsing of variable-related DIEs ---*/ 1202/*--- ---*/ 1203/*------------------------------------------------------------*/ 1204 1205typedef 1206 struct _TempVar { 1207 UChar* name; /* in DebugInfo's .strchunks */ 1208 /* Represent ranges economically. nRanges is the number of 1209 ranges. Cases: 1210 0: .rngOneMin .rngOneMax .manyRanges are all zero 1211 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1212 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1213 This is merely an optimisation to avoid having to allocate 1214 and free the XArray in the common (98%) of cases where there 1215 is zero or one address ranges. */ 1216 UWord nRanges; 1217 Addr rngOneMin; 1218 Addr rngOneMax; 1219 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ 1220 /* Do not free .rngMany, since many TempVars will have the same 1221 value. Instead the associated storage is to be freed by 1222 deleting 'rangetree', which stores a single copy of each 1223 range. */ 1224 /* --- */ 1225 Int level; 1226 UWord typeR; /* a cuOff */ 1227 GExpr* gexpr; /* for this variable */ 1228 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1229 any */ 1230 UChar* fName; /* declaring file name, or NULL */ 1231 Int fLine; /* declaring file line number, or zero */ 1232 /* offset in .debug_info, so that abstract instances can be 1233 found to satisfy references from concrete instances. */ 1234 UWord dioff; 1235 UWord absOri; /* so the absOri fields refer to dioff fields 1236 in some other, related TempVar. */ 1237 } 1238 TempVar; 1239 1240#define N_D3_VAR_STACK 48 1241 1242typedef 1243 struct { 1244 /* Contains the range stack: a stack of address ranges, one 1245 stack entry for each nested scope. 1246 1247 Some scope entries are created by function definitions 1248 (DW_AT_subprogram), and for those, we also note the GExpr 1249 derived from its DW_AT_frame_base attribute, if any. 1250 Consequently it should be possible to find, for any 1251 variable's DIE, the GExpr for the the containing function's 1252 DW_AT_frame_base by scanning back through the stack to find 1253 the nearest entry associated with a function. This somewhat 1254 elaborate scheme is provided so as to make it possible to 1255 obtain the correct DW_AT_frame_base expression even in the 1256 presence of nested functions (or to be more precise, in the 1257 presence of nested DW_AT_subprogram DIEs). 1258 */ 1259 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1260 stack */ 1261 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ 1262 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ 1263 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ 1264 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB 1265 expr, else NULL */ 1266 /* The file name table. Is a mapping from integer index to the 1267 (permanent) copy of the string, iow a non-img area. */ 1268 XArray* /* of UChar* */ filenameTable; 1269 } 1270 D3VarParser; 1271 1272static void varstack_show ( D3VarParser* parser, HChar* str ) { 1273 Word i, j; 1274 VG_(printf)(" varstack (%s) {\n", str); 1275 for (i = 0; i <= parser->sp; i++) { 1276 XArray* xa = parser->ranges[i]; 1277 vg_assert(xa); 1278 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1279 if (parser->isFunc[i]) { 1280 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1281 } else { 1282 vg_assert(parser->fbGX[i] == NULL); 1283 } 1284 VG_(printf)(": "); 1285 if (VG_(sizeXA)( xa ) == 0) { 1286 VG_(printf)("** empty PC range array **"); 1287 } else { 1288 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1289 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1290 vg_assert(range); 1291 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); 1292 } 1293 } 1294 VG_(printf)("\n"); 1295 } 1296 VG_(printf)(" }\n"); 1297} 1298 1299/* Remove from the stack, all entries with .level > 'level' */ 1300static 1301void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1302{ 1303 Bool changed = False; 1304 vg_assert(parser->sp < N_D3_VAR_STACK); 1305 while (True) { 1306 vg_assert(parser->sp >= -1); 1307 if (parser->sp == -1) break; 1308 if (parser->level[parser->sp] <= level) break; 1309 if (0) 1310 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1311 vg_assert(parser->ranges[parser->sp]); 1312 /* Who allocated this xa? get_range_list() or 1313 unitary_range_list(). */ 1314 VG_(deleteXA)( parser->ranges[parser->sp] ); 1315 parser->ranges[parser->sp] = NULL; 1316 parser->level[parser->sp] = 0; 1317 parser->isFunc[parser->sp] = False; 1318 parser->fbGX[parser->sp] = NULL; 1319 parser->sp--; 1320 changed = True; 1321 } 1322 if (changed && td3) 1323 varstack_show( parser, "after preen" ); 1324} 1325 1326static void varstack_push ( CUConst* cc, 1327 D3VarParser* parser, 1328 Bool td3, 1329 XArray* ranges, Int level, 1330 Bool isFunc, GExpr* fbGX ) { 1331 if (0) 1332 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1333 parser->sp+1, level, ranges); 1334 1335 /* First we need to zap everything >= 'level', as we are about to 1336 replace any previous entry at 'level', so .. */ 1337 varstack_preen(parser, /*td3*/False, level-1); 1338 1339 vg_assert(parser->sp >= -1); 1340 vg_assert(parser->sp < N_D3_VAR_STACK); 1341 if (parser->sp == N_D3_VAR_STACK-1) 1342 cc->barf("varstack_push: N_D3_VAR_STACK is too low; " 1343 "increase and recompile"); 1344 if (parser->sp >= 0) 1345 vg_assert(parser->level[parser->sp] < level); 1346 parser->sp++; 1347 vg_assert(parser->ranges[parser->sp] == NULL); 1348 vg_assert(parser->level[parser->sp] == 0); 1349 vg_assert(parser->isFunc[parser->sp] == False); 1350 vg_assert(parser->fbGX[parser->sp] == NULL); 1351 vg_assert(ranges != NULL); 1352 if (!isFunc) vg_assert(fbGX == NULL); 1353 parser->ranges[parser->sp] = ranges; 1354 parser->level[parser->sp] = level; 1355 parser->isFunc[parser->sp] = isFunc; 1356 parser->fbGX[parser->sp] = fbGX; 1357 if (td3) 1358 varstack_show( parser, "after push" ); 1359} 1360 1361 1362/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so 1363 refer either to a location expression or to a location list. 1364 Figure out which, and in both cases bundle the expression or 1365 location list into a so-called GExpr (guarded expression). */ 1366__attribute__((noinline)) 1367static GExpr* get_GX ( CUConst* cc, Bool td3, 1368 ULong cts, Int ctsSzB, UWord ctsMemSzB ) 1369{ 1370 GExpr* gexpr = NULL; 1371 if (ctsMemSzB > 0 && ctsSzB == 0) { 1372 /* represents an in-line location expression, and cts points 1373 right at it */ 1374 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); 1375 } 1376 else 1377 if (ctsMemSzB == 0 && ctsSzB > 0) { 1378 /* represents location list. cts is the offset of it in 1379 .debug_loc. */ 1380 if (!cc->cu_svma_known) 1381 cc->barf("get_GX: location list, but CU svma is unknown"); 1382 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); 1383 } 1384 else { 1385 vg_assert(0); /* else caller is bogus */ 1386 } 1387 return gexpr; 1388} 1389 1390 1391static 1392void read_filename_table( /*MOD*/D3VarParser* parser, 1393 CUConst* cc, UWord debug_line_offset, 1394 Bool td3 ) 1395{ 1396 Bool is_dw64; 1397 Cursor c; 1398 Word i; 1399 UShort version; 1400 UChar opcode_base; 1401 UChar* str; 1402 1403 vg_assert(parser && cc && cc->barf); 1404 if ((!cc->debug_line_img) 1405 || cc->debug_line_sz <= debug_line_offset) 1406 cc->barf("read_filename_table: .debug_line is missing?"); 1407 1408 init_Cursor( &c, cc->debug_line_img, 1409 cc->debug_line_sz, debug_line_offset, cc->barf, 1410 "Overrun whilst reading .debug_line section(1)" ); 1411 1412 /* unit_length = */ 1413 get_Initial_Length( &is_dw64, &c, 1414 "read_filename_table: invalid initial-length field" ); 1415 version = get_UShort( &c ); 1416 if (version != 2 && version != 3 && version != 4) 1417 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info " 1418 "is currently supported."); 1419 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); 1420 /*minimum_instruction_length = */ get_UChar( &c ); 1421 if (version >= 4) 1422 /*maximum_operations_per_insn = */ get_UChar( &c ); 1423 /*default_is_stmt = */ get_UChar( &c ); 1424 /*line_base = (Char)*/ get_UChar( &c ); 1425 /*line_range = */ get_UChar( &c ); 1426 opcode_base = get_UChar( &c ); 1427 /* skip over "standard_opcode_lengths" */ 1428 for (i = 1; i < (Word)opcode_base; i++) 1429 (void)get_UChar( &c ); 1430 1431 /* skip over the directory names table */ 1432 while (peek_UChar(&c) != 0) { 1433 (void)get_AsciiZ(&c); 1434 } 1435 (void)get_UChar(&c); /* skip terminating zero */ 1436 1437 /* Read and record the file names table */ 1438 vg_assert(parser->filenameTable); 1439 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); 1440 /* Add a dummy index-zero entry. DWARF3 numbers its files 1441 from 1, for some reason. */ 1442 str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); 1443 VG_(addToXA)( parser->filenameTable, &str ); 1444 while (peek_UChar(&c) != 0) { 1445 str = get_AsciiZ(&c); 1446 TRACE_D3(" read_filename_table: %ld %s\n", 1447 VG_(sizeXA)(parser->filenameTable), str); 1448 str = ML_(addStr)( cc->di, str, -1 ); 1449 VG_(addToXA)( parser->filenameTable, &str ); 1450 (void)get_ULEB128( &c ); /* skip directory index # */ 1451 (void)get_ULEB128( &c ); /* skip last mod time */ 1452 (void)get_ULEB128( &c ); /* file size */ 1453 } 1454 /* We're done! The rest of it is not interesting. */ 1455} 1456 1457 1458__attribute__((noinline)) 1459static void parse_var_DIE ( 1460 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 1461 /*MOD*/XArray* /* of TempVar* */ tempvars, 1462 /*MOD*/XArray* /* of GExpr* */ gexprs, 1463 /*MOD*/D3VarParser* parser, 1464 DW_TAG dtag, 1465 UWord posn, 1466 Int level, 1467 Cursor* c_die, 1468 Cursor* c_abbv, 1469 CUConst* cc, 1470 Bool td3 1471) 1472{ 1473 ULong cts; 1474 Int ctsSzB; 1475 UWord ctsMemSzB; 1476 1477 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1478 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1479 1480 varstack_preen( parser, td3, level-1 ); 1481 1482 if (dtag == DW_TAG_compile_unit) { 1483 Bool have_lo = False; 1484 Bool have_hi1 = False; 1485 Bool have_range = False; 1486 Addr ip_lo = 0; 1487 Addr ip_hi1 = 0; 1488 Addr rangeoff = 0; 1489 while (True) { 1490 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1491 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1492 if (attr == 0 && form == 0) break; 1493 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1494 cc, c_die, False/*td3*/, form ); 1495 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1496 ip_lo = cts; 1497 have_lo = True; 1498 } 1499 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1500 ip_hi1 = cts; 1501 have_hi1 = True; 1502 } 1503 if (attr == DW_AT_ranges && ctsSzB > 0) { 1504 rangeoff = cts; 1505 have_range = True; 1506 } 1507 if (attr == DW_AT_stmt_list && ctsSzB > 0) { 1508 read_filename_table( parser, cc, (UWord)cts, td3 ); 1509 } 1510 } 1511 /* Now, does this give us an opportunity to find this 1512 CU's svma? */ 1513#if 0 1514 if (level == 0 && have_lo) { 1515 vg_assert(!cc->cu_svma_known); /* if this fails, it must be 1516 because we've already seen a DW_TAG_compile_unit DIE at level 1517 0. But that can't happen, because DWARF3 only allows exactly 1518 one top level DIE per CU. */ 1519 cc->cu_svma_known = True; 1520 cc->cu_svma = ip_lo; 1521 if (1) 1522 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); 1523 /* Now, it may be that this DIE doesn't tell us the CU's 1524 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1525 the CU doesn't *have* to have its SVMA specified. 1526 1527 But as per last para D3 spec sec 3.1.1 ("Normal and 1528 Partial Compilation Unit Entries", "If the base address 1529 (viz, the SVMA) is undefined, then any DWARF entry of 1530 structure defined interms of the base address of that 1531 compilation unit is not valid.". So that means, if whilst 1532 processing the children of this top level DIE (or their 1533 children, etc) we see a DW_AT_range, and cu_svma_known is 1534 False, then the DIE that contains it is (per the spec) 1535 invalid, and we can legitimately stop and complain. */ 1536 } 1537#else 1538 /* .. whereas The Reality is, simply assume the SVMA is zero 1539 if it isn't specified. */ 1540 if (level == 0) { 1541 vg_assert(!cc->cu_svma_known); 1542 cc->cu_svma_known = True; 1543 if (have_lo) 1544 cc->cu_svma = ip_lo; 1545 else 1546 cc->cu_svma = 0; 1547 } 1548#endif 1549 /* Do we have something that looks sane? */ 1550 if (have_lo && have_hi1 && (!have_range)) { 1551 if (ip_lo < ip_hi1) 1552 varstack_push( cc, parser, td3, 1553 unitary_range_list(ip_lo, ip_hi1 - 1), 1554 level, 1555 False/*isFunc*/, NULL/*fbGX*/ ); 1556 } else 1557 if ((!have_lo) && (!have_hi1) && have_range) { 1558 varstack_push( cc, parser, td3, 1559 get_range_list( cc, td3, 1560 rangeoff, cc->cu_svma ), 1561 level, 1562 False/*isFunc*/, NULL/*fbGX*/ ); 1563 } else 1564 if ((!have_lo) && (!have_hi1) && (!have_range)) { 1565 /* CU has no code, presumably? */ 1566 varstack_push( cc, parser, td3, 1567 empty_range_list(), 1568 level, 1569 False/*isFunc*/, NULL/*fbGX*/ ); 1570 } else 1571 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { 1572 /* broken DIE created by gcc-4.3.X ? Ignore the 1573 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges 1574 instead. */ 1575 varstack_push( cc, parser, td3, 1576 get_range_list( cc, td3, 1577 rangeoff, cc->cu_svma ), 1578 level, 1579 False/*isFunc*/, NULL/*fbGX*/ ); 1580 } else { 1581 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", 1582 (Int)have_lo, (Int)have_hi1, (Int)have_range); 1583 goto bad_DIE; 1584 } 1585 } 1586 1587 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 1588 Bool have_lo = False; 1589 Bool have_hi1 = False; 1590 Bool have_range = False; 1591 Addr ip_lo = 0; 1592 Addr ip_hi1 = 0; 1593 Addr rangeoff = 0; 1594 Bool isFunc = dtag == DW_TAG_subprogram; 1595 GExpr* fbGX = NULL; 1596 while (True) { 1597 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1598 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1599 if (attr == 0 && form == 0) break; 1600 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1601 cc, c_die, False/*td3*/, form ); 1602 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1603 ip_lo = cts; 1604 have_lo = True; 1605 } 1606 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1607 ip_hi1 = cts; 1608 have_hi1 = True; 1609 } 1610 if (attr == DW_AT_ranges && ctsSzB > 0) { 1611 rangeoff = cts; 1612 have_range = True; 1613 } 1614 if (isFunc 1615 && attr == DW_AT_frame_base 1616 && ((ctsMemSzB > 0 && ctsSzB == 0) 1617 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1618 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1619 vg_assert(fbGX); 1620 VG_(addToXA)(gexprs, &fbGX); 1621 } 1622 } 1623 /* Do we have something that looks sane? */ 1624 if (dtag == DW_TAG_subprogram 1625 && (!have_lo) && (!have_hi1) && (!have_range)) { 1626 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 1627 representing a subroutine declaration that is not also a 1628 definition does not have code address or range 1629 attributes." */ 1630 } else 1631 if (dtag == DW_TAG_lexical_block 1632 && (!have_lo) && (!have_hi1) && (!have_range)) { 1633 /* I believe this is legit, and means the lexical block 1634 contains no insns (whatever that might mean). Ignore. */ 1635 } else 1636 if (have_lo && have_hi1 && (!have_range)) { 1637 /* This scope supplies just a single address range. */ 1638 if (ip_lo < ip_hi1) 1639 varstack_push( cc, parser, td3, 1640 unitary_range_list(ip_lo, ip_hi1 - 1), 1641 level, isFunc, fbGX ); 1642 } else 1643 if ((!have_lo) && (!have_hi1) && have_range) { 1644 /* This scope supplies multiple address ranges via the use of 1645 a range list. */ 1646 varstack_push( cc, parser, td3, 1647 get_range_list( cc, td3, 1648 rangeoff, cc->cu_svma ), 1649 level, isFunc, fbGX ); 1650 } else 1651 if (have_lo && (!have_hi1) && (!have_range)) { 1652 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 1653 Entries) says fairly clearly that a scope must have either 1654 _range or (_low_pc and _high_pc). */ 1655 /* The spec is a bit ambiguous though. Perhaps a single byte 1656 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 1657 /* This case is here because icc9 produced this: 1658 <2><13bd>: DW_TAG_lexical_block 1659 DW_AT_decl_line : 5229 1660 DW_AT_decl_column : 37 1661 DW_AT_decl_file : 1 1662 DW_AT_low_pc : 0x401b03 1663 */ 1664 /* Ignore (seems safe than pushing a single byte range) */ 1665 } else 1666 goto bad_DIE; 1667 } 1668 1669 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 1670 UChar* name = NULL; 1671 UWord typeR = D3_INVALID_CUOFF; 1672 Bool external = False; 1673 GExpr* gexpr = NULL; 1674 Int n_attrs = 0; 1675 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 1676 Int lineNo = 0; 1677 UChar* fileName = NULL; 1678 while (True) { 1679 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1680 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1681 if (attr == 0 && form == 0) break; 1682 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1683 cc, c_die, False/*td3*/, form ); 1684 n_attrs++; 1685 if (attr == DW_AT_name && ctsMemSzB > 0) { 1686 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1687 } 1688 if (attr == DW_AT_location 1689 && ((ctsMemSzB > 0 && ctsSzB == 0) 1690 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1691 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1692 vg_assert(gexpr); 1693 VG_(addToXA)(gexprs, &gexpr); 1694 } 1695 if (attr == DW_AT_type && ctsSzB > 0) { 1696 typeR = (UWord)cts; 1697 } 1698 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { 1699 external = True; 1700 } 1701 if (attr == DW_AT_abstract_origin && ctsSzB > 0) { 1702 abs_ori = (UWord)cts; 1703 } 1704 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 1705 /*declaration = True;*/ 1706 } 1707 if (attr == DW_AT_decl_line && ctsSzB > 0) { 1708 lineNo = (Int)cts; 1709 } 1710 if (attr == DW_AT_decl_file && ctsSzB > 0) { 1711 Int ftabIx = (Int)cts; 1712 if (ftabIx >= 1 1713 && ftabIx < VG_(sizeXA)( parser->filenameTable )) { 1714 fileName = *(UChar**) 1715 VG_(indexXA)( parser->filenameTable, ftabIx ); 1716 vg_assert(fileName); 1717 } 1718 if (0) VG_(printf)("XXX filename = %s\n", fileName); 1719 } 1720 } 1721 /* We'll collect it under if one of the following three 1722 conditions holds: 1723 (1) has location and type -> completed 1724 (2) has type only -> is an abstract instance 1725 (3) has location and abs_ori -> is a concrete instance 1726 Name, filename and line number are all optional frills. 1727 */ 1728 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 1729 /* 2 */ || (typeR != D3_INVALID_CUOFF) 1730 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 1731 1732 /* Add this variable to the list of interesting looking 1733 variables. Crucially, note along with it the address 1734 range(s) associated with the variable, which for locals 1735 will be the address ranges at the top of the varparser's 1736 stack. */ 1737 GExpr* fbGX = NULL; 1738 Word i, nRanges; 1739 XArray* /* of AddrRange */ xa; 1740 TempVar* tv; 1741 /* Stack can't be empty; we put a dummy entry on it for the 1742 entire address range before starting with the DIEs for 1743 this CU. */ 1744 vg_assert(parser->sp >= 0); 1745 1746 /* If this is a local variable (non-external), try to find 1747 the GExpr for the DW_AT_frame_base of the containing 1748 function. It should have been pushed on the stack at the 1749 time we encountered its DW_TAG_subprogram DIE, so the way 1750 to find it is to scan back down the stack looking for it. 1751 If there isn't an enclosing stack entry marked 'isFunc' 1752 then we must be seeing variable or formal param DIEs 1753 outside of a function, so we deem the Dwarf to be 1754 malformed if that happens. Note that the fbGX may be NULL 1755 if the containing DT_TAG_subprogram didn't supply a 1756 DW_AT_frame_base -- that's OK, but there must actually be 1757 a containing DW_TAG_subprogram. */ 1758 if (!external) { 1759 Bool found = False; 1760 for (i = parser->sp; i >= 0; i--) { 1761 if (parser->isFunc[i]) { 1762 fbGX = parser->fbGX[i]; 1763 found = True; 1764 break; 1765 } 1766 } 1767 if (!found) { 1768 if (0 && VG_(clo_verbosity) >= 0) { 1769 VG_(message)(Vg_DebugMsg, 1770 "warning: parse_var_DIE: non-external variable " 1771 "outside DW_TAG_subprogram\n"); 1772 } 1773 /* goto bad_DIE; */ 1774 /* This seems to happen a lot. Just ignore it -- if, 1775 when we come to evaluation of the location (guarded) 1776 expression, it requires a frame base value, and 1777 there's no expression for that, then evaluation as a 1778 whole will fail. Harmless - a bit of a waste of 1779 cycles but nothing more. */ 1780 } 1781 } 1782 1783 /* re "external ? 0 : parser->sp" (twice), if the var is 1784 marked 'external' then we must put it at the global scope, 1785 as only the global scope (level 0) covers the entire PC 1786 address space. It is asserted elsewhere that level 0 1787 always covers the entire address space. */ 1788 xa = parser->ranges[external ? 0 : parser->sp]; 1789 nRanges = VG_(sizeXA)(xa); 1790 vg_assert(nRanges >= 0); 1791 1792 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); 1793 tv->name = name; 1794 tv->level = external ? 0 : parser->sp; 1795 tv->typeR = typeR; 1796 tv->gexpr = gexpr; 1797 tv->fbGX = fbGX; 1798 tv->fName = fileName; 1799 tv->fLine = lineNo; 1800 tv->dioff = posn; 1801 tv->absOri = abs_ori; 1802 1803 /* See explanation on definition of type TempVar for the 1804 reason for this elaboration. */ 1805 tv->nRanges = nRanges; 1806 tv->rngOneMin = 0; 1807 tv->rngOneMax = 0; 1808 tv->rngMany = NULL; 1809 if (nRanges == 1) { 1810 AddrRange* range = VG_(indexXA)(xa, 0); 1811 tv->rngOneMin = range->aMin; 1812 tv->rngOneMax = range->aMax; 1813 } 1814 else if (nRanges > 1) { 1815 /* See if we already have a range list which is 1816 structurally identical. If so, use that; if not, clone 1817 this one, and add it to our collection. */ 1818 UWord keyW, valW; 1819 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { 1820 XArray* old = (XArray*)keyW; 1821 tl_assert(valW == 0); 1822 tl_assert(old != xa); 1823 tv->rngMany = old; 1824 } else { 1825 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); 1826 tv->rngMany = cloned; 1827 VG_(addToFM)( rangestree, (UWord)cloned, 0 ); 1828 } 1829 } 1830 1831 VG_(addToXA)( tempvars, &tv ); 1832 1833 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 1834 VG_(sizeXA)(xa) ); 1835 /* collect stats on how effective the ->ranges special 1836 casing is */ 1837 if (0) { 1838 static Int ntot=0, ngt=0; 1839 ntot++; 1840 if (tv->rngMany) ngt++; 1841 if (0 == (ntot % 100000)) 1842 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 1843 } 1844 1845 } 1846 1847 /* Here are some other weird cases seen in the wild: 1848 1849 We have a variable with a name and a type, but no 1850 location. I guess that's a sign that it has been 1851 optimised away. Ignore it. Here's an example: 1852 1853 static Int lc_compar(void* n1, void* n2) { 1854 MC_Chunk* mc1 = *(MC_Chunk**)n1; 1855 MC_Chunk* mc2 = *(MC_Chunk**)n2; 1856 return (mc1->data < mc2->data ? -1 : 1); 1857 } 1858 1859 Both mc1 and mc2 are like this 1860 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 1861 DW_AT_name : mc1 1862 DW_AT_decl_file : 1 1863 DW_AT_decl_line : 216 1864 DW_AT_type : <5d3> 1865 1866 whereas n1 and n2 do have locations specified. 1867 1868 --------------------------------------------- 1869 1870 We see a DW_TAG_formal_parameter with a type, but 1871 no name and no location. It's probably part of a function type 1872 construction, thusly, hence ignore it: 1873 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 1874 DW_AT_sibling : <2c9> 1875 DW_AT_prototyped : 1 1876 DW_AT_type : <114> 1877 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1878 DW_AT_type : <13e> 1879 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1880 DW_AT_type : <133> 1881 1882 --------------------------------------------- 1883 1884 Is very minimal, like this: 1885 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 1886 DW_AT_abstract_origin: <7ba> 1887 What that signifies I have no idea. Ignore. 1888 1889 ---------------------------------------------- 1890 1891 Is very minimal, like this: 1892 <200f>: DW_TAG_formal_parameter 1893 DW_AT_abstract_ori: <1f4c> 1894 DW_AT_location : 13440 1895 What that signifies I have no idea. Ignore. 1896 It might be significant, though: the variable at least 1897 has a location and so might exist somewhere. 1898 Maybe we should handle this. 1899 1900 --------------------------------------------- 1901 1902 <22407>: DW_TAG_variable 1903 DW_AT_name : (indirect string, offset: 0x6579): 1904 vgPlain_trampoline_stuff_start 1905 DW_AT_decl_file : 29 1906 DW_AT_decl_line : 56 1907 DW_AT_external : 1 1908 DW_AT_declaration : 1 1909 1910 Nameless and typeless variable that has a location? Who 1911 knows. Not me. 1912 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 1913 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 1914 (DW_OP_addr: 3813c7c0) 1915 1916 No, really. Check it out. gcc is quite simply borked. 1917 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 1918 // followed by no attributes, and the next DIE is a sibling, 1919 // not a child 1920 */ 1921 } 1922 return; 1923 1924 bad_DIE: 1925 set_position_of_Cursor( c_die, saved_die_c_offset ); 1926 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 1927 VG_(printf)("\nparse_var_DIE: confused by:\n"); 1928 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 1929 while (True) { 1930 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1931 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1932 if (attr == 0 && form == 0) break; 1933 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 1934 /* Get the form contents, so as to print them */ 1935 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1936 cc, c_die, True, form ); 1937 VG_(printf)("\t\n"); 1938 } 1939 VG_(printf)("\n"); 1940 cc->barf("parse_var_DIE: confused by the above DIE"); 1941 /*NOTREACHED*/ 1942} 1943 1944 1945/*------------------------------------------------------------*/ 1946/*--- ---*/ 1947/*--- Parsing of type-related DIEs ---*/ 1948/*--- ---*/ 1949/*------------------------------------------------------------*/ 1950 1951#define N_D3_TYPE_STACK 16 1952 1953typedef 1954 struct { 1955 /* What source language? 'C'=C/C++, 'F'=Fortran, '?'=other 1956 Established once per compilation unit. */ 1957 UChar language; 1958 /* A stack of types which are currently under construction */ 1959 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1960 stack */ 1961 /* Note that the TyEnts in qparentE are temporary copies of the 1962 ones accumulating in the main tyent array. So it is not safe 1963 to free up anything on them when popping them off the stack 1964 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just 1965 memset them to zero when done. */ 1966 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */ 1967 Int qlevel[N_D3_TYPE_STACK]; 1968 1969 } 1970 D3TypeParser; 1971 1972static void typestack_show ( D3TypeParser* parser, HChar* str ) { 1973 Word i; 1974 VG_(printf)(" typestack (%s) {\n", str); 1975 for (i = 0; i <= parser->sp; i++) { 1976 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 1977 ML_(pp_TyEnt)( &parser->qparentE[i] ); 1978 VG_(printf)("\n"); 1979 } 1980 VG_(printf)(" }\n"); 1981} 1982 1983/* Remove from the stack, all entries with .level > 'level' */ 1984static 1985void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 1986{ 1987 Bool changed = False; 1988 vg_assert(parser->sp < N_D3_TYPE_STACK); 1989 while (True) { 1990 vg_assert(parser->sp >= -1); 1991 if (parser->sp == -1) break; 1992 if (parser->qlevel[parser->sp] <= level) break; 1993 if (0) 1994 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 1995 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 1996 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt)); 1997 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF; 1998 parser->qparentE[parser->sp].tag = Te_EMPTY; 1999 parser->qlevel[parser->sp] = 0; 2000 parser->sp--; 2001 changed = True; 2002 } 2003 if (changed && td3) 2004 typestack_show( parser, "after preen" ); 2005} 2006 2007static Bool typestack_is_empty ( D3TypeParser* parser ) { 2008 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); 2009 return parser->sp == -1; 2010} 2011 2012static void typestack_push ( CUConst* cc, 2013 D3TypeParser* parser, 2014 Bool td3, 2015 TyEnt* parentE, Int level ) { 2016 if (0) 2017 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", 2018 parser->sp+1, level, parentE->cuOff); 2019 2020 /* First we need to zap everything >= 'level', as we are about to 2021 replace any previous entry at 'level', so .. */ 2022 typestack_preen(parser, /*td3*/False, level-1); 2023 2024 vg_assert(parser->sp >= -1); 2025 vg_assert(parser->sp < N_D3_TYPE_STACK); 2026 if (parser->sp == N_D3_TYPE_STACK-1) 2027 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " 2028 "increase and recompile"); 2029 if (parser->sp >= 0) 2030 vg_assert(parser->qlevel[parser->sp] < level); 2031 parser->sp++; 2032 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY); 2033 vg_assert(parser->qlevel[parser->sp] == 0); 2034 vg_assert(parentE); 2035 vg_assert(ML_(TyEnt__is_type)(parentE)); 2036 vg_assert(parentE->cuOff != D3_INVALID_CUOFF); 2037 parser->qparentE[parser->sp] = *parentE; 2038 parser->qlevel[parser->sp] = level; 2039 if (td3) 2040 typestack_show( parser, "after push" ); 2041} 2042 2043 2044/* Parse a type-related DIE. 'parser' holds the current parser state. 2045 'admin' is where the completed types are dumped. 'dtag' is the tag 2046 for this DIE. 'c_die' points to the start of the data fields (FORM 2047 stuff) for the DIE. c_abbv points to the start of the (name,form) 2048 pairs which describe the DIE. 2049 2050 We may find the DIE uninteresting, in which case we should ignore 2051 it. 2052 2053 What happens: the DIE is examined. If uninteresting, it is ignored. 2054 Otherwise, the DIE gives rise to two things: 2055 2056 (1) the offset of this DIE in the CU -- the cuOffset, a UWord 2057 (2) a TyAdmin structure, which holds the type, or related stuff 2058 2059 (2) is added at the end of 'tyadmins', at some index, say 'i'. 2060 2061 A pair (cuOffset, i) is added to 'tydict'. 2062 2063 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds 2064 a mapping from cuOffset to the index of the corresponding entry in 2065 'tyadmin'. 2066 2067 When resolving a cuOffset to a TyAdmin, first look up the cuOffset 2068 in the tydict (by binary search). This gives an index into 2069 tyadmins, and the required entity lives in tyadmins at that index. 2070*/ 2071__attribute__((noinline)) 2072static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, 2073 /*MOD*/D3TypeParser* parser, 2074 DW_TAG dtag, 2075 UWord posn, 2076 Int level, 2077 Cursor* c_die, 2078 Cursor* c_abbv, 2079 CUConst* cc, 2080 Bool td3 ) 2081{ 2082 ULong cts; 2083 Int ctsSzB; 2084 UWord ctsMemSzB; 2085 TyEnt typeE; 2086 TyEnt atomE; 2087 TyEnt fieldE; 2088 TyEnt boundE; 2089 2090 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 2091 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 2092 2093 VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); 2094 VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); 2095 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); 2096 VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); 2097 2098 /* If we've returned to a level at or above any previously noted 2099 parent, un-note it, so we don't believe we're still collecting 2100 its children. */ 2101 typestack_preen( parser, td3, level-1 ); 2102 2103 if (dtag == DW_TAG_compile_unit) { 2104 /* See if we can find DW_AT_language, since it is important for 2105 establishing array bounds (see DW_TAG_subrange_type below in 2106 this fn) */ 2107 while (True) { 2108 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2109 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2110 if (attr == 0 && form == 0) break; 2111 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2112 cc, c_die, False/*td3*/, form ); 2113 if (attr != DW_AT_language) 2114 continue; 2115 if (ctsSzB == 0) 2116 goto bad_DIE; 2117 switch (cts) { 2118 case DW_LANG_C89: case DW_LANG_C: 2119 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 2120 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 2121 case DW_LANG_Upc: case DW_LANG_C99: 2122 parser->language = 'C'; break; 2123 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 2124 case DW_LANG_Fortran95: 2125 parser->language = 'F'; break; 2126 case DW_LANG_Ada83: case DW_LANG_Cobol74: 2127 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 2128 case DW_LANG_Modula2: case DW_LANG_Java: 2129 case DW_LANG_Ada95: case DW_LANG_PLI: 2130 case DW_LANG_D: case DW_LANG_Python: 2131 case DW_LANG_Mips_Assembler: 2132 parser->language = '?'; break; 2133 default: 2134 goto bad_DIE; 2135 } 2136 } 2137 } 2138 2139 if (dtag == DW_TAG_base_type) { 2140 /* We can pick up a new base type any time. */ 2141 VG_(memset)(&typeE, 0, sizeof(typeE)); 2142 typeE.cuOff = D3_INVALID_CUOFF; 2143 typeE.tag = Te_TyBase; 2144 while (True) { 2145 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2146 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2147 if (attr == 0 && form == 0) break; 2148 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2149 cc, c_die, False/*td3*/, form ); 2150 if (attr == DW_AT_name && ctsMemSzB > 0) { 2151 typeE.Te.TyBase.name 2152 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1", 2153 (UChar*)(UWord)cts ); 2154 } 2155 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2156 typeE.Te.TyBase.szB = cts; 2157 } 2158 if (attr == DW_AT_encoding && ctsSzB > 0) { 2159 switch (cts) { 2160 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 2161 case DW_ATE_boolean:/* FIXME - is this correct? */ 2162 typeE.Te.TyBase.enc = 'U'; break; 2163 case DW_ATE_signed: case DW_ATE_signed_char: 2164 typeE.Te.TyBase.enc = 'S'; break; 2165 case DW_ATE_float: 2166 typeE.Te.TyBase.enc = 'F'; break; 2167 case DW_ATE_complex_float: 2168 typeE.Te.TyBase.enc = 'C'; break; 2169 default: 2170 goto bad_DIE; 2171 } 2172 } 2173 } 2174 2175 /* Invent a name if it doesn't have one. gcc-4.3 2176 -ftree-vectorize is observed to emit nameless base types. */ 2177 if (!typeE.Te.TyBase.name) 2178 typeE.Te.TyBase.name 2179 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", 2180 "<anon_base_type>" ); 2181 2182 /* Do we have something that looks sane? */ 2183 if (/* must have a name */ 2184 typeE.Te.TyBase.name == NULL 2185 /* and a plausible size. Yes, really 32: "complex long 2186 double" apparently has size=32 */ 2187 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 2188 /* and a plausible encoding */ 2189 || (typeE.Te.TyBase.enc != 'U' 2190 && typeE.Te.TyBase.enc != 'S' 2191 && typeE.Te.TyBase.enc != 'F' 2192 && typeE.Te.TyBase.enc != 'C')) 2193 goto bad_DIE; 2194 /* Last minute hack: if we see this 2195 <1><515>: DW_TAG_base_type 2196 DW_AT_byte_size : 0 2197 DW_AT_encoding : 5 2198 DW_AT_name : void 2199 convert it into a real Void type. */ 2200 if (typeE.Te.TyBase.szB == 0 2201 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { 2202 ML_(TyEnt__make_EMPTY)(&typeE); 2203 typeE.tag = Te_TyVoid; 2204 typeE.Te.TyVoid.isFake = False; /* it's a real one! */ 2205 } 2206 2207 goto acquire_Type; 2208 } 2209 2210 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 2211 || dtag == DW_TAG_ptr_to_member_type) { 2212 /* This seems legit for _pointer_type and _reference_type. I 2213 don't know if rolling _ptr_to_member_type in here really is 2214 legit, but it's better than not handling it at all. */ 2215 VG_(memset)(&typeE, 0, sizeof(typeE)); 2216 typeE.cuOff = D3_INVALID_CUOFF; 2217 typeE.tag = Te_TyPorR; 2218 /* target type defaults to void */ 2219 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; 2220 typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type 2221 || dtag == DW_TAG_ptr_to_member_type; 2222 /* These three type kinds don't *have* to specify their size, in 2223 which case we assume it's a machine word. But if they do 2224 specify it, it must be a machine word :-) This probably 2225 assumes that the word size of the Dwarf3 we're reading is the 2226 same size as that on the machine. gcc appears to give a size 2227 whereas icc9 doesn't. */ 2228 typeE.Te.TyPorR.szB = sizeof(UWord); 2229 while (True) { 2230 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2231 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2232 if (attr == 0 && form == 0) break; 2233 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2234 cc, c_die, False/*td3*/, form ); 2235 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2236 typeE.Te.TyPorR.szB = cts; 2237 } 2238 if (attr == DW_AT_type && ctsSzB > 0) { 2239 typeE.Te.TyPorR.typeR = (UWord)cts; 2240 } 2241 } 2242 /* Do we have something that looks sane? */ 2243 if (typeE.Te.TyPorR.szB != sizeof(UWord)) 2244 goto bad_DIE; 2245 else 2246 goto acquire_Type; 2247 } 2248 2249 if (dtag == DW_TAG_enumeration_type) { 2250 /* Create a new Type to hold the results. */ 2251 VG_(memset)(&typeE, 0, sizeof(typeE)); 2252 typeE.cuOff = posn; 2253 typeE.tag = Te_TyEnum; 2254 typeE.Te.TyEnum.atomRs 2255 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 2256 ML_(dinfo_free), 2257 sizeof(UWord) ); 2258 while (True) { 2259 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2260 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2261 if (attr == 0 && form == 0) break; 2262 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2263 cc, c_die, False/*td3*/, form ); 2264 if (attr == DW_AT_name && ctsMemSzB > 0) { 2265 typeE.Te.TyEnum.name 2266 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2", 2267 (UChar*)(UWord)cts ); 2268 } 2269 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2270 typeE.Te.TyEnum.szB = cts; 2271 } 2272 } 2273 2274 if (!typeE.Te.TyEnum.name) 2275 typeE.Te.TyEnum.name 2276 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", 2277 "<anon_enum_type>" ); 2278 2279 /* Do we have something that looks sane? */ 2280 if (typeE.Te.TyEnum.szB == 0 /* we must know the size */) 2281 goto bad_DIE; 2282 /* On't stack! */ 2283 typestack_push( cc, parser, td3, &typeE, level ); 2284 goto acquire_Type; 2285 } 2286 2287 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces 2288 DW_TAG_enumerator with only a DW_AT_name but no 2289 DW_AT_const_value. This is in violation of the Dwarf3 standard, 2290 and appears to be a new "feature" of gcc - versions 4.3.x and 2291 earlier do not appear to do this. So accept DW_TAG_enumerator 2292 which only have a name but no value. An example: 2293 2294 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) 2295 <181> DW_AT_name : (indirect string, offset: 0xda70): 2296 QtMsgType 2297 <185> DW_AT_byte_size : 4 2298 <186> DW_AT_decl_file : 14 2299 <187> DW_AT_decl_line : 1480 2300 <189> DW_AT_sibling : <0x1a7> 2301 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) 2302 <18e> DW_AT_name : (indirect string, offset: 0x9e18): 2303 QtDebugMsg 2304 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) 2305 <193> DW_AT_name : (indirect string, offset: 0x1505f): 2306 QtWarningMsg 2307 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) 2308 <198> DW_AT_name : (indirect string, offset: 0x16f4a): 2309 QtCriticalMsg 2310 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) 2311 <19d> DW_AT_name : (indirect string, offset: 0x156dd): 2312 QtFatalMsg 2313 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) 2314 <1a2> DW_AT_name : (indirect string, offset: 0x13660): 2315 QtSystemMsg 2316 */ 2317 if (dtag == DW_TAG_enumerator) { 2318 VG_(memset)( &atomE, 0, sizeof(atomE) ); 2319 atomE.cuOff = posn; 2320 atomE.tag = Te_Atom; 2321 while (True) { 2322 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2323 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2324 if (attr == 0 && form == 0) break; 2325 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2326 cc, c_die, False/*td3*/, form ); 2327 if (attr == DW_AT_name && ctsMemSzB > 0) { 2328 atomE.Te.Atom.name 2329 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1", 2330 (UChar*)(UWord)cts ); 2331 } 2332 if (attr == DW_AT_const_value && ctsSzB > 0) { 2333 atomE.Te.Atom.value = cts; 2334 atomE.Te.Atom.valueKnown = True; 2335 } 2336 } 2337 /* Do we have something that looks sane? */ 2338 if (atomE.Te.Atom.name == NULL) 2339 goto bad_DIE; 2340 /* Do we have a plausible parent? */ 2341 if (typestack_is_empty(parser)) goto bad_DIE; 2342 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2343 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2344 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2345 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE; 2346 /* Record this child in the parent */ 2347 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); 2348 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, 2349 &atomE ); 2350 /* And record the child itself */ 2351 goto acquire_Atom; 2352 } 2353 2354 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I 2355 don't know if this is correct, but it at least makes this reader 2356 usable for gcc-4.3 produced Dwarf3. */ 2357 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type 2358 || dtag == DW_TAG_union_type) { 2359 Bool have_szB = False; 2360 Bool is_decl = False; 2361 Bool is_spec = False; 2362 /* Create a new Type to hold the results. */ 2363 VG_(memset)(&typeE, 0, sizeof(typeE)); 2364 typeE.cuOff = posn; 2365 typeE.tag = Te_TyStOrUn; 2366 typeE.Te.TyStOrUn.name = NULL; 2367 typeE.Te.TyStOrUn.fieldRs 2368 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 2369 ML_(dinfo_free), 2370 sizeof(UWord) ); 2371 typeE.Te.TyStOrUn.complete = True; 2372 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 2373 || dtag == DW_TAG_class_type; 2374 while (True) { 2375 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2376 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2377 if (attr == 0 && form == 0) break; 2378 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2379 cc, c_die, False/*td3*/, form ); 2380 if (attr == DW_AT_name && ctsMemSzB > 0) { 2381 typeE.Te.TyStOrUn.name 2382 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2", 2383 (UChar*)(UWord)cts ); 2384 } 2385 if (attr == DW_AT_byte_size && ctsSzB >= 0) { 2386 typeE.Te.TyStOrUn.szB = cts; 2387 have_szB = True; 2388 } 2389 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 2390 is_decl = True; 2391 } 2392 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { 2393 is_spec = True; 2394 } 2395 } 2396 /* Do we have something that looks sane? */ 2397 if (is_decl && (!is_spec)) { 2398 /* It's a DW_AT_declaration. We require the name but 2399 nothing else. */ 2400 if (typeE.Te.TyStOrUn.name == NULL) 2401 goto bad_DIE; 2402 typeE.Te.TyStOrUn.complete = False; 2403 /* JRS 2009 Aug 10: <possible kludge>? */ 2404 /* Push this tyent on the stack, even though it's incomplete. 2405 It appears that gcc-4.4 on Fedora 11 will sometimes create 2406 DW_TAG_member entries for it, and so we need to have a 2407 plausible parent present in order for that to work. See 2408 #200029 comments 8 and 9. */ 2409 typestack_push( cc, parser, td3, &typeE, level ); 2410 /* </possible kludge> */ 2411 goto acquire_Type; 2412 } 2413 if ((!is_decl) /* && (!is_spec) */) { 2414 /* this is the common, ordinary case */ 2415 if ((!have_szB) /* we must know the size */ 2416 /* But the name can be present, or not */) 2417 goto bad_DIE; 2418 /* On't stack! */ 2419 typestack_push( cc, parser, td3, &typeE, level ); 2420 goto acquire_Type; 2421 } 2422 else { 2423 /* don't know how to handle any other variants just now */ 2424 goto bad_DIE; 2425 } 2426 } 2427 2428 if (dtag == DW_TAG_member) { 2429 /* Acquire member entries for both DW_TAG_structure_type and 2430 DW_TAG_union_type. They differ minorly, in that struct 2431 members must have a DW_AT_data_member_location expression 2432 whereas union members must not. */ 2433 Bool parent_is_struct; 2434 VG_(memset)( &fieldE, 0, sizeof(fieldE) ); 2435 fieldE.cuOff = posn; 2436 fieldE.tag = Te_Field; 2437 fieldE.Te.Field.typeR = D3_INVALID_CUOFF; 2438 while (True) { 2439 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2440 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2441 if (attr == 0 && form == 0) break; 2442 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2443 cc, c_die, False/*td3*/, form ); 2444 if (attr == DW_AT_name && ctsMemSzB > 0) { 2445 fieldE.Te.Field.name 2446 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1", 2447 (UChar*)(UWord)cts ); 2448 } 2449 if (attr == DW_AT_type && ctsSzB > 0) { 2450 fieldE.Te.Field.typeR = (UWord)cts; 2451 } 2452 /* There are 2 different cases for DW_AT_data_member_location. 2453 If it is a constant class attribute, it contains byte offset 2454 from the beginning of the containing entity. 2455 Otherwise it is a location expression. */ 2456 if (attr == DW_AT_data_member_location && ctsSzB > 0) { 2457 fieldE.Te.Field.nLoc = -1; 2458 fieldE.Te.Field.pos.offset = cts; 2459 } else if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { 2460 fieldE.Te.Field.nLoc = (UWord)ctsMemSzB; 2461 fieldE.Te.Field.pos.loc 2462 = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2", 2463 (UChar*)(UWord)cts, 2464 (SizeT)fieldE.Te.Field.nLoc ); 2465 } 2466 } 2467 /* Do we have a plausible parent? */ 2468 if (typestack_is_empty(parser)) goto bad_DIE; 2469 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2470 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2471 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2472 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE; 2473 /* Do we have something that looks sane? If this a member of a 2474 struct, we must have a location expression; but if a member 2475 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 2476 to reject in the latter case, but some compilers have been 2477 observed to emit constant-zero expressions. So just ignore 2478 them. */ 2479 parent_is_struct 2480 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; 2481 if (!fieldE.Te.Field.name) 2482 fieldE.Te.Field.name 2483 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", 2484 "<anon_field>" ); 2485 vg_assert(fieldE.Te.Field.name); 2486 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) 2487 goto bad_DIE; 2488 if (fieldE.Te.Field.nLoc) { 2489 if (!parent_is_struct) { 2490 /* If this is a union type, pretend we haven't seen the data 2491 member location expression, as it is by definition 2492 redundant (it must be zero). */ 2493 if (fieldE.Te.Field.nLoc > 0) 2494 ML_(dinfo_free)(fieldE.Te.Field.pos.loc); 2495 fieldE.Te.Field.pos.loc = NULL; 2496 fieldE.Te.Field.nLoc = 0; 2497 } 2498 /* Record this child in the parent */ 2499 fieldE.Te.Field.isStruct = parent_is_struct; 2500 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); 2501 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, 2502 &posn ); 2503 /* And record the child itself */ 2504 goto acquire_Field; 2505 } else { 2506 /* Member with no location - this can happen with static 2507 const members in C++ code which are compile time constants 2508 that do no exist in the class. They're not of any interest 2509 to us so we ignore them. */ 2510 } 2511 } 2512 2513 if (dtag == DW_TAG_array_type) { 2514 VG_(memset)(&typeE, 0, sizeof(typeE)); 2515 typeE.cuOff = posn; 2516 typeE.tag = Te_TyArray; 2517 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; 2518 typeE.Te.TyArray.boundRs 2519 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", 2520 ML_(dinfo_free), 2521 sizeof(UWord) ); 2522 while (True) { 2523 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2524 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2525 if (attr == 0 && form == 0) break; 2526 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2527 cc, c_die, False/*td3*/, form ); 2528 if (attr == DW_AT_type && ctsSzB > 0) { 2529 typeE.Te.TyArray.typeR = (UWord)cts; 2530 } 2531 } 2532 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) 2533 goto bad_DIE; 2534 /* On't stack! */ 2535 typestack_push( cc, parser, td3, &typeE, level ); 2536 goto acquire_Type; 2537 } 2538 2539 if (dtag == DW_TAG_subrange_type) { 2540 Bool have_lower = False; 2541 Bool have_upper = False; 2542 Bool have_count = False; 2543 Long lower = 0; 2544 Long upper = 0; 2545 2546 switch (parser->language) { 2547 case 'C': have_lower = True; lower = 0; break; 2548 case 'F': have_lower = True; lower = 1; break; 2549 case '?': have_lower = False; break; 2550 default: vg_assert(0); /* assured us by handling of 2551 DW_TAG_compile_unit in this fn */ 2552 } 2553 2554 VG_(memset)( &boundE, 0, sizeof(boundE) ); 2555 boundE.cuOff = D3_INVALID_CUOFF; 2556 boundE.tag = Te_Bound; 2557 while (True) { 2558 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2559 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2560 if (attr == 0 && form == 0) break; 2561 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2562 cc, c_die, False/*td3*/, form ); 2563 if (attr == DW_AT_lower_bound && ctsSzB > 0) { 2564 lower = (Long)cts; 2565 have_lower = True; 2566 } 2567 if (attr == DW_AT_upper_bound && ctsSzB > 0) { 2568 upper = (Long)cts; 2569 have_upper = True; 2570 } 2571 if (attr == DW_AT_count && ctsSzB > 0) { 2572 /*count = (Long)cts;*/ 2573 have_count = True; 2574 } 2575 } 2576 /* FIXME: potentially skip the rest if no parent present, since 2577 it could be the case that this subrange type is free-standing 2578 (not being used to describe the bounds of a containing array 2579 type) */ 2580 /* Do we have a plausible parent? */ 2581 if (typestack_is_empty(parser)) goto bad_DIE; 2582 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); 2583 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); 2584 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2585 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE; 2586 2587 /* Figure out if we have a definite range or not */ 2588 if (have_lower && have_upper && (!have_count)) { 2589 boundE.Te.Bound.knownL = True; 2590 boundE.Te.Bound.knownU = True; 2591 boundE.Te.Bound.boundL = lower; 2592 boundE.Te.Bound.boundU = upper; 2593 } 2594 else if (have_lower && (!have_upper) && (!have_count)) { 2595 boundE.Te.Bound.knownL = True; 2596 boundE.Te.Bound.knownU = False; 2597 boundE.Te.Bound.boundL = lower; 2598 boundE.Te.Bound.boundU = 0; 2599 } 2600 else if ((!have_lower) && have_upper && (!have_count)) { 2601 boundE.Te.Bound.knownL = False; 2602 boundE.Te.Bound.knownU = True; 2603 boundE.Te.Bound.boundL = 0; 2604 boundE.Te.Bound.boundU = upper; 2605 } 2606 else if ((!have_lower) && (!have_upper) && (!have_count)) { 2607 boundE.Te.Bound.knownL = False; 2608 boundE.Te.Bound.knownU = False; 2609 boundE.Te.Bound.boundL = 0; 2610 boundE.Te.Bound.boundU = 0; 2611 } else { 2612 /* FIXME: handle more cases */ 2613 goto bad_DIE; 2614 } 2615 2616 /* Record this bound in the parent */ 2617 boundE.cuOff = posn; 2618 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); 2619 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, 2620 &boundE ); 2621 /* And record the child itself */ 2622 goto acquire_Bound; 2623 } 2624 2625 if (dtag == DW_TAG_typedef) { 2626 /* We can pick up a new typedef any time. */ 2627 VG_(memset)(&typeE, 0, sizeof(typeE)); 2628 typeE.cuOff = D3_INVALID_CUOFF; 2629 typeE.tag = Te_TyTyDef; 2630 typeE.Te.TyTyDef.name = NULL; 2631 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; 2632 while (True) { 2633 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2634 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2635 if (attr == 0 && form == 0) break; 2636 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2637 cc, c_die, False/*td3*/, form ); 2638 if (attr == DW_AT_name && ctsMemSzB > 0) { 2639 typeE.Te.TyTyDef.name 2640 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1", 2641 (UChar*)(UWord)cts ); 2642 } 2643 if (attr == DW_AT_type && ctsSzB > 0) { 2644 typeE.Te.TyTyDef.typeR = (UWord)cts; 2645 } 2646 } 2647 /* Do we have something that looks sane? */ 2648 if (/* must have a name */ 2649 typeE.Te.TyTyDef.name == NULL 2650 /* but the referred-to type can be absent */) 2651 goto bad_DIE; 2652 else 2653 goto acquire_Type; 2654 } 2655 2656 if (dtag == DW_TAG_subroutine_type) { 2657 /* function type? just record that one fact and ask no 2658 further questions. */ 2659 VG_(memset)(&typeE, 0, sizeof(typeE)); 2660 typeE.cuOff = D3_INVALID_CUOFF; 2661 typeE.tag = Te_TyFn; 2662 goto acquire_Type; 2663 } 2664 2665 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { 2666 Int have_ty = 0; 2667 VG_(memset)(&typeE, 0, sizeof(typeE)); 2668 typeE.cuOff = D3_INVALID_CUOFF; 2669 typeE.tag = Te_TyQual; 2670 typeE.Te.TyQual.qual 2671 = dtag == DW_TAG_volatile_type ? 'V' : 'C'; 2672 /* target type defaults to 'void' */ 2673 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; 2674 while (True) { 2675 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2676 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2677 if (attr == 0 && form == 0) break; 2678 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2679 cc, c_die, False/*td3*/, form ); 2680 if (attr == DW_AT_type && ctsSzB > 0) { 2681 typeE.Te.TyQual.typeR = (UWord)cts; 2682 have_ty++; 2683 } 2684 } 2685 /* gcc sometimes generates DW_TAG_const/volatile_type without 2686 DW_AT_type and GDB appears to interpret the type as 'const 2687 void' (resp. 'volatile void'). So just allow it .. */ 2688 if (have_ty == 1 || have_ty == 0) 2689 goto acquire_Type; 2690 else 2691 goto bad_DIE; 2692 } 2693 2694 /* else ignore this DIE */ 2695 return; 2696 /*NOTREACHED*/ 2697 2698 acquire_Type: 2699 if (0) VG_(printf)("YYYY Acquire Type\n"); 2700 vg_assert(ML_(TyEnt__is_type)( &typeE )); 2701 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); 2702 typeE.cuOff = posn; 2703 VG_(addToXA)( tyents, &typeE ); 2704 return; 2705 /*NOTREACHED*/ 2706 2707 acquire_Atom: 2708 if (0) VG_(printf)("YYYY Acquire Atom\n"); 2709 vg_assert(atomE.tag == Te_Atom); 2710 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); 2711 atomE.cuOff = posn; 2712 VG_(addToXA)( tyents, &atomE ); 2713 return; 2714 /*NOTREACHED*/ 2715 2716 acquire_Field: 2717 /* For union members, Expr should be absent */ 2718 if (0) VG_(printf)("YYYY Acquire Field\n"); 2719 vg_assert(fieldE.tag == Te_Field); 2720 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL); 2721 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL); 2722 if (fieldE.Te.Field.isStruct) { 2723 vg_assert(fieldE.Te.Field.nLoc != 0); 2724 } else { 2725 vg_assert(fieldE.Te.Field.nLoc == 0); 2726 } 2727 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); 2728 fieldE.cuOff = posn; 2729 VG_(addToXA)( tyents, &fieldE ); 2730 return; 2731 /*NOTREACHED*/ 2732 2733 acquire_Bound: 2734 if (0) VG_(printf)("YYYY Acquire Bound\n"); 2735 vg_assert(boundE.tag == Te_Bound); 2736 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); 2737 boundE.cuOff = posn; 2738 VG_(addToXA)( tyents, &boundE ); 2739 return; 2740 /*NOTREACHED*/ 2741 2742 bad_DIE: 2743 set_position_of_Cursor( c_die, saved_die_c_offset ); 2744 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 2745 VG_(printf)("\nparse_type_DIE: confused by:\n"); 2746 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 2747 while (True) { 2748 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2749 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2750 if (attr == 0 && form == 0) break; 2751 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 2752 /* Get the form contents, so as to print them */ 2753 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2754 cc, c_die, True, form ); 2755 VG_(printf)("\t\n"); 2756 } 2757 VG_(printf)("\n"); 2758 cc->barf("parse_type_DIE: confused by the above DIE"); 2759 /*NOTREACHED*/ 2760} 2761 2762 2763/*------------------------------------------------------------*/ 2764/*--- ---*/ 2765/*--- Compression of type DIE information ---*/ 2766/*--- ---*/ 2767/*------------------------------------------------------------*/ 2768 2769static UWord chase_cuOff ( Bool* changed, 2770 XArray* /* of TyEnt */ ents, 2771 TyEntIndexCache* ents_cache, 2772 UWord cuOff ) 2773{ 2774 TyEnt* ent; 2775 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); 2776 2777 if (!ent) { 2778 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); 2779 *changed = False; 2780 return cuOff; 2781 } 2782 2783 vg_assert(ent->tag != Te_EMPTY); 2784 if (ent->tag != Te_INDIR) { 2785 *changed = False; 2786 return cuOff; 2787 } else { 2788 vg_assert(ent->Te.INDIR.indR < cuOff); 2789 *changed = True; 2790 return ent->Te.INDIR.indR; 2791 } 2792} 2793 2794static 2795void chase_cuOffs_in_XArray ( Bool* changed, 2796 XArray* /* of TyEnt */ ents, 2797 TyEntIndexCache* ents_cache, 2798 /*MOD*/XArray* /* of UWord */ cuOffs ) 2799{ 2800 Bool b2 = False; 2801 Word i, n = VG_(sizeXA)( cuOffs ); 2802 for (i = 0; i < n; i++) { 2803 Bool b = False; 2804 UWord* p = VG_(indexXA)( cuOffs, i ); 2805 *p = chase_cuOff( &b, ents, ents_cache, *p ); 2806 if (b) 2807 b2 = True; 2808 } 2809 *changed = b2; 2810} 2811 2812static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents, 2813 TyEntIndexCache* ents_cache, 2814 /*MOD*/TyEnt* te ) 2815{ 2816 Bool b, changed = False; 2817 switch (te->tag) { 2818 case Te_EMPTY: 2819 break; 2820 case Te_INDIR: 2821 te->Te.INDIR.indR 2822 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); 2823 if (b) changed = True; 2824 break; 2825 case Te_UNKNOWN: 2826 break; 2827 case Te_Atom: 2828 break; 2829 case Te_Field: 2830 te->Te.Field.typeR 2831 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); 2832 if (b) changed = True; 2833 break; 2834 case Te_Bound: 2835 break; 2836 case Te_TyBase: 2837 break; 2838 case Te_TyPorR: 2839 te->Te.TyPorR.typeR 2840 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); 2841 if (b) changed = True; 2842 break; 2843 case Te_TyTyDef: 2844 te->Te.TyTyDef.typeR 2845 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); 2846 if (b) changed = True; 2847 break; 2848 case Te_TyStOrUn: 2849 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); 2850 if (b) changed = True; 2851 break; 2852 case Te_TyEnum: 2853 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); 2854 if (b) changed = True; 2855 break; 2856 case Te_TyArray: 2857 te->Te.TyArray.typeR 2858 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); 2859 if (b) changed = True; 2860 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); 2861 if (b) changed = True; 2862 break; 2863 case Te_TyFn: 2864 break; 2865 case Te_TyQual: 2866 te->Te.TyQual.typeR 2867 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); 2868 if (b) changed = True; 2869 break; 2870 case Te_TyVoid: 2871 break; 2872 default: 2873 ML_(pp_TyEnt)(te); 2874 vg_assert(0); 2875 } 2876 return changed; 2877} 2878 2879/* Make a pass over 'ents'. For each tyent, inspect the target of any 2880 'R' or 'Rs' fields (those which refer to other tyents), and replace 2881 any which point to INDIR nodes with the target of the indirection 2882 (which should not itself be an indirection). In summary, this 2883 routine shorts out all references to indirection nodes. */ 2884static 2885Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, 2886 TyEntIndexCache* ents_cache ) 2887{ 2888 Word i, n, nChanged = 0; 2889 Bool b; 2890 n = VG_(sizeXA)( ents ); 2891 for (i = 0; i < n; i++) { 2892 TyEnt* ent = VG_(indexXA)( ents, i ); 2893 vg_assert(ent->tag != Te_EMPTY); 2894 /* We have to substitute everything, even indirections, so as to 2895 ensure that chains of indirections don't build up. */ 2896 b = TyEnt__subst_R_fields( ents, ents_cache, ent ); 2897 if (b) 2898 nChanged++; 2899 } 2900 2901 return nChanged; 2902} 2903 2904 2905/* Make a pass over 'ents', building a dictionary of TyEnts as we go. 2906 Look up each new tyent in the dictionary in turn. If it is already 2907 in the dictionary, replace this tyent with an indirection to the 2908 existing one, and delete any malloc'd stuff hanging off this one. 2909 In summary, this routine commons up all tyents that are identical 2910 as defined by TyEnt__cmp_by_all_except_cuOff. */ 2911static 2912Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) 2913{ 2914 Word n, i, nDeleted; 2915 WordFM* dict; /* TyEnt* -> void */ 2916 TyEnt* ent; 2917 UWord keyW, valW; 2918 2919 dict = VG_(newFM)( 2920 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 2921 ML_(dinfo_free), 2922 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) 2923 ); 2924 2925 nDeleted = 0; 2926 n = VG_(sizeXA)( ents ); 2927 for (i = 0; i < n; i++) { 2928 ent = VG_(indexXA)( ents, i ); 2929 vg_assert(ent->tag != Te_EMPTY); 2930 2931 /* Ignore indirections, although check that they are 2932 not forming a cycle. */ 2933 if (ent->tag == Te_INDIR) { 2934 vg_assert(ent->Te.INDIR.indR < ent->cuOff); 2935 continue; 2936 } 2937 2938 keyW = valW = 0; 2939 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { 2940 /* it's already in the dictionary. */ 2941 TyEnt* old = (TyEnt*)keyW; 2942 vg_assert(valW == 0); 2943 vg_assert(old != ent); 2944 vg_assert(old->tag != Te_INDIR); 2945 /* since we are traversing the array in increasing order of 2946 cuOff: */ 2947 vg_assert(old->cuOff < ent->cuOff); 2948 /* So anyway, dump this entry and replace it with an 2949 indirection to the one in the dictionary. Note that the 2950 assertion above guarantees that we cannot create cycles of 2951 indirections, since we are always creating an indirection 2952 to a tyent with a cuOff lower than this one. */ 2953 ML_(TyEnt__make_EMPTY)( ent ); 2954 ent->tag = Te_INDIR; 2955 ent->Te.INDIR.indR = old->cuOff; 2956 nDeleted++; 2957 } else { 2958 /* not in dictionary; add it and keep going. */ 2959 VG_(addToFM)( dict, (UWord)ent, 0 ); 2960 } 2961 } 2962 2963 VG_(deleteFM)( dict, NULL, NULL ); 2964 2965 return nDeleted; 2966} 2967 2968 2969static 2970void dedup_types ( Bool td3, 2971 /*MOD*/XArray* /* of TyEnt */ ents, 2972 TyEntIndexCache* ents_cache ) 2973{ 2974 Word m, n, i, nDel, nSubst, nThresh; 2975 if (0) td3 = True; 2976 2977 n = VG_(sizeXA)( ents ); 2978 2979 /* If a commoning pass and a substitution pass both make fewer than 2980 this many changes, just stop. It's pointless to burn up CPU 2981 time trying to compress the last 1% or so out of the array. */ 2982 nThresh = n / 200; 2983 2984 /* First we must sort .ents by its .cuOff fields, so we 2985 can index into it. */ 2986 VG_(setCmpFnXA)( 2987 ents, 2988 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 2989 ); 2990 VG_(sortXA)( ents ); 2991 2992 /* Now repeatedly do commoning and substitution passes over 2993 the array, until there are no more changes. */ 2994 do { 2995 nDel = dedup_types_commoning_pass ( ents ); 2996 nSubst = dedup_types_substitution_pass ( ents, ents_cache ); 2997 vg_assert(nDel >= 0 && nSubst >= 0); 2998 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); 2999 } while (nDel > nThresh || nSubst > nThresh); 3000 3001 /* Sanity check: all INDIR nodes should point at a non-INDIR thing. 3002 In fact this should be true at the end of every loop iteration 3003 above (a commoning pass followed by a substitution pass), but 3004 checking it on every iteration is excessively expensive. Note, 3005 this loop also computes 'm' for the stats printing below it. */ 3006 m = 0; 3007 n = VG_(sizeXA)( ents ); 3008 for (i = 0; i < n; i++) { 3009 TyEnt *ent, *ind; 3010 ent = VG_(indexXA)( ents, i ); 3011 if (ent->tag != Te_INDIR) continue; 3012 m++; 3013 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3014 ent->Te.INDIR.indR ); 3015 vg_assert(ind); 3016 vg_assert(ind->tag != Te_INDIR); 3017 } 3018 3019 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); 3020} 3021 3022 3023/*------------------------------------------------------------*/ 3024/*--- ---*/ 3025/*--- Resolution of references to type DIEs ---*/ 3026/*--- ---*/ 3027/*------------------------------------------------------------*/ 3028 3029/* Make a pass through the (temporary) variables array. Examine the 3030 type of each variable, check is it found, and chase any Te_INDIRs. 3031 Postcondition is: each variable has a typeR field that refers to a 3032 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed 3033 not to refer to a Te_INDIR. (This is so that we can throw all the 3034 Te_INDIRs away later). */ 3035 3036__attribute__((noinline)) 3037static void resolve_variable_types ( 3038 void (*barf)( HChar* ) __attribute__((noreturn)), 3039 /*R-O*/XArray* /* of TyEnt */ ents, 3040 /*MOD*/TyEntIndexCache* ents_cache, 3041 /*MOD*/XArray* /* of TempVar* */ vars 3042 ) 3043{ 3044 Word i, n; 3045 n = VG_(sizeXA)( vars ); 3046 for (i = 0; i < n; i++) { 3047 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); 3048 /* This is the stated type of the variable. But it might be 3049 an indirection, so be careful. */ 3050 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3051 var->typeR ); 3052 if (ent && ent->tag == Te_INDIR) { 3053 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 3054 ent->Te.INDIR.indR ); 3055 vg_assert(ent); 3056 vg_assert(ent->tag != Te_INDIR); 3057 } 3058 3059 /* Deal first with "normal" cases */ 3060 if (ent && ML_(TyEnt__is_type)(ent)) { 3061 var->typeR = ent->cuOff; 3062 continue; 3063 } 3064 3065 /* If there's no ent, it probably we did not manage to read a 3066 type at the cuOffset which is stated as being this variable's 3067 type. Maybe a deficiency in parse_type_DIE. Complain. */ 3068 if (ent == NULL) { 3069 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); 3070 barf("resolve_variable_types: " 3071 "cuOff does not refer to a known type"); 3072 } 3073 vg_assert(ent); 3074 /* If ent has any other tag, something bad happened, along the 3075 lines of var->typeR not referring to a type at all. */ 3076 vg_assert(ent->tag == Te_UNKNOWN); 3077 /* Just accept it; the type will be useless, but at least keep 3078 going. */ 3079 var->typeR = ent->cuOff; 3080 } 3081} 3082 3083 3084/*------------------------------------------------------------*/ 3085/*--- ---*/ 3086/*--- Parsing of Compilation Units ---*/ 3087/*--- ---*/ 3088/*------------------------------------------------------------*/ 3089 3090static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { 3091 TempVar* t1 = *(TempVar**)v1; 3092 TempVar* t2 = *(TempVar**)v2; 3093 if (t1->dioff < t2->dioff) return -1; 3094 if (t1->dioff > t2->dioff) return 1; 3095 return 0; 3096} 3097 3098static void read_DIE ( 3099 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, 3100 /*MOD*/XArray* /* of TyEnt */ tyents, 3101 /*MOD*/XArray* /* of TempVar* */ tempvars, 3102 /*MOD*/XArray* /* of GExpr* */ gexprs, 3103 /*MOD*/D3TypeParser* typarser, 3104 /*MOD*/D3VarParser* varparser, 3105 Cursor* c, Bool td3, CUConst* cc, Int level 3106) 3107{ 3108 Cursor abbv; 3109 ULong atag, abbv_code; 3110 UWord posn; 3111 UInt has_children; 3112 UWord start_die_c_offset, start_abbv_c_offset; 3113 UWord after_die_c_offset, after_abbv_c_offset; 3114 3115 /* --- Deal with this DIE --- */ 3116 posn = get_position_of_Cursor( c ); 3117 abbv_code = get_ULEB128( c ); 3118 set_abbv_Cursor( &abbv, td3, cc, abbv_code ); 3119 atag = get_ULEB128( &abbv ); 3120 TRACE_D3("\n"); 3121 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", 3122 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 3123 3124 if (atag == 0) 3125 cc->barf("read_DIE: invalid zero tag on DIE"); 3126 3127 has_children = get_UChar( &abbv ); 3128 if (has_children != DW_children_no && has_children != DW_children_yes) 3129 cc->barf("read_DIE: invalid has_children value"); 3130 3131 /* We're set up to look at the fields of this DIE. Hand it off to 3132 any parser(s) that want to see it. Since they will in general 3133 advance both the DIE and abbrev cursors, remember their current 3134 settings so that we can then back up and do one final pass over 3135 the DIE, to print out its contents. */ 3136 3137 start_die_c_offset = get_position_of_Cursor( c ); 3138 start_abbv_c_offset = get_position_of_Cursor( &abbv ); 3139 3140 while (True) { 3141 ULong cts; 3142 Int ctsSzB; 3143 UWord ctsMemSzB; 3144 ULong at_name = get_ULEB128( &abbv ); 3145 ULong at_form = get_ULEB128( &abbv ); 3146 if (at_name == 0 && at_form == 0) break; 3147 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); 3148 /* Get the form contents, but ignore them; the only purpose is 3149 to print them, if td3 is True */ 3150 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 3151 cc, c, td3, (DW_FORM)at_form ); 3152 TRACE_D3("\t"); 3153 TRACE_D3("\n"); 3154 } 3155 3156 after_die_c_offset = get_position_of_Cursor( c ); 3157 after_abbv_c_offset = get_position_of_Cursor( &abbv ); 3158 3159 set_position_of_Cursor( c, start_die_c_offset ); 3160 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3161 3162 parse_type_DIE( tyents, 3163 typarser, 3164 (DW_TAG)atag, 3165 posn, 3166 level, 3167 c, /* DIE cursor */ 3168 &abbv, /* abbrev cursor */ 3169 cc, 3170 td3 ); 3171 3172 set_position_of_Cursor( c, start_die_c_offset ); 3173 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 3174 3175 parse_var_DIE( rangestree, 3176 tempvars, 3177 gexprs, 3178 varparser, 3179 (DW_TAG)atag, 3180 posn, 3181 level, 3182 c, /* DIE cursor */ 3183 &abbv, /* abbrev cursor */ 3184 cc, 3185 td3 ); 3186 3187 set_position_of_Cursor( c, after_die_c_offset ); 3188 set_position_of_Cursor( &abbv, after_abbv_c_offset ); 3189 3190 /* --- Now recurse into its children, if any --- */ 3191 if (has_children == DW_children_yes) { 3192 if (0) TRACE_D3("BEGIN children of level %d\n", level); 3193 while (True) { 3194 atag = peek_ULEB128( c ); 3195 if (atag == 0) break; 3196 read_DIE( rangestree, tyents, tempvars, gexprs, 3197 typarser, varparser, 3198 c, td3, cc, level+1 ); 3199 } 3200 /* Now we need to eat the terminating zero */ 3201 atag = get_ULEB128( c ); 3202 vg_assert(atag == 0); 3203 if (0) TRACE_D3("END children of level %d\n", level); 3204 } 3205 3206} 3207 3208 3209static 3210void new_dwarf3_reader_wrk ( 3211 struct _DebugInfo* di, 3212 __attribute__((noreturn)) void (*barf)( HChar* ), 3213 UChar* debug_info_img, SizeT debug_info_sz, 3214 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3215 UChar* debug_line_img, SizeT debug_line_sz, 3216 UChar* debug_str_img, SizeT debug_str_sz, 3217 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3218 UChar* debug_loc_img, SizeT debug_loc_sz 3219) 3220{ 3221 XArray* /* of TyEnt */ tyents; 3222 XArray* /* of TyEnt */ tyents_to_keep; 3223 XArray* /* of GExpr* */ gexprs; 3224 XArray* /* of TempVar* */ tempvars; 3225 WordFM* /* of (XArray* of AddrRange, void) */ rangestree; 3226 TyEntIndexCache* tyents_cache = NULL; 3227 TyEntIndexCache* tyents_to_keep_cache = NULL; 3228 TempVar *varp, *varp2; 3229 GExpr* gexpr; 3230 Cursor abbv; /* for showing .debug_abbrev */ 3231 Cursor info; /* primary cursor for parsing .debug_info */ 3232 Cursor ranges; /* for showing .debug_ranges */ 3233 D3TypeParser typarser; 3234 D3VarParser varparser; 3235 Addr dr_base; 3236 UWord dr_offset; 3237 Word i, j, n; 3238 Bool td3 = di->trace_symtab; 3239 XArray* /* of TempVar* */ dioff_lookup_tab; 3240#if 0 3241 /* This doesn't work properly because it assumes all entries are 3242 packed end to end, with no holes. But that doesn't always 3243 appear to be the case, so it loses sync. And the D3 spec 3244 doesn't appear to require a no-hole situation either. */ 3245 /* Display .debug_loc */ 3246 Addr dl_base; 3247 UWord dl_offset; 3248 Cursor loc; /* for showing .debug_loc */ 3249 TRACE_SYMTAB("\n"); 3250 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 3251 TRACE_SYMTAB(" Offset Begin End Expression\n"); 3252 init_Cursor( &loc, debug_loc_img, 3253 debug_loc_sz, 0, barf, 3254 "Overrun whilst reading .debug_loc section(1)" ); 3255 dl_base = 0; 3256 dl_offset = 0; 3257 while (True) { 3258 UWord w1, w2; 3259 UWord len; 3260 if (is_at_end_Cursor( &loc )) 3261 break; 3262 3263 /* Read a (host-)word pair. This is something of a hack since 3264 the word size to read is really dictated by the ELF file; 3265 however, we assume we're reading a file with the same 3266 word-sizeness as the host. Reasonably enough. */ 3267 w1 = get_UWord( &loc ); 3268 w2 = get_UWord( &loc ); 3269 3270 if (w1 == 0 && w2 == 0) { 3271 /* end of list. reset 'base' */ 3272 TRACE_D3(" %08lx <End of list>\n", dl_offset); 3273 dl_base = 0; 3274 dl_offset = get_position_of_Cursor( &loc ); 3275 continue; 3276 } 3277 3278 if (w1 == -1UL) { 3279 /* new value for 'base' */ 3280 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3281 dl_offset, w1, w2); 3282 dl_base = w2; 3283 continue; 3284 } 3285 3286 /* else a location expression follows */ 3287 TRACE_D3(" %08lx %08lx %08lx ", 3288 dl_offset, w1 + dl_base, w2 + dl_base); 3289 len = (UWord)get_UShort( &loc ); 3290 while (len > 0) { 3291 UChar byte = get_UChar( &loc ); 3292 TRACE_D3("%02x", (UInt)byte); 3293 len--; 3294 } 3295 TRACE_SYMTAB("\n"); 3296 } 3297#endif 3298 3299 /* Display .debug_ranges */ 3300 TRACE_SYMTAB("\n"); 3301 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 3302 TRACE_SYMTAB(" Offset Begin End\n"); 3303 init_Cursor( &ranges, debug_ranges_img, 3304 debug_ranges_sz, 0, barf, 3305 "Overrun whilst reading .debug_ranges section(1)" ); 3306 dr_base = 0; 3307 dr_offset = 0; 3308 while (True) { 3309 UWord w1, w2; 3310 3311 if (is_at_end_Cursor( &ranges )) 3312 break; 3313 3314 /* Read a (host-)word pair. This is something of a hack since 3315 the word size to read is really dictated by the ELF file; 3316 however, we assume we're reading a file with the same 3317 word-sizeness as the host. Reasonably enough. */ 3318 w1 = get_UWord( &ranges ); 3319 w2 = get_UWord( &ranges ); 3320 3321 if (w1 == 0 && w2 == 0) { 3322 /* end of list. reset 'base' */ 3323 TRACE_D3(" %08lx <End of list>\n", dr_offset); 3324 dr_base = 0; 3325 dr_offset = get_position_of_Cursor( &ranges ); 3326 continue; 3327 } 3328 3329 if (w1 == -1UL) { 3330 /* new value for 'base' */ 3331 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 3332 dr_offset, w1, w2); 3333 dr_base = w2; 3334 continue; 3335 } 3336 3337 /* else a range [w1+base, w2+base) is denoted */ 3338 TRACE_D3(" %08lx %08lx %08lx\n", 3339 dr_offset, w1 + dr_base, w2 + dr_base); 3340 } 3341 3342 /* Display .debug_abbrev */ 3343 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, 3344 "Overrun whilst reading .debug_abbrev section" ); 3345 TRACE_SYMTAB("\n"); 3346 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 3347 while (True) { 3348 if (is_at_end_Cursor( &abbv )) 3349 break; 3350 /* Read one abbreviation table */ 3351 TRACE_D3(" Number TAG\n"); 3352 while (True) { 3353 ULong atag; 3354 UInt has_children; 3355 ULong acode = get_ULEB128( &abbv ); 3356 if (acode == 0) break; /* end of the table */ 3357 atag = get_ULEB128( &abbv ); 3358 has_children = get_UChar( &abbv ); 3359 TRACE_D3(" %llu %s [%s]\n", 3360 acode, ML_(pp_DW_TAG)(atag), 3361 ML_(pp_DW_children)(has_children)); 3362 while (True) { 3363 ULong at_name = get_ULEB128( &abbv ); 3364 ULong at_form = get_ULEB128( &abbv ); 3365 if (at_name == 0 && at_form == 0) break; 3366 TRACE_D3(" %18s %s\n", 3367 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 3368 } 3369 } 3370 } 3371 TRACE_SYMTAB("\n"); 3372 3373 /* Now loop over the Compilation Units listed in the .debug_info 3374 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 3375 unit contains a Compilation Unit Header followed by precisely 3376 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 3377 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, 3378 "Overrun whilst reading .debug_info section" ); 3379 3380 /* We'll park the harvested type information in here. Also create 3381 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 3382 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 3383 huge and presumably will not occur in any valid DWARF3 file -- 3384 it would need to have a .debug_info section 4GB long for that to 3385 happen. These type entries end up in the DebugInfo. */ 3386 tyents = VG_(newXA)( ML_(dinfo_zalloc), 3387 "di.readdwarf3.ndrw.1 (TyEnt temp array)", 3388 ML_(dinfo_free), sizeof(TyEnt) ); 3389 { TyEnt tyent; 3390 VG_(memset)(&tyent, 0, sizeof(tyent)); 3391 tyent.tag = Te_TyVoid; 3392 tyent.cuOff = D3_FAKEVOID_CUOFF; 3393 tyent.Te.TyVoid.isFake = True; 3394 VG_(addToXA)( tyents, &tyent ); 3395 } 3396 { TyEnt tyent; 3397 VG_(memset)(&tyent, 0, sizeof(tyent)); 3398 tyent.tag = Te_UNKNOWN; 3399 tyent.cuOff = D3_INVALID_CUOFF; 3400 VG_(addToXA)( tyents, &tyent ); 3401 } 3402 3403 /* This is a tree used to unique-ify the range lists that are 3404 manufactured by parse_var_DIE. References to the keys in the 3405 tree wind up in .rngMany fields in TempVars. We'll need to 3406 delete this tree, and the XArrays attached to it, at the end of 3407 this function. */ 3408 rangestree = VG_(newFM)( ML_(dinfo_zalloc), 3409 "di.readdwarf3.ndrw.2 (rangestree)", 3410 ML_(dinfo_free), 3411 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); 3412 3413 /* List of variables we're accumulating. These don't end up in the 3414 DebugInfo; instead their contents are handed to ML_(addVar) and 3415 the list elements are then deleted. */ 3416 tempvars = VG_(newXA)( ML_(dinfo_zalloc), 3417 "di.readdwarf3.ndrw.3 (TempVar*s array)", 3418 ML_(dinfo_free), 3419 sizeof(TempVar*) ); 3420 3421 /* List of GExprs we're accumulating. These wind up in the 3422 DebugInfo. */ 3423 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", 3424 ML_(dinfo_free), sizeof(GExpr*) ); 3425 3426 /* We need a D3TypeParser to keep track of partially constructed 3427 types. It'll be discarded as soon as we've completed the CU, 3428 since the resulting information is tipped in to 'tyents' as it 3429 is generated. */ 3430 VG_(memset)( &typarser, 0, sizeof(typarser) ); 3431 typarser.sp = -1; 3432 typarser.language = '?'; 3433 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3434 typarser.qparentE[i].tag = Te_EMPTY; 3435 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF; 3436 } 3437 3438 VG_(memset)( &varparser, 0, sizeof(varparser) ); 3439 varparser.sp = -1; 3440 3441 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 3442 while (True) { 3443 UWord cu_start_offset, cu_offset_now; 3444 CUConst cc; 3445 /* It may be that the stated size of this CU is larger than the 3446 amount of stuff actually in it. icc9 seems to generate CUs 3447 thusly. We use these variables to figure out if this is 3448 indeed the case, and if so how many bytes we need to skip to 3449 get to the start of the next CU. Not skipping those bytes 3450 causes us to misidentify the start of the next CU, and it all 3451 goes badly wrong after that (not surprisingly). */ 3452 UWord cu_size_including_IniLen, cu_amount_used; 3453 3454 /* It seems icc9 finishes the DIE info before debug_info_sz 3455 bytes have been used up. So be flexible, and declare the 3456 sequence complete if there is not enough remaining bytes to 3457 hold even the smallest conceivable CU header. (11 bytes I 3458 reckon). */ 3459 /* JRS 23Jan09: I suspect this is no longer necessary now that 3460 the code below contains a 'while (cu_amount_used < 3461 cu_size_including_IniLen ...' style loop, which skips over 3462 any leftover bytes at the end of a CU in the case where the 3463 CU's stated size is larger than its actual size (as 3464 determined by reading all its DIEs). However, for prudence, 3465 I'll leave the following test in place. I can't see that a 3466 CU header can be smaller than 11 bytes, so I don't think 3467 there's any harm possible through the test -- it just adds 3468 robustness. */ 3469 Word avail = get_remaining_length_Cursor( &info ); 3470 if (avail < 11) { 3471 if (avail > 0) 3472 TRACE_D3("new_dwarf3_reader_wrk: warning: " 3473 "%ld unused bytes after end of DIEs\n", avail); 3474 break; 3475 } 3476 3477 /* Check the varparser's stack is in a sane state. */ 3478 vg_assert(varparser.sp == -1); 3479 for (i = 0; i < N_D3_VAR_STACK; i++) { 3480 vg_assert(varparser.ranges[i] == NULL); 3481 vg_assert(varparser.level[i] == 0); 3482 } 3483 for (i = 0; i < N_D3_TYPE_STACK; i++) { 3484 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF); 3485 vg_assert(typarser.qparentE[i].tag == Te_EMPTY); 3486 vg_assert(typarser.qlevel[i] == 0); 3487 } 3488 3489 cu_start_offset = get_position_of_Cursor( &info ); 3490 TRACE_D3("\n"); 3491 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 3492 /* parse_CU_header initialises the CU's set_abbv_Cursor cache 3493 (saC_cache) */ 3494 parse_CU_Header( &cc, td3, &info, 3495 (UChar*)debug_abbv_img, debug_abbv_sz ); 3496 cc.debug_str_img = debug_str_img; 3497 cc.debug_str_sz = debug_str_sz; 3498 cc.debug_ranges_img = debug_ranges_img; 3499 cc.debug_ranges_sz = debug_ranges_sz; 3500 cc.debug_loc_img = debug_loc_img; 3501 cc.debug_loc_sz = debug_loc_sz; 3502 cc.debug_line_img = debug_line_img; 3503 cc.debug_line_sz = debug_line_sz; 3504 cc.debug_info_img = debug_info_img; 3505 cc.debug_info_sz = debug_info_sz; 3506 cc.cu_start_offset = cu_start_offset; 3507 cc.di = di; 3508 /* The CU's svma can be deduced by looking at the AT_low_pc 3509 value in the top level TAG_compile_unit, which is the topmost 3510 DIE. We'll leave it for the 'varparser' to acquire that info 3511 and fill it in -- since it is the only party to want to know 3512 it. */ 3513 cc.cu_svma_known = False; 3514 cc.cu_svma = 0; 3515 3516 /* Create a fake outermost-level range covering the entire 3517 address range. So we always have *something* to catch all 3518 variable declarations. */ 3519 varstack_push( &cc, &varparser, td3, 3520 unitary_range_list(0UL, ~0UL), 3521 -1, False/*isFunc*/, NULL/*fbGX*/ ); 3522 3523 /* And set up the file name table. When we come across the top 3524 level DIE for this CU (which is what the next call to 3525 read_DIE should process) we will copy all the file names out 3526 of the .debug_line img area and use this table to look up the 3527 copies when we later see filename numbers in DW_TAG_variables 3528 etc. */ 3529 vg_assert(!varparser.filenameTable ); 3530 varparser.filenameTable 3531 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5", 3532 ML_(dinfo_free), 3533 sizeof(UChar*) ); 3534 vg_assert(varparser.filenameTable); 3535 3536 /* Now read the one-and-only top-level DIE for this CU. */ 3537 vg_assert(varparser.sp == 0); 3538 read_DIE( rangestree, 3539 tyents, tempvars, gexprs, 3540 &typarser, &varparser, 3541 &info, td3, &cc, 0 ); 3542 3543 cu_offset_now = get_position_of_Cursor( &info ); 3544 3545 if (0) VG_(printf)("Travelled: %lu size %llu\n", 3546 cu_offset_now - cc.cu_start_offset, 3547 cc.unit_length + (cc.is_dw64 ? 12 : 4)); 3548 3549 /* How big the CU claims it is .. */ 3550 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); 3551 /* .. vs how big we have found it to be */ 3552 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3553 3554 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", 3555 cu_offset_now, debug_info_sz); 3556 if (cu_offset_now > debug_info_sz) 3557 barf("toplevel DIEs beyond end of CU"); 3558 3559 /* If the CU is bigger than it claims to be, we've got a serious 3560 problem. */ 3561 if (cu_amount_used > cu_size_including_IniLen) 3562 barf("CU's actual size appears to be larger than it claims it is"); 3563 3564 /* If the CU is smaller than it claims to be, we need to skip some 3565 bytes. Loop updates cu_offset_new and cu_amount_used. */ 3566 while (cu_amount_used < cu_size_including_IniLen 3567 && get_remaining_length_Cursor( &info ) > 0) { 3568 if (0) VG_(printf)("SKIP\n"); 3569 (void)get_UChar( &info ); 3570 cu_offset_now = get_position_of_Cursor( &info ); 3571 cu_amount_used = cu_offset_now - cc.cu_start_offset; 3572 } 3573 3574 if (cu_offset_now == debug_info_sz) 3575 break; 3576 3577 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 3578 anywhere else at all. Our fake the-entire-address-space 3579 range is at level -1, so preening to -2 should completely 3580 empty the stack out. */ 3581 TRACE_D3("\n"); 3582 varstack_preen( &varparser, td3, -2 ); 3583 /* Similarly, empty the type stack out. */ 3584 typestack_preen( &typarser, td3, -2 ); 3585 /* else keep going */ 3586 3587 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", 3588 cc.saC_cache_queries, cc.saC_cache_misses); 3589 3590 vg_assert(varparser.filenameTable ); 3591 VG_(deleteXA)( varparser.filenameTable ); 3592 varparser.filenameTable = NULL; 3593 } 3594 3595 /* From here on we're post-processing the stuff we got 3596 out of the .debug_info section. */ 3597 if (td3) { 3598 TRACE_D3("\n"); 3599 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); 3600 TRACE_D3("\n"); 3601 TRACE_D3("------ Compressing type entries ------\n"); 3602 } 3603 3604 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", 3605 sizeof(TyEntIndexCache) ); 3606 ML_(TyEntIndexCache__invalidate)( tyents_cache ); 3607 dedup_types( td3, tyents, tyents_cache ); 3608 if (td3) { 3609 TRACE_D3("\n"); 3610 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); 3611 } 3612 3613 TRACE_D3("\n"); 3614 TRACE_D3("------ Resolving the types of variables ------\n" ); 3615 resolve_variable_types( barf, tyents, tyents_cache, tempvars ); 3616 3617 /* Copy all the non-INDIR tyents into a new table. For large 3618 .so's, about 90% of the tyents will by now have been resolved to 3619 INDIRs, and we no longer need them, and so don't need to store 3620 them. */ 3621 tyents_to_keep 3622 = VG_(newXA)( ML_(dinfo_zalloc), 3623 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", 3624 ML_(dinfo_free), sizeof(TyEnt) ); 3625 n = VG_(sizeXA)( tyents ); 3626 for (i = 0; i < n; i++) { 3627 TyEnt* ent = VG_(indexXA)( tyents, i ); 3628 if (ent->tag != Te_INDIR) 3629 VG_(addToXA)( tyents_to_keep, ent ); 3630 } 3631 3632 VG_(deleteXA)( tyents ); 3633 tyents = NULL; 3634 ML_(dinfo_free)( tyents_cache ); 3635 tyents_cache = NULL; 3636 3637 /* Sort tyents_to_keep so we can lookup in it. A complete (if 3638 minor) waste of time, since tyents itself is sorted, but 3639 necessary since VG_(lookupXA) refuses to cooperate if we 3640 don't. */ 3641 VG_(setCmpFnXA)( 3642 tyents_to_keep, 3643 (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) 3644 ); 3645 VG_(sortXA)( tyents_to_keep ); 3646 3647 /* Enable cacheing on tyents_to_keep */ 3648 tyents_to_keep_cache 3649 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", 3650 sizeof(TyEntIndexCache) ); 3651 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); 3652 3653 /* And record the tyents in the DebugInfo. We do this before 3654 starting to hand variables to ML_(addVar), since if ML_(addVar) 3655 wants to do debug printing (of the types of said vars) then it 3656 will need the tyents.*/ 3657 vg_assert(!di->admin_tyents); 3658 di->admin_tyents = tyents_to_keep; 3659 3660 /* Bias all the location expressions. */ 3661 TRACE_D3("\n"); 3662 TRACE_D3("------ Biasing the location expressions ------\n" ); 3663 3664 n = VG_(sizeXA)( gexprs ); 3665 for (i = 0; i < n; i++) { 3666 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); 3667 bias_GX( gexpr, di ); 3668 } 3669 3670 TRACE_D3("\n"); 3671 TRACE_D3("------ Acquired the following variables: ------\n\n"); 3672 3673 /* Park (pointers to) all the vars in an XArray, so we can look up 3674 abstract origins quickly. The array is sorted (hence, looked-up 3675 by) the .dioff fields. Since the .dioffs should be in strictly 3676 ascending order, there is no need to sort the array after 3677 construction. The ascendingness is however asserted for. */ 3678 dioff_lookup_tab 3679 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", 3680 ML_(dinfo_free), 3681 sizeof(TempVar*) ); 3682 vg_assert(dioff_lookup_tab); 3683 3684 n = VG_(sizeXA)( tempvars ); 3685 for (i = 0; i < n; i++) { 3686 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3687 if (i > 0) { 3688 varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 ); 3689 /* why should this hold? Only, I think, because we've 3690 constructed the array by reading .debug_info sequentially, 3691 and so the array .dioff fields should reflect that, and be 3692 strictly ascending. */ 3693 vg_assert(varp2->dioff < varp->dioff); 3694 } 3695 VG_(addToXA)( dioff_lookup_tab, &varp ); 3696 } 3697 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 3698 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 3699 3700 /* Now visit each var. Collect up as much info as possible for 3701 each var and hand it to ML_(addVar). */ 3702 n = VG_(sizeXA)( tempvars ); 3703 for (j = 0; j < n; j++) { 3704 TyEnt* ent; 3705 varp = *(TempVar**)VG_(indexXA)( tempvars, j ); 3706 3707 /* Possibly show .. */ 3708 if (td3) { 3709 VG_(printf)("<%lx> addVar: level %d: %s :: ", 3710 varp->dioff, 3711 varp->level, 3712 varp->name ? varp->name : (UChar*)"<anon_var>" ); 3713 if (varp->typeR) { 3714 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); 3715 } else { 3716 VG_(printf)("NULL"); 3717 } 3718 VG_(printf)("\n Loc="); 3719 if (varp->gexpr) { 3720 ML_(pp_GX)(varp->gexpr); 3721 } else { 3722 VG_(printf)("NULL"); 3723 } 3724 VG_(printf)("\n"); 3725 if (varp->fbGX) { 3726 VG_(printf)(" FrB="); 3727 ML_(pp_GX)( varp->fbGX ); 3728 VG_(printf)("\n"); 3729 } else { 3730 VG_(printf)(" FrB=none\n"); 3731 } 3732 VG_(printf)(" declared at: %s:%d\n", 3733 varp->fName ? varp->fName : (UChar*)"NULL", 3734 varp->fLine ); 3735 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 3736 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 3737 } 3738 3739 /* Skip variables which have no location. These must be 3740 abstract instances; they are useless as-is since with no 3741 location they have no specified memory location. They will 3742 presumably be referred to via the absOri fields of other 3743 variables. */ 3744 if (!varp->gexpr) { 3745 TRACE_D3(" SKIP (no location)\n\n"); 3746 continue; 3747 } 3748 3749 /* So it has a location, at least. If it refers to some other 3750 entry through its absOri field, pull in further info through 3751 that. */ 3752 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 3753 Bool found; 3754 Word ixFirst, ixLast; 3755 TempVar key; 3756 TempVar* keyp = &key; 3757 TempVar *varAI; 3758 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 3759 key.dioff = varp->absOri; /* this is what we want to find */ 3760 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 3761 &ixFirst, &ixLast ); 3762 if (!found) { 3763 /* barf("DW_AT_abstract_origin can't be resolved"); */ 3764 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n"); 3765 continue; 3766 } 3767 /* If the following fails, there is more than one entry with 3768 the same dioff. Which can't happen. */ 3769 vg_assert(ixFirst == ixLast); 3770 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 3771 /* stay sane */ 3772 vg_assert(varAI); 3773 vg_assert(varAI->dioff == varp->absOri); 3774 3775 /* Copy what useful info we can. */ 3776 if (varAI->typeR && !varp->typeR) 3777 varp->typeR = varAI->typeR; 3778 if (varAI->name && !varp->name) 3779 varp->name = varAI->name; 3780 if (varAI->fName && !varp->fName) 3781 varp->fName = varAI->fName; 3782 if (varAI->fLine > 0 && varp->fLine == 0) 3783 varp->fLine = varAI->fLine; 3784 } 3785 3786 /* Give it a name if it doesn't have one. */ 3787 if (!varp->name) 3788 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 3789 3790 /* So now does it have enough info to be useful? */ 3791 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then 3792 the type didn't get resolved. Really, in that case 3793 something's broken earlier on, and should be fixed, rather 3794 than just skipping the variable. */ 3795 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, 3796 tyents_to_keep_cache, 3797 varp->typeR ); 3798 /* The next two assertions should be guaranteed by 3799 our previous call to resolve_variable_types. */ 3800 vg_assert(ent); 3801 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); 3802 3803 if (ent->tag == Te_UNKNOWN) continue; 3804 3805 vg_assert(varp->gexpr); 3806 vg_assert(varp->name); 3807 vg_assert(varp->typeR); 3808 vg_assert(varp->level >= 0); 3809 3810 /* Ok. So we're going to keep it. Call ML_(addVar) once for 3811 each address range in which the variable exists. */ 3812 TRACE_D3(" ACQUIRE for range(s) "); 3813 { AddrRange oneRange; 3814 AddrRange* varPcRanges; 3815 Word nVarPcRanges; 3816 /* Set up to iterate over address ranges, however 3817 represented. */ 3818 if (varp->nRanges == 0 || varp->nRanges == 1) { 3819 vg_assert(!varp->rngMany); 3820 if (varp->nRanges == 0) { 3821 vg_assert(varp->rngOneMin == 0); 3822 vg_assert(varp->rngOneMax == 0); 3823 } 3824 nVarPcRanges = varp->nRanges; 3825 oneRange.aMin = varp->rngOneMin; 3826 oneRange.aMax = varp->rngOneMax; 3827 varPcRanges = &oneRange; 3828 } else { 3829 vg_assert(varp->rngMany); 3830 vg_assert(varp->rngOneMin == 0); 3831 vg_assert(varp->rngOneMax == 0); 3832 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 3833 vg_assert(nVarPcRanges >= 2); 3834 vg_assert(nVarPcRanges == (Word)varp->nRanges); 3835 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 3836 } 3837 if (varp->level == 0) 3838 vg_assert( nVarPcRanges == 1 ); 3839 /* and iterate */ 3840 for (i = 0; i < nVarPcRanges; i++) { 3841 Addr pcMin = varPcRanges[i].aMin; 3842 Addr pcMax = varPcRanges[i].aMax; 3843 vg_assert(pcMin <= pcMax); 3844 /* Level 0 is the global address range. So at level 0 we 3845 don't want to bias pcMin/pcMax; but at all other levels 3846 we do since those are derived from svmas in the Dwarf 3847 we're reading. Be paranoid ... */ 3848 if (varp->level == 0) { 3849 vg_assert(pcMin == (Addr)0); 3850 vg_assert(pcMax == ~(Addr)0); 3851 } else { 3852 /* vg_assert(pcMin > (Addr)0); 3853 No .. we can legitimately expect to see ranges like 3854 0x0-0x11D (pre-biasing, of course). */ 3855 vg_assert(pcMax < ~(Addr)0); 3856 } 3857 3858 /* Apply text biasing, for non-global variables. */ 3859 if (varp->level > 0) { 3860 pcMin += di->text_debug_bias; 3861 pcMax += di->text_debug_bias; 3862 } 3863 3864 if (i > 0 && (i%2) == 0) 3865 TRACE_D3("\n "); 3866 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); 3867 3868 ML_(addVar)( 3869 di, varp->level, 3870 pcMin, pcMax, 3871 varp->name, varp->typeR, 3872 varp->gexpr, varp->fbGX, 3873 varp->fName, varp->fLine, td3 3874 ); 3875 } 3876 } 3877 3878 TRACE_D3("\n\n"); 3879 /* and move on to the next var */ 3880 } 3881 3882 /* Now free all the TempVars */ 3883 n = VG_(sizeXA)( tempvars ); 3884 for (i = 0; i < n; i++) { 3885 varp = *(TempVar**)VG_(indexXA)( tempvars, i ); 3886 ML_(dinfo_free)(varp); 3887 } 3888 VG_(deleteXA)( tempvars ); 3889 tempvars = NULL; 3890 3891 /* and the temp lookup table */ 3892 VG_(deleteXA)( dioff_lookup_tab ); 3893 3894 /* and the ranges tree. Note that we need to also free the XArrays 3895 which constitute the keys, hence pass VG_(deleteXA) as a 3896 key-finalizer. */ 3897 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); 3898 3899 /* and the tyents_to_keep cache */ 3900 ML_(dinfo_free)( tyents_to_keep_cache ); 3901 tyents_to_keep_cache = NULL; 3902 3903 /* and the file name table (just the array, not the entries 3904 themselves). (Apparently, 2008-Oct-23, varparser.filenameTable 3905 can be NULL here, for icc9 generated Dwarf3. Not sure what that 3906 signifies (a deeper problem with the reader?)) */ 3907 if (varparser.filenameTable) { 3908 VG_(deleteXA)( varparser.filenameTable ); 3909 varparser.filenameTable = NULL; 3910 } 3911 3912 /* record the GExprs in di so they can be freed later */ 3913 vg_assert(!di->admin_gexprs); 3914 di->admin_gexprs = gexprs; 3915} 3916 3917 3918/*------------------------------------------------------------*/ 3919/*--- ---*/ 3920/*--- The "new" DWARF3 reader -- top level control logic ---*/ 3921/*--- ---*/ 3922/*------------------------------------------------------------*/ 3923 3924/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 3925#include <setjmp.h> /* For jmp_buf */ 3926/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 3927 3928static Bool d3rd_jmpbuf_valid = False; 3929static HChar* d3rd_jmpbuf_reason = NULL; 3930static jmp_buf d3rd_jmpbuf; 3931 3932static __attribute__((noreturn)) void barf ( HChar* reason ) { 3933 vg_assert(d3rd_jmpbuf_valid); 3934 d3rd_jmpbuf_reason = reason; 3935 __builtin_longjmp(&d3rd_jmpbuf, 1); 3936 /*NOTREACHED*/ 3937 vg_assert(0); 3938} 3939 3940 3941void 3942ML_(new_dwarf3_reader) ( 3943 struct _DebugInfo* di, 3944 UChar* debug_info_img, SizeT debug_info_sz, 3945 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3946 UChar* debug_line_img, SizeT debug_line_sz, 3947 UChar* debug_str_img, SizeT debug_str_sz, 3948 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3949 UChar* debug_loc_img, SizeT debug_loc_sz 3950) 3951{ 3952 volatile Int jumped; 3953 volatile Bool td3 = di->trace_symtab; 3954 3955 /* Run the _wrk function to read the dwarf3. If it succeeds, it 3956 just returns normally. If there is any failure, it longjmp's 3957 back here, having first set d3rd_jmpbuf_reason to something 3958 useful. */ 3959 vg_assert(d3rd_jmpbuf_valid == False); 3960 vg_assert(d3rd_jmpbuf_reason == NULL); 3961 3962 d3rd_jmpbuf_valid = True; 3963 jumped = __builtin_setjmp(&d3rd_jmpbuf); 3964 if (jumped == 0) { 3965 /* try this ... */ 3966 new_dwarf3_reader_wrk( di, barf, 3967 debug_info_img, debug_info_sz, 3968 debug_abbv_img, debug_abbv_sz, 3969 debug_line_img, debug_line_sz, 3970 debug_str_img, debug_str_sz, 3971 debug_ranges_img, debug_ranges_sz, 3972 debug_loc_img, debug_loc_sz ); 3973 d3rd_jmpbuf_valid = False; 3974 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 3975 } else { 3976 /* It longjmp'd. */ 3977 d3rd_jmpbuf_valid = False; 3978 /* Can't longjump without giving some sort of reason. */ 3979 vg_assert(d3rd_jmpbuf_reason != NULL); 3980 3981 TRACE_D3("\n------ .debug_info reading failed ------\n"); 3982 3983 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 3984 } 3985 3986 d3rd_jmpbuf_valid = False; 3987 d3rd_jmpbuf_reason = NULL; 3988} 3989 3990 3991 3992/* --- Unused code fragments which might be useful one day. --- */ 3993 3994#if 0 3995 /* Read the arange tables */ 3996 TRACE_SYMTAB("\n"); 3997 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 3998 init_Cursor( &aranges, debug_aranges_img, 3999 debug_aranges_sz, 0, barf, 4000 "Overrun whilst reading .debug_aranges section" ); 4001 while (True) { 4002 ULong len, d_i_offset; 4003 Bool is64; 4004 UShort version; 4005 UChar asize, segsize; 4006 4007 if (is_at_end_Cursor( &aranges )) 4008 break; 4009 /* Read one arange thingy */ 4010 /* initial_length field */ 4011 len = get_Initial_Length( &is64, &aranges, 4012 "in .debug_aranges: invalid initial-length field" ); 4013 version = get_UShort( &aranges ); 4014 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 4015 asize = get_UChar( &aranges ); 4016 segsize = get_UChar( &aranges ); 4017 TRACE_D3(" Length: %llu\n", len); 4018 TRACE_D3(" Version: %d\n", (Int)version); 4019 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 4020 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 4021 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 4022 TRACE_D3("\n"); 4023 TRACE_D3(" Address Length\n"); 4024 4025 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 4026 (void)get_UChar( & aranges ); 4027 } 4028 while (True) { 4029 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 4030 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 4031 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 4032 if (address == 0 && length == 0) break; 4033 } 4034 } 4035 TRACE_SYMTAB("\n"); 4036#endif 4037 4038#endif // defined(VGO_linux) || defined(VGO_darwin) 4039 4040/*--------------------------------------------------------------------*/ 4041/*--- end ---*/ 4042/*--------------------------------------------------------------------*/ 4043