readdwarf3.c revision 0b5bf911d9d40b8dd3130f6043ef7ba68a9f446e
1 2/*--------------------------------------------------------------------*/ 3/*--- Read DWARF3 ".debug_info" sections (DIE trees). ---*/ 4/*--- readdwarf3.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2008 OpenWorks LLP 12 info@open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35*/ 36 37/* REFERENCE (without which this code will not make much sense): 38 39 DWARF Debugging Information Format, Version 3, 40 dated 20 December 2005 (the "D3 spec"). 41 42 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a 43 .doc (MS Word) version, but for some reason the section numbers 44 between the Word and PDF versions differ by 1 in the first digit. 45 All section references in this code are to the PDF version. 46 47 CURRENT HACKS: 48 49 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is 50 assumed to mean "const void" or "volatile void" respectively. 51 GDB appears to interpret them like this, anyway. 52 53 In many cases it is important to know the svma of a CU (the "base 54 address of the CU", as the D3 spec calls it). There are some 55 situations in which the spec implies this value is unknown, but the 56 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but 57 merely zero when not explicitly stated. So we too have to make 58 that assumption. 59 60 TODO, 2008 Feb 17: 61 62 get rid of cu_svma_known and document the assumed-zero svma hack. 63 64 ML_(sizeOfType): differentiate between zero sized types and types 65 for which the size is unknown. Is this important? I don't know. 66 67 DW_AT_array_types: deal with explicit sizes (currently we compute 68 the size from the bounds and the element size, although that's 69 fragile, if the bounds incompletely specified, or completely 70 absent) 71 72 Document reason for difference (by 1) of stack preening depth in 73 parse_var_DIE vs parse_type_DIE. 74 75 Don't hand to ML_(addVars), vars whose locations are entirely in 76 registers (DW_OP_reg*). This is merely a space-saving 77 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these 78 expressions correctly, by failing to evaluate them and hence 79 effectively ignoring the variable with which they are associated. 80 81 Deal with DW_AT_array_types which have element size != stride 82 83 In some cases, the info for a variable is split between two 84 different DIEs (generally a declarer and a definer). We punt on 85 these. Could do better here. 86 87 The 'data_bias' argument passed to the expression evaluator 88 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a 89 MaybeUWord, to make it clear when we do vs don't know what it is 90 for the evaluation of an expression. At the moment zero is passed 91 for this parameter in the don't know case. That's a bit fragile 92 and obscure; using a MaybeUWord would be clearer. 93 94 POTENTIAL PERFORMANCE IMPROVEMENTS: 95 96 The number of type entities that end up in the list of TyAdmins 97 rapidly becomes huge (eg, for libQtGui.so.4.3.2 (amd64-linux, size 98 80729047 bytes), there are 786860 entries in the list). Mostly 99 this seems to be caused by g++ adding type DIEs for all the basic 100 types once for each source file contributing to the compilation 101 unit, and for a large library they add up quickly. That causes 102 both a lot of work for this reader module, and also wastes vast 103 amounts of memory storing this duplicated information. We could 104 surely do a lot better here. 105 106 Handle interaction between read_DIE and parse_{var,type}_DIE 107 better. Currently read_DIE reads the entire DIE just to find where 108 the end is (and for debug printing), so that it can later reliably 109 move the cursor to the end regardless of what parse_{var,type}_DIE 110 do. This means many DIEs (most, even?) are read twice. It would 111 be smarter to make parse_{var,type}_DIE return a Bool indicating 112 whether or not they advanced the DIE cursor, and only if they 113 didn't should read_DIE itself read through the DIE. 114 115 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have 116 zero variables in their .vars XArray. Rather than have an XArray 117 with zero elements (which uses 2 malloc'd blocks), allow the .vars 118 pointer to be NULL in this case. 119 120 More generally, reduce the amount of memory allocated and freed 121 while reading Dwarf3 type/variable information. Even modest (20MB) 122 objects cause this module to allocate and free hundreds of 123 thousands of small blocks, and ML_(arena_malloc) and its various 124 groupies always show up at the top of performance profiles. */ 125 126#include "pub_core_basics.h" 127#include "pub_core_libcbase.h" 128#include "pub_core_libcassert.h" 129#include "pub_core_libcprint.h" 130#include "pub_core_options.h" 131#include "pub_core_xarray.h" 132#include "priv_misc.h" /* dinfo_zalloc/free */ 133#include "priv_tytypes.h" 134#include "priv_d3basics.h" 135#include "priv_storage.h" 136#include "priv_readdwarf3.h" /* self */ 137 138 139/*------------------------------------------------------------*/ 140/*--- ---*/ 141/*--- Basic machinery for parsing DIEs. ---*/ 142/*--- ---*/ 143/*------------------------------------------------------------*/ 144 145#define TRACE_D3(format, args...) \ 146 if (td3) { VG_(printf)(format, ## args); } 147 148#define D3_INVALID_CUOFF ((void*)(-1UL)) 149#define D3_FAKEVOID_CUOFF ((void*)(-2UL)) 150 151typedef 152 struct { 153 UChar* region_start_img; 154 UWord region_szB; 155 UWord region_next; 156 void (*barf)( HChar* ) __attribute__((noreturn)); 157 HChar* barfstr; 158 } 159 Cursor; 160 161static inline Bool is_sane_Cursor ( Cursor* c ) { 162 if (!c) return False; 163 if (!c->barf) return False; 164 if (!c->barfstr) return False; 165 return True; 166} 167 168static void init_Cursor ( Cursor* c, 169 UChar* region_start_img, 170 UWord region_szB, 171 UWord region_next, 172 __attribute__((noreturn)) void (*barf)( HChar* ), 173 HChar* barfstr ) 174{ 175 vg_assert(c); 176 VG_(memset)(c, 0, sizeof(*c)); 177 c->region_start_img = region_start_img; 178 c->region_szB = region_szB; 179 c->region_next = region_next; 180 c->barf = barf; 181 c->barfstr = barfstr; 182 vg_assert(is_sane_Cursor(c)); 183} 184 185static Bool is_at_end_Cursor ( Cursor* c ) { 186 vg_assert(is_sane_Cursor(c)); 187 return c->region_next >= c->region_szB; 188} 189 190static inline UWord get_position_of_Cursor ( Cursor* c ) { 191 vg_assert(is_sane_Cursor(c)); 192 return c->region_next; 193} 194static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { 195 c->region_next = pos; 196 vg_assert(is_sane_Cursor(c)); 197} 198 199static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { 200 vg_assert(is_sane_Cursor(c)); 201 return c->region_szB - c->region_next; 202} 203 204static UChar* get_address_of_Cursor ( Cursor* c ) { 205 vg_assert(is_sane_Cursor(c)); 206 return &c->region_start_img[ c->region_next ]; 207} 208 209__attribute__((noreturn)) 210static void failWith ( Cursor* c, HChar* str ) { 211 vg_assert(c); 212 vg_assert(c->barf); 213 c->barf(str); 214 /*NOTREACHED*/ 215 vg_assert(0); 216} 217 218/* FIXME: document assumptions on endianness for 219 get_UShort/UInt/ULong. */ 220static inline UChar get_UChar ( Cursor* c ) { 221 UChar r; 222 /* vg_assert(is_sane_Cursor(c)); */ 223 if (c->region_next + sizeof(UChar) > c->region_szB) { 224 c->barf(c->barfstr); 225 /*NOTREACHED*/ 226 vg_assert(0); 227 } 228 r = * (UChar*) &c->region_start_img[ c->region_next ]; 229 c->region_next += sizeof(UChar); 230 return r; 231} 232static UShort get_UShort ( Cursor* c ) { 233 UShort r; 234 vg_assert(is_sane_Cursor(c)); 235 if (c->region_next + sizeof(UShort) > c->region_szB) { 236 c->barf(c->barfstr); 237 /*NOTREACHED*/ 238 vg_assert(0); 239 } 240 r = * (UShort*) &c->region_start_img[ c->region_next ]; 241 c->region_next += sizeof(UShort); 242 return r; 243} 244static UInt get_UInt ( Cursor* c ) { 245 UInt r; 246 vg_assert(is_sane_Cursor(c)); 247 if (c->region_next + sizeof(UInt) > c->region_szB) { 248 c->barf(c->barfstr); 249 /*NOTREACHED*/ 250 vg_assert(0); 251 } 252 r = * (UInt*) &c->region_start_img[ c->region_next ]; 253 c->region_next += sizeof(UInt); 254 return r; 255} 256static ULong get_ULong ( Cursor* c ) { 257 ULong r; 258 vg_assert(is_sane_Cursor(c)); 259 if (c->region_next + sizeof(ULong) > c->region_szB) { 260 c->barf(c->barfstr); 261 /*NOTREACHED*/ 262 vg_assert(0); 263 } 264 r = * (ULong*) &c->region_start_img[ c->region_next ]; 265 c->region_next += sizeof(ULong); 266 return r; 267} 268static inline ULong get_ULEB128 ( Cursor* c ) { 269 ULong result; 270 Int shift; 271 UChar byte; 272 /* unroll first iteration */ 273 byte = get_UChar( c ); 274 result = (ULong)(byte & 0x7f); 275 if (LIKELY(!(byte & 0x80))) return result; 276 shift = 7; 277 /* end unroll first iteration */ 278 do { 279 byte = get_UChar( c ); 280 result |= ((ULong)(byte & 0x7f)) << shift; 281 shift += 7; 282 } while (byte & 0x80); 283 return result; 284} 285static Long get_SLEB128 ( Cursor* c ) { 286 ULong result = 0; 287 Int shift = 0; 288 UChar byte; 289 do { 290 byte = get_UChar(c); 291 result |= ((ULong)(byte & 0x7f)) << shift; 292 shift += 7; 293 } while (byte & 0x80); 294 if (shift < 64 && (byte & 0x40)) 295 result |= -(1ULL << shift); 296 return result; 297} 298 299/* Assume 'c' points to the start of a string. Return the absolute 300 address of whatever it points at, and advance it past the 301 terminating zero. This makes it safe for the caller to then copy 302 the string with ML_(addStr), since (w.r.t. image overruns) the 303 process of advancing past the terminating zero will already have 304 "vetted" the string. */ 305static UChar* get_AsciiZ ( Cursor* c ) { 306 UChar uc; 307 UChar* res = get_address_of_Cursor(c); 308 do { uc = get_UChar(c); } while (uc != 0); 309 return res; 310} 311 312static ULong peek_ULEB128 ( Cursor* c ) { 313 Word here = c->region_next; 314 ULong r = get_ULEB128( c ); 315 c->region_next = here; 316 return r; 317} 318static UChar peek_UChar ( Cursor* c ) { 319 Word here = c->region_next; 320 UChar r = get_UChar( c ); 321 c->region_next = here; 322 return r; 323} 324 325static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { 326 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); 327} 328 329static UWord get_UWord ( Cursor* c ) { 330 vg_assert(sizeof(UWord) == sizeof(void*)); 331 if (sizeof(UWord) == 4) return get_UInt(c); 332 if (sizeof(UWord) == 8) return get_ULong(c); 333 vg_assert(0); 334} 335 336 337/* Read a DWARF3 'Initial Length' field */ 338static ULong get_Initial_Length ( /*OUT*/Bool* is64, 339 Cursor* c, 340 HChar* barfMsg ) 341{ 342 ULong w64; 343 UInt w32; 344 *is64 = False; 345 w32 = get_UInt( c ); 346 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { 347 c->barf( barfMsg ); 348 } 349 else if (w32 == 0xFFFFFFFF) { 350 *is64 = True; 351 w64 = get_ULong( c ); 352 } else { 353 *is64 = False; 354 w64 = (ULong)w32; 355 } 356 return w64; 357} 358 359 360/*------------------------------------------------------------*/ 361/*--- ---*/ 362/*--- "CUConst" structure ---*/ 363/*--- ---*/ 364/*------------------------------------------------------------*/ 365 366#define N_ABBV_CACHE 32 367 368/* Holds information that is constant through the parsing of a 369 Compilation Unit. This is basically plumbed through to 370 everywhere. */ 371typedef 372 struct { 373 /* Call here if anything goes wrong */ 374 void (*barf)( HChar* ) __attribute__((noreturn)); 375 /* Is this 64-bit DWARF ? */ 376 Bool is_dw64; 377 /* Which DWARF version ? (2 or 3) */ 378 UShort version; 379 /* Length of this Compilation Unit, excluding its Header */ 380 ULong unit_length; 381 /* Offset of start of this unit in .debug_info */ 382 UWord cu_start_offset; 383 /* SVMA for this CU. In the D3 spec, is known as the "base 384 address of the compilation unit (last para sec 3.1.1). 385 Needed for (amongst things) interpretation of location-list 386 values. */ 387 Addr cu_svma; 388 Bool cu_svma_known; 389 /* The debug_abbreviations table to be used for this Unit */ 390 UChar* debug_abbv; 391 /* Upper bound on size thereof (an overestimate, in general) */ 392 UWord debug_abbv_maxszB; 393 /* Where is .debug_str ? */ 394 UChar* debug_str_img; 395 UWord debug_str_sz; 396 /* Where is .debug_ranges ? */ 397 UChar* debug_ranges_img; 398 UWord debug_ranges_sz; 399 /* Where is .debug_loc ? */ 400 UChar* debug_loc_img; 401 UWord debug_loc_sz; 402 /* Where is .debug_line? */ 403 UChar* debug_line_img; 404 UWord debug_line_sz; 405 /* --- Needed so we can add stuff to the string table. --- */ 406 struct _DebugInfo* di; 407 /* --- a cache for set_abbv_Cursor --- */ 408 /* abbv_code == (ULong)-1 for an unused entry. */ 409 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; 410 UWord saC_cache_queries; 411 UWord saC_cache_misses; 412 } 413 CUConst; 414 415 416/*------------------------------------------------------------*/ 417/*--- ---*/ 418/*--- Helper functions for Guarded Expressions ---*/ 419/*--- ---*/ 420/*------------------------------------------------------------*/ 421 422/* Parse the location list starting at img-offset 'debug_loc_offset' 423 in .debug_loc. Results are biased with 'svma_of_referencing_CU' 424 and so I believe are correct SVMAs for the object as a whole. This 425 function allocates the UChar*, and the caller must deallocate it. 426 The resulting block is in so-called Guarded-Expression format. 427 428 Guarded-Expression format is similar but not identical to the DWARF3 429 location-list format. The format of each returned block is: 430 431 UChar biasMe; 432 UChar isEnd; 433 followed by zero or more of 434 435 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) 436 437 '..bytes..' is an standard DWARF3 location expression which is 438 valid when aMin <= pc <= aMax (possibly after suitable biasing). 439 440 The number of bytes in '..bytes..' is nbytes. 441 442 The end of the sequence is marked by an isEnd == 1 value. All 443 previous isEnd values must be zero. 444 445 biasMe is 1 if the aMin/aMax fields need this DebugInfo's 446 text_bias added before use, and 0 if the GX is this is not 447 necessary (is ready to go). 448 449 Hence the block can be quickly parsed and is self-describing. Note 450 that aMax is 1 less than the corresponding value in a DWARF3 451 location list. Zero length ranges, with aMax == aMin-1, are not 452 allowed. 453*/ 454void ML_(pp_GX) ( GExpr* gx ) { 455 Addr aMin, aMax; 456 UChar uc; 457 UShort nbytes; 458 UChar* p = &gx->payload[0]; 459 uc = *p++; 460 VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" ); 461 vg_assert(uc == 0 || uc == 1); 462 while (True) { 463 uc = *p++; 464 if (uc == 1) 465 break; /*isEnd*/ 466 vg_assert(uc == 0); 467 aMin = * (Addr*)p; p += sizeof(Addr); 468 aMax = * (Addr*)p; p += sizeof(Addr); 469 nbytes = * (UShort*)p; p += sizeof(UShort); 470 VG_(printf)("[%p,%p]=", aMin, aMax); 471 while (nbytes > 0) { 472 VG_(printf)("%02x", (UInt)*p++); 473 nbytes--; 474 } 475 if (*p == 0) 476 VG_(printf)(","); 477 } 478 VG_(printf)("}"); 479} 480 481static void bias_GX ( /*MOD*/GExpr* gx, Addr bias ) 482{ 483 UShort nbytes; 484 UChar* p = &gx->payload[0]; 485 UChar uc; 486 uc = *p++; /*biasMe*/ 487 if (uc == 0) 488 return; 489 vg_assert(uc == 1); 490 p[-1] = 0; /* mark it as done */ 491 while (True) { 492 uc = *p++; 493 if (uc == 1) 494 break; /*isEnd*/ 495 vg_assert(uc == 0); 496 * ((Addr*)p) += bias; /*aMin*/ p += sizeof(Addr); 497 * ((Addr*)p) += bias; /*aMax*/ p += sizeof(Addr); 498 nbytes = * (UShort*)p; p += sizeof(UShort); 499 p += nbytes; 500 } 501} 502 503__attribute__((noinline)) 504static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) 505{ 506 SizeT bytesReqd; 507 GExpr* gx; 508 UChar *p, *pstart; 509 510 vg_assert(sizeof(UWord) == sizeof(Addr)); 511 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ 512 bytesReqd 513 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ 514 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ 515 + sizeof(UShort) /*nbytes*/ + nbytes 516 + sizeof(UChar); /*isEnd*/ 517 518 gx = ML_(dinfo_zalloc)( sizeof(GExpr) + bytesReqd ); 519 vg_assert(gx); 520 521 p = pstart = &gx->payload[0]; 522 523 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar); 524 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar); 525 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr); 526 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr); 527 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort); 528 VG_(memcpy)(p, block, nbytes); p += nbytes; 529 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar); 530 531 vg_assert( (SizeT)(p - pstart) == bytesReqd); 532 vg_assert( &gx->payload[bytesReqd] 533 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); 534 535 gx->next = NULL; 536 return gx; 537} 538 539__attribute__((noinline)) 540static GExpr* make_general_GX ( CUConst* cc, 541 Bool td3, 542 UWord debug_loc_offset, 543 Addr svma_of_referencing_CU ) 544{ 545 Addr base; 546 Cursor loc; 547 XArray* xa; /* XArray of UChar */ 548 GExpr* gx; 549 Word nbytes; 550 551 vg_assert(sizeof(UWord) == sizeof(Addr)); 552 if (cc->debug_loc_sz == 0) 553 cc->barf("make_general_GX: .debug_loc is empty/missing"); 554 555 init_Cursor( &loc, cc->debug_loc_img, 556 cc->debug_loc_sz, 0, cc->barf, 557 "Overrun whilst reading .debug_loc section(2)" ); 558 set_position_of_Cursor( &loc, debug_loc_offset ); 559 560 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", 561 debug_loc_offset, get_address_of_Cursor( &loc ) ); 562 563 /* Who frees this xa? It is freed before this fn exits. */ 564 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 565 sizeof(UChar) ); 566 567 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 568 569 base = 0; 570 while (True) { 571 Bool acquire; 572 UWord len; 573 /* Read a (host-)word pair. This is something of a hack since 574 the word size to read is really dictated by the ELF file; 575 however, we assume we're reading a file with the same 576 word-sizeness as the host. Reasonably enough. */ 577 UWord w1 = get_UWord( &loc ); 578 UWord w2 = get_UWord( &loc ); 579 580 TRACE_D3(" %08lx %08lx\n", w1, w2); 581 if (w1 == 0 && w2 == 0) 582 break; /* end of list */ 583 584 if (w1 == -1UL) { 585 /* new value for 'base' */ 586 base = w2; 587 continue; 588 } 589 590 /* else a location expression follows */ 591 /* else enumerate [w1+base, w2+base) */ 592 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 593 (sec 2.17.2) */ 594 if (w1 > w2) { 595 TRACE_D3("negative range is for .debug_loc expr at " 596 "file offset %lu\n", 597 debug_loc_offset); 598 cc->barf( "negative range in .debug_loc section" ); 599 } 600 601 /* ignore zero length ranges */ 602 acquire = w1 < w2; 603 len = (UWord)get_UShort( &loc ); 604 605 if (acquire) { 606 UWord w; 607 UShort s; 608 UChar c; 609 c = 0; /* !isEnd*/ 610 VG_(addBytesToXA)( xa, &c, sizeof(c) ); 611 w = w1 + base + svma_of_referencing_CU; 612 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 613 w = w2 -1 + base + svma_of_referencing_CU; 614 VG_(addBytesToXA)( xa, &w, sizeof(w) ); 615 s = (UShort)len; 616 VG_(addBytesToXA)( xa, &s, sizeof(s) ); 617 } 618 619 while (len > 0) { 620 UChar byte = get_UChar( &loc ); 621 TRACE_D3("%02x", (UInt)byte); 622 if (acquire) 623 VG_(addBytesToXA)( xa, &byte, 1 ); 624 len--; 625 } 626 TRACE_D3("\n"); 627 } 628 629 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } 630 631 nbytes = VG_(sizeXA)( xa ); 632 vg_assert(nbytes >= 1); 633 634 gx = ML_(dinfo_zalloc)( sizeof(GExpr) + nbytes ); 635 vg_assert(gx); 636 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); 637 vg_assert( &gx->payload[nbytes] 638 == ((UChar*)gx) + sizeof(GExpr) + nbytes ); 639 640 VG_(deleteXA)( xa ); 641 642 gx->next = NULL; 643 644 TRACE_D3("}\n"); 645 646 return gx; 647} 648 649 650/*------------------------------------------------------------*/ 651/*--- ---*/ 652/*--- Helper functions for range lists and CU headers ---*/ 653/*--- ---*/ 654/*------------------------------------------------------------*/ 655 656/* Denotes an address range. Both aMin and aMax are included in the 657 range; hence a complete range is (0, ~0) and an empty range is any 658 (X, X-1) for X > 0.*/ 659typedef 660 struct { Addr aMin; Addr aMax; } 661 AddrRange; 662 663 664__attribute__((noinline)) 665static XArray* /* of AddrRange */ empty_range_list ( void ) 666{ 667 XArray* xa; /* XArray of AddrRange */ 668 /* Who frees this xa? varstack_preen() does. */ 669 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 670 sizeof(AddrRange) ); 671 return xa; 672} 673 674 675static XArray* unitary_range_list ( Addr aMin, Addr aMax ) 676{ 677 XArray* xa; 678 AddrRange pair; 679 vg_assert(aMin <= aMax); 680 /* Who frees this xa? varstack_preen() does. */ 681 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 682 sizeof(AddrRange) ); 683 pair.aMin = aMin; 684 pair.aMax = aMax; 685 VG_(addToXA)( xa, &pair ); 686 return xa; 687} 688 689 690/* Enumerate the address ranges starting at img-offset 691 'debug_ranges_offset' in .debug_ranges. Results are biased with 692 'svma_of_referencing_CU' and so I believe are correct SVMAs for the 693 object as a whole. This function allocates the XArray, and the 694 caller must deallocate it. */ 695__attribute__((noinline)) 696static XArray* /* of AddrRange */ 697 get_range_list ( CUConst* cc, 698 Bool td3, 699 UWord debug_ranges_offset, 700 Addr svma_of_referencing_CU ) 701{ 702 Addr base; 703 Cursor ranges; 704 XArray* xa; /* XArray of AddrRange */ 705 AddrRange pair; 706 707 if (cc->debug_ranges_sz == 0) 708 cc->barf("get_range_list: .debug_ranges is empty/missing"); 709 710 init_Cursor( &ranges, cc->debug_ranges_img, 711 cc->debug_ranges_sz, 0, cc->barf, 712 "Overrun whilst reading .debug_ranges section(2)" ); 713 set_position_of_Cursor( &ranges, debug_ranges_offset ); 714 715 /* Who frees this xa? varstack_preen() does. */ 716 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 717 sizeof(AddrRange) ); 718 base = 0; 719 while (True) { 720 /* Read a (host-)word pair. This is something of a hack since 721 the word size to read is really dictated by the ELF file; 722 however, we assume we're reading a file with the same 723 word-sizeness as the host. Reasonably enough. */ 724 UWord w1 = get_UWord( &ranges ); 725 UWord w2 = get_UWord( &ranges ); 726 727 if (w1 == 0 && w2 == 0) 728 break; /* end of list. */ 729 730 if (w1 == -1UL) { 731 /* new value for 'base' */ 732 base = w2; 733 continue; 734 } 735 736 /* else enumerate [w1+base, w2+base) */ 737 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" 738 (sec 2.17.2) */ 739 if (w1 > w2) 740 cc->barf( "negative range in .debug_ranges section" ); 741 if (w1 < w2) { 742 pair.aMin = w1 + base + svma_of_referencing_CU; 743 pair.aMax = w2 - 1 + base + svma_of_referencing_CU; 744 vg_assert(pair.aMin <= pair.aMax); 745 VG_(addToXA)( xa, &pair ); 746 } 747 } 748 return xa; 749} 750 751 752/* Parse the Compilation Unit header indicated at 'c' and 753 initialise 'cc' accordingly. */ 754static __attribute__((noinline)) 755void parse_CU_Header ( /*OUT*/CUConst* cc, 756 Bool td3, 757 Cursor* c, 758 UChar* debug_abbv_img, UWord debug_abbv_sz ) 759{ 760 UChar address_size; 761 UWord debug_abbrev_offset; 762 Int i; 763 764 VG_(memset)(cc, 0, sizeof(*cc)); 765 vg_assert(c && c->barf); 766 cc->barf = c->barf; 767 768 /* initial_length field */ 769 cc->unit_length 770 = get_Initial_Length( &cc->is_dw64, c, 771 "parse_CU_Header: invalid initial-length field" ); 772 773 TRACE_D3(" Length: %lld\n", cc->unit_length ); 774 775 /* version */ 776 cc->version = get_UShort( c ); 777 if (cc->version != 2 && cc->version != 3) 778 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3" ); 779 TRACE_D3(" Version: %d\n", (Int)cc->version ); 780 781 /* debug_abbrev_offset */ 782 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); 783 if (debug_abbrev_offset >= debug_abbv_sz) 784 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); 785 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); 786 787 /* address size. If this isn't equal to the host word size, just 788 give up. This makes it safe to assume elsewhere that 789 DW_FORM_addr can be treated as a host word. */ 790 address_size = get_UChar( c ); 791 if (address_size != sizeof(void*)) 792 cc->barf( "parse_CU_Header: invalid address_size" ); 793 TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); 794 795 /* Set up so that cc->debug_abbv points to the relevant table for 796 this CU. Set the szB so that at least we can't read off the end 797 of the debug_abbrev section -- potentially (and quite likely) 798 too big, if this isn't the last table in the section, but at 799 least it's safe. */ 800 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; 801 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; 802 /* and empty out the set_abbv_Cursor cache */ 803 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); 804 for (i = 0; i < N_ABBV_CACHE; i++) { 805 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ 806 cc->saC_cache[i].posn = 0; 807 } 808 cc->saC_cache_queries = 0; 809 cc->saC_cache_misses = 0; 810} 811 812 813/* Set up 'c' so it is ready to parse the abbv table entry code 814 'abbv_code' for this compilation unit. */ 815static __attribute__((noinline)) 816void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, 817 CUConst* cc, ULong abbv_code ) 818{ 819 Int i; 820 ULong acode; 821 822 if (abbv_code == 0) 823 cc->barf("set_abbv_Cursor: abbv_code == 0" ); 824 825 /* (ULong)-1 is used to represent an empty cache slot. So we can't 826 allow it. In any case no valid DWARF3 should make a reference 827 to a negative abbreviation code. [at least, they always seem to 828 be numbered upwards from zero as far as I have seen] */ 829 vg_assert(abbv_code != (ULong)-1); 830 831 /* First search the cache. */ 832 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); 833 cc->saC_cache_queries++; 834 for (i = 0; i < N_ABBV_CACHE; i++) { 835 /* No need to test the cached abbv_codes for -1 (empty), since 836 we just asserted that abbv_code is not -1. */ 837 if (cc->saC_cache[i].abbv_code == abbv_code) { 838 /* Found it. Cool. Set up the parser using the cached 839 position, and move this cache entry 1 step closer to the 840 front. */ 841 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); 842 init_Cursor( c, cc->debug_abbv, 843 cc->debug_abbv_maxszB, cc->saC_cache[i].posn, 844 cc->barf, 845 "Overrun whilst parsing .debug_abbrev section(1)" ); 846 if (i > 0) { 847 ULong t_abbv_code = cc->saC_cache[i].abbv_code; 848 UWord t_posn = cc->saC_cache[i].posn; 849 while (i > 0) { 850 cc->saC_cache[i] = cc->saC_cache[i-1]; 851 cc->saC_cache[0].abbv_code = t_abbv_code; 852 cc->saC_cache[0].posn = t_posn; 853 i--; 854 } 855 } 856 return; 857 } 858 } 859 860 /* No. It's not in the cache. We have to search through 861 .debug_abbrev, of course taking care to update the cache 862 when done. */ 863 864 cc->saC_cache_misses++; 865 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, 866 "Overrun whilst parsing .debug_abbrev section(2)" ); 867 868 /* Now iterate though the table until we find the requested 869 entry. */ 870 while (True) { 871 ULong atag; 872 UInt has_children; 873 acode = get_ULEB128( c ); 874 if (acode == 0) break; /* end of the table */ 875 if (acode == abbv_code) break; /* found it */ 876 atag = get_ULEB128( c ); 877 has_children = get_UChar( c ); 878 //TRACE_D3(" %llu %s [%s]\n", 879 // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); 880 while (True) { 881 ULong at_name = get_ULEB128( c ); 882 ULong at_form = get_ULEB128( c ); 883 if (at_name == 0 && at_form == 0) break; 884 //TRACE_D3(" %18s %s\n", 885 // pp_DW_AT(at_name), pp_DW_FORM(at_form)); 886 } 887 } 888 889 if (acode == 0) { 890 /* Not found. This is fatal. */ 891 cc->barf("set_abbv_Cursor: abbv_code not found"); 892 } 893 894 /* Otherwise, 'c' is now set correctly to parse the relevant entry, 895 starting from the abbreviation entry's tag. So just cache 896 the result, and return. */ 897 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { 898 cc->saC_cache[i] = cc->saC_cache[i-1]; 899 } 900 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); 901 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; 902 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); 903} 904 905 906/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. 907 908 If *cts itself contains the entire result, then *ctsSzB is set to 909 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. 910 911 Alternatively, the result can be a block of data (in the 912 transiently mapped-in object, so-called "image" space). If so then 913 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said 914 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. 915 916 Unfortunately this means it is impossible to represent a zero-size 917 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 918 and so is ambiguous (which case it is?) 919 920 Invariant on successful return: 921 (*ctsSzB > 0 && *ctsMemSzB == 0) 922 || (*ctsSzB == 0 && *ctsMemSzB > 0) 923*/ 924static 925void get_Form_contents ( /*OUT*/ULong* cts, 926 /*OUT*/Int* ctsSzB, 927 /*OUT*/UWord* ctsMemSzB, 928 CUConst* cc, Cursor* c, 929 Bool td3, DW_FORM form ) 930{ 931 *cts = 0; 932 *ctsSzB = 0; 933 *ctsMemSzB = 0; 934 switch (form) { 935 case DW_FORM_data1: 936 *cts = (ULong)(UChar)get_UChar(c); 937 *ctsSzB = 1; 938 TRACE_D3("%u", (UInt)*cts); 939 break; 940 case DW_FORM_data2: 941 *cts = (ULong)(UShort)get_UShort(c); 942 *ctsSzB = 2; 943 TRACE_D3("%u", (UInt)*cts); 944 break; 945 case DW_FORM_data4: 946 *cts = (ULong)(UInt)get_UInt(c); 947 *ctsSzB = 4; 948 TRACE_D3("%u", (UInt)*cts); 949 break; 950 case DW_FORM_data8: 951 *cts = get_ULong(c); 952 *ctsSzB = 8; 953 TRACE_D3("%llu", *cts); 954 break; 955 case DW_FORM_sdata: 956 *cts = (ULong)(Long)get_SLEB128(c); 957 *ctsSzB = 8; 958 TRACE_D3("%lld", (Long)*cts); 959 break; 960 case DW_FORM_addr: 961 /* note, this is a hack. DW_FORM_addr is defined as getting 962 a word the size of the target machine as defined by the 963 address_size field in the CU Header. However, 964 parse_CU_Header() rejects all inputs except those for 965 which address_size == sizeof(Word), hence we can just 966 treat it as a (host) Word. */ 967 *cts = (ULong)(UWord)get_UWord(c); 968 *ctsSzB = sizeof(UWord); 969 TRACE_D3("0x%lx", (UWord)*cts); 970 break; 971 case DW_FORM_strp: { 972 /* this is an offset into .debug_str */ 973 UChar* str; 974 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); 975 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) 976 cc->barf("read_and_show_Form: DW_FORM_strp " 977 "points outside .debug_str"); 978 /* FIXME: check the entire string lies inside debug_str, 979 not just the first byte of it. */ 980 str = (UChar*)cc->debug_str_img + uw; 981 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); 982 *cts = (ULong)(UWord)str; 983 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 984 break; 985 } 986 case DW_FORM_string: { 987 UChar* str = get_AsciiZ(c); 988 TRACE_D3("%s", str); 989 *cts = (ULong)(UWord)str; 990 /* strlen is safe because get_AsciiZ already 'vetted' the 991 entire string */ 992 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); 993 break; 994 } 995 case DW_FORM_ref4: { 996 UInt u32 = get_UInt(c); 997 UWord res = cc->cu_start_offset + (UWord)u32; 998 *cts = (ULong)res; 999 *ctsSzB = sizeof(UWord); 1000 TRACE_D3("<%lx>", res); 1001 break; 1002 } 1003 case DW_FORM_flag: { 1004 UChar u8 = get_UChar(c); 1005 TRACE_D3("%u", (UInt)u8); 1006 *cts = (ULong)u8; 1007 *ctsSzB = 1; 1008 break; 1009 } 1010 case DW_FORM_block1: { 1011 ULong u64b; 1012 ULong u64 = (ULong)get_UChar(c); 1013 UChar* block = get_address_of_Cursor(c); 1014 TRACE_D3("%llu byte block: ", u64); 1015 for (u64b = u64; u64b > 0; u64b--) { 1016 UChar u8 = get_UChar(c); 1017 TRACE_D3("%x ", (UInt)u8); 1018 } 1019 *cts = (ULong)(UWord)block; 1020 *ctsMemSzB = (UWord)u64; 1021 break; 1022 } 1023 default: 1024 VG_(printf)("get_Form_contents: unhandled %lld (%s)\n", 1025 form, ML_(pp_DW_FORM)(form)); 1026 c->barf("get_Form_contents: unhandled DW_FORM"); 1027 } 1028} 1029 1030 1031/*------------------------------------------------------------*/ 1032/*--- ---*/ 1033/*--- Parsing of variable-related DIEs ---*/ 1034/*--- ---*/ 1035/*------------------------------------------------------------*/ 1036 1037typedef 1038 struct _TempVar { 1039 struct _TempVar* next; 1040 UChar* name; /* in DebugInfo's .strchunks */ 1041 /* Represent ranges economically. nRanges is the number of 1042 ranges. Cases: 1043 0: .rngOneMin .rngOneMax .manyRanges are all zero 1044 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL 1045 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. 1046 This is merely an optimisation to avoid having to allocate 1047 and free the XArray in the common (98%) of cases where there 1048 is zero or one address ranges. */ 1049 UWord nRanges; 1050 Addr rngOneMin; 1051 Addr rngOneMax; 1052 XArray* rngMany; /* of AddrRange. UNIQUE PTR in AR_DINFO. */ 1053 /* --- */ 1054 Int level; 1055 Type* typeR; 1056 GExpr* gexpr; /* for this variable */ 1057 GExpr* fbGX; /* to find the frame base of the enclosing fn, if 1058 any */ 1059 UChar* fName; /* declaring file name, or NULL */ 1060 Int fLine; /* declaring file line number, or zero */ 1061 /* offset in .debug_info, so that abstract instances can be 1062 found to satisfy references from concrete instances. */ 1063 UWord dioff; 1064 UWord absOri; /* so the absOri fields refer to dioff fields 1065 in some other, related TempVar. */ 1066 } 1067 TempVar; 1068 1069#define N_D3_VAR_STACK 24 1070 1071typedef 1072 struct { 1073 /* Contains the range stack: a stack of address ranges, one 1074 stack entry for each nested scope. 1075 1076 Some scope entries are created by function definitions 1077 (DW_AT_subprogram), and for those, we also note the GExpr 1078 derived from its DW_AT_frame_base attribute, if any. 1079 Consequently it should be possible to find, for any 1080 variable's DIE, the GExpr for the the containing function's 1081 DW_AT_frame_base by scanning back through the stack to find 1082 the nearest entry associated with a function. This somewhat 1083 elaborate scheme is provided so as to make it possible to 1084 obtain the correct DW_AT_frame_base expression even in the 1085 presence of nested functions (or to be more precise, in the 1086 presence of nested DW_AT_subprogram DIEs). 1087 */ 1088 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1089 stack */ 1090 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ 1091 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ 1092 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ 1093 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB 1094 expr, else NULL */ 1095 /* The file name table. Is a mapping from integer index to the 1096 (permanent) copy of the string, iow a non-img area. */ 1097 XArray* /* of UChar* */ filenameTable; 1098 } 1099 D3VarParser; 1100 1101static void varstack_show ( D3VarParser* parser, HChar* str ) { 1102 Word i, j; 1103 VG_(printf)(" varstack (%s) {\n", str); 1104 for (i = 0; i <= parser->sp; i++) { 1105 XArray* xa = parser->ranges[i]; 1106 vg_assert(xa); 1107 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); 1108 if (parser->isFunc[i]) { 1109 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); 1110 } else { 1111 vg_assert(parser->fbGX[i] == NULL); 1112 } 1113 VG_(printf)(": "); 1114 if (VG_(sizeXA)( xa ) == 0) { 1115 VG_(printf)("** empty PC range array **"); 1116 } else { 1117 for (j = 0; j < VG_(sizeXA)( xa ); j++) { 1118 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); 1119 vg_assert(range); 1120 VG_(printf)("[%p,%p] ", range->aMin, range->aMax); 1121 } 1122 } 1123 VG_(printf)("\n"); 1124 } 1125 VG_(printf)(" }\n"); 1126} 1127 1128/* Remove from the stack, all entries with .level > 'level' */ 1129static 1130void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) 1131{ 1132 Bool changed = False; 1133 vg_assert(parser->sp < N_D3_VAR_STACK); 1134 while (True) { 1135 vg_assert(parser->sp >= -1); 1136 if (parser->sp == -1) break; 1137 if (parser->level[parser->sp] <= level) break; 1138 if (0) 1139 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); 1140 vg_assert(parser->ranges[parser->sp]); 1141 /* Who allocated this xa? get_range_list() or 1142 unitary_range_list(). */ 1143 VG_(deleteXA)( parser->ranges[parser->sp] ); 1144 parser->ranges[parser->sp] = NULL; 1145 parser->level[parser->sp] = 0; 1146 parser->isFunc[parser->sp] = False; 1147 parser->fbGX[parser->sp] = NULL; 1148 parser->sp--; 1149 changed = True; 1150 } 1151 if (changed && td3) 1152 varstack_show( parser, "after preen" ); 1153} 1154 1155static void varstack_push ( CUConst* cc, 1156 D3VarParser* parser, 1157 Bool td3, 1158 XArray* ranges, Int level, 1159 Bool isFunc, GExpr* fbGX ) { 1160 if (0) 1161 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", 1162 parser->sp+1, level, ranges); 1163 1164 /* First we need to zap everything >= 'level', as we are about to 1165 replace any previous entry at 'level', so .. */ 1166 varstack_preen(parser, /*td3*/False, level-1); 1167 1168 vg_assert(parser->sp >= -1); 1169 vg_assert(parser->sp < N_D3_VAR_STACK); 1170 if (parser->sp == N_D3_VAR_STACK-1) 1171 cc->barf("varstack_push: N_D3_VAR_STACK is too low; " 1172 "increase and recompile"); 1173 if (parser->sp >= 0) 1174 vg_assert(parser->level[parser->sp] < level); 1175 parser->sp++; 1176 vg_assert(parser->ranges[parser->sp] == NULL); 1177 vg_assert(parser->level[parser->sp] == 0); 1178 vg_assert(parser->isFunc[parser->sp] == False); 1179 vg_assert(parser->fbGX[parser->sp] == NULL); 1180 vg_assert(ranges != NULL); 1181 if (!isFunc) vg_assert(fbGX == NULL); 1182 parser->ranges[parser->sp] = ranges; 1183 parser->level[parser->sp] = level; 1184 parser->isFunc[parser->sp] = isFunc; 1185 parser->fbGX[parser->sp] = fbGX; 1186 if (td3) 1187 varstack_show( parser, "after push" ); 1188} 1189 1190 1191/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so 1192 refer either to a location expression or to a location list. 1193 Figure out which, and in both cases bundle the expression or 1194 location list into a so-called GExpr (guarded expression). */ 1195__attribute__((noinline)) 1196static GExpr* get_GX ( CUConst* cc, Bool td3, 1197 ULong cts, Int ctsSzB, UWord ctsMemSzB ) 1198{ 1199 GExpr* gexpr = NULL; 1200 if (ctsMemSzB > 0 && ctsSzB == 0) { 1201 /* represents an in-line location expression, and cts points 1202 right at it */ 1203 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); 1204 } 1205 else 1206 if (ctsMemSzB == 0 && ctsSzB > 0) { 1207 /* represents location list. cts is the offset of it in 1208 .debug_loc. */ 1209 if (!cc->cu_svma_known) 1210 cc->barf("get_GX: location list, but CU svma is unknown"); 1211 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); 1212 } 1213 else { 1214 vg_assert(0); /* else caller is bogus */ 1215 } 1216 return gexpr; 1217} 1218 1219 1220static 1221void read_filename_table( /*MOD*/D3VarParser* parser, 1222 CUConst* cc, UWord debug_line_offset, 1223 Bool td3 ) 1224{ 1225 Bool is_dw64; 1226 Cursor c; 1227 Word i; 1228 ULong unit_length; 1229 UShort version; 1230 ULong header_length; 1231 UChar minimum_instruction_length; 1232 UChar default_is_stmt; 1233 Char line_base; 1234 UChar line_range; 1235 UChar opcode_base; 1236 UChar* str; 1237 1238 vg_assert(parser && cc && cc->barf); 1239 if ((!cc->debug_line_img) 1240 || cc->debug_line_sz <= debug_line_offset) 1241 cc->barf("read_filename_table: .debug_line is missing?"); 1242 1243 init_Cursor( &c, cc->debug_line_img, 1244 cc->debug_line_sz, debug_line_offset, cc->barf, 1245 "Overrun whilst reading .debug_line section(1)" ); 1246 1247 unit_length 1248 = get_Initial_Length( &is_dw64, &c, 1249 "read_filename_table: invalid initial-length field" ); 1250 version = get_UShort( &c ); 1251 if (version != 2) 1252 cc->barf("read_filename_table: Only DWARF version 2 line info " 1253 "is currently supported."); 1254 header_length = (ULong)get_Dwarfish_UWord( &c, is_dw64 ); 1255 minimum_instruction_length = get_UChar( &c ); 1256 default_is_stmt = get_UChar( &c ); 1257 line_base = (Char)get_UChar( &c ); 1258 line_range = get_UChar( &c ); 1259 opcode_base = get_UChar( &c ); 1260 /* skip over "standard_opcode_lengths" */ 1261 for (i = 1; i < (Word)opcode_base; i++) 1262 (void)get_UChar( &c ); 1263 1264 /* skip over the directory names table */ 1265 while (peek_UChar(&c) != 0) { 1266 (void)get_AsciiZ(&c); 1267 } 1268 (void)get_UChar(&c); /* skip terminating zero */ 1269 1270 /* Read and record the file names table */ 1271 vg_assert(parser->filenameTable); 1272 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); 1273 /* Add a dummy index-zero entry. DWARF3 numbers its files 1274 from 1, for some reason. */ 1275 str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); 1276 VG_(addToXA)( parser->filenameTable, &str ); 1277 while (peek_UChar(&c) != 0) { 1278 str = get_AsciiZ(&c); 1279 TRACE_D3(" read_filename_table: %ld %s\n", 1280 VG_(sizeXA)(parser->filenameTable), str); 1281 str = ML_(addStr)( cc->di, str, -1 ); 1282 VG_(addToXA)( parser->filenameTable, &str ); 1283 (void)get_ULEB128( &c ); /* skip directory index # */ 1284 (void)get_ULEB128( &c ); /* skip last mod time */ 1285 (void)get_ULEB128( &c ); /* file size */ 1286 } 1287 /* We're done! The rest of it is not interesting. */ 1288} 1289 1290 1291__attribute__((noinline)) 1292static void parse_var_DIE ( /*OUT*/TempVar** tempvars, 1293 /*OUT*/GExpr** gexprs, 1294 /*MOD*/D3VarParser* parser, 1295 DW_TAG dtag, 1296 UWord posn, 1297 Int level, 1298 Cursor* c_die, 1299 Cursor* c_abbv, 1300 CUConst* cc, 1301 Bool td3 ) 1302{ 1303 ULong cts; 1304 Int ctsSzB; 1305 UWord ctsMemSzB; 1306 1307 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1308 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1309 1310 varstack_preen( parser, td3, level-1 ); 1311 1312 if (dtag == DW_TAG_compile_unit) { 1313 Bool have_lo = False; 1314 Bool have_hi1 = False; 1315 Bool have_range = False; 1316 Addr ip_lo = 0; 1317 Addr ip_hi1 = 0; 1318 Addr rangeoff = 0; 1319 while (True) { 1320 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1321 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1322 if (attr == 0 && form == 0) break; 1323 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1324 cc, c_die, False/*td3*/, form ); 1325 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1326 ip_lo = cts; 1327 have_lo = True; 1328 } 1329 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1330 ip_hi1 = cts; 1331 have_hi1 = True; 1332 } 1333 if (attr == DW_AT_ranges && ctsSzB > 0) { 1334 rangeoff = cts; 1335 have_range = True; 1336 } 1337 if (attr == DW_AT_stmt_list && ctsSzB > 0) { 1338 read_filename_table( parser, cc, (UWord)cts, td3 ); 1339 } 1340 } 1341 /* Now, does this give us an opportunity to find this 1342 CU's svma? */ 1343#if 0 1344 if (level == 0 && have_lo) { 1345 vg_assert(!cc->cu_svma_known); /* if this fails, it must be 1346 because we've already seen a DW_TAG_compile_unit DIE at level 1347 0. But that can't happen, because DWARF3 only allows exactly 1348 one top level DIE per CU. */ 1349 cc->cu_svma_known = True; 1350 cc->cu_svma = ip_lo; 1351 if (1) 1352 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); 1353 /* Now, it may be that this DIE doesn't tell us the CU's 1354 SVMA, by way of not having a DW_AT_low_pc. That's OK -- 1355 the CU doesn't *have* to have its SVMA specified. 1356 1357 But as per last para D3 spec sec 3.1.1 ("Normal and 1358 Partial Compilation Unit Entries", "If the base address 1359 (viz, the SVMA) is undefined, then any DWARF entry of 1360 structure defined interms of the base address of that 1361 compilation unit is not valid.". So that means, if whilst 1362 processing the children of this top level DIE (or their 1363 children, etc) we see a DW_AT_range, and cu_svma_known is 1364 False, then the DIE that contains it is (per the spec) 1365 invalid, and we can legitimately stop and complain. */ 1366 } 1367#else 1368 /* .. whereas The Reality is, simply assume the SVMA is zero 1369 if it isn't specified. */ 1370 if (level == 0) { 1371 vg_assert(!cc->cu_svma_known); 1372 cc->cu_svma_known = True; 1373 if (have_lo) 1374 cc->cu_svma = ip_lo; 1375 else 1376 cc->cu_svma = 0; 1377 } 1378#endif 1379 /* Do we have something that looks sane? */ 1380 if (have_lo && have_hi1 && (!have_range)) { 1381 if (ip_lo < ip_hi1) 1382 varstack_push( cc, parser, td3, 1383 unitary_range_list(ip_lo, ip_hi1 - 1), 1384 level, 1385 False/*isFunc*/, NULL/*fbGX*/ ); 1386 } else 1387 if ((!have_lo) && (!have_hi1) && have_range) { 1388 varstack_push( cc, parser, td3, 1389 get_range_list( cc, td3, 1390 rangeoff, cc->cu_svma ), 1391 level, 1392 False/*isFunc*/, NULL/*fbGX*/ ); 1393 } else 1394 if ((!have_lo) && (!have_hi1) && (!have_range)) { 1395 /* CU has no code, presumably? */ 1396 varstack_push( cc, parser, td3, 1397 empty_range_list(), 1398 level, 1399 False/*isFunc*/, NULL/*fbGX*/ ); 1400 } else 1401 goto bad_DIE; 1402 } 1403 1404 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { 1405 Bool have_lo = False; 1406 Bool have_hi1 = False; 1407 Bool have_range = False; 1408 Addr ip_lo = 0; 1409 Addr ip_hi1 = 0; 1410 Addr rangeoff = 0; 1411 Bool isFunc = dtag == DW_TAG_subprogram; 1412 GExpr* fbGX = NULL; 1413 while (True) { 1414 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1415 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1416 if (attr == 0 && form == 0) break; 1417 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1418 cc, c_die, False/*td3*/, form ); 1419 if (attr == DW_AT_low_pc && ctsSzB > 0) { 1420 ip_lo = cts; 1421 have_lo = True; 1422 } 1423 if (attr == DW_AT_high_pc && ctsSzB > 0) { 1424 ip_hi1 = cts; 1425 have_hi1 = True; 1426 } 1427 if (attr == DW_AT_ranges && ctsSzB > 0) { 1428 rangeoff = cts; 1429 have_range = True; 1430 } 1431 if (isFunc 1432 && attr == DW_AT_frame_base 1433 && ((ctsMemSzB > 0 && ctsSzB == 0) 1434 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1435 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1436 vg_assert(fbGX); 1437 vg_assert(!fbGX->next); 1438 fbGX->next = *gexprs; 1439 *gexprs = fbGX; 1440 } 1441 } 1442 /* Do we have something that looks sane? */ 1443 if (dtag == DW_TAG_subprogram 1444 && (!have_lo) && (!have_hi1) && (!have_range)) { 1445 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry 1446 representing a subroutine declaration that is not also a 1447 definition does not have code address or range 1448 attributes." */ 1449 } else 1450 if (dtag == DW_TAG_lexical_block 1451 && (!have_lo) && (!have_hi1) && (!have_range)) { 1452 /* I believe this is legit, and means the lexical block 1453 contains no insns (whatever that might mean). Ignore. */ 1454 } else 1455 if (have_lo && have_hi1 && (!have_range)) { 1456 /* This scope supplies just a single address range. */ 1457 if (ip_lo < ip_hi1) 1458 varstack_push( cc, parser, td3, 1459 unitary_range_list(ip_lo, ip_hi1 - 1), 1460 level, isFunc, fbGX ); 1461 } else 1462 if ((!have_lo) && (!have_hi1) && have_range) { 1463 /* This scope supplies multiple address ranges via the use of 1464 a range list. */ 1465 varstack_push( cc, parser, td3, 1466 get_range_list( cc, td3, 1467 rangeoff, cc->cu_svma ), 1468 level, isFunc, fbGX ); 1469 } else 1470 if (have_lo && (!have_hi1) && (!have_range)) { 1471 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block 1472 Entries) says fairly clearly that a scope must have either 1473 _range or (_low_pc and _high_pc). */ 1474 /* The spec is a bit ambiguous though. Perhaps a single byte 1475 range is intended? See sec 2.17 (Code Addresses And Ranges) */ 1476 /* This case is here because icc9 produced this: 1477 <2><13bd>: DW_TAG_lexical_block 1478 DW_AT_decl_line : 5229 1479 DW_AT_decl_column : 37 1480 DW_AT_decl_file : 1 1481 DW_AT_low_pc : 0x401b03 1482 */ 1483 /* Ignore (seems safe than pushing a single byte range) */ 1484 } else 1485 goto bad_DIE; 1486 } 1487 1488 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { 1489 UChar* name = NULL; 1490 Type* typeR = D3_INVALID_CUOFF; 1491 Bool external = False; 1492 GExpr* gexpr = NULL; 1493 Int n_attrs = 0; 1494 UWord abs_ori = (UWord)D3_INVALID_CUOFF; 1495 Bool declaration = False; 1496 Int lineNo = 0; 1497 UChar* fileName = NULL; 1498 while (True) { 1499 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1500 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1501 if (attr == 0 && form == 0) break; 1502 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1503 cc, c_die, False/*td3*/, form ); 1504 n_attrs++; 1505 if (attr == DW_AT_name && ctsMemSzB > 0) { 1506 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1507 } 1508 if (attr == DW_AT_location 1509 && ((ctsMemSzB > 0 && ctsSzB == 0) 1510 || (ctsMemSzB == 0 && ctsSzB > 0))) { 1511 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); 1512 vg_assert(gexpr); 1513 vg_assert(!gexpr->next); 1514 gexpr->next = *gexprs; 1515 *gexprs = gexpr; 1516 } 1517 if (attr == DW_AT_type && ctsSzB > 0) { 1518 typeR = (Type*)(UWord)cts; 1519 } 1520 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { 1521 external = True; 1522 } 1523 if (attr == DW_AT_abstract_origin && ctsSzB > 0) { 1524 abs_ori = (UWord)cts; 1525 } 1526 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 1527 declaration = True; 1528 } 1529 if (attr == DW_AT_decl_line && ctsSzB > 0) { 1530 lineNo = (Int)cts; 1531 } 1532 if (attr == DW_AT_decl_file && ctsSzB > 0) { 1533 Int ftabIx = (Int)cts; 1534 if (ftabIx >= 1 1535 && ftabIx < VG_(sizeXA)( parser->filenameTable )) { 1536 fileName = *(UChar**) 1537 VG_(indexXA)( parser->filenameTable, ftabIx ); 1538 vg_assert(fileName); 1539 } 1540 if (0) VG_(printf)("XXX filename = %s\n", fileName); 1541 } 1542 } 1543 /* We'll collect it under if one of the following three 1544 conditions holds: 1545 (1) has location and type -> completed 1546 (2) has type only -> is an abstract instance 1547 (3) has location and abs_ori -> is a concrete instance 1548 Name, filename and line number are all option frills. 1549 */ 1550 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 1551 /* 2 */ || (typeR != D3_INVALID_CUOFF) 1552 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { 1553 1554 /* Add this variable to the list of interesting looking 1555 variables. Crucially, note along with it the address 1556 range(s) associated with the variable, which for locals 1557 will be the address ranges at the top of the varparser's 1558 stack. */ 1559 GExpr* fbGX = NULL; 1560 Word i, nRanges; 1561 XArray* /* of AddrRange */ xa; 1562 TempVar* tv; 1563 /* Stack can't be empty; we put a dummy entry on it for the 1564 entire address range before starting with the DIEs for 1565 this CU. */ 1566 vg_assert(parser->sp >= 0); 1567 1568 /* If this is a local variable (non-external), try to find 1569 the GExpr for the DW_AT_frame_base of the containing 1570 function. It should have been pushed on the stack at the 1571 time we encountered its DW_TAG_subprogram DIE, so the way 1572 to find it is to scan back down the stack looking for it. 1573 If there isn't an enclosing stack entry marked 'isFunc' 1574 then we must be seeing variable or formal param DIEs 1575 outside of a function, so we deem the Dwarf to be 1576 malformed if that happens. Note that the fbGX may be NULL 1577 if the containing DT_TAG_subprogram didn't supply a 1578 DW_AT_frame_base -- that's OK, but there must actually be 1579 a containing DW_TAG_subprogram. */ 1580 if (!external) { 1581 Bool found = False; 1582 for (i = parser->sp; i >= 0; i--) { 1583 if (parser->isFunc[i]) { 1584 fbGX = parser->fbGX[i]; 1585 found = True; 1586 break; 1587 } 1588 } 1589 if (!found) { 1590 if (0 && VG_(clo_verbosity) >= 0) { 1591 VG_(message)(Vg_DebugMsg, 1592 "warning: parse_var_DIE: non-external variable " 1593 "outside DW_TAG_subprogram"); 1594 } 1595 /* goto bad_DIE; */ 1596 /* This seems to happen a lot. Just ignore it -- if, 1597 when we come to evaluation of the location (guarded) 1598 expression, it requires a frame base value, and 1599 there's no expression for that, then evaluation as a 1600 whole will fail. Harmless - a bit of a waste of 1601 cycles but nothing more. */ 1602 } 1603 } 1604 1605 /* re "external ? 0 : parser->sp" (twice), if the var is 1606 marked 'external' then we must put it at the global scope, 1607 as only the global scope (level 0) covers the entire PC 1608 address space. It is asserted elsewhere that level 0 1609 always covers the entire address space. */ 1610 xa = parser->ranges[external ? 0 : parser->sp]; 1611 nRanges = VG_(sizeXA)(xa); 1612 vg_assert(nRanges >= 0); 1613 1614 tv = ML_(dinfo_zalloc)( sizeof(TempVar) ); 1615 tv->name = name; 1616 tv->level = external ? 0 : parser->sp; 1617 tv->typeR = typeR; 1618 tv->gexpr = gexpr; 1619 tv->fbGX = fbGX; 1620 tv->fName = fileName; 1621 tv->fLine = lineNo; 1622 tv->dioff = posn; 1623 tv->absOri = abs_ori; 1624 1625 /* See explanation on definition of type TempVar for the 1626 reason for this elaboration. */ 1627 tv->nRanges = nRanges; 1628 tv->rngOneMin = 0; 1629 tv->rngOneMax = 0; 1630 tv->rngMany = NULL; 1631 if (nRanges == 1) { 1632 AddrRange* range = VG_(indexXA)(xa, 0); 1633 tv->rngOneMin = range->aMin; 1634 tv->rngOneMax = range->aMax; 1635 } 1636 else if (nRanges > 1) { 1637 tv->rngMany = VG_(cloneXA)( xa ); /* free when 'tv' freed */ 1638 } 1639 1640 tv->next = *tempvars; 1641 *tempvars = tv; 1642 1643 TRACE_D3(" Recording this variable, with %ld PC range(s)\n", 1644 VG_(sizeXA)(xa) ); 1645 /* collect stats on how effective the ->ranges special 1646 casing is */ 1647 if (0) { 1648 static Int ntot=0, ngt=0; 1649 ntot++; 1650 if (tv->rngMany) ngt++; 1651 if (0 == (ntot % 100000)) 1652 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); 1653 } 1654 1655 } 1656 1657 /* Here are some other weird cases seen in the wild: 1658 1659 We have a variable with a name and a type, but no 1660 location. I guess that's a sign that it has been 1661 optimised away. Ignore it. Here's an example: 1662 1663 static Int lc_compar(void* n1, void* n2) { 1664 MC_Chunk* mc1 = *(MC_Chunk**)n1; 1665 MC_Chunk* mc2 = *(MC_Chunk**)n2; 1666 return (mc1->data < mc2->data ? -1 : 1); 1667 } 1668 1669 Both mc1 and mc2 are like this 1670 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) 1671 DW_AT_name : mc1 1672 DW_AT_decl_file : 1 1673 DW_AT_decl_line : 216 1674 DW_AT_type : <5d3> 1675 1676 whereas n1 and n2 do have locations specified. 1677 1678 --------------------------------------------- 1679 1680 We see a DW_TAG_formal_parameter with a type, but 1681 no name and no location. It's probably part of a function type 1682 construction, thusly, hence ignore it: 1683 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) 1684 DW_AT_sibling : <2c9> 1685 DW_AT_prototyped : 1 1686 DW_AT_type : <114> 1687 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1688 DW_AT_type : <13e> 1689 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) 1690 DW_AT_type : <133> 1691 1692 --------------------------------------------- 1693 1694 Is very minimal, like this: 1695 <4><81d>: Abbrev Number: 44 (DW_TAG_variable) 1696 DW_AT_abstract_origin: <7ba> 1697 What that signifies I have no idea. Ignore. 1698 1699 ---------------------------------------------- 1700 1701 Is very minimal, like this: 1702 <200f>: DW_TAG_formal_parameter 1703 DW_AT_abstract_ori: <1f4c> 1704 DW_AT_location : 13440 1705 What that signifies I have no idea. Ignore. 1706 It might be significant, though: the variable at least 1707 has a location and so might exist somewhere. 1708 Maybe we should handle this. 1709 1710 --------------------------------------------- 1711 1712 <22407>: DW_TAG_variable 1713 DW_AT_name : (indirect string, offset: 0x6579): 1714 vgPlain_trampoline_stuff_start 1715 DW_AT_decl_file : 29 1716 DW_AT_decl_line : 56 1717 DW_AT_external : 1 1718 DW_AT_declaration : 1 1719 1720 Nameless and typeless variable that has a location? Who 1721 knows. Not me. 1722 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) 1723 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 1724 (DW_OP_addr: 3813c7c0) 1725 1726 No, really. Check it out. gcc is quite simply borked. 1727 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) 1728 // followed by no attributes, and the next DIE is a sibling, 1729 // not a child 1730 */ 1731 } 1732 return; 1733 1734 bad_DIE: 1735 set_position_of_Cursor( c_die, saved_die_c_offset ); 1736 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 1737 VG_(printf)("\nparse_var_DIE: confused by:\n"); 1738 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 1739 while (True) { 1740 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1741 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1742 if (attr == 0 && form == 0) break; 1743 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 1744 /* Get the form contents, so as to print them */ 1745 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1746 cc, c_die, True, form ); 1747 VG_(printf)("\t\n"); 1748 } 1749 VG_(printf)("\n"); 1750 cc->barf("parse_var_DIE: confused by the above DIE"); 1751 /*NOTREACHED*/ 1752} 1753 1754 1755/*------------------------------------------------------------*/ 1756/*--- ---*/ 1757/*--- Parsing of type-related DIEs ---*/ 1758/*--- ---*/ 1759/*------------------------------------------------------------*/ 1760 1761#define N_D3_TYPE_STACK 16 1762 1763typedef 1764 struct { 1765 /* What source language? 'C'=C/C++, 'F'=Fortran, '?'=other 1766 Established once per compilation unit. */ 1767 UChar language; 1768 /* A stack of types which are currently under construction */ 1769 Int sp; /* [sp] is innermost active entry; sp==-1 for empty 1770 stack */ 1771 Type* qparent[N_D3_TYPE_STACK]; 1772 Int qlevel[N_D3_TYPE_STACK]; 1773 1774 } 1775 D3TypeParser; 1776 1777static void typestack_show ( D3TypeParser* parser, HChar* str ) { 1778 Word i; 1779 VG_(printf)(" typestack (%s) {\n", str); 1780 for (i = 0; i <= parser->sp; i++) { 1781 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); 1782 ML_(pp_Type)( parser->qparent[i] ); 1783 VG_(printf)("\n"); 1784 } 1785 VG_(printf)(" }\n"); 1786} 1787 1788/* Remove from the stack, all entries with .level > 'level' */ 1789static 1790void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) 1791{ 1792 Bool changed = False; 1793 vg_assert(parser->sp < N_D3_TYPE_STACK); 1794 while (True) { 1795 vg_assert(parser->sp >= -1); 1796 if (parser->sp == -1) break; 1797 if (parser->qlevel[parser->sp] <= level) break; 1798 if (0) 1799 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); 1800 vg_assert(parser->qparent[parser->sp]); 1801 parser->qparent[parser->sp] = NULL; 1802 parser->qlevel[parser->sp] = 0; 1803 parser->sp--; 1804 changed = True; 1805 } 1806 if (changed && td3) 1807 typestack_show( parser, "after preen" ); 1808} 1809 1810static Bool typestack_is_empty ( D3TypeParser* parser ) { 1811 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); 1812 return parser->sp == -1; 1813} 1814 1815static void typestack_push ( CUConst* cc, 1816 D3TypeParser* parser, 1817 Bool td3, 1818 Type* parent, Int level ) { 1819 if (0) 1820 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %p\n", 1821 parser->sp+1, level, parent); 1822 1823 /* First we need to zap everything >= 'level', as we are about to 1824 replace any previous entry at 'level', so .. */ 1825 typestack_preen(parser, /*td3*/False, level-1); 1826 1827 vg_assert(parser->sp >= -1); 1828 vg_assert(parser->sp < N_D3_TYPE_STACK); 1829 if (parser->sp == N_D3_TYPE_STACK-1) 1830 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " 1831 "increase and recompile"); 1832 if (parser->sp >= 0) 1833 vg_assert(parser->qlevel[parser->sp] < level); 1834 parser->sp++; 1835 vg_assert(parser->qparent[parser->sp] == NULL); 1836 vg_assert(parser->qlevel[parser->sp] == 0); 1837 vg_assert(parent != NULL); 1838 parser->qparent[parser->sp] = parent; 1839 parser->qlevel[parser->sp] = level; 1840 if (td3) 1841 typestack_show( parser, "after push" ); 1842} 1843 1844 1845/* Parse a type-related DIE. 'parser' holds the current parser state. 1846 'admin' is where the completed types are dumped. 'dtag' is the tag 1847 for this DIE. 'c_die' points to the start of the data fields (FORM 1848 stuff) for the DIE. c_abbv points to the start of the (name,form) 1849 pairs which describe the DIE. 1850 1851 We may find the DIE uninteresting, in which case we should ignore 1852 it. 1853*/ 1854__attribute__((noinline)) 1855static void parse_type_DIE ( /*OUT*/TyAdmin** admin, 1856 /*MOD*/D3TypeParser* parser, 1857 DW_TAG dtag, 1858 UWord posn, 1859 Int level, 1860 Cursor* c_die, 1861 Cursor* c_abbv, 1862 CUConst* cc, 1863 Bool td3 ) 1864{ 1865 ULong cts; 1866 Int ctsSzB; 1867 UWord ctsMemSzB; 1868 Type* type = NULL; 1869 TyAtom* atom = NULL; 1870 TyField* field = NULL; 1871 D3Expr* expr = NULL; 1872 TyBounds* bounds = NULL; 1873 1874 UWord saved_die_c_offset = get_position_of_Cursor( c_die ); 1875 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); 1876 1877 /* If we've returned to a level at or above any previously noted 1878 parent, un-note it, so we don't believe we're still collecting 1879 its children. */ 1880 typestack_preen( parser, td3, level-1 ); 1881 1882 if (dtag == DW_TAG_compile_unit) { 1883 /* See if we can find DW_AT_language, since it is important for 1884 establishing array bounds (see DW_TAG_subrange_type below in 1885 this fn) */ 1886 while (True) { 1887 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1888 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1889 if (attr == 0 && form == 0) break; 1890 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1891 cc, c_die, False/*td3*/, form ); 1892 if (attr != DW_AT_language) 1893 continue; 1894 if (ctsSzB == 0) 1895 goto bad_DIE; 1896 switch (cts) { 1897 case DW_LANG_C89: case DW_LANG_C: 1898 case DW_LANG_C_plus_plus: case DW_LANG_ObjC: 1899 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: 1900 case DW_LANG_Upc: 1901 parser->language = 'C'; break; 1902 case DW_LANG_Fortran77: case DW_LANG_Fortran90: 1903 case DW_LANG_Fortran95: 1904 parser->language = 'F'; break; 1905 case DW_LANG_Ada83: case DW_LANG_Cobol74: 1906 case DW_LANG_Cobol85: case DW_LANG_Pascal83: 1907 case DW_LANG_Modula2: case DW_LANG_Java: 1908 case DW_LANG_C99: case DW_LANG_Ada95: 1909 case DW_LANG_PLI: case DW_LANG_D: 1910 case DW_LANG_Mips_Assembler: 1911 parser->language = '?'; break; 1912 default: 1913 goto bad_DIE; 1914 } 1915 } 1916 } 1917 1918 if (dtag == DW_TAG_base_type) { 1919 /* We can pick up a new base type any time. */ 1920 type = ML_(new_Type)(); 1921 type->tag = Ty_Base; 1922 while (True) { 1923 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 1924 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 1925 if (attr == 0 && form == 0) break; 1926 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 1927 cc, c_die, False/*td3*/, form ); 1928 if (attr == DW_AT_name && ctsMemSzB > 0) { 1929 type->Ty.Base.name 1930 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 1931 } 1932 if (attr == DW_AT_byte_size && ctsSzB > 0) { 1933 type->Ty.Base.szB = cts; 1934 } 1935 if (attr == DW_AT_encoding && ctsSzB > 0) { 1936 switch (cts) { 1937 case DW_ATE_unsigned: case DW_ATE_unsigned_char: 1938 case DW_ATE_boolean:/* FIXME - is this correct? */ 1939 type->Ty.Base.enc = 'U'; break; 1940 case DW_ATE_signed: case DW_ATE_signed_char: 1941 type->Ty.Base.enc = 'S'; break; 1942 case DW_ATE_float: 1943 type->Ty.Base.enc = 'F'; break; 1944 case DW_ATE_complex_float: 1945 type->Ty.Base.enc = 'C'; break; 1946 default: 1947 goto bad_DIE; 1948 } 1949 } 1950 } 1951 1952 /* Invent a name if it doesn't have one. gcc-4.3 1953 -ftree-vectorize is observed to emit nameless base types. */ 1954 if (!type->Ty.Base.name) 1955 type->Ty.Base.name 1956 = ML_(addStr)( cc->di, "<anon_base_type>", -1 ); 1957 1958 /* Do we have something that looks sane? */ 1959 if (/* must have a name */ 1960 type->Ty.Base.name == NULL 1961 /* and a plausible size. Yes, really 32: "complex long 1962 double" apparently has size=32 */ 1963 || type->Ty.Base.szB < 0 || type->Ty.Base.szB > 32 1964 /* and a plausible encoding */ 1965 || (type->Ty.Base.enc != 'U' 1966 && type->Ty.Base.enc != 'S' 1967 && type->Ty.Base.enc != 'F' 1968 && type->Ty.Base.enc != 'C')) 1969 goto bad_DIE; 1970 /* Last minute hack: if we see this 1971 <1><515>: DW_TAG_base_type 1972 DW_AT_byte_size : 0 1973 DW_AT_encoding : 5 1974 DW_AT_name : void 1975 convert it into a real Void type. */ 1976 if (type->Ty.Base.szB == 0 1977 && 0 == VG_(strcmp)("void", type->Ty.Base.name)) { 1978 VG_(memset)(type, 0, sizeof(*type)); 1979 type->tag = Ty_Void; 1980 type->Ty.Void.isFake = False; /* it's a real one! */ 1981 } 1982 goto acquire_Type; 1983 } 1984 1985 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type 1986 || dtag == DW_TAG_ptr_to_member_type) { 1987 /* This seems legit for _pointer_type and _reference_type. I 1988 don't know if rolling _ptr_to_member_type in here really is 1989 legit, but it's better than not handling it at all. */ 1990 type = ML_(new_Type)(); 1991 type->tag = Ty_PorR; 1992 /* target type defaults to void */ 1993 type->Ty.PorR.typeR = D3_FAKEVOID_CUOFF; 1994 type->Ty.PorR.isPtr = dtag == DW_TAG_pointer_type 1995 || dtag == DW_TAG_ptr_to_member_type; 1996 /* Pointer types don't *have* to specify their size, in which 1997 case we assume it's a machine word. But if they do specify 1998 it, it must be a machine word :-) This probably assumes that 1999 the word size of the Dwarf3 we're reading is the same size as 2000 that on the machine. gcc appears to give a size whereas icc9 2001 doesn't. */ 2002 if (type->Ty.PorR.isPtr) 2003 type->Ty.PorR.szB = sizeof(Word); 2004 while (True) { 2005 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2006 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2007 if (attr == 0 && form == 0) break; 2008 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2009 cc, c_die, False/*td3*/, form ); 2010 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2011 type->Ty.PorR.szB = cts; 2012 } 2013 if (attr == DW_AT_type && ctsSzB > 0) { 2014 type->Ty.PorR.typeR = (Type*)(UWord)cts; 2015 } 2016 } 2017 /* Do we have something that looks sane? */ 2018 if (type->Ty.PorR.szB != sizeof(Word)) 2019 goto bad_DIE; 2020 else 2021 goto acquire_Type; 2022 } 2023 2024 if (dtag == DW_TAG_enumeration_type) { 2025 /* Create a new Type to hold the results. */ 2026 type = ML_(new_Type)(); 2027 type->tag = Ty_Enum; 2028 type->Ty.Enum.name = NULL; 2029 type->Ty.Enum.atomRs 2030 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 2031 sizeof(TyAtom*) ); 2032 while (True) { 2033 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2034 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2035 if (attr == 0 && form == 0) break; 2036 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2037 cc, c_die, False/*td3*/, form ); 2038 if (attr == DW_AT_name && ctsMemSzB > 0) { 2039 type->Ty.Enum.name 2040 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 2041 } 2042 if (attr == DW_AT_byte_size && ctsSzB > 0) { 2043 type->Ty.Enum.szB = cts; 2044 } 2045 } 2046 /* Do we have something that looks sane? */ 2047 if (type->Ty.Enum.szB == 0 /* we must know the size */ 2048 /* But the name can be present, or not */) 2049 goto bad_DIE; 2050 /* On't stack! */ 2051 typestack_push( cc, parser, td3, type, level ); 2052 goto acquire_Type; 2053 } 2054 2055 if (dtag == DW_TAG_enumerator) { 2056 Bool have_value = False; 2057 atom = ML_(new_TyAtom)( NULL, 0 ); 2058 while (True) { 2059 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2060 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2061 if (attr == 0 && form == 0) break; 2062 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2063 cc, c_die, False/*td3*/, form ); 2064 if (attr == DW_AT_name && ctsMemSzB > 0) { 2065 atom->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 2066 } 2067 if (attr == DW_AT_const_value && ctsSzB > 0) { 2068 atom->value = cts; 2069 have_value = True; 2070 } 2071 } 2072 /* Do we have something that looks sane? */ 2073 if ((!have_value) || atom->name == NULL) 2074 goto bad_DIE; 2075 /* Do we have a plausible parent? */ 2076 if (typestack_is_empty(parser)) goto bad_DIE; 2077 vg_assert(parser->qparent[parser->sp]); 2078 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2079 if (parser->qparent[parser->sp]->tag != Ty_Enum) goto bad_DIE; 2080 /* Record this child in the parent */ 2081 vg_assert(parser->qparent[parser->sp]->Ty.Enum.atomRs); 2082 VG_(addToXA)( parser->qparent[parser->sp]->Ty.Enum.atomRs, &atom ); 2083 /* And record the child itself */ 2084 goto acquire_Atom; 2085 } 2086 2087 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_union_type) { 2088 Bool have_szB = False; 2089 Bool is_decl = False; 2090 Bool is_spec = False; 2091 /* Create a new Type to hold the results. */ 2092 type = ML_(new_Type)(); 2093 type->tag = Ty_StOrUn; 2094 type->Ty.StOrUn.name = NULL; 2095 type->Ty.StOrUn.fields 2096 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 2097 sizeof(TyAtom*) ); 2098 type->Ty.StOrUn.complete = True; 2099 type->Ty.StOrUn.isStruct = dtag == DW_TAG_structure_type; 2100 while (True) { 2101 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2102 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2103 if (attr == 0 && form == 0) break; 2104 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2105 cc, c_die, False/*td3*/, form ); 2106 if (attr == DW_AT_name && ctsMemSzB > 0) { 2107 type->Ty.StOrUn.name 2108 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 2109 } 2110 if (attr == DW_AT_byte_size && ctsSzB >= 0) { 2111 type->Ty.StOrUn.szB = cts; 2112 have_szB = True; 2113 } 2114 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { 2115 is_decl = True; 2116 } 2117 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { 2118 is_spec = True; 2119 } 2120 } 2121 /* Do we have something that looks sane? */ 2122 if (is_decl && (!is_spec)) { 2123 /* It's a DW_AT_declaration. We require the name but 2124 nothing else. */ 2125 if (type->Ty.StOrUn.name == NULL) 2126 goto bad_DIE; 2127 type->Ty.StOrUn.complete = False; 2128 goto acquire_Type; 2129 } 2130 if ((!is_decl) /* && (!is_spec) */) { 2131 /* this is the common, ordinary case */ 2132 if ((!have_szB) /* we must know the size */ 2133 /* But the name can be present, or not */) 2134 goto bad_DIE; 2135 /* On't stack! */ 2136 typestack_push( cc, parser, td3, type, level ); 2137 goto acquire_Type; 2138 } 2139 else { 2140 /* don't know how to handle any other variants just now */ 2141 goto bad_DIE; 2142 } 2143 } 2144 2145 if (dtag == DW_TAG_member) { 2146 /* Acquire member entries for both DW_TAG_structure_type and 2147 DW_TAG_union_type. They differ minorly, in that struct 2148 members must have a DW_AT_data_member_location expression 2149 whereas union members must not. */ 2150 Bool parent_is_struct; 2151 field = ML_(new_TyField)( NULL, NULL, NULL ); 2152 field->typeR = D3_INVALID_CUOFF; 2153 expr = NULL; 2154 while (True) { 2155 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2156 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2157 if (attr == 0 && form == 0) break; 2158 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2159 cc, c_die, False/*td3*/, form ); 2160 if (attr == DW_AT_name && ctsMemSzB > 0) { 2161 field->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 2162 } 2163 if (attr == DW_AT_type && ctsSzB > 0) { 2164 field->typeR = (Type*)(UWord)cts; 2165 } 2166 if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { 2167 UChar* copy = ML_(addStr)( cc->di, (UChar*)(UWord)cts, 2168 (Int)ctsMemSzB ); 2169 expr = ML_(new_D3Expr)( copy, (UWord)ctsMemSzB ); 2170 } 2171 } 2172 /* Do we have a plausible parent? */ 2173 if (typestack_is_empty(parser)) goto bad_DIE; 2174 vg_assert(parser->qparent[parser->sp]); 2175 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2176 if (parser->qparent[parser->sp]->tag != Ty_StOrUn) goto bad_DIE; 2177 /* Do we have something that looks sane? If this a member of a 2178 struct, we must have a location expression; but if a member 2179 of a union that is irrelevant (D3 spec sec 5.6.6). We ought 2180 to reject in the latter case, but some compilers have been 2181 observed to emit constant-zero expressions. So just ignore 2182 them. */ 2183 parent_is_struct 2184 = parser->qparent[parser->sp]->Ty.StOrUn.isStruct; 2185 if (!field->name) 2186 field->name = ML_(addStr)(cc->di, "<anon_field>", -1); 2187 if ((!field->name) || (field->typeR == D3_INVALID_CUOFF)) 2188 goto bad_DIE; 2189 if (parent_is_struct && (!expr)) 2190 goto bad_DIE; 2191 if ((!parent_is_struct) && expr) { 2192 /* If this is a union type, pretend we haven't seen the data 2193 member location expression, as it is by definition 2194 redundant (it must be zero). */ 2195 expr = NULL; 2196 } 2197 /* Record this child in the parent */ 2198 field->isStruct = parent_is_struct; 2199 if (expr) 2200 field->loc = expr; 2201 vg_assert(parser->qparent[parser->sp]->Ty.StOrUn.fields); 2202 VG_(addToXA)( parser->qparent[parser->sp]->Ty.StOrUn.fields, 2203 &field ); 2204 /* And record the child itself */ 2205 goto acquire_Field_and_Expr; 2206 } 2207 2208 if (dtag == DW_TAG_array_type) { 2209 type = ML_(new_Type)(); 2210 type->tag = Ty_Array; 2211 type->Ty.Array.typeR = D3_INVALID_CUOFF; 2212 type->Ty.Array.bounds 2213 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 2214 sizeof(TyBounds*) ); 2215 while (True) { 2216 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2217 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2218 if (attr == 0 && form == 0) break; 2219 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2220 cc, c_die, False/*td3*/, form ); 2221 if (attr == DW_AT_type && ctsSzB > 0) { 2222 type->Ty.Array.typeR = (Type*)(UWord)cts; 2223 } 2224 } 2225 if (type->Ty.Array.typeR == D3_INVALID_CUOFF) 2226 goto bad_DIE; 2227 /* On't stack! */ 2228 typestack_push( cc, parser, td3, type, level ); 2229 goto acquire_Type; 2230 } 2231 2232 if (dtag == DW_TAG_subrange_type) { 2233 Bool have_lower = False; 2234 Bool have_upper = False; 2235 Bool have_count = False; 2236 Long lower = 0; 2237 Long upper = 0; 2238 Long count = 0; 2239 2240 switch (parser->language) { 2241 case 'C': have_lower = True; lower = 0; break; 2242 case 'F': have_lower = True; lower = 1; break; 2243 case '?': have_lower = False; break; 2244 default: vg_assert(0); /* assured us by handling of 2245 DW_TAG_compile_unit in this fn */ 2246 } 2247 bounds = ML_(new_TyBounds)(); 2248 while (True) { 2249 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2250 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2251 if (attr == 0 && form == 0) break; 2252 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2253 cc, c_die, False/*td3*/, form ); 2254 if (attr == DW_AT_lower_bound && ctsSzB > 0) { 2255 lower = (Long)cts; 2256 have_lower = True; 2257 } 2258 if (attr == DW_AT_upper_bound && ctsSzB > 0) { 2259 upper = (Long)cts; 2260 have_upper = True; 2261 } 2262 if (attr == DW_AT_count && ctsSzB > 0) { 2263 count = cts; 2264 have_count = True; 2265 } 2266 } 2267 /* FIXME: potentially skip the rest if no parent present, since 2268 it could be the case that this subrange type is free-standing 2269 (not being used to describe the bounds of a containing array 2270 type) */ 2271 /* Do we have a plausible parent? */ 2272 if (typestack_is_empty(parser)) goto bad_DIE; 2273 vg_assert(parser->qparent[parser->sp]); 2274 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; 2275 if (parser->qparent[parser->sp]->tag != Ty_Array) goto bad_DIE; 2276 2277 /* Figure out if we have a definite range or not */ 2278 if (have_lower && have_upper && (!have_count)) { 2279 bounds->knownL = True; 2280 bounds->knownU = True; 2281 bounds->boundL = lower; 2282 bounds->boundU = upper; 2283 } 2284 else if (have_lower && (!have_upper) && (!have_count)) { 2285 bounds->knownL = True; 2286 bounds->knownU = False; 2287 bounds->boundL = lower; 2288 bounds->boundU = 0; 2289 } else { 2290 /* FIXME: handle more cases */ 2291 goto bad_DIE; 2292 } 2293 2294 /* Record this bound in the parent */ 2295 vg_assert(parser->qparent[parser->sp]->Ty.Array.bounds); 2296 VG_(addToXA)( parser->qparent[parser->sp]->Ty.Array.bounds, 2297 &bounds ); 2298 /* And record the child itself */ 2299 goto acquire_Bounds; 2300 } 2301 2302 if (dtag == DW_TAG_typedef) { 2303 /* We can pick up a new base type any time. */ 2304 type = ML_(new_Type)(); 2305 type->tag = Ty_TyDef; 2306 type->Ty.TyDef.name = NULL; 2307 type->Ty.TyDef.typeR = D3_INVALID_CUOFF; 2308 while (True) { 2309 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2310 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2311 if (attr == 0 && form == 0) break; 2312 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2313 cc, c_die, False/*td3*/, form ); 2314 if (attr == DW_AT_name && ctsMemSzB > 0) { 2315 type->Ty.TyDef.name 2316 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); 2317 } 2318 if (attr == DW_AT_type && ctsSzB > 0) { 2319 type->Ty.TyDef.typeR = (Type*)(UWord)cts; 2320 } 2321 } 2322 /* Do we have something that looks sane? */ 2323 if (/* must have a name */ 2324 type->Ty.TyDef.name == NULL 2325 /* but the referred-to type can be absent */) 2326 goto bad_DIE; 2327 else 2328 goto acquire_Type; 2329 } 2330 2331 if (dtag == DW_TAG_subroutine_type) { 2332 /* function type? just record that one fact and ask no 2333 further questions. */ 2334 type = ML_(new_Type)(); 2335 type->tag = Ty_Fn; 2336 goto acquire_Type; 2337 } 2338 2339 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { 2340 Int have_ty = 0; 2341 type = ML_(new_Type)(); 2342 type->tag = Ty_Qual; 2343 type->Ty.Qual.qual 2344 = dtag == DW_TAG_volatile_type ? 'V' : 'C'; 2345 /* target type defaults to 'void' */ 2346 type->Ty.Qual.typeR = D3_FAKEVOID_CUOFF; 2347 while (True) { 2348 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2349 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2350 if (attr == 0 && form == 0) break; 2351 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2352 cc, c_die, False/*td3*/, form ); 2353 if (attr == DW_AT_type && ctsSzB > 0) { 2354 type->Ty.Qual.typeR = (Type*)(UWord)cts; 2355 have_ty++; 2356 } 2357 } 2358 /* gcc sometimes generates DW_TAG_const/volatile_type without 2359 DW_AT_type and GDB appears to interpret the type as 'const 2360 void' (resp. 'volatile void'). So just allow it .. */ 2361 if (have_ty == 1 || have_ty == 0) 2362 goto acquire_Type; 2363 else 2364 goto bad_DIE; 2365 } 2366 2367 /* else ignore this DIE */ 2368 return; 2369 /*NOTREACHED*/ 2370 2371 acquire_Type: 2372 if (0) VG_(printf)("YYYY Acquire Type\n"); 2373 vg_assert(type); vg_assert(!atom); vg_assert(!field); 2374 vg_assert(!expr); vg_assert(!bounds); 2375 *admin = ML_(new_TyAdmin)( posn, *admin ); 2376 (*admin)->payload = type; 2377 (*admin)->tag = TyA_Type; 2378 return; 2379 /*NOTREACHED*/ 2380 2381 acquire_Atom: 2382 if (0) VG_(printf)("YYYY Acquire Atom\n"); 2383 vg_assert(!type); vg_assert(atom); vg_assert(!field); 2384 vg_assert(!expr); vg_assert(!bounds); 2385 *admin = ML_(new_TyAdmin)( posn, *admin ); 2386 (*admin)->payload = atom; 2387 (*admin)->tag = TyA_Atom; 2388 return; 2389 /*NOTREACHED*/ 2390 2391 acquire_Field_and_Expr: 2392 /* For union members, Expr should be absent */ 2393 if (0) VG_(printf)("YYYY Acquire Field and Expr\n"); 2394 vg_assert(!type); vg_assert(!atom); vg_assert(field); 2395 /*vg_assert(expr);*/ vg_assert(!bounds); 2396 if (expr) { 2397 *admin = ML_(new_TyAdmin)( (UWord)D3_INVALID_CUOFF, 2398 *admin ); 2399 (*admin)->payload = expr; 2400 (*admin)->tag = TyA_Expr; 2401 } 2402 *admin = ML_(new_TyAdmin)( posn, *admin ); 2403 (*admin)->payload = field; 2404 (*admin)->tag = TyA_Field; 2405 return; 2406 /*NOTREACHED*/ 2407 2408 acquire_Bounds: 2409 if (0) VG_(printf)("YYYY Acquire Bounds\n"); 2410 vg_assert(!type); vg_assert(!atom); vg_assert(!field); 2411 vg_assert(!expr); vg_assert(bounds); 2412 *admin = ML_(new_TyAdmin)( posn, *admin ); 2413 (*admin)->payload = bounds; 2414 (*admin)->tag = TyA_Bounds; 2415 return; 2416 /*NOTREACHED*/ 2417 2418 bad_DIE: 2419 set_position_of_Cursor( c_die, saved_die_c_offset ); 2420 set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); 2421 VG_(printf)("\nparse_type_DIE: confused by:\n"); 2422 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); 2423 while (True) { 2424 DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); 2425 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); 2426 if (attr == 0 && form == 0) break; 2427 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); 2428 /* Get the form contents, so as to print them */ 2429 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2430 cc, c_die, True, form ); 2431 VG_(printf)("\t\n"); 2432 } 2433 VG_(printf)("\n"); 2434 cc->barf("parse_type_DIE: confused by the above DIE"); 2435 /*NOTREACHED*/ 2436} 2437 2438 2439/*------------------------------------------------------------*/ 2440/*--- ---*/ 2441/*--- Resolution of references to type DIEs ---*/ 2442/*--- ---*/ 2443/*------------------------------------------------------------*/ 2444 2445static Int cmp_D3TyAdmin_by_cuOff ( void* v1, void* v2 ) { 2446 TyAdmin* a1 = *(TyAdmin**)v1; 2447 TyAdmin* a2 = *(TyAdmin**)v2; 2448 if (a1->cuOff < a2->cuOff) return -1; 2449 if (a1->cuOff > a2->cuOff) return 1; 2450 return 0; 2451} 2452 2453/* Look up 'cuOff' in 'map', to find the associated D3TyAdmin*. Check 2454 that the found D3TyAdmin has tag 'adtag'. Sets *payload to be the 2455 resulting payload pointer and returns True on success. 2456 2457 Also, if 'allow_invalid' is True, then if cuOff is 2458 D3_INVALID_CUOFF, return NULL in *payload. 2459 2460 Otherwise (conceptually fails) and returns False. */ 2461__attribute__((noinline)) 2462static Bool resolve_binding ( /*OUT*/void** payload, 2463 XArray* map, void* cuOff, 2464 TyAdminTag tag, 2465 Bool allow_invalid ) { 2466 Bool found; 2467 Word ixLo, ixHi; 2468 TyAdmin dummy, *dummyP, *admin; 2469 2470 if (cuOff == D3_INVALID_CUOFF && allow_invalid) { 2471 *payload = NULL; 2472 return True; 2473 } 2474 2475 VG_(memset)(&dummy, 0, sizeof(dummy)); 2476 dummy.cuOff = (UWord)cuOff; 2477 dummyP = &dummy; 2478 found = VG_(lookupXA)( map, &dummyP, &ixLo, &ixHi ); 2479 if (!found) 2480 return False; 2481 /* If this doesn't hold, we must have seen more than one DIE with 2482 the same cuOff(set). Which isn't possible. */ 2483 vg_assert(ixLo == ixHi); 2484 admin = *(TyAdmin**)VG_(indexXA)( map, ixLo ); 2485 /* All payload pointers should be non-NULL. Ensured by assertion in 2486 loop in resolve_type_entities that creates 'map'. Hence it is 2487 safe to return NULL to indicate 'not found'. */ 2488 vg_assert(admin->payload); 2489 vg_assert(admin->cuOff == (UWord)cuOff); /* stay sane */ 2490 2491 if (admin->tag != tag) 2492 return False; 2493 2494 *payload = admin->payload; 2495 return True; 2496} 2497 2498__attribute__((noinline)) 2499static void resolve_type_entities ( /*MOD*/TyAdmin* admin, 2500 /*MOD*/TempVar* vars ) 2501{ 2502 Bool ok; 2503 void* payload; 2504 TyAdmin* adp; 2505 XArray* /* of D3TyAdmin* */ map; 2506 2507 map = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 2508 sizeof(TyAdmin*) ); 2509 for (adp = admin; adp; adp = adp->next) { 2510 vg_assert(adp); 2511 vg_assert(adp->payload != NULL); 2512 if (adp->cuOff != (UWord)D3_INVALID_CUOFF) { 2513 VG_(addToXA)( map, &adp ); 2514 } 2515 } 2516 2517 VG_(setCmpFnXA)( map, cmp_D3TyAdmin_by_cuOff ); 2518 if (0) 2519 VG_(printf)("XXXXXX sorting map with %d entries\n", 2520 (Int)VG_(sizeXA)(map)); 2521 VG_(sortXA)( map ); 2522 2523 for (adp = admin; adp; adp = adp->next) { 2524 vg_assert(adp->payload); 2525 switch (adp->tag) { 2526 case TyA_Bounds: { 2527 TyBounds* bounds = (TyBounds*)adp->payload; 2528 if (bounds->knownL && bounds->knownU 2529 && bounds->knownL > bounds->knownU) goto baaad; 2530 break; 2531 } 2532 case TyA_Atom: { 2533 TyAtom* atom = (TyAtom*)adp->payload; 2534 if (!atom->name) goto baaad; 2535 break; 2536 } 2537 case TyA_Expr: { 2538 D3Expr* expr = (D3Expr*)adp->payload; 2539 if (!expr->bytes) goto baaad; 2540 break; 2541 } 2542 case TyA_Field: { 2543 TyField* field = (TyField*)adp->payload; 2544 if (!field->name) goto baaad; 2545 if ( (field->isStruct && (!field->loc)) 2546 || ((!field->isStruct) && field->loc)) 2547 goto baaad; 2548 ok = resolve_binding( &payload, map, field->typeR, 2549 TyA_Type, False/*!allow_invalid*/ ); 2550 if (!ok) goto baaad; 2551 field->typeR = payload; 2552 break; 2553 } 2554 case TyA_Type: { 2555 UChar enc; 2556 XArray* xa; 2557 Type* ty = (Type*)adp->payload; 2558 switch (ty->tag) { 2559 case Ty_Base: 2560 enc = ty->Ty.Base.enc; 2561 if ((!ty->Ty.Base.name) 2562 || ty->Ty.Base.szB < 1 || ty->Ty.Base.szB > 32 2563 || (enc != 'S' && enc != 'U' && enc != 'F' && enc != 'C')) 2564 goto baaad; 2565 break; 2566 case Ty_TyDef: 2567 if (!ty->Ty.TyDef.name) goto baaad; 2568 ok = resolve_binding( &payload, map, 2569 ty->Ty.TyDef.typeR, 2570 TyA_Type, 2571 True/*allow_invalid*/ ); 2572 if (!ok) goto baaad; 2573 ty->Ty.TyDef.typeR = payload; 2574 break; 2575 case Ty_PorR: 2576 if (ty->Ty.PorR.szB != sizeof(Word)) goto baaad; 2577 ok = resolve_binding( &payload, map, 2578 ty->Ty.PorR.typeR, 2579 TyA_Type, 2580 False/*!allow_invalid*/ ); 2581 if (!ok) goto baaad; 2582 ty->Ty.PorR.typeR = payload; 2583 break; 2584 case Ty_Array: 2585 if (!ty->Ty.Array.bounds) goto baaad; 2586 ok = resolve_binding( &payload, map, 2587 ty->Ty.Array.typeR, 2588 TyA_Type, 2589 False/*!allow_invalid*/ ); 2590 if (!ok) goto baaad; 2591 ty->Ty.Array.typeR = payload; 2592 break; 2593 case Ty_Enum: 2594 if ((!ty->Ty.Enum.atomRs) 2595 || ty->Ty.Enum.szB < 1 2596 || ty->Ty.Enum.szB > 8) goto baaad; 2597 xa = ty->Ty.Enum.atomRs; 2598 break; 2599 case Ty_StOrUn: 2600 xa = ty->Ty.StOrUn.fields; 2601 if (!xa) goto baaad; 2602 break; 2603 case Ty_Fn: 2604 break; 2605 case Ty_Qual: 2606 if (ty->Ty.Qual.qual != 'C' 2607 && ty->Ty.Qual.qual != 'V') goto baaad; 2608 ok = resolve_binding( &payload, map, 2609 ty->Ty.Qual.typeR, 2610 TyA_Type, 2611 False/*!allow_invalid*/ ); 2612 if (!ok) goto baaad; 2613 ty->Ty.Qual.typeR = payload; 2614 break; 2615 case Ty_Void: 2616 if (ty->Ty.Void.isFake != False 2617 && ty->Ty.Void.isFake != True) goto baaad; 2618 break; 2619 default: 2620 goto baaad; 2621 } 2622 break; 2623 } 2624 baaad: 2625 default: 2626 VG_(printf)("valgrind: bad D3TyAdmin: "); 2627 ML_(pp_TyAdmin)(adp); 2628 VG_(printf)("\n"); 2629 } 2630 } 2631 2632 /* Now resolve the variables list */ 2633 for (; vars; vars = vars->next) { 2634 payload = NULL; 2635 ok = resolve_binding( &payload, map, vars->typeR, 2636 TyA_Type, True/*allow_invalid*/ ); 2637 2638 if (0 && !ok) 2639 VG_(printf)("Can't resolve type reference 0x%lx\n", 2640 (UWord)vars->typeR); 2641 //vg_assert(ok); 2642 vars->typeR = payload; 2643 } 2644 2645 VG_(deleteXA)( map ); 2646} 2647 2648 2649/*------------------------------------------------------------*/ 2650/*--- ---*/ 2651/*--- Parsing of Compilation Units ---*/ 2652/*--- ---*/ 2653/*------------------------------------------------------------*/ 2654 2655static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { 2656 TempVar* t1 = *(TempVar**)v1; 2657 TempVar* t2 = *(TempVar**)v2; 2658 if (t1->dioff < t2->dioff) return -1; 2659 if (t1->dioff > t2->dioff) return 1; 2660 return 0; 2661} 2662 2663static void read_DIE ( /*OUT*/TyAdmin** admin, 2664 /*OUT*/TempVar** tempvars, 2665 /*OUT*/GExpr** gexprs, 2666 /*MOD*/D3TypeParser* typarser, 2667 /*MOD*/D3VarParser* varparser, 2668 Cursor* c, Bool td3, CUConst* cc, Int level ) 2669{ 2670 Cursor abbv; 2671 ULong atag, abbv_code; 2672 UWord posn; 2673 UInt has_children; 2674 UWord start_die_c_offset, start_abbv_c_offset; 2675 UWord after_die_c_offset, after_abbv_c_offset; 2676 2677 /* --- Deal with this DIE --- */ 2678 posn = get_position_of_Cursor( c ); 2679 abbv_code = get_ULEB128( c ); 2680 set_abbv_Cursor( &abbv, td3, cc, abbv_code ); 2681 atag = get_ULEB128( &abbv ); 2682 TRACE_D3("\n"); 2683 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", 2684 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); 2685 2686 if (atag == 0) 2687 cc->barf("read_DIE: invalid zero tag on DIE"); 2688 2689 has_children = get_UChar( &abbv ); 2690 if (has_children != DW_children_no && has_children != DW_children_yes) 2691 cc->barf("read_DIE: invalid has_children value"); 2692 2693 /* We're set up to look at the fields of this DIE. Hand it off to 2694 any parser(s) that want to see it. Since they will in general 2695 advance both the DIE and abbrev cursors, remember their current 2696 settings so that we can then back up and do one final pass over 2697 the DIE, to print out its contents. */ 2698 2699 start_die_c_offset = get_position_of_Cursor( c ); 2700 start_abbv_c_offset = get_position_of_Cursor( &abbv ); 2701 2702 while (True) { 2703 ULong cts; 2704 Int ctsSzB; 2705 UWord ctsMemSzB; 2706 ULong at_name = get_ULEB128( &abbv ); 2707 ULong at_form = get_ULEB128( &abbv ); 2708 if (at_name == 0 && at_form == 0) break; 2709 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); 2710 /* Get the form contents, but ignore them; the only purpose is 2711 to print them, if td3 is True */ 2712 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, 2713 cc, c, td3, (DW_FORM)at_form ); 2714 TRACE_D3("\t"); 2715 TRACE_D3("\n"); 2716 } 2717 2718 after_die_c_offset = get_position_of_Cursor( c ); 2719 after_abbv_c_offset = get_position_of_Cursor( &abbv ); 2720 2721 set_position_of_Cursor( c, start_die_c_offset ); 2722 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 2723 2724 parse_type_DIE( admin, 2725 typarser, 2726 (DW_TAG)atag, 2727 posn, 2728 level, 2729 c, /* DIE cursor */ 2730 &abbv, /* abbrev cursor */ 2731 cc, 2732 td3 ); 2733 2734 set_position_of_Cursor( c, start_die_c_offset ); 2735 set_position_of_Cursor( &abbv, start_abbv_c_offset ); 2736 2737 parse_var_DIE( tempvars, 2738 gexprs, 2739 varparser, 2740 (DW_TAG)atag, 2741 posn, 2742 level, 2743 c, /* DIE cursor */ 2744 &abbv, /* abbrev cursor */ 2745 cc, 2746 td3 ); 2747 2748 set_position_of_Cursor( c, after_die_c_offset ); 2749 set_position_of_Cursor( &abbv, after_abbv_c_offset ); 2750 2751 /* --- Now recurse into its children, if any --- */ 2752 if (has_children == DW_children_yes) { 2753 if (0) TRACE_D3("BEGIN children of level %d\n", level); 2754 while (True) { 2755 atag = peek_ULEB128( c ); 2756 if (atag == 0) break; 2757 read_DIE( admin, tempvars, gexprs, typarser, varparser, 2758 c, td3, cc, level+1 ); 2759 } 2760 /* Now we need to eat the terminating zero */ 2761 atag = get_ULEB128( c ); 2762 vg_assert(atag == 0); 2763 if (0) TRACE_D3("END children of level %d\n", level); 2764 } 2765 2766} 2767 2768 2769static 2770void new_dwarf3_reader_wrk ( 2771 struct _DebugInfo* di, 2772 __attribute__((noreturn)) 2773 void (*barf)( HChar* ), 2774 UChar* debug_info_img, SizeT debug_info_sz, 2775 UChar* debug_abbv_img, SizeT debug_abbv_sz, 2776 UChar* debug_line_img, SizeT debug_line_sz, 2777 UChar* debug_str_img, SizeT debug_str_sz, 2778 UChar* debug_ranges_img, SizeT debug_ranges_sz, 2779 UChar* debug_loc_img, SizeT debug_loc_sz 2780) 2781{ 2782 TyAdmin *admin, *adminp; 2783 TempVar *tempvars, *varp, *varp2; 2784 GExpr *gexprs, *gexpr; 2785 Cursor abbv; /* for showing .debug_abbrev */ 2786 Cursor info; /* primary cursor for parsing .debug_info */ 2787 Cursor ranges; /* for showing .debug_ranges */ 2788 D3TypeParser typarser; 2789 D3VarParser varparser; 2790 Addr dr_base; 2791 UWord dr_offset; 2792 Word i; 2793 Bool td3 = di->trace_symtab; 2794 XArray* /* of TempVar* */ dioff_lookup_tab; 2795 2796#if 0 2797 /* This doesn't work properly because it assumes all entries are 2798 packed end to end, with no holes. But that doesn't always 2799 appear to be the case, so it loses sync. And the D3 spec 2800 doesn't appear to require a no-hole situation either. */ 2801 /* Display .debug_loc */ 2802 Addr dl_base; 2803 UWord dl_offset; 2804 Cursor loc; /* for showing .debug_loc */ 2805 TRACE_SYMTAB("\n"); 2806 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); 2807 TRACE_SYMTAB(" Offset Begin End Expression\n"); 2808 init_Cursor( &loc, debug_loc_img, 2809 debug_loc_sz, 0, barf, 2810 "Overrun whilst reading .debug_loc section(1)" ); 2811 dl_base = 0; 2812 dl_offset = 0; 2813 while (True) { 2814 UWord w1, w2; 2815 UWord len; 2816 if (is_at_end_Cursor( &loc )) 2817 break; 2818 2819 /* Read a (host-)word pair. This is something of a hack since 2820 the word size to read is really dictated by the ELF file; 2821 however, we assume we're reading a file with the same 2822 word-sizeness as the host. Reasonably enough. */ 2823 w1 = get_UWord( &loc ); 2824 w2 = get_UWord( &loc ); 2825 2826 if (w1 == 0 && w2 == 0) { 2827 /* end of list. reset 'base' */ 2828 TRACE_D3(" %08lx <End of list>\n", dl_offset); 2829 dl_base = 0; 2830 dl_offset = get_position_of_Cursor( &loc ); 2831 continue; 2832 } 2833 2834 if (w1 == -1UL) { 2835 /* new value for 'base' */ 2836 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 2837 dl_offset, w1, w2); 2838 dl_base = w2; 2839 continue; 2840 } 2841 2842 /* else a location expression follows */ 2843 TRACE_D3(" %08lx %08lx %08lx ", 2844 dl_offset, w1 + dl_base, w2 + dl_base); 2845 len = (UWord)get_UShort( &loc ); 2846 while (len > 0) { 2847 UChar byte = get_UChar( &loc ); 2848 TRACE_D3("%02x", (UInt)byte); 2849 len--; 2850 } 2851 TRACE_SYMTAB("\n"); 2852 } 2853#endif 2854 2855 /* Display .debug_ranges */ 2856 TRACE_SYMTAB("\n"); 2857 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); 2858 TRACE_SYMTAB(" Offset Begin End\n"); 2859 init_Cursor( &ranges, debug_ranges_img, 2860 debug_ranges_sz, 0, barf, 2861 "Overrun whilst reading .debug_ranges section(1)" ); 2862 dr_base = 0; 2863 dr_offset = 0; 2864 while (True) { 2865 UWord w1, w2; 2866 2867 if (is_at_end_Cursor( &ranges )) 2868 break; 2869 2870 /* Read a (host-)word pair. This is something of a hack since 2871 the word size to read is really dictated by the ELF file; 2872 however, we assume we're reading a file with the same 2873 word-sizeness as the host. Reasonably enough. */ 2874 w1 = get_UWord( &ranges ); 2875 w2 = get_UWord( &ranges ); 2876 2877 if (w1 == 0 && w2 == 0) { 2878 /* end of list. reset 'base' */ 2879 TRACE_D3(" %08lx <End of list>\n", dr_offset); 2880 dr_base = 0; 2881 dr_offset = get_position_of_Cursor( &ranges ); 2882 continue; 2883 } 2884 2885 if (w1 == -1UL) { 2886 /* new value for 'base' */ 2887 TRACE_D3(" %08lx %16lx %08lx (base address)\n", 2888 dr_offset, w1, w2); 2889 dr_base = w2; 2890 continue; 2891 } 2892 2893 /* else a range [w1+base, w2+base) is denoted */ 2894 TRACE_D3(" %08lx %08lx %08lx\n", 2895 dr_offset, w1 + dr_base, w2 + dr_base); 2896 } 2897 2898 2899 /* Display .debug_abbrev */ 2900 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, 2901 "Overrun whilst reading .debug_abbrev section" ); 2902 TRACE_SYMTAB("\n"); 2903 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); 2904 while (True) { 2905 if (is_at_end_Cursor( &abbv )) 2906 break; 2907 /* Read one abbreviation table */ 2908 TRACE_D3(" Number TAG\n"); 2909 while (True) { 2910 ULong atag; 2911 UInt has_children; 2912 ULong acode = get_ULEB128( &abbv ); 2913 if (acode == 0) break; /* end of the table */ 2914 atag = get_ULEB128( &abbv ); 2915 has_children = get_UChar( &abbv ); 2916 TRACE_D3(" %llu %s [%s]\n", 2917 acode, ML_(pp_DW_TAG)(atag), 2918 ML_(pp_DW_children)(has_children)); 2919 while (True) { 2920 ULong at_name = get_ULEB128( &abbv ); 2921 ULong at_form = get_ULEB128( &abbv ); 2922 if (at_name == 0 && at_form == 0) break; 2923 TRACE_D3(" %18s %s\n", 2924 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); 2925 } 2926 } 2927 } 2928 TRACE_SYMTAB("\n"); 2929 2930 /* Now loop over the Compilation Units listed in the .debug_info 2931 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation 2932 unit contains a Compilation Unit Header followed by precisely 2933 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ 2934 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, 2935 "Overrun whilst reading .debug_info section" ); 2936 2937 /* We'll park the harvested type information in here. Also create 2938 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always 2939 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is 2940 huge and presumably will not occur in any valid DWARF3 file -- 2941 it would need to have a .debug_info section 4GB long for that to 2942 happen. These type entries end up in the DebugInfo. */ 2943 admin = NULL; 2944 { Type* tVoid = ML_(new_Type)(); 2945 tVoid->tag = Ty_Void; 2946 tVoid->Ty.Void.isFake = True; 2947 admin = ML_(new_TyAdmin)( (UWord)D3_FAKEVOID_CUOFF, admin ); 2948 admin->payload = tVoid; 2949 admin->tag = TyA_Type; 2950 } 2951 2952 /* List of variables we're accumulating. These don't end up in the 2953 DebugInfo; instead their contents are handed to ML_(addVar) and 2954 the list elements are then deleted. */ 2955 tempvars = NULL; 2956 2957 /* List of GExprs we're accumulating. These wind up in the 2958 DebugInfo. */ 2959 gexprs = NULL; 2960 2961 /* We need a D3TypeParser to keep track of partially constructed 2962 types. It'll be discarded as soon as we've completed the CU, 2963 since the resulting information is tipped in to 'admin' as it is 2964 generated. */ 2965 VG_(memset)( &typarser, 0, sizeof(typarser) ); 2966 typarser.sp = -1; 2967 typarser.language = '?'; 2968 2969 VG_(memset)( &varparser, 0, sizeof(varparser) ); 2970 varparser.sp = -1; 2971 2972 TRACE_D3("\n------ Parsing .debug_info section ------\n"); 2973 while (True) { 2974 UWord cu_start_offset, cu_offset_now; 2975 CUConst cc; 2976 2977 /* It seems icc9 finishes the DIE info before debug_info_sz 2978 bytes have been used up. So be flexible, and declare the 2979 sequence complete if there is not enough remaining bytes to 2980 hold even the smallest conceivable CU header. (11 bytes I 2981 reckon). */ 2982 Word avail = get_remaining_length_Cursor( &info ); 2983 if (avail < 11) { 2984 if (avail > 0) 2985 TRACE_D3("new_dwarf3_reader_wrk: warning: " 2986 "%ld unused bytes after end of DIEs\n", avail); 2987 break; 2988 } 2989 2990 /* Check the varparser's stack is in a sane state. */ 2991 vg_assert(varparser.sp == -1); 2992 for (i = 0; i < N_D3_VAR_STACK; i++) { 2993 vg_assert(varparser.ranges[i] == NULL); 2994 vg_assert(varparser.level[i] == 0); 2995 } 2996 for (i = 0; i < N_D3_TYPE_STACK; i++) { 2997 vg_assert(typarser.qparent[i] == NULL); 2998 vg_assert(typarser.qlevel[i] == 0); 2999 } 3000 3001 cu_start_offset = get_position_of_Cursor( &info ); 3002 TRACE_D3("\n"); 3003 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); 3004 /* parse_CU_header initialises the CU's set_abbv_Cursor cache 3005 (saC_cache) */ 3006 parse_CU_Header( &cc, td3, &info, 3007 (UChar*)debug_abbv_img, debug_abbv_sz ); 3008 cc.debug_str_img = debug_str_img; 3009 cc.debug_str_sz = debug_str_sz; 3010 cc.debug_ranges_img = debug_ranges_img; 3011 cc.debug_ranges_sz = debug_ranges_sz; 3012 cc.debug_loc_img = debug_loc_img; 3013 cc.debug_loc_sz = debug_loc_sz; 3014 cc.debug_line_img = debug_line_img; 3015 cc.debug_line_sz = debug_line_sz; 3016 cc.cu_start_offset = cu_start_offset; 3017 cc.di = di; 3018 /* The CU's svma can be deduced by looking at the AT_low_pc 3019 value in the top level TAG_compile_unit, which is the topmost 3020 DIE. We'll leave it for the 'varparser' to acquire that info 3021 and fill it in -- since it is the only party to want to know 3022 it. */ 3023 cc.cu_svma_known = False; 3024 cc.cu_svma = 0; 3025 3026 /* Create a fake outermost-level range covering the entire 3027 address range. So we always have *something* to catch all 3028 variable declarations. */ 3029 varstack_push( &cc, &varparser, td3, 3030 unitary_range_list(0UL, ~0UL), 3031 -1, False/*isFunc*/, NULL/*fbGX*/ ); 3032 3033 /* And set up the file name table. When we come across the top 3034 level DIE for this CU (which is what the next call to 3035 read_DIE should process) we will copy all the file names out 3036 of the .debug_line img area and use this table to look up the 3037 copies when we later see filename numbers in DW_TAG_variables 3038 etc. */ 3039 vg_assert(!varparser.filenameTable ); 3040 varparser.filenameTable 3041 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 3042 sizeof(UChar*) ); 3043 vg_assert(varparser.filenameTable ); 3044 3045 /* Now read the one-and-only top-level DIE for this CU. */ 3046 vg_assert(varparser.sp == 0); 3047 read_DIE( &admin, &tempvars, &gexprs, &typarser, &varparser, 3048 &info, td3, &cc, 0 ); 3049 3050 cu_offset_now = get_position_of_Cursor( &info ); 3051 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", 3052 cu_offset_now, debug_info_sz); 3053 if (cu_offset_now > debug_info_sz) 3054 barf("toplevel DIEs beyond end of CU"); 3055 if (cu_offset_now == debug_info_sz) 3056 break; 3057 3058 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur 3059 anywhere else at all. Our fake the-entire-address-space 3060 range is at level -1, so preening to -2 should completely 3061 empty the stack out. */ 3062 TRACE_D3("\n"); 3063 varstack_preen( &varparser, td3, -2 ); 3064 /* Similarly, empty the type stack out. */ 3065 typestack_preen( &typarser, td3, -2 ); 3066 /* else keep going */ 3067 3068 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", 3069 cc.saC_cache_queries, cc.saC_cache_misses); 3070 3071 vg_assert(varparser.filenameTable ); 3072 VG_(deleteXA)( varparser.filenameTable ); 3073 varparser.filenameTable = NULL; 3074 } 3075 3076 /* Put the type entry list the right way round. Not strictly 3077 necessary, but makes it easier to read. */ 3078 vg_assert(admin); 3079 if (admin) { 3080 TyAdmin *next, *prev = NULL; 3081 for (adminp = admin; adminp; adminp = next) { 3082 next = adminp->next; 3083 adminp->next = prev; 3084 prev = adminp; 3085 } 3086 admin = prev; 3087 } 3088 3089 /* Put the variable list the right way round. Not strictly 3090 necessary, but makes it easier to read. */ 3091 if (tempvars) { 3092 TempVar *next, *prev = NULL; 3093 for (varp = tempvars; varp; varp = next) { 3094 next = varp->next; 3095 varp->next = prev; 3096 prev = varp; 3097 } 3098 tempvars = prev; 3099 } 3100 3101 TRACE_D3("\n"); 3102 TRACE_D3("------ Acquired the following type entities: ------\n"); 3103 for (adminp = admin; adminp; adminp = adminp->next) { 3104 TRACE_D3(" "); 3105 if (td3) ML_(pp_TyAdmin)( adminp ); 3106 TRACE_D3("\n"); 3107 } 3108 TRACE_D3("\n"); 3109 TRACE_D3("------ Resolving type entries ------\n"); 3110 3111 resolve_type_entities( admin, tempvars ); 3112 for (gexpr = gexprs; gexpr; gexpr = gexpr->next) { 3113 bias_GX( gexpr, di->text_bias ); 3114 } 3115 3116 TRACE_D3("\n"); 3117 TRACE_D3("------ Acquired the following variables: ------\n\n"); 3118 3119 /* Park (pointers to) all the vars in an XArray, so we can look up 3120 abstract origins quickly. The array is sorted (hence, looked-up 3121 by) the .dioff fields. Since the .dioffs should be instrictly 3122 ascending order, there is no need to sort the array after 3123 construction. The ascendingness is however asserted for. */ 3124 dioff_lookup_tab 3125 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 3126 sizeof(TempVar*) ); 3127 vg_assert(dioff_lookup_tab); 3128 varp2 = NULL; 3129 for (varp = tempvars; varp; varp = varp->next) { 3130 if (varp2) 3131 vg_assert(varp2->dioff < varp->dioff); 3132 VG_(addToXA)( dioff_lookup_tab, &varp ); 3133 varp2 = varp; 3134 } 3135 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); 3136 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ 3137 3138 /* Now visit each var. Collect up as much info as possible for 3139 each var and hand it to ML_(addVar). */ 3140 for (varp = tempvars; varp; varp = varp->next) { 3141 3142 /* Possibly show .. */ 3143 if (td3) { 3144 VG_(printf)("<%lx> addVar: level %d: %s :: ", 3145 varp->dioff, 3146 varp->level, 3147 varp->name ? varp->name : (UChar*)"<anon_var>" ); 3148 if (varp->typeR) { 3149 ML_(pp_Type_C_ishly)( varp->typeR ); 3150 } else { 3151 VG_(printf)("NULL"); 3152 } 3153 VG_(printf)("\n Loc="); 3154 if (varp->gexpr) { 3155 ML_(pp_GX)(varp->gexpr); 3156 } else { 3157 VG_(printf)("NULL"); 3158 } 3159 VG_(printf)("\n"); 3160 if (varp->fbGX) { 3161 VG_(printf)(" FrB="); 3162 ML_(pp_GX)( varp->fbGX ); 3163 VG_(printf)("\n"); 3164 } else { 3165 VG_(printf)(" FrB=none\n"); 3166 } 3167 VG_(printf)(" declared at: %s:%d\n", 3168 varp->fName ? varp->fName : (UChar*)"NULL", 3169 varp->fLine ); 3170 if (varp->absOri != (UWord)D3_INVALID_CUOFF) 3171 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); 3172 } 3173 3174 /* Skip variables which have no location. These must be 3175 abstract instances; they are useless as-is since with no 3176 location they have no specified memory location. They will 3177 presumably be referred to via the absOri fields of other 3178 variables. */ 3179 if (!varp->gexpr) { 3180 TRACE_D3(" SKIP (no location)\n\n"); 3181 continue; 3182 } 3183 3184 /* So it has a location, at least. If it refers to some other 3185 entry through its absOri field, pull in further info through 3186 that. */ 3187 if (varp->absOri != (UWord)D3_INVALID_CUOFF) { 3188 Bool found; 3189 Word ixFirst, ixLast; 3190 TempVar key; 3191 TempVar* keyp = &key; 3192 TempVar *varAI; 3193 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ 3194 key.dioff = varp->absOri; /* this is what we want to find */ 3195 found = VG_(lookupXA)( dioff_lookup_tab, &keyp, 3196 &ixFirst, &ixLast ); 3197 if (!found) 3198 barf("DW_AT_abstract_origin can't be resolved"); 3199 /* If the following fails, there is more than one entry with 3200 the same dioff. Which can't happen. */ 3201 vg_assert(ixFirst == ixLast); 3202 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); 3203 /* stay sane */ 3204 vg_assert(varAI); 3205 vg_assert(varAI->dioff == varp->absOri); 3206 3207 /* Copy what useful info we can. */ 3208 if (varAI->typeR && !varp->typeR) 3209 varp->typeR = varAI->typeR; 3210 if (varAI->name && !varp->name) 3211 varp->name = varAI->name; 3212 if (varAI->fName && !varp->fName) 3213 varp->fName = varAI->fName; 3214 if (varAI->fLine > 0 && varp->fLine == 0) 3215 varp->fLine = varAI->fLine; 3216 } 3217 3218 /* Give it a name if it doesn't have one. */ 3219 if (!varp->name) 3220 varp->name = ML_(addStr)( di, "<anon_var>", -1 ); 3221 3222 /* So now does it have enough info to be useful? */ 3223 /* NOTE: re typeR: this is a hack. If typeR is NULL then the 3224 type didn't get resolved. Really, in that case something's 3225 broken earlier on, and should be fixed, rather than just 3226 skipping the variable. */ 3227 if (!varp->typeR) continue; 3228 vg_assert(varp->gexpr); 3229 vg_assert(varp->name); 3230 vg_assert(varp->typeR); 3231 vg_assert(varp->level >= 0); 3232 3233 /* Ok. So we're going to keep it. Call ML_(addVar) once for 3234 each address range in which the variable exists. */ 3235 TRACE_D3(" ACQUIRE for range(s) "); 3236 { AddrRange oneRange; 3237 AddrRange* varPcRanges; 3238 Word nVarPcRanges; 3239 /* Set up to iterate over address ranges, however 3240 represented. */ 3241 if (varp->nRanges == 0 || varp->nRanges == 1) { 3242 vg_assert(!varp->rngMany); 3243 if (varp->nRanges == 0) { 3244 vg_assert(varp->rngOneMin == 0); 3245 vg_assert(varp->rngOneMax == 0); 3246 } 3247 nVarPcRanges = varp->nRanges; 3248 oneRange.aMin = varp->rngOneMin; 3249 oneRange.aMax = varp->rngOneMax; 3250 varPcRanges = &oneRange; 3251 } else { 3252 vg_assert(varp->rngMany); 3253 vg_assert(varp->rngOneMin == 0); 3254 vg_assert(varp->rngOneMax == 0); 3255 nVarPcRanges = VG_(sizeXA)(varp->rngMany); 3256 vg_assert(nVarPcRanges >= 2); 3257 vg_assert(nVarPcRanges == (Word)varp->nRanges); 3258 varPcRanges = VG_(indexXA)(varp->rngMany, 0); 3259 } 3260 if (varp->level == 0) 3261 vg_assert( nVarPcRanges == 1 ); 3262 /* and iterate */ 3263 for (i = 0; i < nVarPcRanges; i++) { 3264 Addr pcMin = varPcRanges[i].aMin; 3265 Addr pcMax = varPcRanges[i].aMax; 3266 vg_assert(pcMin <= pcMax); 3267 /* Level 0 is the global address range. So at level 0 we 3268 don't want to bias pcMin/pcMax; but at all other levels 3269 we do since those are derived from svmas in the Dwarf 3270 we're reading. Be paranoid ... */ 3271 if (varp->level == 0) { 3272 vg_assert(pcMin == (Addr)0); 3273 vg_assert(pcMax == ~(Addr)0); 3274 } else { 3275 /* vg_assert(pcMin > (Addr)0); 3276 No .. we can legitimately expect to see ranges like 3277 0x0-0x11D (pre-biasing, of course). */ 3278 vg_assert(pcMax < ~(Addr)0); 3279 } 3280 3281 if (i > 0 && (i%2) == 0) TRACE_D3("\n "); 3282 TRACE_D3("[%p,%p] ", pcMin, pcMax ); 3283 3284 ML_(addVar)( 3285 di, varp->level, 3286 pcMin + (varp->level==0 ? 0 : di->text_bias), 3287 pcMax + (varp->level==0 ? 0 : di->text_bias), 3288 varp->name, (void*)varp->typeR, 3289 varp->gexpr, varp->fbGX, 3290 varp->fName, varp->fLine, td3 3291 ); 3292 } 3293 } 3294 3295 TRACE_D3("\n\n"); 3296 /* and move on to the next var */ 3297 } 3298 3299 /* Now free all the TempVars */ 3300 for (varp = tempvars; varp; varp = varp2) { 3301 varp2 = varp->next; 3302 if (varp->rngMany) 3303 VG_(deleteXA)(varp->rngMany); 3304 ML_(dinfo_free)(varp); 3305 } 3306 tempvars = NULL; 3307 3308 /* And get rid of the temporary mapping table. */ 3309 VG_(deleteXA)( dioff_lookup_tab ); 3310 3311 /* record the TyAdmins and the GExprs in di so they can be freed 3312 later */ 3313 vg_assert(!di->admin_tyadmins); 3314 di->admin_tyadmins = admin; 3315 vg_assert(!di->admin_gexprs); 3316 di->admin_gexprs = gexprs; 3317} 3318 3319 3320/*------------------------------------------------------------*/ 3321/*--- ---*/ 3322/*--- The "new" DWARF3 reader -- top level control logic ---*/ 3323/*--- ---*/ 3324/*------------------------------------------------------------*/ 3325 3326/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 3327#include <setjmp.h> /* For jmp_buf */ 3328/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 3329 3330static Bool d3rd_jmpbuf_valid = False; 3331static HChar* d3rd_jmpbuf_reason = NULL; 3332static jmp_buf d3rd_jmpbuf; 3333 3334static __attribute__((noreturn)) void barf ( HChar* reason ) { 3335 vg_assert(d3rd_jmpbuf_valid); 3336 d3rd_jmpbuf_reason = reason; 3337 __builtin_longjmp(&d3rd_jmpbuf, 1); 3338 /*NOTREACHED*/ 3339 vg_assert(0); 3340} 3341 3342 3343void 3344ML_(new_dwarf3_reader) ( 3345 struct _DebugInfo* di, 3346 UChar* debug_info_img, SizeT debug_info_sz, 3347 UChar* debug_abbv_img, SizeT debug_abbv_sz, 3348 UChar* debug_line_img, SizeT debug_line_sz, 3349 UChar* debug_str_img, SizeT debug_str_sz, 3350 UChar* debug_ranges_img, SizeT debug_ranges_sz, 3351 UChar* debug_loc_img, SizeT debug_loc_sz 3352) 3353{ 3354 volatile Int jumped; 3355 volatile Bool td3 = di->trace_symtab; 3356 3357 /* Run the _wrk function to read the dwarf3. If it succeeds, it 3358 just returns normally. If there is any failure, it longjmp's 3359 back here, having first set d3rd_jmpbuf_reason to something 3360 useful. */ 3361 vg_assert(d3rd_jmpbuf_valid == False); 3362 vg_assert(d3rd_jmpbuf_reason == NULL); 3363 3364 d3rd_jmpbuf_valid = True; 3365 jumped = __builtin_setjmp(&d3rd_jmpbuf); 3366 if (jumped == 0) { 3367 /* try this ... */ 3368 new_dwarf3_reader_wrk( di, barf, 3369 debug_info_img, debug_info_sz, 3370 debug_abbv_img, debug_abbv_sz, 3371 debug_line_img, debug_line_sz, 3372 debug_str_img, debug_str_sz, 3373 debug_ranges_img, debug_ranges_sz, 3374 debug_loc_img, debug_loc_sz ); 3375 d3rd_jmpbuf_valid = False; 3376 TRACE_D3("\n------ .debug_info reading was successful ------\n"); 3377 } else { 3378 /* It longjmp'd. */ 3379 d3rd_jmpbuf_valid = False; 3380 /* Can't longjump without giving some sort of reason. */ 3381 vg_assert(d3rd_jmpbuf_reason != NULL); 3382 3383 TRACE_D3("\n------ .debug_info reading failed ------\n"); 3384 3385 ML_(symerr)(di, True, d3rd_jmpbuf_reason); 3386 } 3387 3388 d3rd_jmpbuf_valid = False; 3389 d3rd_jmpbuf_reason = NULL; 3390} 3391 3392 3393 3394/* --- Unused code fragments which might be useful one day. --- */ 3395 3396#if 0 3397 /* Read the arange tables */ 3398 TRACE_SYMTAB("\n"); 3399 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); 3400 init_Cursor( &aranges, debug_aranges_img, 3401 debug_aranges_sz, 0, barf, 3402 "Overrun whilst reading .debug_aranges section" ); 3403 while (True) { 3404 ULong len, d_i_offset; 3405 Bool is64; 3406 UShort version; 3407 UChar asize, segsize; 3408 3409 if (is_at_end_Cursor( &aranges )) 3410 break; 3411 /* Read one arange thingy */ 3412 /* initial_length field */ 3413 len = get_Initial_Length( &is64, &aranges, 3414 "in .debug_aranges: invalid initial-length field" ); 3415 version = get_UShort( &aranges ); 3416 d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); 3417 asize = get_UChar( &aranges ); 3418 segsize = get_UChar( &aranges ); 3419 TRACE_D3(" Length: %llu\n", len); 3420 TRACE_D3(" Version: %d\n", (Int)version); 3421 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); 3422 TRACE_D3(" Pointer Size: %d\n", (Int)asize); 3423 TRACE_D3(" Segment Size: %d\n", (Int)segsize); 3424 TRACE_D3("\n"); 3425 TRACE_D3(" Address Length\n"); 3426 3427 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { 3428 (void)get_UChar( & aranges ); 3429 } 3430 while (True) { 3431 ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); 3432 ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); 3433 TRACE_D3(" 0x%016llx 0x%llx\n", address, length); 3434 if (address == 0 && length == 0) break; 3435 } 3436 } 3437 TRACE_SYMTAB("\n"); 3438#endif 3439 3440/*--------------------------------------------------------------------*/ 3441/*--- end readdwarf3.c ---*/ 3442/*--------------------------------------------------------------------*/ 3443