dwfl_segment_report_module.c revision 387654d501eabd17d46e1a7d2a2f27388ed52943
1/* Sniff out modules from ELF headers visible in memory segments. 2 Copyright (C) 2008-2012 Red Hat, Inc. 3 This file is part of elfutils. 4 5 This file is free software; you can redistribute it and/or modify 6 it under the terms of either 7 8 * the GNU Lesser General Public License as published by the Free 9 Software Foundation; either version 3 of the License, or (at 10 your option) any later version 11 12 or 13 14 * the GNU General Public License as published by the Free 15 Software Foundation; either version 2 of the License, or (at 16 your option) any later version 17 18 or both in parallel, as here. 19 20 elfutils is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received copies of the GNU General Public License and 26 the GNU Lesser General Public License along with this program. If 27 not, see <http://www.gnu.org/licenses/>. */ 28 29#include <config.h> 30#include "../libelf/libelfP.h" /* For NOTE_ALIGN. */ 31#undef _ 32#include "libdwflP.h" 33 34#include <elf.h> 35#include <gelf.h> 36#include <inttypes.h> 37#include <sys/param.h> 38#include <alloca.h> 39#include <endian.h> 40 41 42/* A good size for the initial read from memory, if it's not too costly. 43 This more than covers the phdrs and note segment in the average 64-bit 44 binary. */ 45 46#define INITIAL_READ 1024 47 48#if __BYTE_ORDER == __LITTLE_ENDIAN 49# define MY_ELFDATA ELFDATA2LSB 50#else 51# define MY_ELFDATA ELFDATA2MSB 52#endif 53 54 55/* Return user segment index closest to ADDR but not above it. 56 If NEXT, return the closest to ADDR but not below it. */ 57static int 58addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr, bool next) 59{ 60 int ndx = -1; 61 do 62 { 63 if (dwfl->lookup_segndx[segment] >= 0) 64 ndx = dwfl->lookup_segndx[segment]; 65 if (++segment >= dwfl->lookup_elts - 1) 66 return next ? ndx + 1 : ndx; 67 } 68 while (dwfl->lookup_addr[segment] < addr); 69 70 if (next) 71 { 72 while (dwfl->lookup_segndx[segment] < 0) 73 if (++segment >= dwfl->lookup_elts - 1) 74 return ndx + 1; 75 ndx = dwfl->lookup_segndx[segment]; 76 } 77 78 return ndx; 79} 80 81int 82dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name, 83 Dwfl_Memory_Callback *memory_callback, 84 void *memory_callback_arg, 85 Dwfl_Module_Callback *read_eagerly, 86 void *read_eagerly_arg) 87{ 88 size_t segment = ndx; 89 90 if (segment >= dwfl->lookup_elts) 91 segment = dwfl->lookup_elts - 1; 92 93 while (segment > 0 94 && (dwfl->lookup_segndx[segment] > ndx 95 || dwfl->lookup_segndx[segment] == -1)) 96 --segment; 97 98 while (dwfl->lookup_segndx[segment] < ndx) 99 if (++segment == dwfl->lookup_elts) 100 return 0; 101 102 GElf_Addr start = dwfl->lookup_addr[segment]; 103 104 inline bool segment_read (int segndx, 105 void **buffer, size_t *buffer_available, 106 GElf_Addr addr, size_t minread) 107 { 108 return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available, 109 addr, minread, memory_callback_arg); 110 } 111 112 inline void release_buffer (void **buffer, size_t *buffer_available) 113 { 114 if (*buffer != NULL) 115 (void) segment_read (-1, buffer, buffer_available, 0, 0); 116 } 117 118 /* First read in the file header and check its sanity. */ 119 120 void *buffer = NULL; 121 size_t buffer_available = INITIAL_READ; 122 123 inline int finish (void) 124 { 125 release_buffer (&buffer, &buffer_available); 126 return ndx; 127 } 128 129 if (segment_read (ndx, &buffer, &buffer_available, 130 start, sizeof (Elf64_Ehdr)) 131 || memcmp (buffer, ELFMAG, SELFMAG) != 0) 132 return finish (); 133 134 inline bool read_portion (void **data, size_t *data_size, 135 GElf_Addr vaddr, size_t filesz) 136 { 137 if (vaddr - start + filesz > buffer_available 138 /* If we're in string mode, then don't consider the buffer we have 139 sufficient unless it contains the terminator of the string. */ 140 || (filesz == 0 && memchr (vaddr - start + buffer, '\0', 141 buffer_available - (vaddr - start)) == NULL)) 142 { 143 *data = NULL; 144 *data_size = filesz; 145 return segment_read (addr_segndx (dwfl, segment, vaddr, false), 146 data, data_size, vaddr, filesz); 147 } 148 149 /* We already have this whole note segment from our initial read. */ 150 *data = vaddr - start + buffer; 151 *data_size = 0; 152 return false; 153 } 154 155 inline void finish_portion (void **data, size_t *data_size) 156 { 157 if (*data_size != 0) 158 release_buffer (data, data_size); 159 } 160 161 /* Extract the information we need from the file header. */ 162 union 163 { 164 Elf32_Ehdr e32; 165 Elf64_Ehdr e64; 166 } ehdr; 167 GElf_Off phoff; 168 uint_fast16_t phnum; 169 uint_fast16_t phentsize; 170 GElf_Off shdrs_end; 171 Elf_Data xlatefrom = 172 { 173 .d_type = ELF_T_EHDR, 174 .d_buf = (void *) buffer, 175 .d_version = EV_CURRENT, 176 }; 177 Elf_Data xlateto = 178 { 179 .d_type = ELF_T_EHDR, 180 .d_buf = &ehdr, 181 .d_size = sizeof ehdr, 182 .d_version = EV_CURRENT, 183 }; 184 switch (((const unsigned char *) buffer)[EI_CLASS]) 185 { 186 case ELFCLASS32: 187 xlatefrom.d_size = sizeof (Elf32_Ehdr); 188 if (elf32_xlatetom (&xlateto, &xlatefrom, 189 ((const unsigned char *) buffer)[EI_DATA]) == NULL) 190 return finish (); 191 phoff = ehdr.e32.e_phoff; 192 phnum = ehdr.e32.e_phnum; 193 phentsize = ehdr.e32.e_phentsize; 194 if (phentsize != sizeof (Elf32_Phdr)) 195 return finish (); 196 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize; 197 break; 198 199 case ELFCLASS64: 200 xlatefrom.d_size = sizeof (Elf64_Ehdr); 201 if (elf64_xlatetom (&xlateto, &xlatefrom, 202 ((const unsigned char *) buffer)[EI_DATA]) == NULL) 203 return finish (); 204 phoff = ehdr.e64.e_phoff; 205 phnum = ehdr.e64.e_phnum; 206 phentsize = ehdr.e64.e_phentsize; 207 if (phentsize != sizeof (Elf64_Phdr)) 208 return finish (); 209 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize; 210 break; 211 212 default: 213 return finish (); 214 } 215 216 /* The file header tells where to find the program headers. 217 These are what we need to find the boundaries of the module. 218 Without them, we don't have a module to report. */ 219 220 if (phnum == 0) 221 return finish (); 222 223 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR; 224 xlatefrom.d_size = phnum * phentsize; 225 226 void *ph_buffer = NULL; 227 size_t ph_buffer_size = 0; 228 if (read_portion (&ph_buffer, &ph_buffer_size, 229 start + phoff, xlatefrom.d_size)) 230 return finish (); 231 232 xlatefrom.d_buf = ph_buffer; 233 234 union 235 { 236 Elf32_Phdr p32[phnum]; 237 Elf64_Phdr p64[phnum]; 238 } phdrs; 239 240 xlateto.d_buf = &phdrs; 241 xlateto.d_size = sizeof phdrs; 242 243 /* Track the bounds of the file visible in memory. */ 244 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */ 245 GElf_Off file_end = 0; /* Rounded up to effective page size. */ 246 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */ 247 GElf_Off total_filesz = 0; /* Total size of data to read. */ 248 249 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */ 250 GElf_Addr bias = 0; 251 bool found_bias = false; 252 253 /* Collect the unbiased bounds of the module here. */ 254 GElf_Addr module_start = -1l; 255 GElf_Addr module_end = 0; 256 GElf_Addr module_address_sync = 0; 257 258 /* If we see PT_DYNAMIC, record it here. */ 259 GElf_Addr dyn_vaddr = 0; 260 GElf_Xword dyn_filesz = 0; 261 262 /* Collect the build ID bits here. */ 263 void *build_id = NULL; 264 size_t build_id_len = 0; 265 GElf_Addr build_id_vaddr = 0; 266 267 /* Consider a PT_NOTE we've found in the image. */ 268 inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz) 269 { 270 /* If we have already seen a build ID, we don't care any more. */ 271 if (build_id != NULL || filesz == 0) 272 return; 273 274 void *data; 275 size_t data_size; 276 if (read_portion (&data, &data_size, vaddr, filesz)) 277 return; 278 279 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr)); 280 281 void *notes; 282 if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA) 283 notes = data; 284 else 285 { 286 notes = malloc (filesz); 287 if (unlikely (notes == NULL)) 288 return; 289 xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR; 290 xlatefrom.d_buf = (void *) data; 291 xlatefrom.d_size = filesz; 292 xlateto.d_buf = notes; 293 xlateto.d_size = filesz; 294 if (elf32_xlatetom (&xlateto, &xlatefrom, 295 ehdr.e32.e_ident[EI_DATA]) == NULL) 296 goto done; 297 } 298 299 const GElf_Nhdr *nh = notes; 300 while ((const void *) nh < (const void *) notes + filesz) 301 { 302 const void *note_name = nh + 1; 303 const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz); 304 if (unlikely ((size_t) ((const void *) notes + filesz 305 - note_desc) < nh->n_descsz)) 306 break; 307 308 if (nh->n_type == NT_GNU_BUILD_ID 309 && nh->n_descsz > 0 310 && nh->n_namesz == sizeof "GNU" 311 && !memcmp (note_name, "GNU", sizeof "GNU")) 312 { 313 build_id_vaddr = note_desc - (const void *) notes + vaddr; 314 build_id_len = nh->n_descsz; 315 build_id = malloc (nh->n_descsz); 316 if (likely (build_id != NULL)) 317 memcpy (build_id, note_desc, build_id_len); 318 break; 319 } 320 321 nh = note_desc + NOTE_ALIGN (nh->n_descsz); 322 } 323 324 done: 325 if (notes != data) 326 free (notes); 327 finish_portion (&data, &data_size); 328 } 329 330 /* Consider each of the program headers we've read from the image. */ 331 inline void consider_phdr (GElf_Word type, 332 GElf_Addr vaddr, GElf_Xword memsz, 333 GElf_Off offset, GElf_Xword filesz, 334 GElf_Xword align) 335 { 336 switch (type) 337 { 338 case PT_DYNAMIC: 339 dyn_vaddr = vaddr; 340 dyn_filesz = filesz; 341 break; 342 343 case PT_NOTE: 344 /* We calculate from the p_offset of the note segment, 345 because we don't yet know the bias for its p_vaddr. */ 346 consider_notes (start + offset, filesz); 347 break; 348 349 case PT_LOAD: 350 align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1; 351 352 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align; 353 GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end; 354 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset; 355 356 if (file_trimmed_end < offset + filesz) 357 { 358 file_trimmed_end = offset + filesz; 359 360 /* Trim the last segment so we don't bother with zeros 361 in the last page that are off the end of the file. 362 However, if the extra bit in that page includes the 363 section headers, keep them. */ 364 if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end) 365 { 366 filesz += shdrs_end - file_trimmed_end; 367 file_trimmed_end = shdrs_end; 368 } 369 } 370 371 total_filesz += filesz; 372 373 if (file_end < filesz_offset) 374 { 375 file_end = filesz_offset; 376 if (filesz_vaddr - start == filesz_offset) 377 contiguous = file_end; 378 } 379 380 if (!found_bias && (offset & -align) == 0 381 && likely (filesz_offset >= phoff + phnum * phentsize)) 382 { 383 bias = start - vaddr; 384 found_bias = true; 385 } 386 387 if ((vaddr & -align) < module_start) 388 { 389 module_start = vaddr & -align; 390 module_address_sync = vaddr + memsz; 391 } 392 393 if (module_end < vaddr_end) 394 module_end = vaddr_end; 395 break; 396 } 397 } 398 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 399 { 400 if (elf32_xlatetom (&xlateto, &xlatefrom, 401 ehdr.e32.e_ident[EI_DATA]) == NULL) 402 found_bias = false; /* Trigger error check. */ 403 else 404 for (uint_fast16_t i = 0; i < phnum; ++i) 405 consider_phdr (phdrs.p32[i].p_type, 406 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz, 407 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz, 408 phdrs.p32[i].p_align); 409 } 410 else 411 { 412 if (elf64_xlatetom (&xlateto, &xlatefrom, 413 ehdr.e32.e_ident[EI_DATA]) == NULL) 414 found_bias = false; /* Trigger error check. */ 415 else 416 for (uint_fast16_t i = 0; i < phnum; ++i) 417 consider_phdr (phdrs.p64[i].p_type, 418 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz, 419 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz, 420 phdrs.p64[i].p_align); 421 } 422 423 finish_portion (&ph_buffer, &ph_buffer_size); 424 425 /* We must have seen the segment covering offset 0, or else the ELF 426 header we read at START was not produced by these program headers. */ 427 if (unlikely (!found_bias)) 428 return finish (); 429 430 /* Now we know enough to report a module for sure: its bounds. */ 431 module_start += bias; 432 module_end += bias; 433 434 dyn_vaddr += bias; 435 436 /* Our return value now says to skip the segments contained 437 within the module. */ 438 ndx = addr_segndx (dwfl, segment, module_end, true); 439 440 /* Examine its .dynamic section to get more interesting details. 441 If it has DT_SONAME, we'll use that as the module name. 442 If it has a DT_DEBUG, then it's actually a PIE rather than a DSO. 443 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME, 444 and they also tell us the essential portion of the file 445 for fetching symbols. */ 446 GElf_Addr soname_stroff = 0; 447 GElf_Addr dynstr_vaddr = 0; 448 GElf_Xword dynstrsz = 0; 449 bool execlike = false; 450 inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val) 451 { 452 switch (tag) 453 { 454 default: 455 return false; 456 457 case DT_DEBUG: 458 execlike = true; 459 break; 460 461 case DT_SONAME: 462 soname_stroff = val; 463 break; 464 465 case DT_STRTAB: 466 dynstr_vaddr = val; 467 break; 468 469 case DT_STRSZ: 470 dynstrsz = val; 471 break; 472 } 473 474 return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0; 475 } 476 477 const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32 478 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn)); 479 void *dyn_data = NULL; 480 size_t dyn_data_size = 0; 481 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0 482 && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz)) 483 { 484 union 485 { 486 Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)]; 487 Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)]; 488 } dyn; 489 490 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN; 491 xlatefrom.d_buf = (void *) dyn_data; 492 xlatefrom.d_size = dyn_filesz; 493 xlateto.d_buf = &dyn; 494 xlateto.d_size = sizeof dyn; 495 496 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 497 { 498 if (elf32_xlatetom (&xlateto, &xlatefrom, 499 ehdr.e32.e_ident[EI_DATA]) != NULL) 500 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i) 501 if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val)) 502 break; 503 } 504 else 505 { 506 if (elf64_xlatetom (&xlateto, &xlatefrom, 507 ehdr.e32.e_ident[EI_DATA]) != NULL) 508 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i) 509 if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val)) 510 break; 511 } 512 } 513 finish_portion (&dyn_data, &dyn_data_size); 514 515 /* We'll use the name passed in or a stupid default if not DT_SONAME. */ 516 if (name == NULL) 517 name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : execlike ? "[pie]" : "[dso]"; 518 519 void *soname = NULL; 520 size_t soname_size = 0; 521 if (dynstrsz != 0 && dynstr_vaddr != 0) 522 { 523 /* We know the bounds of the .dynstr section. 524 525 The DYNSTR_VADDR pointer comes from the .dynamic section 526 (DT_STRTAB, detected above). Ordinarily the dynamic linker 527 will have adjusted this pointer in place so it's now an 528 absolute address. But sometimes .dynamic is read-only (in 529 vDSOs and odd architectures), and sometimes the adjustment 530 just hasn't happened yet in the memory image we looked at. 531 So treat DYNSTR_VADDR as an absolute address if it falls 532 within the module bounds, or try applying the phdr bias 533 when that adjusts it to fall within the module bounds. */ 534 535 if ((dynstr_vaddr < module_start || dynstr_vaddr >= module_end) 536 && dynstr_vaddr + bias >= module_start 537 && dynstr_vaddr + bias < module_end) 538 dynstr_vaddr += bias; 539 540 if (unlikely (dynstr_vaddr + dynstrsz > module_end)) 541 dynstrsz = 0; 542 543 /* Try to get the DT_SONAME string. */ 544 if (soname_stroff != 0 && soname_stroff + 1 < dynstrsz 545 && ! read_portion (&soname, &soname_size, 546 dynstr_vaddr + soname_stroff, 0)) 547 name = soname; 548 } 549 550 /* Now that we have chosen the module's name and bounds, report it. 551 If we found a build ID, report that too. */ 552 553 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name, 554 module_start, module_end); 555 if (likely (mod != NULL) && build_id != NULL 556 && unlikely (INTUSE(dwfl_module_report_build_id) (mod, 557 build_id, 558 build_id_len, 559 build_id_vaddr))) 560 { 561 mod->gc = true; 562 mod = NULL; 563 } 564 565 /* At this point we do not need BUILD_ID or NAME any more. 566 They have been copied. */ 567 free (build_id); 568 finish_portion (&soname, &soname_size); 569 570 if (unlikely (mod == NULL)) 571 { 572 ndx = -1; 573 return finish (); 574 } 575 576 /* We have reported the module. Now let the caller decide whether we 577 should read the whole thing in right now. */ 578 579 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz 580 : buffer_available >= contiguous ? 0 581 : contiguous - buffer_available); 582 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0 583 : dynstr_vaddr + dynstrsz - start); 584 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end); 585 586 Elf *elf = NULL; 587 if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available, 588 cost, worthwhile, whole, contiguous, 589 read_eagerly_arg, &elf) 590 && elf == NULL) 591 { 592 /* The caller wants to read the whole file in right now, but hasn't 593 done it for us. Fill in a local image of the virtual file. */ 594 595 void *contents = calloc (1, file_trimmed_end); 596 if (unlikely (contents == NULL)) 597 return finish (); 598 599 inline void final_read (size_t offset, GElf_Addr vaddr, size_t size) 600 { 601 void *into = contents + offset; 602 size_t read_size = size; 603 (void) segment_read (addr_segndx (dwfl, segment, vaddr, false), 604 &into, &read_size, vaddr, size); 605 } 606 607 if (contiguous < file_trimmed_end) 608 { 609 /* We can't use the memory image verbatim as the file image. 610 So we'll be reading into a local image of the virtual file. */ 611 612 inline void read_phdr (GElf_Word type, GElf_Addr vaddr, 613 GElf_Off offset, GElf_Xword filesz) 614 { 615 if (type == PT_LOAD) 616 final_read (offset, vaddr + bias, filesz); 617 } 618 619 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 620 for (uint_fast16_t i = 0; i < phnum; ++i) 621 read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr, 622 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz); 623 else 624 for (uint_fast16_t i = 0; i < phnum; ++i) 625 read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr, 626 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz); 627 } 628 else 629 { 630 /* The whole file sits contiguous in memory, 631 but the caller didn't want to just do it. */ 632 633 const size_t have = MIN (buffer_available, file_trimmed_end); 634 memcpy (contents, buffer, have); 635 636 if (have < file_trimmed_end) 637 final_read (have, start + have, file_trimmed_end - have); 638 } 639 640 elf = elf_memory (contents, file_trimmed_end); 641 if (unlikely (elf == NULL)) 642 free (contents); 643 else 644 elf->flags |= ELF_F_MALLOCED; 645 } 646 647 if (elf != NULL) 648 { 649 /* Install the file in the module. */ 650 mod->main.elf = elf; 651 mod->main.vaddr = module_start - bias; 652 mod->main.address_sync = module_address_sync; 653 mod->main_bias = bias; 654 } 655 656 return finish (); 657} 658