plt.c revision cb9a28da448439eab4bf554810fd1004fbc00885
1#include <gelf.h> 2#include <sys/ptrace.h> 3#include <errno.h> 4#include <error.h> 5#include <inttypes.h> 6#include <assert.h> 7#include <string.h> 8 9#include "proc.h" 10#include "common.h" 11#include "library.h" 12#include "breakpoint.h" 13#include "linux-gnu/trace.h" 14 15/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and 16 * new-style "secure" PLT. We can tell one from the other by the 17 * flags on the .plt section. If it's +X (executable), it's BSS PLT, 18 * otherwise it's secure. 19 * 20 * BSS PLT works the same way as most architectures: the .plt section 21 * contains trampolines and we put breakpoints to those. With secure 22 * PLT, the .plt section doesn't contain instructions but addresses. 23 * The real PLT table is stored in .text. Addresses of those PLT 24 * entries can be computed, and it fact that's what the glink deal 25 * below does. 26 * 27 * If not prelinked, BSS PLT entries in the .plt section contain 28 * zeroes that are overwritten by the dynamic linker during start-up. 29 * For that reason, ltrace realizes those breakpoints only after 30 * .start is hit. 31 * 32 * 64-bit PPC is more involved. Program linker creates for each 33 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee> 34 * (where xxxxxxxx is a hexadecimal number). That stub does the call 35 * dispatch: it loads an address of a function to call from the 36 * section .plt, and branches. PLT entries themselves are essentially 37 * a curried call to the resolver. When the symbol is resolved, the 38 * resolver updates the value stored in .plt, and the next time 39 * around, the stub calls the library function directly. So we make 40 * at most one trip (none if the binary is prelinked) through each PLT 41 * entry, and correspondingly that is useless as a breakpoint site. 42 * 43 * Note the three confusing terms: stubs (that play the role of PLT 44 * entries), PLT entries, .plt section. 45 * 46 * We first check symbol tables and see if we happen to have stub 47 * symbols available. If yes we just put breakpoints to those, and 48 * treat them as usual breakpoints. The only tricky part is realizing 49 * that there can be more than one breakpoint per symbol. 50 * 51 * The case that we don't have the stub symbols available is harder. 52 * The following scheme uses two kinds of PLT breakpoints: unresolved 53 * and resolved (to some address). When the process starts (or when 54 * we attach), we distribute unresolved PLT breakpoints to the PLT 55 * entries (not stubs). Then we look in .plt, and for each entry 56 * whose value is different than the corresponding PLT entry address, 57 * we assume it was already resolved, and convert the breakpoint to 58 * resolved. We also rewrite the resolved value in .plt back to the 59 * PLT address. 60 * 61 * When a PLT entry hits a resolved breakpoint (which happens because 62 * we put back the unresolved addresses to .plt), we move the 63 * instruction pointer to the corresponding address and continue the 64 * process as if nothing happened. 65 * 66 * When unresolved PLT entry is called for the first time, we need to 67 * catch the new value that the resolver will write to a .plt slot. 68 * We also need to prevent another thread from racing through and 69 * taking the branch without ltrace noticing. So when unresolved PLT 70 * entry hits, we have to stop all threads. We then single-step 71 * through the resolver, until the .plt slot changes. When it does, 72 * we treat it the same way as above: convert the PLT breakpoint to 73 * resolved, and rewrite the .plt value back to PLT address. We then 74 * start all threads again. 75 * 76 * In theory we might find the exact instruction that will update the 77 * .plt slot, and emulate it, updating the PLT breakpoint immediately, 78 * and then just skip it. But that's even messier than the thread 79 * stopping business and single stepping that needs to be done. 80 * 81 * Short of doing this we really have to stop everyone. There is no 82 * way around that. Unless we know where the stubs are, we don't have 83 * a way to catch a thread that would use the window of opportunity 84 * between updating .plt and notifying ltrace about the singlestep. 85 */ 86 87#define PPC_PLT_STUB_SIZE 16 88#define PPC64_PLT_STUB_SIZE 8 //xxx 89 90static inline int 91host_powerpc64() 92{ 93#ifdef __powerpc64__ 94 return 1; 95#else 96 return 0; 97#endif 98} 99 100GElf_Addr 101arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela) 102{ 103 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 104 assert(lte->arch.plt_stub_vma != 0); 105 return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx; 106 107 } else if (lte->ehdr.e_machine == EM_PPC) { 108 return rela->r_offset; 109 110 } else { 111 /* If we get here, we don't have stub symbols. In 112 * that case we put brakpoints to PLT entries the same 113 * as the PPC32 secure PLT case does. */ 114 assert(lte->arch.plt_stub_vma != 0); 115 return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx; 116 } 117} 118 119int 120arch_translate_address(struct Process *proc, 121 target_address_t addr, target_address_t *ret) 122{ 123 if (proc->e_machine == EM_PPC64) { 124 assert(host_powerpc64()); 125 long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0); 126 fprintf(stderr, "arch_translate_address %p->%#lx\n", 127 addr, l); 128 if (l == -1 && errno) { 129 error(0, errno, ".opd translation of %p", addr); 130 return -1; 131 } 132 *ret = (target_address_t)l; 133 return 0; 134 } 135 136 *ret = addr; 137 return 0; 138} 139 140/* XXX Apparently PPC64 doesn't support PLT breakpoints. */ 141void * 142sym2addr(Process *proc, struct library_symbol *sym) { 143 void *addr = sym->enter_addr; 144 long pt_ret; 145 146 debug(3, 0); 147 148 if (sym->plt_type != LS_TOPLT_POINT) { 149 return addr; 150 } 151 152 if (proc->pid == 0) { 153 return 0; 154 } 155 156 if (options.debug >= 3) { 157 xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0), 158 sizeof(void*)*8); 159 } 160 161 // On a PowerPC-64 system, a plt is three 64-bit words: the first is the 162 // 64-bit address of the routine. Before the PLT has been initialized, 163 // this will be 0x0. In fact, the symbol table won't have the plt's 164 // address even. Ater the PLT has been initialized, but before it has 165 // been resolved, the first word will be the address of the function in 166 // the dynamic linker that will reslove the PLT. After the PLT is 167 // resolved, this will will be the address of the routine whose symbol 168 // is in the symbol table. 169 170 // On a PowerPC-32 system, there are two types of PLTs: secure (new) and 171 // non-secure (old). For the secure case, the PLT is simply a pointer 172 // and we can treat it much as we do for the PowerPC-64 case. For the 173 // non-secure case, the PLT is executable code and we can put the 174 // break-point right in the PLT. 175 176 pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0); 177 178#if SIZEOF_LONG == 8 179 if (proc->mask_32bit) { 180 // Assume big-endian. 181 addr = (void *)((pt_ret >> 32) & 0xffffffff); 182 } else { 183 addr = (void *)pt_ret; 184 } 185#else 186 /* XXX Um, so where exactly are we dealing with the non-secure 187 PLT thing? */ 188 addr = (void *)pt_ret; 189#endif 190 191 return addr; 192} 193 194static GElf_Addr 195get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data) 196{ 197 Elf_Scn *ppcgot_sec = NULL; 198 GElf_Shdr ppcgot_shdr; 199 if (ppcgot != 0 200 && elf_get_section_covering(lte, ppcgot, 201 &ppcgot_sec, &ppcgot_shdr) < 0) 202 // xxx should be the log out 203 fprintf(stderr, 204 "DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n", 205 ppcgot); 206 207 if (ppcgot_sec != NULL) { 208 Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr); 209 if (data == NULL || data->d_size < 8 ) { 210 fprintf(stderr, "Couldn't read GOT data.\n"); 211 } else { 212 // where PPCGOT begins in .got 213 size_t offset = ppcgot - ppcgot_shdr.sh_addr; 214 assert(offset % 4 == 0); 215 uint32_t glink_vma; 216 if (elf_read_u32(data, offset + 4, &glink_vma) < 0) { 217 fprintf(stderr, 218 "Couldn't read glink VMA address" 219 " at %zd@GOT\n", offset); 220 return 0; 221 } 222 if (glink_vma != 0) { 223 debug(1, "PPC GOT glink_vma address: %#" PRIx32, 224 glink_vma); 225 fprintf(stderr, "PPC GOT glink_vma " 226 "address: %#"PRIx32"\n", glink_vma); 227 return (GElf_Addr)glink_vma; 228 } 229 } 230 } 231 232 if (plt_data != NULL) { 233 uint32_t glink_vma; 234 if (elf_read_u32(plt_data, 0, &glink_vma) < 0) { 235 fprintf(stderr, 236 "Couldn't read glink VMA address at 0@.plt\n"); 237 return 0; 238 } 239 debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma); 240 fprintf(stderr, ".plt glink_vma address: " 241 "%#"PRIx32"\n", glink_vma); 242 return (GElf_Addr)glink_vma; 243 } 244 245 return 0; 246} 247 248static int 249load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep) 250{ 251 Elf_Scn *scn; 252 GElf_Shdr shdr; 253 if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0 254 || scn == NULL) { 255 fail: 256 error(0, 0, "Couldn't get SHT_DYNAMIC: %s", 257 elf_errmsg(-1)); 258 return -1; 259 } 260 261 Elf_Data *data = elf_loaddata(scn, &shdr); 262 if (data == NULL) 263 goto fail; 264 265 size_t j; 266 for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) { 267 GElf_Dyn dyn; 268 if (gelf_getdyn(data, j, &dyn) == NULL) 269 goto fail; 270 271 if(dyn.d_tag == tag) { 272 *valuep = dyn.d_un.d_ptr; 273 return 0; 274 } 275 } 276 277 return -1; 278} 279 280static int 281load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp) 282{ 283 return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp); 284} 285 286static int 287load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp) 288{ 289 return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp); 290} 291 292int 293arch_elf_init(struct ltelf *lte) 294{ 295 lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE); 296 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 297 GElf_Addr ppcgot; 298 if (load_ppcgot(lte, &ppcgot) < 0) { 299 fprintf(stderr, "Couldn't find DT_PPC_GOT.\n"); 300 return -1; 301 } 302 GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data); 303 304 assert (lte->relplt_size % 12 == 0); 305 size_t count = lte->relplt_size / 12; // size of RELA entry 306 lte->arch.plt_stub_vma = glink_vma 307 - (GElf_Addr)count * PPC_PLT_STUB_SIZE; 308 debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma); 309 310 } else if (lte->ehdr.e_machine == EM_PPC64) { 311 GElf_Addr glink_vma; 312 if (load_ppc64_glink(lte, &glink_vma) < 0) { 313 fprintf(stderr, "Couldn't find DT_PPC64_GLINK.\n"); 314 return -1; 315 } 316 317 /* The first glink stub starts at offset 32. */ 318 lte->arch.plt_stub_vma = glink_vma + 32; 319 } 320 321 /* Override the value that we gleaned from flags on the .plt 322 * section. The PLT entries are in fact executable, they are 323 * just not in .plt. */ 324 lte->lte_flags |= LTE_PLT_EXECUTABLE; 325 326 /* On PPC64, look for stub symbols in symbol table. These are 327 * called: xxxxxxxx.plt_call.callee_name@version+addend. */ 328 if (lte->ehdr.e_machine == EM_PPC64 329 && lte->symtab != NULL && lte->strtab != NULL) { 330 331 /* N.B. We can't simply skip the symbols that we fail 332 * to read or malloc. There may be more than one stub 333 * per symbol name, and if we failed in one but 334 * succeeded in another, the PLT enabling code would 335 * have no way to tell that something is missing. We 336 * could work around that, of course, but it doesn't 337 * seem worth the trouble. So if anything fails, we 338 * just pretend that we don't have stub symbols at 339 * all, as if the binary is stripped. */ 340 341 size_t i; 342 for (i = 0; i < lte->symtab_count; ++i) { 343 GElf_Sym sym; 344 if (gelf_getsym(lte->symtab, i, &sym) == NULL) { 345 struct library_symbol *sym, *next; 346 fail: 347 for (sym = lte->arch.stubs; sym != NULL; ) { 348 next = sym->next; 349 library_symbol_destroy(sym); 350 free(sym); 351 sym = next; 352 } 353 lte->arch.stubs = NULL; 354 break; 355 } 356 357 const char *name = lte->strtab + sym.st_name; 358 359#define STUBN ".plt_call." 360 if ((name = strstr(name, STUBN)) == NULL) 361 continue; 362 name += sizeof(STUBN) - 1; 363#undef STUBN 364 365 size_t len; 366 const char *ver = strchr(name, '@'); 367 if (ver != NULL) { 368 len = ver - name; 369 370 } else { 371 /* If there is "+" at all, check that 372 * the symbol name ends in "+0". */ 373 const char *add = strrchr(name, '+'); 374 if (add != NULL) { 375 assert(strcmp(add, "+0") == 0); 376 len = add - name; 377 } else { 378 len = strlen(name); 379 } 380 } 381 382 char *sym_name = strndup(name, len); 383 struct library_symbol *libsym = malloc(sizeof(*libsym)); 384 if (sym_name == NULL || libsym == NULL) { 385 free(sym_name); 386 free(libsym); 387 goto fail; 388 } 389 390 target_address_t addr 391 = (target_address_t)sym.st_value + lte->bias; 392 library_symbol_init(libsym, addr, sym_name, 1, 393 LS_TOPLT_EXEC); 394 libsym->arch.type = PPC64PLT_STUB; 395 libsym->next = lte->arch.stubs; 396 lte->arch.stubs = libsym; 397 } 398 } 399 400 return 0; 401} 402 403static int 404read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp) 405{ 406 /* on PPC32 we need to do things differently, but PPC64/PPC32 407 * is currently not supported anyway. */ 408 assert(host_powerpc64()); 409 410 long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0); 411 if (l == -1 && errno != 0) { 412 error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr); 413 return -1; 414 } 415 416 *valp = (GElf_Addr)l; 417 return 0; 418} 419 420static int 421unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value) 422{ 423 /* We only modify plt_entry[0], which holds the resolved 424 * address of the routine. We keep the TOC and environment 425 * pointers intact. Hence the only adjustment that we need to 426 * do is to IP. */ 427 if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) { 428 error(0, errno, "unresolve .plt slot"); 429 return -1; 430 } 431 return 0; 432} 433 434enum plt_status 435arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte, 436 const char *a_name, GElf_Rela *rela, size_t ndx, 437 struct library_symbol **ret) 438{ 439 if (lte->ehdr.e_machine == EM_PPC) 440 return plt_default; 441 442 /* PPC64. If we have stubs, we return a chain of breakpoint 443 * sites, one for each stub that corresponds to this PLT 444 * entry. */ 445 struct library_symbol *chain = NULL; 446 struct library_symbol **symp; 447 for (symp = <e->arch.stubs; *symp != NULL; ) { 448 struct library_symbol *sym = *symp; 449 if (strcmp(sym->name, a_name) != 0) { 450 symp = &(*symp)->next; 451 continue; 452 } 453 454 /* Re-chain the symbol from stubs to CHAIN. */ 455 *symp = sym->next; 456 sym->next = chain; 457 chain = sym; 458 } 459 460 if (chain != NULL) { 461 struct library_symbol *sym; 462 for (sym = chain; sym != NULL; sym = sym->next) 463 fprintf(stderr, "match %s --> %p\n", 464 sym->name, sym->enter_addr); 465 for (sym = lte->arch.stubs; sym != NULL; sym = sym->next) 466 fprintf(stderr, "remains %s --> %p\n", 467 sym->name, sym->enter_addr); 468 469 *ret = chain; 470 return plt_ok; 471 } 472 473 /* We don't have stub symbols. Find corresponding .plt slot, 474 * and check whether it contains the corresponding PLT address 475 * (or 0 if the dynamic linker hasn't run yet). N.B. we don't 476 * want read this from ELF file, but from process image. That 477 * makes a difference if we are attaching to a running 478 * process. */ 479 480 GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela); 481 GElf_Addr plt_slot_addr = rela->r_offset; 482 assert(plt_slot_addr >= lte->plt_addr 483 || plt_slot_addr < lte->plt_addr + lte->plt_size); 484 485 GElf_Addr plt_slot_value; 486 if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0) 487 return plt_fail; 488 489 char *name = strdup(a_name); 490 struct library_symbol *libsym = malloc(sizeof(*libsym)); 491 if (name == NULL || libsym == NULL) { 492 error(0, errno, "allocation for .plt slot"); 493 fail: 494 free(name); 495 free(libsym); 496 return plt_fail; 497 } 498 499 library_symbol_init(libsym, (target_address_t)plt_entry_addr, 500 name, 1, LS_TOPLT_EXEC); 501 libsym->arch.plt_slot_addr = plt_slot_addr; 502 503 if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) { 504 libsym->arch.type = PPC64PLT_UNRESOLVED; 505 libsym->arch.resolved_value = plt_entry_addr; 506 507 } else { 508 /* Unresolve the .plt slot. If the binary was 509 * prelinked, this makes the code invalid, because in 510 * case of prelinked binary, the dynamic linker 511 * doesn't update .plt[0] and .plt[1] with addresses 512 * of the resover. But we don't care, we will never 513 * need to enter the resolver. That just means that 514 * we have to un-un-resolve this back before we 515 * detach, which is nothing new: we already need to 516 * retract breakpoints. */ 517 518 if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) 519 goto fail; 520 libsym->arch.type = PPC64PLT_RESOLVED; 521 libsym->arch.resolved_value = plt_slot_value; 522 } 523 524 *ret = libsym; 525 return plt_ok; 526} 527 528void 529arch_elf_destroy(struct ltelf *lte) 530{ 531 struct library_symbol *sym; 532 for (sym = lte->arch.stubs; sym != NULL; ) { 533 struct library_symbol *next = sym->next; 534 library_symbol_destroy(sym); 535 free(sym); 536 sym = next; 537 } 538} 539 540static enum callback_status 541keep_stepping_p(struct process_stopping_handler *self) 542{ 543 struct Process *proc = self->task_enabling_breakpoint; 544 struct library_symbol *libsym = self->breakpoint_being_enabled->libsym; 545 GElf_Addr value; 546 if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0) 547 return CBS_FAIL; 548 549 /* In UNRESOLVED state, the RESOLVED_VALUE in fact contains 550 * the PLT entry value. */ 551 if (value == libsym->arch.resolved_value) 552 return CBS_CONT; 553 554 /* The .plt slot got resolved! We can migrate the breakpoint 555 * to RESOLVED and stop single-stepping. */ 556 if (unresolve_plt_slot(proc, libsym->arch.plt_slot_addr, 557 libsym->arch.resolved_value) < 0) 558 return CBS_FAIL; 559 libsym->arch.type = PPC64PLT_RESOLVED; 560 libsym->arch.resolved_value = value; 561 562 return CBS_STOP; 563} 564 565static enum callback_status 566yes(struct process_stopping_handler *self) 567{ 568 return CBS_CONT; 569} 570 571static void 572ppc64_plt_bp_continue(struct breakpoint *bp, struct Process *proc) 573{ 574 fprintf(stderr, "ppc64_plt_bp_continue\n"); 575 576 switch (bp->libsym->arch.type) { 577 target_address_t rv; 578 579 case PPC64PLT_STUB: 580 /* We should never get here. */ 581 abort(); 582 583 case PPC64PLT_UNRESOLVED: 584 if (process_install_stopping_handler(proc, bp, NULL, 585 &keep_stepping_p, 586 &yes) < 0) { 587 perror("ppc64_unresolved_bp_continue: couldn't install" 588 " event handler"); 589 continue_after_breakpoint(proc, bp); 590 } 591 return; 592 593 case PPC64PLT_RESOLVED: 594 fprintf(stderr, "ppc64_resolved_bp_continue\n"); 595 rv = (target_address_t)bp->libsym->arch.resolved_value; 596 set_instruction_pointer(proc, rv); 597 continue_process(proc->pid); 598 } 599} 600 601/* For some symbol types, we need to set up custom callbacks. */ 602int 603arch_breakpoint_init(struct Process *proc, struct breakpoint *bp) 604{ 605 if (proc->e_machine == EM_PPC 606 || bp->libsym == NULL 607 || bp->libsym->arch.type == PPC64PLT_STUB) 608 return 0; 609 610 static struct bp_callbacks cbs = { 611 .on_continue = ppc64_plt_bp_continue, 612 }; 613 breakpoint_set_callbacks(bp, &cbs); 614 return 0; 615} 616 617void 618arch_breakpoint_destroy(struct breakpoint *bp) 619{ 620} 621