plt.c revision 73b85aadbf377541ac336914e5ff8ec521226a97
1/* 2 * This file is part of ltrace. 3 * Copyright (C) 2012,2013 Petr Machata, Red Hat Inc. 4 * Copyright (C) 2004,2008,2009 Juan Cespedes 5 * Copyright (C) 2006 Paul Gilliam 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 20 * 02110-1301 USA 21 */ 22 23#include <gelf.h> 24#include <sys/ptrace.h> 25#include <errno.h> 26#include <inttypes.h> 27#include <assert.h> 28#include <stdbool.h> 29#include <string.h> 30 31#include "proc.h" 32#include "common.h" 33#include "insn.h" 34#include "library.h" 35#include "breakpoint.h" 36#include "linux-gnu/trace.h" 37#include "backend.h" 38 39/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and 40 * new-style "secure" PLT. We can tell one from the other by the 41 * flags on the .plt section. If it's +X (executable), it's BSS PLT, 42 * otherwise it's secure. 43 * 44 * BSS PLT works the same way as most architectures: the .plt section 45 * contains trampolines and we put breakpoints to those. If not 46 * prelinked, .plt contains zeroes, and dynamic linker fills in the 47 * initial set of trampolines, which means that we need to delay 48 * enabling breakpoints until after binary entry point is hit. 49 * Additionally, after first call, dynamic linker updates .plt with 50 * branch to resolved address. That means that on first hit, we must 51 * do something similar to the PPC64 gambit described below. 52 * 53 * With secure PLT, the .plt section doesn't contain instructions but 54 * addresses. The real PLT table is stored in .text. Addresses of 55 * those PLT entries can be computed, and apart from the fact that 56 * they are in .text, they are ordinary PLT entries. 57 * 58 * 64-bit PPC is more involved. Program linker creates for each 59 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee> 60 * (where xxxxxxxx is a hexadecimal number). That stub does the call 61 * dispatch: it loads an address of a function to call from the 62 * section .plt, and branches. PLT entries themselves are essentially 63 * a curried call to the resolver. When the symbol is resolved, the 64 * resolver updates the value stored in .plt, and the next time 65 * around, the stub calls the library function directly. So we make 66 * at most one trip (none if the binary is prelinked) through each PLT 67 * entry, and correspondingly that is useless as a breakpoint site. 68 * 69 * Note the three confusing terms: stubs (that play the role of PLT 70 * entries), PLT entries, .plt section. 71 * 72 * We first check symbol tables and see if we happen to have stub 73 * symbols available. If yes we just put breakpoints to those, and 74 * treat them as usual breakpoints. The only tricky part is realizing 75 * that there can be more than one breakpoint per symbol. 76 * 77 * The case that we don't have the stub symbols available is harder. 78 * The following scheme uses two kinds of PLT breakpoints: unresolved 79 * and resolved (to some address). When the process starts (or when 80 * we attach), we distribute unresolved PLT breakpoints to the PLT 81 * entries (not stubs). Then we look in .plt, and for each entry 82 * whose value is different than the corresponding PLT entry address, 83 * we assume it was already resolved, and convert the breakpoint to 84 * resolved. We also rewrite the resolved value in .plt back to the 85 * PLT address. 86 * 87 * When a PLT entry hits a resolved breakpoint (which happens because 88 * we rewrite .plt with the original unresolved addresses), we move 89 * the instruction pointer to the corresponding address and continue 90 * the process as if nothing happened. 91 * 92 * When unresolved PLT entry is called for the first time, we need to 93 * catch the new value that the resolver will write to a .plt slot. 94 * We also need to prevent another thread from racing through and 95 * taking the branch without ltrace noticing. So when unresolved PLT 96 * entry hits, we have to stop all threads. We then single-step 97 * through the resolver, until the .plt slot changes. When it does, 98 * we treat it the same way as above: convert the PLT breakpoint to 99 * resolved, and rewrite the .plt value back to PLT address. We then 100 * start all threads again. 101 * 102 * As an optimization, we remember the address where the address was 103 * resolved, and put a breakpoint there. The next time around (when 104 * the next PLT entry is to be resolved), instead of single-stepping 105 * through half the dynamic linker, we just let the thread run and hit 106 * this breakpoint. When it hits, we know the PLT entry was resolved. 107 * 108 * Another twist comes from tracing slots corresponding to 109 * R_PPC64_JMP_IREL relocations. These have no dedicated PLT entry. 110 * The calls are done directly from stubs, and the .plt entry 111 * (actually .iplt entry, these live in a special section) is resolved 112 * in advance before the binary starts. Because there's no PLT entry, 113 * we put the PLT breakpoints directly to the IFUNC resolver code, and 114 * then would like them to behave like ordinary PLT slots, including 115 * catching the point where these get resolved to unresolve them. So 116 * for the first call (which is the actual resolver call), we pretend 117 * that this breakpoint is artificial and has no associated symbol, 118 * and turn it on fully only after the first hit. Ideally we would 119 * trace that first call as well, but then the stepper, which tries to 120 * catch the point where the slot is resolved, would hit the return 121 * breakpoint and that's not currently handled well. 122 * 123 * XXX TODO If we have hardware watch point, we might put a read watch 124 * on .plt slot, and discover the offenders this way. I don't know 125 * the details, but I assume at most a handful (like, one or two, if 126 * available at all) addresses may be watched at a time, and thus this 127 * would be used as an amendment of the above rather than full-on 128 * solution to PLT tracing on PPC. 129 */ 130 131#define PPC_PLT_STUB_SIZE 16 132#define PPC64_PLT_STUB_SIZE 8 //xxx 133 134static inline int 135host_powerpc64() 136{ 137#ifdef __powerpc64__ 138 return 1; 139#else 140 return 0; 141#endif 142} 143 144static void 145mark_as_resolved(struct library_symbol *libsym, GElf_Addr value) 146{ 147 libsym->arch.type = PPC_PLT_RESOLVED; 148 libsym->arch.resolved_value = value; 149} 150 151static void 152ppc32_delayed_symbol(struct library_symbol *libsym) 153{ 154 /* arch_dynlink_done is called on attach as well. In that 155 * case some slots will have been resolved already. 156 * Unresolved PLT looks like this: 157 * 158 * <sleep@plt>: li r11,0 159 * <sleep@plt+4>: b "resolve" 160 * 161 * "resolve" is another address in PLTGOT (the same block that 162 * all the PLT slots are it). When resolved, it looks either 163 * this way: 164 * 165 * <sleep@plt>: b 0xfea88d0 <sleep> 166 * 167 * Which is easy to detect. It can also look this way: 168 * 169 * <sleep@plt>: li r11,0 170 * <sleep@plt+4>: b "dispatch" 171 * 172 * The "dispatch" address lies in PLTGOT as well. In current 173 * GNU toolchain, "dispatch" address is the same as PLTGOT 174 * address. We rely on this to figure out whether the address 175 * is resolved or not. */ 176 177 uint32_t insn1 = libsym->arch.resolved_value >> 32; 178 uint32_t insn2 = (uint32_t) libsym->arch.resolved_value; 179 if ((insn1 & BRANCH_MASK) == B_INSN 180 || ((insn2 & BRANCH_MASK) == B_INSN 181 /* XXX double cast */ 182 && (ppc_branch_dest(libsym->enter_addr + 4, insn2) 183 == (arch_addr_t) (long) libsym->lib->arch.pltgot_addr))) 184 { 185 mark_as_resolved(libsym, libsym->arch.resolved_value); 186 } 187} 188 189void 190arch_dynlink_done(struct process *proc) 191{ 192 /* We may need to activate delayed symbols. */ 193 struct library_symbol *libsym = NULL; 194 while ((libsym = proc_each_symbol(proc, libsym, 195 library_symbol_delayed_cb, NULL))) { 196 if (proc_read_64(proc, libsym->enter_addr, 197 &libsym->arch.resolved_value) < 0) { 198 fprintf(stderr, 199 "couldn't read PLT value for %s(%p): %s\n", 200 libsym->name, libsym->enter_addr, 201 strerror(errno)); 202 return; 203 } 204 205 if (proc->e_machine == EM_PPC) 206 ppc32_delayed_symbol(libsym); 207 208 fprintf(stderr, "activating %s\n", libsym->name); 209 if (proc_activate_delayed_symbol(proc, libsym) < 0) 210 return; 211 212 if (proc->e_machine == EM_PPC) 213 /* XXX double cast */ 214 libsym->arch.plt_slot_addr 215 = (GElf_Addr) (uintptr_t) libsym->enter_addr; 216 } 217} 218 219static bool 220reloc_is_irelative(int machine, GElf_Rela *rela) 221{ 222 bool irelative = false; 223 if (machine == EM_PPC64) { 224#ifdef R_PPC64_JMP_IREL 225 irelative = GELF_R_TYPE(rela->r_info) == R_PPC64_JMP_IREL; 226#endif 227 } else { 228 assert(machine == EM_PPC); 229#ifdef R_PPC_IRELATIVE 230 irelative = GELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE; 231#endif 232 } 233 return irelative; 234} 235 236GElf_Addr 237arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela) 238{ 239 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 240 assert(lte->arch.plt_stub_vma != 0); 241 return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx; 242 243 } else if (lte->ehdr.e_machine == EM_PPC) { 244 return rela->r_offset; 245 246 /* Beyond this point, we are on PPC64, but don't have stub 247 * symbols. */ 248 249 } else if (reloc_is_irelative(lte->ehdr.e_machine, rela)) { 250 251 /* Put JMP_IREL breakpoint to resolver, since there's 252 * no dedicated PLT entry. */ 253 254 assert(rela->r_addend != 0); 255 /* XXX double cast */ 256 arch_addr_t res_addr = (arch_addr_t) (uintptr_t) rela->r_addend; 257 if (arch_translate_address(lte, res_addr, &res_addr) < 0) { 258 fprintf(stderr, "Couldn't OPD-translate IRELATIVE " 259 "resolver address.\n"); 260 return 0; 261 } 262 /* XXX double cast */ 263 return (GElf_Addr) (uintptr_t) res_addr; 264 265 } else { 266 /* We put brakpoints to PLT entries the same as the 267 * PPC32 secure PLT case does. */ 268 assert(lte->arch.plt_stub_vma != 0); 269 return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx; 270 } 271} 272 273/* This entry point is called when ltelf is not available 274 * anymore--during runtime. At that point we don't have to concern 275 * ourselves with bias, as the values in OPD have been resolved 276 * already. */ 277int 278arch_translate_address_dyn(struct process *proc, 279 arch_addr_t addr, arch_addr_t *ret) 280{ 281 if (proc->e_machine == EM_PPC64) { 282 uint64_t value; 283 if (proc_read_64(proc, addr, &value) < 0) { 284 fprintf(stderr, 285 "dynamic .opd translation of %p: %s\n", 286 addr, strerror(errno)); 287 return -1; 288 } 289 /* XXX The double cast should be removed when 290 * arch_addr_t becomes integral type. */ 291 *ret = (arch_addr_t)(uintptr_t)value; 292 return 0; 293 } 294 295 *ret = addr; 296 return 0; 297} 298 299int 300arch_translate_address(struct ltelf *lte, 301 arch_addr_t addr, arch_addr_t *ret) 302{ 303 if (lte->ehdr.e_machine == EM_PPC64) { 304 /* XXX The double cast should be removed when 305 * arch_addr_t becomes integral type. */ 306 GElf_Xword offset 307 = (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base; 308 uint64_t value; 309 if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) { 310 fprintf(stderr, "static .opd translation of %p: %s\n", 311 addr, elf_errmsg(-1)); 312 return -1; 313 } 314 *ret = (arch_addr_t)(uintptr_t)(value + lte->bias); 315 return 0; 316 } 317 318 *ret = addr; 319 return 0; 320} 321 322static int 323load_opd_data(struct ltelf *lte, struct library *lib) 324{ 325 Elf_Scn *sec; 326 GElf_Shdr shdr; 327 if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0 328 || sec == NULL) { 329 fail: 330 fprintf(stderr, "couldn't find .opd data\n"); 331 return -1; 332 } 333 334 lte->arch.opd_data = elf_rawdata(sec, NULL); 335 if (lte->arch.opd_data == NULL) 336 goto fail; 337 338 lte->arch.opd_base = shdr.sh_addr + lte->bias; 339 lte->arch.opd_size = shdr.sh_size; 340 341 return 0; 342} 343 344void * 345sym2addr(struct process *proc, struct library_symbol *sym) 346{ 347 return sym->enter_addr; 348} 349 350static GElf_Addr 351get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data) 352{ 353 Elf_Scn *ppcgot_sec = NULL; 354 GElf_Shdr ppcgot_shdr; 355 if (ppcgot != 0 356 && (elf_get_section_covering(lte, ppcgot, 357 &ppcgot_sec, &ppcgot_shdr) < 0 358 || ppcgot_sec == NULL)) 359 fprintf(stderr, 360 "DT_PPC_GOT=%#"PRIx64", but no such section found\n", 361 ppcgot); 362 363 if (ppcgot_sec != NULL) { 364 Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr); 365 if (data == NULL || data->d_size < 8 ) { 366 fprintf(stderr, "couldn't read GOT data\n"); 367 } else { 368 // where PPCGOT begins in .got 369 size_t offset = ppcgot - ppcgot_shdr.sh_addr; 370 assert(offset % 4 == 0); 371 uint32_t glink_vma; 372 if (elf_read_u32(data, offset + 4, &glink_vma) < 0) { 373 fprintf(stderr, "couldn't read glink VMA" 374 " address at %zd@GOT\n", offset); 375 return 0; 376 } 377 if (glink_vma != 0) { 378 debug(1, "PPC GOT glink_vma address: %#" PRIx32, 379 glink_vma); 380 return (GElf_Addr)glink_vma; 381 } 382 } 383 } 384 385 if (plt_data != NULL) { 386 uint32_t glink_vma; 387 if (elf_read_u32(plt_data, 0, &glink_vma) < 0) { 388 fprintf(stderr, "couldn't read glink VMA address\n"); 389 return 0; 390 } 391 debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma); 392 return (GElf_Addr)glink_vma; 393 } 394 395 return 0; 396} 397 398static int 399load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep) 400{ 401 Elf_Scn *scn; 402 GElf_Shdr shdr; 403 if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0 404 || scn == NULL) { 405 fail: 406 fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n", 407 elf_errmsg(-1)); 408 return -1; 409 } 410 411 Elf_Data *data = elf_loaddata(scn, &shdr); 412 if (data == NULL) 413 goto fail; 414 415 size_t j; 416 for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) { 417 GElf_Dyn dyn; 418 if (gelf_getdyn(data, j, &dyn) == NULL) 419 goto fail; 420 421 if(dyn.d_tag == tag) { 422 *valuep = dyn.d_un.d_ptr; 423 return 0; 424 } 425 } 426 427 return -1; 428} 429 430static int 431nonzero_data(Elf_Data *data) 432{ 433 /* We are not supposed to get here if there's no PLT. */ 434 assert(data != NULL); 435 436 unsigned char *buf = data->d_buf; 437 if (buf == NULL) 438 return 0; 439 440 size_t i; 441 for (i = 0; i < data->d_size; ++i) 442 if (buf[i] != 0) 443 return 1; 444 return 0; 445} 446 447static enum callback_status 448reloc_copy_if_irelative(GElf_Rela *rela, void *data) 449{ 450 struct ltelf *lte = data; 451 452 return CBS_STOP_IF(reloc_is_irelative(lte->ehdr.e_machine, rela) 453 && VECT_PUSHBACK(<e->plt_relocs, rela) < 0); 454} 455 456int 457arch_elf_init(struct ltelf *lte, struct library *lib) 458{ 459 if (lte->ehdr.e_machine == EM_PPC64 460 && load_opd_data(lte, lib) < 0) 461 return -1; 462 463 lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR); 464 465 /* For PPC32 BSS, it is important whether the binary was 466 * prelinked. If .plt section is NODATA, or if it contains 467 * zeroes, then this library is not prelinked, and we need to 468 * delay breakpoints. */ 469 if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt) 470 lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data); 471 else 472 /* For cases where it's irrelevant, initialize the 473 * value to something conspicuous. */ 474 lib->arch.bss_plt_prelinked = -1; 475 476 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 477 GElf_Addr ppcgot; 478 if (load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) { 479 fprintf(stderr, "couldn't find DT_PPC_GOT\n"); 480 return -1; 481 } 482 GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data); 483 484 size_t count = vect_size(<e->plt_relocs); 485 lte->arch.plt_stub_vma = glink_vma 486 - (GElf_Addr)count * PPC_PLT_STUB_SIZE; 487 debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma); 488 489 } else if (lte->ehdr.e_machine == EM_PPC64) { 490 GElf_Addr glink_vma; 491 if (load_dynamic_entry(lte, DT_PPC64_GLINK, &glink_vma) < 0) { 492 fprintf(stderr, "couldn't find DT_PPC64_GLINK\n"); 493 return -1; 494 } 495 496 /* The first glink stub starts at offset 32. */ 497 lte->arch.plt_stub_vma = glink_vma + 32; 498 499 } else { 500 /* By exhaustion--PPC32 BSS. */ 501 if (load_dynamic_entry(lte, DT_PLTGOT, 502 &lib->arch.pltgot_addr) < 0) { 503 fprintf(stderr, "couldn't find DT_PLTGOT\n"); 504 return -1; 505 } 506 } 507 508 /* On PPC64, look for stub symbols in symbol table. These are 509 * called: xxxxxxxx.plt_call.callee_name@version+addend. */ 510 if (lte->ehdr.e_machine == EM_PPC64 511 && lte->symtab != NULL && lte->strtab != NULL) { 512 513 /* N.B. We can't simply skip the symbols that we fail 514 * to read or malloc. There may be more than one stub 515 * per symbol name, and if we failed in one but 516 * succeeded in another, the PLT enabling code would 517 * have no way to tell that something is missing. We 518 * could work around that, of course, but it doesn't 519 * seem worth the trouble. So if anything fails, we 520 * just pretend that we don't have stub symbols at 521 * all, as if the binary is stripped. */ 522 523 size_t i; 524 for (i = 0; i < lte->symtab_count; ++i) { 525 GElf_Sym sym; 526 if (gelf_getsym(lte->symtab, i, &sym) == NULL) { 527 struct library_symbol *sym, *next; 528 fail: 529 for (sym = lte->arch.stubs; sym != NULL; ) { 530 next = sym->next; 531 library_symbol_destroy(sym); 532 free(sym); 533 sym = next; 534 } 535 lte->arch.stubs = NULL; 536 break; 537 } 538 539 const char *name = lte->strtab + sym.st_name; 540 541#define STUBN ".plt_call." 542 if ((name = strstr(name, STUBN)) == NULL) 543 continue; 544 name += sizeof(STUBN) - 1; 545#undef STUBN 546 547 size_t len; 548 const char *ver = strchr(name, '@'); 549 if (ver != NULL) { 550 len = ver - name; 551 552 } else { 553 /* If there is "+" at all, check that 554 * the symbol name ends in "+0". */ 555 const char *add = strrchr(name, '+'); 556 if (add != NULL) { 557 assert(strcmp(add, "+0") == 0); 558 len = add - name; 559 } else { 560 len = strlen(name); 561 } 562 } 563 564 char *sym_name = strndup(name, len); 565 struct library_symbol *libsym = malloc(sizeof(*libsym)); 566 if (sym_name == NULL || libsym == NULL) { 567 fail2: 568 free(sym_name); 569 free(libsym); 570 goto fail; 571 } 572 573 /* XXX The double cast should be removed when 574 * arch_addr_t becomes integral type. */ 575 arch_addr_t addr = (arch_addr_t) 576 (uintptr_t)sym.st_value + lte->bias; 577 if (library_symbol_init(libsym, addr, sym_name, 1, 578 LS_TOPLT_EXEC) < 0) 579 goto fail2; 580 libsym->arch.type = PPC64_PLT_STUB; 581 libsym->next = lte->arch.stubs; 582 lte->arch.stubs = libsym; 583 } 584 } 585 586 /* On PPC64, IRELATIVE relocations actually relocate .iplt 587 * section, and as such are stored in .rela.dyn (where all 588 * non-PLT relocations are stored) instead of .rela.plt. Add 589 * these to lte->plt_relocs. */ 590 extern int read_relplt(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr, 591 struct vect *ret); 592 593 GElf_Addr rela, relasz; 594 Elf_Scn *rela_sec; 595 GElf_Shdr rela_shdr; 596 if (lte->ehdr.e_machine == EM_PPC64 597 && load_dynamic_entry(lte, DT_RELA, &rela) == 0 598 && load_dynamic_entry(lte, DT_RELASZ, &relasz) == 0 599 && elf_get_section_covering(lte, rela, &rela_sec, &rela_shdr) == 0 600 && rela_sec != NULL) { 601 602 struct vect v; 603 VECT_INIT(&v, GElf_Rela); 604 int ret = read_relplt(lte, rela_sec, &rela_shdr, &v); 605 if (ret >= 0 606 && VECT_EACH(&v, GElf_Rela, NULL, 607 reloc_copy_if_irelative, lte) != NULL) 608 ret = -1; 609 610 VECT_DESTROY(&v, GElf_Rela, NULL, NULL); 611 612 if (ret < 0) 613 return ret; 614 } 615 return 0; 616} 617 618static int 619read_plt_slot_value(struct process *proc, GElf_Addr addr, GElf_Addr *valp) 620{ 621 /* On PPC64, we read from .plt, which contains 8 byte 622 * addresses. On PPC32 we read from .plt, which contains 4 623 * byte instructions, but the PLT is two instructions, and 624 * either can change. */ 625 uint64_t l; 626 /* XXX double cast. */ 627 if (proc_read_64(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) { 628 fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n", 629 addr, strerror(errno)); 630 return -1; 631 } 632 633 *valp = (GElf_Addr)l; 634 return 0; 635} 636 637static int 638unresolve_plt_slot(struct process *proc, GElf_Addr addr, GElf_Addr value) 639{ 640 /* We only modify plt_entry[0], which holds the resolved 641 * address of the routine. We keep the TOC and environment 642 * pointers intact. Hence the only adjustment that we need to 643 * do is to IP. */ 644 if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) { 645 fprintf(stderr, "failed to unresolve .plt slot: %s\n", 646 strerror(errno)); 647 return -1; 648 } 649 return 0; 650} 651 652enum plt_status 653arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte, 654 const char *a_name, GElf_Rela *rela, size_t ndx, 655 struct library_symbol **ret) 656{ 657 if (lte->ehdr.e_machine == EM_PPC) { 658 if (lte->arch.secure_plt) 659 return PLT_DEFAULT; 660 661 struct library_symbol *libsym = NULL; 662 if (default_elf_add_plt_entry(proc, lte, a_name, rela, ndx, 663 &libsym) < 0) 664 return PLT_FAIL; 665 666 /* On PPC32 with BSS PLT, delay the symbol until 667 * dynamic linker is done. */ 668 assert(!libsym->delayed); 669 libsym->delayed = 1; 670 671 *ret = libsym; 672 return PLT_OK; 673 674 } 675 676 bool is_irelative = reloc_is_irelative(lte->ehdr.e_machine, rela); 677 char *name; 678 if (is_irelative) 679 name = linux_elf_find_irelative_name(lte, rela); 680 else 681 name = strdup(a_name); 682 683 if (name == NULL) 684 return PLT_FAIL; 685 686 /* PPC64. If we have stubs, we return a chain of breakpoint 687 * sites, one for each stub that corresponds to this PLT 688 * entry. */ 689 struct library_symbol *chain = NULL; 690 struct library_symbol **symp; 691 for (symp = <e->arch.stubs; *symp != NULL; ) { 692 struct library_symbol *sym = *symp; 693 if (strcmp(sym->name, name) != 0) { 694 symp = &(*symp)->next; 695 continue; 696 } 697 698 /* Re-chain the symbol from stubs to CHAIN. */ 699 *symp = sym->next; 700 sym->next = chain; 701 chain = sym; 702 } 703 704 if (chain != NULL) { 705 *ret = chain; 706 free(name); 707 return PLT_OK; 708 } 709 710 /* We don't have stub symbols. Find corresponding .plt slot, 711 * and check whether it contains the corresponding PLT address 712 * (or 0 if the dynamic linker hasn't run yet). N.B. we don't 713 * want read this from ELF file, but from process image. That 714 * makes a difference if we are attaching to a running 715 * process. */ 716 717 GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela); 718 GElf_Addr plt_slot_addr = rela->r_offset; 719 720 assert(plt_slot_addr >= lte->plt_addr 721 || plt_slot_addr < lte->plt_addr + lte->plt_size); 722 723 GElf_Addr plt_slot_value; 724 if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0) { 725 free(name); 726 return PLT_FAIL; 727 } 728 729 struct library_symbol *libsym = malloc(sizeof(*libsym)); 730 if (libsym == NULL) { 731 fprintf(stderr, "allocation for .plt slot: %s\n", 732 strerror(errno)); 733 fail: 734 free(name); 735 free(libsym); 736 return PLT_FAIL; 737 } 738 739 /* XXX The double cast should be removed when 740 * arch_addr_t becomes integral type. */ 741 if (library_symbol_init(libsym, 742 (arch_addr_t) (uintptr_t) plt_entry_addr, 743 name, 1, LS_TOPLT_EXEC) < 0) 744 goto fail; 745 libsym->arch.plt_slot_addr = plt_slot_addr; 746 747 if (! is_irelative 748 && (plt_slot_value == plt_entry_addr || plt_slot_value == 0)) { 749 libsym->arch.type = PPC_PLT_UNRESOLVED; 750 libsym->arch.resolved_value = plt_entry_addr; 751 752 } else { 753 /* Unresolve the .plt slot. If the binary was 754 * prelinked, this makes the code invalid, because in 755 * case of prelinked binary, the dynamic linker 756 * doesn't update .plt[0] and .plt[1] with addresses 757 * of the resover. But we don't care, we will never 758 * need to enter the resolver. That just means that 759 * we have to un-un-resolve this back before we 760 * detach. */ 761 762 if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) { 763 library_symbol_destroy(libsym); 764 goto fail; 765 } 766 767 if (! is_irelative) { 768 mark_as_resolved(libsym, plt_slot_value); 769 } else { 770 libsym->arch.type = PPC_PLT_IRELATIVE; 771 libsym->arch.resolved_value = plt_entry_addr; 772 } 773 } 774 775 *ret = libsym; 776 return PLT_OK; 777} 778 779void 780arch_elf_destroy(struct ltelf *lte) 781{ 782 struct library_symbol *sym; 783 for (sym = lte->arch.stubs; sym != NULL; ) { 784 struct library_symbol *next = sym->next; 785 library_symbol_destroy(sym); 786 free(sym); 787 sym = next; 788 } 789} 790 791static void 792dl_plt_update_bp_on_hit(struct breakpoint *bp, struct process *proc) 793{ 794 debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)", 795 proc->pid, breakpoint_name(bp), bp->addr); 796 struct process_stopping_handler *self = proc->arch.handler; 797 assert(self != NULL); 798 799 struct library_symbol *libsym = self->breakpoint_being_enabled->libsym; 800 GElf_Addr value; 801 if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0) 802 return; 803 804 /* On PPC64, we rewrite the slot value. */ 805 if (proc->e_machine == EM_PPC64) 806 unresolve_plt_slot(proc, libsym->arch.plt_slot_addr, 807 libsym->arch.resolved_value); 808 /* We mark the breakpoint as resolved on both arches. */ 809 mark_as_resolved(libsym, value); 810 811 /* cb_on_all_stopped looks if HANDLER is set to NULL as a way 812 * to check that this was run. It's an error if it 813 * wasn't. */ 814 proc->arch.handler = NULL; 815 816 breakpoint_turn_off(bp, proc); 817} 818 819static void 820cb_on_all_stopped(struct process_stopping_handler *self) 821{ 822 /* Put that in for dl_plt_update_bp_on_hit to see. */ 823 assert(self->task_enabling_breakpoint->arch.handler == NULL); 824 self->task_enabling_breakpoint->arch.handler = self; 825 826 linux_ptrace_disable_and_continue(self); 827} 828 829static enum callback_status 830cb_keep_stepping_p(struct process_stopping_handler *self) 831{ 832 struct process *proc = self->task_enabling_breakpoint; 833 struct library_symbol *libsym = self->breakpoint_being_enabled->libsym; 834 835 GElf_Addr value; 836 if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0) 837 return CBS_FAIL; 838 839 /* In UNRESOLVED state, the RESOLVED_VALUE in fact contains 840 * the PLT entry value. */ 841 if (value == libsym->arch.resolved_value) 842 return CBS_CONT; 843 844 debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64, 845 proc->pid, value); 846 847 /* The .plt slot got resolved! We can migrate the breakpoint 848 * to RESOLVED and stop single-stepping. */ 849 if (proc->e_machine == EM_PPC64 850 && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr, 851 libsym->arch.resolved_value) < 0) 852 return CBS_FAIL; 853 854 /* Resolving on PPC64 consists of overwriting a doubleword in 855 * .plt. That doubleword is than read back by a stub, and 856 * jumped on. Hopefully we can assume that double word update 857 * is done on a single place only, as it contains a final 858 * address. We still need to look around for any sync 859 * instruction, but essentially it is safe to optimize away 860 * the single stepping next time and install a post-update 861 * breakpoint. 862 * 863 * The situation on PPC32 BSS is more complicated. The 864 * dynamic linker here updates potentially several 865 * instructions (XXX currently we assume two) and the rules 866 * are more complicated. Sometimes it's enough to adjust just 867 * one of the addresses--the logic for generating optimal 868 * dispatch depends on relative addresses of the .plt entry 869 * and the jump destination. We can't assume that the some 870 * instruction block does the update every time. So on PPC32, 871 * we turn the optimization off and just step through it each 872 * time. */ 873 if (proc->e_machine == EM_PPC) 874 goto done; 875 876 /* Install breakpoint to the address where the change takes 877 * place. If we fail, then that just means that we'll have to 878 * singlestep the next time around as well. */ 879 struct process *leader = proc->leader; 880 if (leader == NULL || leader->arch.dl_plt_update_bp != NULL) 881 goto done; 882 883 /* We need to install to the next instruction. ADDR points to 884 * a store instruction, so moving the breakpoint one 885 * instruction forward is safe. */ 886 arch_addr_t addr = get_instruction_pointer(proc) + 4; 887 leader->arch.dl_plt_update_bp = insert_breakpoint_at(proc, addr, NULL); 888 if (leader->arch.dl_plt_update_bp == NULL) 889 goto done; 890 891 static struct bp_callbacks dl_plt_update_cbs = { 892 .on_hit = dl_plt_update_bp_on_hit, 893 }; 894 leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs; 895 896 /* Turn it off for now. We will turn it on again when we hit 897 * the PLT entry that needs this. */ 898 breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc); 899 900done: 901 mark_as_resolved(libsym, value); 902 903 return CBS_STOP; 904} 905 906static void 907jump_to_entry_point(struct process *proc, struct breakpoint *bp) 908{ 909 /* XXX The double cast should be removed when 910 * arch_addr_t becomes integral type. */ 911 arch_addr_t rv = (arch_addr_t) 912 (uintptr_t)bp->libsym->arch.resolved_value; 913 set_instruction_pointer(proc, rv); 914} 915 916static void 917ppc_plt_bp_continue(struct breakpoint *bp, struct process *proc) 918{ 919 /* If this is a first call through IREL breakpoint, enable the 920 * symbol so that it doesn't look like an artificial 921 * breakpoint anymore. */ 922 if (bp->libsym == NULL) { 923 assert(bp->arch.irel_libsym != NULL); 924 bp->libsym = bp->arch.irel_libsym; 925 bp->arch.irel_libsym = NULL; 926 } 927 928 switch (bp->libsym->arch.type) { 929 struct process *leader; 930 void (*on_all_stopped)(struct process_stopping_handler *); 931 enum callback_status (*keep_stepping_p) 932 (struct process_stopping_handler *); 933 934 case PPC_DEFAULT: 935 assert(proc->e_machine == EM_PPC); 936 assert(bp->libsym != NULL); 937 assert(bp->libsym->lib->arch.bss_plt_prelinked == 0); 938 /* Fall through. */ 939 940 case PPC_PLT_IRELATIVE: 941 case PPC_PLT_UNRESOLVED: 942 on_all_stopped = NULL; 943 keep_stepping_p = NULL; 944 leader = proc->leader; 945 946 if (leader != NULL && leader->arch.dl_plt_update_bp != NULL 947 && breakpoint_turn_on(leader->arch.dl_plt_update_bp, 948 proc) >= 0) 949 on_all_stopped = cb_on_all_stopped; 950 else 951 keep_stepping_p = cb_keep_stepping_p; 952 953 if (process_install_stopping_handler 954 (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) { 955 fprintf(stderr, "ppc_plt_bp_continue: " 956 "couldn't install event handler\n"); 957 continue_after_breakpoint(proc, bp); 958 } 959 return; 960 961 case PPC_PLT_RESOLVED: 962 if (proc->e_machine == EM_PPC) { 963 continue_after_breakpoint(proc, bp); 964 return; 965 } 966 967 jump_to_entry_point(proc, bp); 968 continue_process(proc->pid); 969 return; 970 971 case PPC64_PLT_STUB: 972 /* These should never hit here. */ 973 break; 974 } 975 976 assert(bp->libsym->arch.type != bp->libsym->arch.type); 977 abort(); 978} 979 980/* When a process is in a PLT stub, it may have already read the data 981 * in .plt that we changed. If we detach now, it will jump to PLT 982 * entry and continue to the dynamic linker, where it will SIGSEGV, 983 * because zeroth .plt slot is not filled in prelinked binaries, and 984 * the dynamic linker needs that data. Moreover, the process may 985 * actually have hit the breakpoint already. This functions tries to 986 * detect both cases and do any fix-ups necessary to mend this 987 * situation. */ 988static enum callback_status 989detach_task_cb(struct process *task, void *data) 990{ 991 struct breakpoint *bp = data; 992 993 if (get_instruction_pointer(task) == bp->addr) { 994 debug(DEBUG_PROCESS, "%d at %p, which is PLT slot", 995 task->pid, bp->addr); 996 jump_to_entry_point(task, bp); 997 return CBS_CONT; 998 } 999 1000 /* XXX There's still a window of several instructions where we 1001 * might catch the task inside a stub such that it has already 1002 * read destination address from .plt, but hasn't jumped yet, 1003 * thus avoiding the breakpoint. */ 1004 1005 return CBS_CONT; 1006} 1007 1008static void 1009ppc_plt_bp_retract(struct breakpoint *bp, struct process *proc) 1010{ 1011 /* On PPC64, we rewrite .plt with PLT entry addresses. This 1012 * needs to be undone. Unfortunately, the program may have 1013 * made decisions based on that value */ 1014 if (proc->e_machine == EM_PPC64 1015 && bp->libsym != NULL 1016 && bp->libsym->arch.type == PPC_PLT_RESOLVED) { 1017 each_task(proc->leader, NULL, detach_task_cb, bp); 1018 unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr, 1019 bp->libsym->arch.resolved_value); 1020 } 1021} 1022 1023int 1024arch_library_init(struct library *lib) 1025{ 1026 return 0; 1027} 1028 1029void 1030arch_library_destroy(struct library *lib) 1031{ 1032} 1033 1034int 1035arch_library_clone(struct library *retp, struct library *lib) 1036{ 1037 return 0; 1038} 1039 1040int 1041arch_library_symbol_init(struct library_symbol *libsym) 1042{ 1043 /* We set type explicitly in the code above, where we have the 1044 * necessary context. This is for calls from ltrace-elf.c and 1045 * such. */ 1046 libsym->arch.type = PPC_DEFAULT; 1047 return 0; 1048} 1049 1050void 1051arch_library_symbol_destroy(struct library_symbol *libsym) 1052{ 1053} 1054 1055int 1056arch_library_symbol_clone(struct library_symbol *retp, 1057 struct library_symbol *libsym) 1058{ 1059 retp->arch = libsym->arch; 1060 return 0; 1061} 1062 1063/* For some symbol types, we need to set up custom callbacks. XXX we 1064 * don't need PROC here, we can store the data in BP if it is of 1065 * interest to us. */ 1066int 1067arch_breakpoint_init(struct process *proc, struct breakpoint *bp) 1068{ 1069 bp->arch.irel_libsym = NULL; 1070 1071 /* Artificial and entry-point breakpoints are plain. */ 1072 if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC) 1073 return 0; 1074 1075 /* On PPC, secure PLT and prelinked BSS PLT are plain. */ 1076 if (proc->e_machine == EM_PPC 1077 && bp->libsym->lib->arch.bss_plt_prelinked != 0) 1078 return 0; 1079 1080 /* On PPC64, stub PLT breakpoints are plain. */ 1081 if (proc->e_machine == EM_PPC64 1082 && bp->libsym->arch.type == PPC64_PLT_STUB) 1083 return 0; 1084 1085 static struct bp_callbacks cbs = { 1086 .on_continue = ppc_plt_bp_continue, 1087 .on_retract = ppc_plt_bp_retract, 1088 }; 1089 breakpoint_set_callbacks(bp, &cbs); 1090 1091 /* For JMP_IREL breakpoints, make the breakpoint look 1092 * artificial by hiding the symbol. */ 1093 if (bp->libsym->arch.type == PPC_PLT_IRELATIVE) { 1094 bp->arch.irel_libsym = bp->libsym; 1095 bp->libsym = NULL; 1096 } 1097 1098 return 0; 1099} 1100 1101void 1102arch_breakpoint_destroy(struct breakpoint *bp) 1103{ 1104} 1105 1106int 1107arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp) 1108{ 1109 retp->arch = sbp->arch; 1110 return 0; 1111} 1112 1113int 1114arch_process_init(struct process *proc) 1115{ 1116 proc->arch.dl_plt_update_bp = NULL; 1117 proc->arch.handler = NULL; 1118 return 0; 1119} 1120 1121void 1122arch_process_destroy(struct process *proc) 1123{ 1124} 1125 1126int 1127arch_process_clone(struct process *retp, struct process *proc) 1128{ 1129 retp->arch = proc->arch; 1130 return 0; 1131} 1132 1133int 1134arch_process_exec(struct process *proc) 1135{ 1136 return arch_process_init(proc); 1137} 1138