plt.c revision b64b5c7b6f4a368ccaf60507090192845221a3be
1#include <gelf.h> 2#include <sys/ptrace.h> 3#include <errno.h> 4#include <error.h> 5#include <inttypes.h> 6#include <assert.h> 7#include <string.h> 8 9#include "proc.h" 10#include "common.h" 11#include "library.h" 12#include "breakpoint.h" 13 14/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and 15 * new-style "secure" PLT. We can tell one from the other by the 16 * flags on the .plt section. If it's +X (executable), it's BSS PLT, 17 * otherwise it's secure. 18 * 19 * BSS PLT works the same way as most architectures: the .plt section 20 * contains trampolines and we put breakpoints to those. With secure 21 * PLT, the .plt section doesn't contain instructions but addresses. 22 * The real PLT table is stored in .text. Addresses of those PLT 23 * entries can be computed, and it fact that's what the glink deal 24 * below does. 25 * 26 * If not prelinked, BSS PLT entries in the .plt section contain 27 * zeroes that are overwritten by the dynamic linker during start-up. 28 * For that reason, ltrace realizes those breakpoints only after 29 * .start is hit. 30 * 31 * 64-bit PPC is more involved. Program linker creates for each 32 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee> 33 * (where xxxxxxxx is a hexadecimal number). That stub does the call 34 * dispatch: it loads an address of a function to call from the 35 * section .plt, and branches. PLT entries themselves are essentially 36 * a curried call to the resolver. When the symbol is resolved, the 37 * resolver updates the value stored in .plt, and the next time 38 * around, the stub calls the library function directly. So we make 39 * at most one trip (none if the binary is prelinked) through each PLT 40 * entry, and correspondingly that is useless as a breakpoint site. 41 * 42 * Note the three confusing terms: stubs (that play the role of PLT 43 * entries), PLT entries, .plt section. 44 * 45 * We first check symbol tables and see if we happen to have stub 46 * symbols available. If yes we just put breakpoints to those, and 47 * treat them as usual breakpoints. The only tricky part is realizing 48 * that there can be more than one breakpoint per symbol. 49 * 50 * The case that we don't have the stub symbols available is harder. 51 * The following scheme uses two kinds of PLT breakpoints: unresolved 52 * and resolved (to some address). When the process starts (or when 53 * we attach), we distribute unresolved PLT breakpoints to the PLT 54 * entries (not stubs). Then we look in .plt, and for each entry 55 * whose value is different than the corresponding PLT entry address, 56 * we assume it was already resolved, and convert the breakpoint to 57 * resolved. We also rewrite the resolved value in .plt back to the 58 * PLT address. 59 * 60 * When a PLT entry hits a resolved breakpoint (which happens because 61 * we put back the unresolved addresses to .plt), we move the 62 * instruction pointer to the corresponding address and continue the 63 * process as if nothing happened. 64 * 65 * When unresolved PLT entry is called for the first time, we need to 66 * catch the new value that the resolver will write to a .plt slot. 67 * We also need to prevent another thread from racing through and 68 * taking the branch without ltrace noticing. So when unresolved PLT 69 * entry hits, we have to stop all threads. We then single-step 70 * through the resolver, until the .plt slot changes. When it does, 71 * we treat it the same way as above: convert the PLT breakpoint to 72 * resolved, and rewrite the .plt value back to PLT address. We then 73 * start all threads again. 74 * 75 * In theory we might find the exact instruction that will update the 76 * .plt slot, and emulate it, updating the PLT breakpoint immediately, 77 * and then just skip it. But that's even messier than the thread 78 * stopping business and single stepping that needs to be done. 79 */ 80 81#define PPC_PLT_STUB_SIZE 16 82#define PPC64_PLT_STUB_SIZE 8 //xxx 83 84static inline int 85host_powerpc64() 86{ 87#ifdef __powerpc64__ 88 return 1; 89#else 90 return 0; 91#endif 92} 93 94GElf_Addr 95arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela) 96{ 97 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 98 assert(lte->arch.plt_stub_vma != 0); 99 return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx; 100 101 } else if (lte->ehdr.e_machine == EM_PPC) { 102 return rela->r_offset; 103 104 } else { 105 /* If we get here, we don't have stub symbols. In 106 * that case we put brakpoints to PLT entries the same 107 * as the PPC32 secure PLT case does. */ 108 assert(lte->arch.plt_stub_vma != 0); 109 return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx; 110 } 111} 112 113int 114arch_translate_address(struct Process *proc, 115 target_address_t addr, target_address_t *ret) 116{ 117 if (proc->e_machine == EM_PPC64) { 118 assert(host_powerpc64()); 119 long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0); 120 fprintf(stderr, "arch_translate_address %p->%#lx\n", 121 addr, l); 122 if (l == -1 && errno) { 123 error(0, errno, ".opd translation of %p", addr); 124 return -1; 125 } 126 *ret = (target_address_t)l; 127 return 0; 128 } 129 130 *ret = addr; 131 return 0; 132} 133 134/* XXX Apparently PPC64 doesn't support PLT breakpoints. */ 135void * 136sym2addr(Process *proc, struct library_symbol *sym) { 137 void *addr = sym->enter_addr; 138 long pt_ret; 139 140 debug(3, 0); 141 142 if (sym->plt_type != LS_TOPLT_POINT) { 143 return addr; 144 } 145 146 if (proc->pid == 0) { 147 return 0; 148 } 149 150 if (options.debug >= 3) { 151 xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0), 152 sizeof(void*)*8); 153 } 154 155 // On a PowerPC-64 system, a plt is three 64-bit words: the first is the 156 // 64-bit address of the routine. Before the PLT has been initialized, 157 // this will be 0x0. In fact, the symbol table won't have the plt's 158 // address even. Ater the PLT has been initialized, but before it has 159 // been resolved, the first word will be the address of the function in 160 // the dynamic linker that will reslove the PLT. After the PLT is 161 // resolved, this will will be the address of the routine whose symbol 162 // is in the symbol table. 163 164 // On a PowerPC-32 system, there are two types of PLTs: secure (new) and 165 // non-secure (old). For the secure case, the PLT is simply a pointer 166 // and we can treat it much as we do for the PowerPC-64 case. For the 167 // non-secure case, the PLT is executable code and we can put the 168 // break-point right in the PLT. 169 170 pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0); 171 172#if SIZEOF_LONG == 8 173 if (proc->mask_32bit) { 174 // Assume big-endian. 175 addr = (void *)((pt_ret >> 32) & 0xffffffff); 176 } else { 177 addr = (void *)pt_ret; 178 } 179#else 180 /* XXX Um, so where exactly are we dealing with the non-secure 181 PLT thing? */ 182 addr = (void *)pt_ret; 183#endif 184 185 return addr; 186} 187 188static GElf_Addr 189get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data) 190{ 191 Elf_Scn *ppcgot_sec = NULL; 192 GElf_Shdr ppcgot_shdr; 193 if (ppcgot != 0 194 && elf_get_section_covering(lte, ppcgot, 195 &ppcgot_sec, &ppcgot_shdr) < 0) 196 // xxx should be the log out 197 fprintf(stderr, 198 "DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n", 199 ppcgot); 200 201 if (ppcgot_sec != NULL) { 202 Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr); 203 if (data == NULL || data->d_size < 8 ) { 204 fprintf(stderr, "Couldn't read GOT data.\n"); 205 } else { 206 // where PPCGOT begins in .got 207 size_t offset = ppcgot - ppcgot_shdr.sh_addr; 208 assert(offset % 4 == 0); 209 uint32_t glink_vma; 210 if (elf_read_u32(data, offset + 4, &glink_vma) < 0) { 211 fprintf(stderr, 212 "Couldn't read glink VMA address" 213 " at %zd@GOT\n", offset); 214 return 0; 215 } 216 if (glink_vma != 0) { 217 debug(1, "PPC GOT glink_vma address: %#" PRIx32, 218 glink_vma); 219 fprintf(stderr, "PPC GOT glink_vma " 220 "address: %#"PRIx32"\n", glink_vma); 221 return (GElf_Addr)glink_vma; 222 } 223 } 224 } 225 226 if (plt_data != NULL) { 227 uint32_t glink_vma; 228 if (elf_read_u32(plt_data, 0, &glink_vma) < 0) { 229 fprintf(stderr, 230 "Couldn't read glink VMA address at 0@.plt\n"); 231 return 0; 232 } 233 debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma); 234 fprintf(stderr, ".plt glink_vma address: " 235 "%#"PRIx32"\n", glink_vma); 236 return (GElf_Addr)glink_vma; 237 } 238 239 return 0; 240} 241 242static int 243load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep) 244{ 245 Elf_Scn *scn; 246 GElf_Shdr shdr; 247 if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0 248 || scn == NULL) { 249 fail: 250 error(0, 0, "Couldn't get SHT_DYNAMIC: %s", 251 elf_errmsg(-1)); 252 return -1; 253 } 254 255 Elf_Data *data = elf_loaddata(scn, &shdr); 256 if (data == NULL) 257 goto fail; 258 259 size_t j; 260 for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) { 261 GElf_Dyn dyn; 262 if (gelf_getdyn(data, j, &dyn) == NULL) 263 goto fail; 264 265 if(dyn.d_tag == tag) { 266 *valuep = dyn.d_un.d_ptr; 267 return 0; 268 } 269 } 270 271 return -1; 272} 273 274static int 275load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp) 276{ 277 return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp); 278} 279 280static int 281load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp) 282{ 283 return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp); 284} 285 286int 287arch_elf_init(struct ltelf *lte) 288{ 289 lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE); 290 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 291 GElf_Addr ppcgot; 292 if (load_ppcgot(lte, &ppcgot) < 0) { 293 fprintf(stderr, "Couldn't find DT_PPC_GOT.\n"); 294 return -1; 295 } 296 GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data); 297 298 assert (lte->relplt_size % 12 == 0); 299 size_t count = lte->relplt_size / 12; // size of RELA entry 300 lte->arch.plt_stub_vma = glink_vma 301 - (GElf_Addr)count * PPC_PLT_STUB_SIZE; 302 debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma); 303 304 } else if (lte->ehdr.e_machine == EM_PPC64) { 305 GElf_Addr glink_vma; 306 if (load_ppc64_glink(lte, &glink_vma) < 0) { 307 fprintf(stderr, "Couldn't find DT_PPC64_GLINK.\n"); 308 return -1; 309 } 310 311 /* The first glink stub starts at offset 32. */ 312 lte->arch.plt_stub_vma = glink_vma + 32; 313 } 314 315 /* Override the value that we gleaned from flags on the .plt 316 * section. The PLT entries are in fact executable, they are 317 * just not in .plt. */ 318 lte->lte_flags |= LTE_PLT_EXECUTABLE; 319 320 /* On PPC64, look for stub symbols in symbol table. These are 321 * called: xxxxxxxx.plt_call.callee_name@version+addend. */ 322 if (lte->ehdr.e_machine == EM_PPC64 323 && lte->symtab != NULL && lte->strtab != NULL) { 324 325 /* N.B. We can't simply skip the symbols that we fail 326 * to read or malloc. There may be more than one stub 327 * per symbol name, and if we failed in one but 328 * succeeded in another, the PLT enabling code would 329 * have no way to tell that something is missing. We 330 * could work around that, of course, but it doesn't 331 * seem worth the trouble. So if anything fails, we 332 * just pretend that we don't have stub symbols at 333 * all, as if the binary is stripped. */ 334 335 size_t i; 336 for (i = 0; i < lte->symtab_count; ++i) { 337 GElf_Sym sym; 338 if (gelf_getsym(lte->symtab, i, &sym) == NULL) { 339 struct library_symbol *sym, *next; 340 fail: 341 for (sym = lte->arch.stubs; sym != NULL; ) { 342 next = sym->next; 343 library_symbol_destroy(sym); 344 free(sym); 345 sym = next; 346 } 347 lte->arch.stubs = NULL; 348 break; 349 } 350 351 const char *name = lte->strtab + sym.st_name; 352 353#define STUBN ".plt_call." 354 if ((name = strstr(name, STUBN)) == NULL) 355 continue; 356 name += sizeof(STUBN) - 1; 357#undef STUBN 358 359 size_t len; 360 const char *ver = strchr(name, '@'); 361 if (ver != NULL) { 362 len = ver - name; 363 364 } else { 365 /* If there is "+" at all, check that 366 * the symbol name ends in "+0". */ 367 const char *add = strrchr(name, '+'); 368 if (add != NULL) { 369 assert(strcmp(add, "+0") == 0); 370 len = add - name; 371 } else { 372 len = strlen(name); 373 } 374 } 375 376 char *sym_name = strndup(name, len); 377 struct library_symbol *libsym = malloc(sizeof(*libsym)); 378 if (sym_name == NULL || libsym == NULL) { 379 free(sym_name); 380 free(libsym); 381 goto fail; 382 } 383 384 target_address_t addr 385 = (target_address_t)sym.st_value + lte->bias; 386 library_symbol_init(libsym, addr, sym_name, 1, 387 LS_TOPLT_EXEC); 388 libsym->arch.type = PPC64PLT_STUB; 389 libsym->next = lte->arch.stubs; 390 lte->arch.stubs = libsym; 391 } 392 } 393 394 return 0; 395} 396 397enum plt_status 398arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte, 399 const char *a_name, GElf_Rela *rela, size_t ndx, 400 struct library_symbol **ret) 401{ 402 if (lte->ehdr.e_machine == EM_PPC) 403 return plt_default; 404 405 /* PPC64. If we have stubs, we return a chain of breakpoint 406 * sites, one for each stub that corresponds to this PLT 407 * entry. */ 408 struct library_symbol *chain = NULL; 409 struct library_symbol **symp; 410 for (symp = <e->arch.stubs; *symp != NULL; ) { 411 struct library_symbol *sym = *symp; 412 if (strcmp(sym->name, a_name) != 0) { 413 symp = &(*symp)->next; 414 continue; 415 } 416 417 /* Re-chain the symbol from stubs to CHAIN. */ 418 *symp = sym->next; 419 sym->next = chain; 420 chain = sym; 421 } 422 423 if (chain != NULL) { 424 struct library_symbol *sym; 425 for (sym = chain; sym != NULL; sym = sym->next) 426 fprintf(stderr, "match %s --> %p\n", 427 sym->name, sym->enter_addr); 428 for (sym = lte->arch.stubs; sym != NULL; sym = sym->next) 429 fprintf(stderr, "remains %s --> %p\n", 430 sym->name, sym->enter_addr); 431 432 *ret = chain; 433 return plt_ok; 434 } 435 436 /* We don't have stub symbols. Find corresponding .plt slot, 437 * and check whether it contains the corresponding PLT address 438 * (or 0 if the dynamic linker hasn't run yet). N.B. we don't 439 * want read this from ELF file, but from process image. That 440 * makes a difference if we are attaching to a running 441 * process. */ 442 443 GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela); 444 GElf_Addr plt_slot_addr = rela->r_offset; 445 assert(plt_slot_addr >= lte->plt_addr 446 || plt_slot_addr < lte->plt_addr + lte->plt_size); 447 448 long plt_slot_value = ptrace(PTRACE_PEEKTEXT, proc->pid, 449 plt_slot_addr, 0); 450 if (plt_slot_value == -1 && errno != 0) { 451 error(0, errno, "ptrace .plt slot value @%#" PRIx64, 452 plt_slot_addr); 453 return plt_fail; 454 } 455 456 char *name = strdup(a_name); 457 struct library_symbol *libsym = malloc(sizeof(*libsym)); 458 if (name == NULL || libsym == NULL) { 459 error(0, errno, "allocation for .plt slot"); 460 fail: 461 free(name); 462 free(libsym); 463 return plt_fail; 464 } 465 466 library_symbol_init(libsym, (target_address_t)plt_entry_addr, 467 name, 1, LS_TOPLT_EXEC); 468 if ((GElf_Addr)plt_slot_value == plt_entry_addr 469 || plt_slot_value == 0) { 470 libsym->arch.type = PPC64PLT_UNRESOLVED; 471 libsym->arch.orig_addr = 0; 472 } else { 473 /* Unresolve the .plt slot. If the binary was 474 * prelinked, this makes the code invalid, because in 475 * case of prelinked binary, the dynamic linker 476 * doesn't update .plt[0] and .plt[1] with addresses 477 * of the resover. But we don't care, we will never 478 * need to enter the resolver. That just means that 479 * we have to un-un-resolve this back before we 480 * detach, which is nothing new: we already need to 481 * retract breakpoints. */ 482 /* We only modify plt_entry[0], which holds the 483 * resolved address of the routine. We keep the TOC 484 * and environment pointers intact. Hence the only 485 * adjustment that we need to do is to IP. */ 486 if (ptrace(PTRACE_POKETEXT, proc->pid, 487 plt_slot_addr, plt_entry_addr) < 0) { 488 error(0, errno, "unresolve .plt slot"); 489 goto fail; 490 } 491 libsym->arch.type = PPC64PLT_RESOLVED; 492 libsym->arch.orig_addr = plt_slot_value; 493 } 494 495 *ret = libsym; 496 return plt_ok; 497} 498 499void 500arch_elf_destroy(struct ltelf *lte) 501{ 502 struct library_symbol *sym; 503 for (sym = lte->arch.stubs; sym != NULL; ) { 504 struct library_symbol *next = sym->next; 505 library_symbol_destroy(sym); 506 free(sym); 507 sym = next; 508 } 509} 510 511static void 512ppc64_resolved_bp_continue(struct breakpoint *bp, struct Process *proc) 513{ 514 fprintf(stderr, "ppc64_resolved_bp_continue\n"); 515 set_instruction_pointer(proc, 516 (target_address_t)bp->libsym->arch.orig_addr); 517 continue_process(proc->pid); 518} 519 520int 521arch_breakpoint_init(struct Process *proc, struct breakpoint *bp) 522{ 523 if (proc->e_machine == EM_PPC 524 || bp->libsym == NULL 525 || bp->libsym->arch.type == PPC64PLT_STUB) 526 return 0; 527 528 if (bp->libsym->arch.type == PPC64PLT_RESOLVED) { 529 fprintf(stderr, "arch_breakpoint_init RESOLVED\n"); 530 static struct bp_callbacks resolved_cbs = { 531 .on_continue = ppc64_resolved_bp_continue, 532 }; 533 breakpoint_set_callbacks(bp, &resolved_cbs); 534 535 } else { 536 fprintf(stderr, "arch_breakpoint_init UNRESOLVED\n"); 537 fprintf(stderr, "a.k.a the insane case\n"); 538 abort(); 539 } 540 541 return 0; 542} 543 544void 545arch_breakpoint_destroy(struct breakpoint *bp) 546{ 547} 548