single_step.c revision 0707ad30d10110aebc01a5a64fb63f4b32d20b73
1/* 2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 * 14 * A code-rewriter that enables instruction single-stepping. 15 * Derived from iLib's single-stepping code. 16 */ 17 18#ifndef __tilegx__ /* No support for single-step yet. */ 19 20/* These functions are only used on the TILE platform */ 21#include <linux/slab.h> 22#include <linux/thread_info.h> 23#include <linux/uaccess.h> 24#include <linux/mman.h> 25#include <linux/types.h> 26#include <linux/err.h> 27#include <asm/cacheflush.h> 28#include <asm/opcode-tile.h> 29#include <asm/opcode_constants.h> 30#include <arch/abi.h> 31 32#define signExtend17(val) sign_extend((val), 17) 33#define TILE_X1_MASK (0xffffffffULL << 31) 34 35int unaligned_printk; 36 37static int __init setup_unaligned_printk(char *str) 38{ 39 long val; 40 if (strict_strtol(str, 0, &val) != 0) 41 return 0; 42 unaligned_printk = val; 43 pr_info("Printk for each unaligned data accesses is %s\n", 44 unaligned_printk ? "enabled" : "disabled"); 45 return 1; 46} 47__setup("unaligned_printk=", setup_unaligned_printk); 48 49unsigned int unaligned_fixup_count; 50 51enum mem_op { 52 MEMOP_NONE, 53 MEMOP_LOAD, 54 MEMOP_STORE, 55 MEMOP_LOAD_POSTINCR, 56 MEMOP_STORE_POSTINCR 57}; 58 59static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) 60{ 61 tile_bundle_bits result; 62 63 /* mask out the old offset */ 64 tile_bundle_bits mask = create_BrOff_X1(-1); 65 result = n & (~mask); 66 67 /* or in the new offset */ 68 result |= create_BrOff_X1(offset); 69 70 return result; 71} 72 73static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) 74{ 75 tile_bundle_bits result; 76 tile_bundle_bits op; 77 78 result = n & (~TILE_X1_MASK); 79 80 op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) | 81 create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) | 82 create_Dest_X1(dest) | 83 create_SrcB_X1(TREG_ZERO) | 84 create_SrcA_X1(src) ; 85 86 result |= op; 87 return result; 88} 89 90static inline tile_bundle_bits nop_X1(tile_bundle_bits n) 91{ 92 return move_X1(n, TREG_ZERO, TREG_ZERO); 93} 94 95static inline tile_bundle_bits addi_X1( 96 tile_bundle_bits n, int dest, int src, int imm) 97{ 98 n &= ~TILE_X1_MASK; 99 100 n |= (create_SrcA_X1(src) | 101 create_Dest_X1(dest) | 102 create_Imm8_X1(imm) | 103 create_S_X1(0) | 104 create_Opcode_X1(IMM_0_OPCODE_X1) | 105 create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1)); 106 107 return n; 108} 109 110static tile_bundle_bits rewrite_load_store_unaligned( 111 struct single_step_state *state, 112 tile_bundle_bits bundle, 113 struct pt_regs *regs, 114 enum mem_op mem_op, 115 int size, int sign_ext) 116{ 117 unsigned char __user *addr; 118 int val_reg, addr_reg, err, val; 119 120 /* Get address and value registers */ 121 if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { 122 addr_reg = get_SrcA_Y2(bundle); 123 val_reg = get_SrcBDest_Y2(bundle); 124 } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { 125 addr_reg = get_SrcA_X1(bundle); 126 val_reg = get_Dest_X1(bundle); 127 } else { 128 addr_reg = get_SrcA_X1(bundle); 129 val_reg = get_SrcB_X1(bundle); 130 } 131 132 /* 133 * If registers are not GPRs, don't try to handle it. 134 * 135 * FIXME: we could handle non-GPR loads by getting the real value 136 * from memory, writing it to the single step buffer, using a 137 * temp_reg to hold a pointer to that memory, then executing that 138 * instruction and resetting temp_reg. For non-GPR stores, it's a 139 * little trickier; we could use the single step buffer for that 140 * too, but we'd have to add some more state bits so that we could 141 * call back in here to copy that value to the real target. For 142 * now, we just handle the simple case. 143 */ 144 if ((val_reg >= PTREGS_NR_GPRS && 145 (val_reg != TREG_ZERO || 146 mem_op == MEMOP_LOAD || 147 mem_op == MEMOP_LOAD_POSTINCR)) || 148 addr_reg >= PTREGS_NR_GPRS) 149 return bundle; 150 151 /* If it's aligned, don't handle it specially */ 152 addr = (void __user *)regs->regs[addr_reg]; 153 if (((unsigned long)addr % size) == 0) 154 return bundle; 155 156#ifndef __LITTLE_ENDIAN 157# error We assume little-endian representation with copy_xx_user size 2 here 158#endif 159 /* Handle unaligned load/store */ 160 if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { 161 unsigned short val_16; 162 switch (size) { 163 case 2: 164 err = copy_from_user(&val_16, addr, sizeof(val_16)); 165 val = sign_ext ? ((short)val_16) : val_16; 166 break; 167 case 4: 168 err = copy_from_user(&val, addr, sizeof(val)); 169 break; 170 default: 171 BUG(); 172 } 173 if (err == 0) { 174 state->update_reg = val_reg; 175 state->update_value = val; 176 state->update = 1; 177 } 178 } else { 179 val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; 180 err = copy_to_user(addr, &val, size); 181 } 182 183 if (err) { 184 siginfo_t info = { 185 .si_signo = SIGSEGV, 186 .si_code = SEGV_MAPERR, 187 .si_addr = addr 188 }; 189 force_sig_info(info.si_signo, &info, current); 190 return (tile_bundle_bits) 0; 191 } 192 193 if (unaligned_fixup == 0) { 194 siginfo_t info = { 195 .si_signo = SIGBUS, 196 .si_code = BUS_ADRALN, 197 .si_addr = addr 198 }; 199 force_sig_info(info.si_signo, &info, current); 200 return (tile_bundle_bits) 0; 201 } 202 203 if (unaligned_printk || unaligned_fixup_count == 0) { 204 pr_info("Process %d/%s: PC %#lx: Fixup of" 205 " unaligned %s at %#lx.\n", 206 current->pid, current->comm, regs->pc, 207 (mem_op == MEMOP_LOAD || 208 mem_op == MEMOP_LOAD_POSTINCR) ? 209 "load" : "store", 210 (unsigned long)addr); 211 if (!unaligned_printk) { 212#define P pr_info 213P("\n"); 214P("Unaligned fixups in the kernel will slow your application considerably.\n"); 215P("To find them, write a \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"); 216P("which requests the kernel show all unaligned fixups, or write a \"0\"\n"); 217P("to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"); 218P("access will become a SIGBUS you can debug. No further warnings will be\n"); 219P("shown so as to avoid additional slowdown, but you can track the number\n"); 220P("of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"); 221P("Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"); 222P("\n"); 223#undef P 224 } 225 } 226 ++unaligned_fixup_count; 227 228 if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { 229 /* Convert the Y2 instruction to a prefetch. */ 230 bundle &= ~(create_SrcBDest_Y2(-1) | 231 create_Opcode_Y2(-1)); 232 bundle |= (create_SrcBDest_Y2(TREG_ZERO) | 233 create_Opcode_Y2(LW_OPCODE_Y2)); 234 /* Replace the load postincr with an addi */ 235 } else if (mem_op == MEMOP_LOAD_POSTINCR) { 236 bundle = addi_X1(bundle, addr_reg, addr_reg, 237 get_Imm8_X1(bundle)); 238 /* Replace the store postincr with an addi */ 239 } else if (mem_op == MEMOP_STORE_POSTINCR) { 240 bundle = addi_X1(bundle, addr_reg, addr_reg, 241 get_Dest_Imm8_X1(bundle)); 242 } else { 243 /* Convert the X1 instruction to a nop. */ 244 bundle &= ~(create_Opcode_X1(-1) | 245 create_UnShOpcodeExtension_X1(-1) | 246 create_UnOpcodeExtension_X1(-1)); 247 bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) | 248 create_UnShOpcodeExtension_X1( 249 UN_0_SHUN_0_OPCODE_X1) | 250 create_UnOpcodeExtension_X1( 251 NOP_UN_0_SHUN_0_OPCODE_X1)); 252 } 253 254 return bundle; 255} 256 257/** 258 * single_step_once() - entry point when single stepping has been triggered. 259 * @regs: The machine register state 260 * 261 * When we arrive at this routine via a trampoline, the single step 262 * engine copies the executing bundle to the single step buffer. 263 * If the instruction is a condition branch, then the target is 264 * reset to one past the next instruction. If the instruction 265 * sets the lr, then that is noted. If the instruction is a jump 266 * or call, then the new target pc is preserved and the current 267 * bundle instruction set to null. 268 * 269 * The necessary post-single-step rewriting information is stored in 270 * single_step_state-> We use data segment values because the 271 * stack will be rewound when we run the rewritten single-stepped 272 * instruction. 273 */ 274void single_step_once(struct pt_regs *regs) 275{ 276 extern tile_bundle_bits __single_step_ill_insn; 277 extern tile_bundle_bits __single_step_j_insn; 278 extern tile_bundle_bits __single_step_addli_insn; 279 extern tile_bundle_bits __single_step_auli_insn; 280 struct thread_info *info = (void *)current_thread_info(); 281 struct single_step_state *state = info->step_state; 282 int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); 283 tile_bundle_bits __user *buffer, *pc; 284 tile_bundle_bits bundle; 285 int temp_reg; 286 int target_reg = TREG_LR; 287 int err; 288 enum mem_op mem_op = MEMOP_NONE; 289 int size = 0, sign_ext = 0; /* happy compiler */ 290 291 asm( 292" .pushsection .rodata.single_step\n" 293" .align 8\n" 294" .globl __single_step_ill_insn\n" 295"__single_step_ill_insn:\n" 296" ill\n" 297" .globl __single_step_addli_insn\n" 298"__single_step_addli_insn:\n" 299" { nop; addli r0, zero, 0 }\n" 300" .globl __single_step_auli_insn\n" 301"__single_step_auli_insn:\n" 302" { nop; auli r0, r0, 0 }\n" 303" .globl __single_step_j_insn\n" 304"__single_step_j_insn:\n" 305" j .\n" 306" .popsection\n" 307 ); 308 309 if (state == NULL) { 310 /* allocate a page of writable, executable memory */ 311 state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL); 312 if (state == NULL) { 313 pr_err("Out of kernel memory trying to single-step\n"); 314 return; 315 } 316 317 /* allocate a cache line of writable, executable memory */ 318 down_write(¤t->mm->mmap_sem); 319 buffer = (void __user *) do_mmap(NULL, 0, 64, 320 PROT_EXEC | PROT_READ | PROT_WRITE, 321 MAP_PRIVATE | MAP_ANONYMOUS, 322 0); 323 up_write(¤t->mm->mmap_sem); 324 325 if (IS_ERR((void __force *)buffer)) { 326 kfree(state); 327 pr_err("Out of kernel pages trying to single-step\n"); 328 return; 329 } 330 331 state->buffer = buffer; 332 state->is_enabled = 0; 333 334 info->step_state = state; 335 336 /* Validate our stored instruction patterns */ 337 BUG_ON(get_Opcode_X1(__single_step_addli_insn) != 338 ADDLI_OPCODE_X1); 339 BUG_ON(get_Opcode_X1(__single_step_auli_insn) != 340 AULI_OPCODE_X1); 341 BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO); 342 BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0); 343 BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0); 344 } 345 346 /* 347 * If we are returning from a syscall, we still haven't hit the 348 * "ill" for the swint1 instruction. So back the PC up to be 349 * pointing at the swint1, but we'll actually return directly 350 * back to the "ill" so we come back in via SIGILL as if we 351 * had "executed" the swint1 without ever being in kernel space. 352 */ 353 if (regs->faultnum == INT_SWINT_1) 354 regs->pc -= 8; 355 356 pc = (tile_bundle_bits __user *)(regs->pc); 357 if (get_user(bundle, pc) != 0) { 358 pr_err("Couldn't read instruction at %p trying to step\n", pc); 359 return; 360 } 361 362 /* We'll follow the instruction with 2 ill op bundles */ 363 state->orig_pc = (unsigned long)pc; 364 state->next_pc = (unsigned long)(pc + 1); 365 state->branch_next_pc = 0; 366 state->update = 0; 367 368 if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) { 369 /* two wide, check for control flow */ 370 int opcode = get_Opcode_X1(bundle); 371 372 switch (opcode) { 373 /* branches */ 374 case BRANCH_OPCODE_X1: 375 { 376 int32_t offset = signExtend17(get_BrOff_X1(bundle)); 377 378 /* 379 * For branches, we use a rewriting trick to let the 380 * hardware evaluate whether the branch is taken or 381 * untaken. We record the target offset and then 382 * rewrite the branch instruction to target 1 insn 383 * ahead if the branch is taken. We then follow the 384 * rewritten branch with two bundles, each containing 385 * an "ill" instruction. The supervisor examines the 386 * pc after the single step code is executed, and if 387 * the pc is the first ill instruction, then the 388 * branch (if any) was not taken. If the pc is the 389 * second ill instruction, then the branch was 390 * taken. The new pc is computed for these cases, and 391 * inserted into the registers for the thread. If 392 * the pc is the start of the single step code, then 393 * an exception or interrupt was taken before the 394 * code started processing, and the same "original" 395 * pc is restored. This change, different from the 396 * original implementation, has the advantage of 397 * executing a single user instruction. 398 */ 399 state->branch_next_pc = (unsigned long)(pc + offset); 400 401 /* rewrite branch offset to go forward one bundle */ 402 bundle = set_BrOff_X1(bundle, 2); 403 } 404 break; 405 406 /* jumps */ 407 case JALB_OPCODE_X1: 408 case JALF_OPCODE_X1: 409 state->update = 1; 410 state->next_pc = 411 (unsigned long) (pc + get_JOffLong_X1(bundle)); 412 break; 413 414 case JB_OPCODE_X1: 415 case JF_OPCODE_X1: 416 state->next_pc = 417 (unsigned long) (pc + get_JOffLong_X1(bundle)); 418 bundle = nop_X1(bundle); 419 break; 420 421 case SPECIAL_0_OPCODE_X1: 422 switch (get_RRROpcodeExtension_X1(bundle)) { 423 /* jump-register */ 424 case JALRP_SPECIAL_0_OPCODE_X1: 425 case JALR_SPECIAL_0_OPCODE_X1: 426 state->update = 1; 427 state->next_pc = 428 regs->regs[get_SrcA_X1(bundle)]; 429 break; 430 431 case JRP_SPECIAL_0_OPCODE_X1: 432 case JR_SPECIAL_0_OPCODE_X1: 433 state->next_pc = 434 regs->regs[get_SrcA_X1(bundle)]; 435 bundle = nop_X1(bundle); 436 break; 437 438 case LNK_SPECIAL_0_OPCODE_X1: 439 state->update = 1; 440 target_reg = get_Dest_X1(bundle); 441 break; 442 443 /* stores */ 444 case SH_SPECIAL_0_OPCODE_X1: 445 mem_op = MEMOP_STORE; 446 size = 2; 447 break; 448 449 case SW_SPECIAL_0_OPCODE_X1: 450 mem_op = MEMOP_STORE; 451 size = 4; 452 break; 453 } 454 break; 455 456 /* loads and iret */ 457 case SHUN_0_OPCODE_X1: 458 if (get_UnShOpcodeExtension_X1(bundle) == 459 UN_0_SHUN_0_OPCODE_X1) { 460 switch (get_UnOpcodeExtension_X1(bundle)) { 461 case LH_UN_0_SHUN_0_OPCODE_X1: 462 mem_op = MEMOP_LOAD; 463 size = 2; 464 sign_ext = 1; 465 break; 466 467 case LH_U_UN_0_SHUN_0_OPCODE_X1: 468 mem_op = MEMOP_LOAD; 469 size = 2; 470 sign_ext = 0; 471 break; 472 473 case LW_UN_0_SHUN_0_OPCODE_X1: 474 mem_op = MEMOP_LOAD; 475 size = 4; 476 break; 477 478 case IRET_UN_0_SHUN_0_OPCODE_X1: 479 { 480 unsigned long ex0_0 = __insn_mfspr( 481 SPR_EX_CONTEXT_0_0); 482 unsigned long ex0_1 = __insn_mfspr( 483 SPR_EX_CONTEXT_0_1); 484 /* 485 * Special-case it if we're iret'ing 486 * to PL0 again. Otherwise just let 487 * it run and it will generate SIGILL. 488 */ 489 if (EX1_PL(ex0_1) == USER_PL) { 490 state->next_pc = ex0_0; 491 regs->ex1 = ex0_1; 492 bundle = nop_X1(bundle); 493 } 494 } 495 } 496 } 497 break; 498 499#if CHIP_HAS_WH64() 500 /* postincrement operations */ 501 case IMM_0_OPCODE_X1: 502 switch (get_ImmOpcodeExtension_X1(bundle)) { 503 case LWADD_IMM_0_OPCODE_X1: 504 mem_op = MEMOP_LOAD_POSTINCR; 505 size = 4; 506 break; 507 508 case LHADD_IMM_0_OPCODE_X1: 509 mem_op = MEMOP_LOAD_POSTINCR; 510 size = 2; 511 sign_ext = 1; 512 break; 513 514 case LHADD_U_IMM_0_OPCODE_X1: 515 mem_op = MEMOP_LOAD_POSTINCR; 516 size = 2; 517 sign_ext = 0; 518 break; 519 520 case SWADD_IMM_0_OPCODE_X1: 521 mem_op = MEMOP_STORE_POSTINCR; 522 size = 4; 523 break; 524 525 case SHADD_IMM_0_OPCODE_X1: 526 mem_op = MEMOP_STORE_POSTINCR; 527 size = 2; 528 break; 529 530 default: 531 break; 532 } 533 break; 534#endif /* CHIP_HAS_WH64() */ 535 } 536 537 if (state->update) { 538 /* 539 * Get an available register. We start with a 540 * bitmask with 1's for available registers. 541 * We truncate to the low 32 registers since 542 * we are guaranteed to have set bits in the 543 * low 32 bits, then use ctz to pick the first. 544 */ 545 u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) | 546 (1ULL << get_SrcA_X0(bundle)) | 547 (1ULL << get_SrcB_X0(bundle)) | 548 (1ULL << target_reg)); 549 temp_reg = __builtin_ctz(mask); 550 state->update_reg = temp_reg; 551 state->update_value = regs->regs[temp_reg]; 552 regs->regs[temp_reg] = (unsigned long) (pc+1); 553 regs->flags |= PT_FLAGS_RESTORE_REGS; 554 bundle = move_X1(bundle, target_reg, temp_reg); 555 } 556 } else { 557 int opcode = get_Opcode_Y2(bundle); 558 559 switch (opcode) { 560 /* loads */ 561 case LH_OPCODE_Y2: 562 mem_op = MEMOP_LOAD; 563 size = 2; 564 sign_ext = 1; 565 break; 566 567 case LH_U_OPCODE_Y2: 568 mem_op = MEMOP_LOAD; 569 size = 2; 570 sign_ext = 0; 571 break; 572 573 case LW_OPCODE_Y2: 574 mem_op = MEMOP_LOAD; 575 size = 4; 576 break; 577 578 /* stores */ 579 case SH_OPCODE_Y2: 580 mem_op = MEMOP_STORE; 581 size = 2; 582 break; 583 584 case SW_OPCODE_Y2: 585 mem_op = MEMOP_STORE; 586 size = 4; 587 break; 588 } 589 } 590 591 /* 592 * Check if we need to rewrite an unaligned load/store. 593 * Returning zero is a special value meaning we need to SIGSEGV. 594 */ 595 if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { 596 bundle = rewrite_load_store_unaligned(state, bundle, regs, 597 mem_op, size, sign_ext); 598 if (bundle == 0) 599 return; 600 } 601 602 /* write the bundle to our execution area */ 603 buffer = state->buffer; 604 err = __put_user(bundle, buffer++); 605 606 /* 607 * If we're really single-stepping, we take an INT_ILL after. 608 * If we're just handling an unaligned access, we can just 609 * jump directly back to where we were in user code. 610 */ 611 if (is_single_step) { 612 err |= __put_user(__single_step_ill_insn, buffer++); 613 err |= __put_user(__single_step_ill_insn, buffer++); 614 } else { 615 long delta; 616 617 if (state->update) { 618 /* We have some state to update; do it inline */ 619 int ha16; 620 bundle = __single_step_addli_insn; 621 bundle |= create_Dest_X1(state->update_reg); 622 bundle |= create_Imm16_X1(state->update_value); 623 err |= __put_user(bundle, buffer++); 624 bundle = __single_step_auli_insn; 625 bundle |= create_Dest_X1(state->update_reg); 626 bundle |= create_SrcA_X1(state->update_reg); 627 ha16 = (state->update_value + 0x8000) >> 16; 628 bundle |= create_Imm16_X1(ha16); 629 err |= __put_user(bundle, buffer++); 630 state->update = 0; 631 } 632 633 /* End with a jump back to the next instruction */ 634 delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - 635 (unsigned long)buffer) >> 636 TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; 637 bundle = __single_step_j_insn; 638 bundle |= create_JOffLong_X1(delta); 639 err |= __put_user(bundle, buffer++); 640 } 641 642 if (err) { 643 pr_err("Fault when writing to single-step buffer\n"); 644 return; 645 } 646 647 /* 648 * Flush the buffer. 649 * We do a local flush only, since this is a thread-specific buffer. 650 */ 651 __flush_icache_range((unsigned long)state->buffer, 652 (unsigned long)buffer); 653 654 /* Indicate enabled */ 655 state->is_enabled = is_single_step; 656 regs->pc = (unsigned long)state->buffer; 657 658 /* Fault immediately if we are coming back from a syscall. */ 659 if (regs->faultnum == INT_SWINT_1) 660 regs->pc += 8; 661} 662 663#endif /* !__tilegx__ */ 664