1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2011 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#if defined(VGP_ppc64_linux) 32 33#include "pub_core_basics_asm.h" 34#include "pub_core_dispatch_asm.h" 35#include "pub_core_transtab_asm.h" 36#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 37 38 39/* References to globals via the TOC */ 40 41/* 42 .globl vgPlain_tt_fast 43 .lcomm vgPlain_tt_fast,4,4 44 .type vgPlain_tt_fast, @object 45*/ 46 .section ".toc","aw" 47.tocent__vgPlain_tt_fast: 48 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 49.tocent__vgPlain_tt_fastN: 50 .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN 51.tocent__vgPlain_dispatch_ctr: 52 .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr 53.tocent__vgPlain_machine_ppc64_has_VMX: 54 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 55 56/*------------------------------------------------------------*/ 57/*--- ---*/ 58/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ 59/*--- run all translations except no-redir ones. ---*/ 60/*--- ---*/ 61/*------------------------------------------------------------*/ 62 63/*----------------------------------------------------*/ 64/*--- Preamble (set everything up) ---*/ 65/*----------------------------------------------------*/ 66 67/* signature: 68UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); 69*/ 70 71.section ".text" 72.align 2 73.globl VG_(run_innerloop) 74.section ".opd","aw" 75.align 3 76VG_(run_innerloop): 77.quad .VG_(run_innerloop),.TOC.@tocbase,0 78.previous 79.type .VG_(run_innerloop),@function 80.globl .VG_(run_innerloop) 81.VG_(run_innerloop): 82 /* r3 holds guest_state */ 83 /* r4 holds do_profiling */ 84 85 /* ----- entry point to VG_(run_innerloop) ----- */ 86 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 87 88 /* Save lr, cr */ 89 mflr 0 90 std 0,16(1) 91 mfcr 0 92 std 0,8(1) 93 94 /* New stack frame */ 95 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 96 97 /* Save callee-saved registers... */ 98 99 /* Floating-point reg save area : 144 bytes */ 100 stfd 31,616(1) 101 stfd 30,608(1) 102 stfd 29,600(1) 103 stfd 28,592(1) 104 stfd 27,584(1) 105 stfd 26,576(1) 106 stfd 25,568(1) 107 stfd 24,560(1) 108 stfd 23,552(1) 109 stfd 22,544(1) 110 stfd 21,536(1) 111 stfd 20,528(1) 112 stfd 19,520(1) 113 stfd 18,512(1) 114 stfd 17,504(1) 115 stfd 16,496(1) 116 stfd 15,488(1) 117 stfd 14,480(1) 118 119 /* General reg save area : 144 bytes */ 120 std 31,472(1) 121 std 30,464(1) 122 std 29,456(1) 123 std 28,448(1) 124 std 27,440(1) 125 std 26,432(1) 126 std 25,424(1) 127 std 24,416(1) 128 std 23,408(1) 129 std 22,400(1) 130 std 21,392(1) 131 std 20,384(1) 132 std 19,376(1) 133 std 18,368(1) 134 std 17,360(1) 135 std 16,352(1) 136 std 15,344(1) 137 std 14,336(1) 138 /* Probably not necessary to save r13 (thread-specific ptr), 139 as VEX stays clear of it... but what the hey. */ 140 std 13,328(1) 141 142 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 143 The Linux kernel might not actually use VRSAVE for its intended 144 purpose, but it should be harmless to preserve anyway. */ 145 /* r3, r4 are live here, so use r5 */ 146 ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 147 ld 5,0(5) 148 cmpldi 5,0 149 beq .LafterVMX1 150 151 /* VRSAVE save word : 32 bytes */ 152 mfspr 5,256 /* vrsave reg is spr number 256 */ 153 stw 5,324(1) 154 155 /* Alignment padding : 4 bytes */ 156 157 /* Vector reg save area (quadword aligned) : 192 bytes */ 158 li 5,304 159 stvx 31,5,1 160 li 5,288 161 stvx 30,5,1 162 li 5,272 163 stvx 29,5,1 164 li 5,256 165 stvx 28,5,1 166 li 5,240 167 stvx 27,5,1 168 li 5,224 169 stvx 26,5,1 170 li 5,208 171 stvx 25,5,1 172 li 5,192 173 stvx 24,5,1 174 li 5,176 175 stvx 23,5,1 176 li 5,160 177 stvx 22,5,1 178 li 5,144 179 stvx 21,5,1 180 li 5,128 181 stvx 20,5,1 182.LafterVMX1: 183 184 /* Local variable space... */ 185 186 /* r3 holds guest_state */ 187 /* r4 holds do_profiling */ 188 mr 31,3 189 std 3,104(1) /* spill orig guest_state ptr */ 190 191 /* 96(sp) used later to check FPSCR[RM] */ 192 /* 88(sp) used later to load fpscr with zero */ 193 /* 48:87(sp) free */ 194 195 /* Linkage Area (reserved) 196 40(sp) : TOC 197 32(sp) : link editor doubleword 198 24(sp) : compiler doubleword 199 16(sp) : LR 200 8(sp) : CR 201 0(sp) : back-chain 202 */ 203 204// CAB TODO: Use a caller-saved reg for orig guest_state ptr 205// - rem to set non-allocateable in isel.c 206 207 /* hold dispatch_ctr (=32bit value) in r29 */ 208 ld 29,.tocent__vgPlain_dispatch_ctr@toc(2) 209 lwz 29,0(29) /* 32-bit zero-extending load */ 210 211 /* set host FPU control word to the default mode expected 212 by VEX-generated code. See comments in libvex.h for 213 more info. */ 214 /* => get zero into f3 (tedious) 215 fsub 3,3,3 is not a reliable way to do this, since if 216 f3 holds a NaN or similar then we don't necessarily 217 wind up with zero. */ 218 li 5,0 219 stw 5,88(1) 220 lfs 3,88(1) 221 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 222 223 /* set host AltiVec control word to the default mode expected 224 by VEX-generated code. */ 225 ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 226 ld 5,0(5) 227 cmpldi 5,0 228 beq .LafterVMX2 229 230 vspltisw 3,0x0 /* generate zero */ 231 mtvscr 3 232.LafterVMX2: 233 234 /* make a stack frame for the code we are calling */ 235 stdu 1,-48(1) 236 237 /* fetch %CIA into r3 */ 238 ld 3,OFFSET_ppc64_CIA(31) 239 240 /* fall into main loop (the right one) */ 241 /* r4 = do_profiling. It's probably trashed after here, 242 but that's OK: we don't need it after here. */ 243 cmplwi 4,0 244 beq .VG_(run_innerloop__dispatch_unprofiled) 245 b .VG_(run_innerloop__dispatch_profiled) 246 /*NOTREACHED*/ 247 248 249/*----------------------------------------------------*/ 250/*--- NO-PROFILING (standard) dispatcher ---*/ 251/*----------------------------------------------------*/ 252 253 .section ".text" 254 .align 2 255 .globl VG_(run_innerloop__dispatch_unprofiled) 256 .section ".opd","aw" 257 .align 3 258VG_(run_innerloop__dispatch_unprofiled): 259 .quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0 260 .previous 261 .type .VG_(run_innerloop__dispatch_unprofiled),@function 262 .globl .VG_(run_innerloop__dispatch_unprofiled) 263.VG_(run_innerloop__dispatch_unprofiled): 264 /* At entry: Live regs: 265 r1 (=sp) 266 r2 (toc pointer) 267 r3 (=CIA = next guest address) 268 r29 (=dispatch_ctr) 269 r31 (=guest_state) 270 Stack state: 271 144(r1) (=var space for FPSCR[RM]) 272 */ 273 /* Has the guest state pointer been messed with? If yes, exit. 274 Also set up & VG_(tt_fast) early in an attempt at better 275 scheduling. */ 276 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 277 rldicl. 0,31,0,63 278 bne .gsp_changed 279 280 /* save the jump address in the guest state */ 281 std 3,OFFSET_ppc64_CIA(31) 282 283 /* Are we out of timeslice? If yes, defer to scheduler. */ 284 subi 29,29,1 285 cmpldi 29,0 286 beq .counter_is_zero 287 288 /* try a fast lookup in the translation cache */ 289 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 290 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 291 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 292 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 293 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 294 ld 6,0(5) /* .guest */ 295 ld 7,8(5) /* .host */ 296 cmpd 3,6 297 bne .fast_lookup_failed 298 299 /* Found a match. Call .host. */ 300 mtctr 7 301 bctrl 302 303 /* On return from guest code: 304 r3 holds destination (original) address. 305 r31 may be unchanged (guest_state), or may indicate further 306 details of the control transfer requested to *r3. 307 */ 308 /* start over */ 309 b .VG_(run_innerloop__dispatch_unprofiled) 310 /*NOTREACHED*/ 311 .size .VG_(run_innerloop), .-.VG_(run_innerloop) 312 313 314/*----------------------------------------------------*/ 315/*--- PROFILING dispatcher (can be much slower) ---*/ 316/*----------------------------------------------------*/ 317 318 .section ".text" 319 .align 2 320 .globl VG_(run_innerloop__dispatch_profiled) 321 .section ".opd","aw" 322 .align 3 323VG_(run_innerloop__dispatch_profiled): 324 .quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0 325 .previous 326 .type .VG_(run_innerloop__dispatch_profiled),@function 327 .globl .VG_(run_innerloop__dispatch_profiled) 328.VG_(run_innerloop__dispatch_profiled): 329 /* At entry: Live regs: 330 r1 (=sp) 331 r2 (toc pointer) 332 r3 (=CIA = next guest address) 333 r29 (=dispatch_ctr) 334 r31 (=guest_state) 335 Stack state: 336 144(r1) (=var space for FPSCR[RM]) 337 */ 338 /* Has the guest state pointer been messed with? If yes, exit. 339 Also set up & VG_(tt_fast) early in an attempt at better 340 scheduling. */ 341 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 342 rldicl. 0,31,0,63 343 bne .gsp_changed 344 345 /* save the jump address in the guest state */ 346 std 3,OFFSET_ppc64_CIA(31) 347 348 /* Are we out of timeslice? If yes, defer to scheduler. */ 349 subi 29,29,1 350 cmpldi 29,0 351 beq .counter_is_zero 352 353 /* try a fast lookup in the translation cache */ 354 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 355 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 356 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 357 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 358 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 359 ld 6,0(5) /* .guest */ 360 ld 7,8(5) /* .host */ 361 cmpd 3,6 362 bne .fast_lookup_failed 363 364 /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */ 365 ld 9, .tocent__vgPlain_tt_fastN@toc(2) 366 srdi 4, 4,1 /* entry# * sizeof(UInt*) */ 367 ldx 9, 9,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */ 368 lwz 6, 0(9) /* *(UInt*)r7 ++ */ 369 addi 6, 6,1 370 stw 6, 0(9) 371 372 /* Found a match. Call .host. */ 373 mtctr 7 374 bctrl 375 376 /* On return from guest code: 377 r3 holds destination (original) address. 378 r31 may be unchanged (guest_state), or may indicate further 379 details of the control transfer requested to *r3. 380 */ 381 /* start over */ 382 b .VG_(run_innerloop__dispatch_profiled) 383 /*NOTREACHED*/ 384 .size .VG_(run_a_noredir_translation), .-.VG_(run_a_noredir_translation) 385 386 387/*----------------------------------------------------*/ 388/*--- exit points ---*/ 389/*----------------------------------------------------*/ 390 391.gsp_changed: 392 /* Someone messed with the gsp (in r31). Have to 393 defer to scheduler to resolve this. dispatch ctr 394 is not yet decremented, so no need to increment. */ 395 /* %CIA is NOT up to date here. First, need to write 396 %r3 back to %CIA, but without trashing %r31 since 397 that holds the value we want to return to the scheduler. 398 Hence use %r5 transiently for the guest state pointer. */ 399 ld 5,152(1) /* original guest_state ptr */ 400 std 3,OFFSET_ppc64_CIA(5) 401 mr 3,31 /* r3 = new gsp value */ 402 b .run_innerloop_exit 403 /*NOTREACHED*/ 404 405.counter_is_zero: 406 /* %CIA is up to date */ 407 /* back out decrement of the dispatch counter */ 408 addi 29,29,1 409 li 3,VG_TRC_INNER_COUNTERZERO 410 b .run_innerloop_exit 411 412.fast_lookup_failed: 413 /* %CIA is up to date */ 414 /* back out decrement of the dispatch counter */ 415 addi 29,29,1 416 li 3,VG_TRC_INNER_FASTMISS 417 b .run_innerloop_exit 418 419 420 421/* All exits from the dispatcher go through here. 422 r3 holds the return value. 423*/ 424.run_innerloop_exit: 425 /* We're leaving. Check that nobody messed with 426 VSCR or FPSCR. */ 427 428 /* Set fpscr back to a known state, since vex-generated code 429 may have messed with fpscr[rm]. */ 430 li 5,0 431 addi 1,1,-16 432 stw 5,0(1) 433 lfs 3,0(1) 434 addi 1,1,16 435 mtfsf 0xFF,3 /* fpscr = f3 */ 436 437 /* Using r11 - value used again further on, so don't trash! */ 438 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 439 ld 11,0(11) 440 cmpldi 11,0 441 beq .LafterVMX8 442 443 /* Check VSCR[NJ] == 1 */ 444 /* first generate 4x 0x00010000 */ 445 vspltisw 4,0x1 /* 4x 0x00000001 */ 446 vspltisw 5,0x0 /* zero */ 447 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 448 /* retrieve VSCR and mask wanted bits */ 449 mfvscr 7 450 vand 7,7,6 /* gives NJ flag */ 451 vspltw 7,7,0x3 /* flags-word to all lanes */ 452 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 453 bt 24,.invariant_violation /* branch if all_equal */ 454.LafterVMX8: 455 456 /* otherwise we're OK */ 457 b .run_innerloop_exit_REALLY 458 459 460.invariant_violation: 461 li 3,VG_TRC_INVARIANT_FAILED 462 b .run_innerloop_exit_REALLY 463 464.run_innerloop_exit_REALLY: 465 /* r3 holds VG_TRC_* value to return */ 466 467 /* Return to parent stack */ 468 addi 1,1,48 469 470 /* Write ctr to VG_(dispatch_ctr) (=32bit value) */ 471 ld 5,.tocent__vgPlain_dispatch_ctr@toc(2) 472 stw 29,0(5) 473 474 /* Restore cr */ 475 lwz 0,44(1) 476 mtcr 0 477 478 /* Restore callee-saved registers... */ 479 480 /* Floating-point regs */ 481 lfd 31,616(1) 482 lfd 30,608(1) 483 lfd 29,600(1) 484 lfd 28,592(1) 485 lfd 27,584(1) 486 lfd 26,576(1) 487 lfd 25,568(1) 488 lfd 24,560(1) 489 lfd 23,552(1) 490 lfd 22,544(1) 491 lfd 21,536(1) 492 lfd 20,528(1) 493 lfd 19,520(1) 494 lfd 18,512(1) 495 lfd 17,504(1) 496 lfd 16,496(1) 497 lfd 15,488(1) 498 lfd 14,480(1) 499 500 /* General regs */ 501 ld 31,472(1) 502 ld 30,464(1) 503 ld 29,456(1) 504 ld 28,448(1) 505 ld 27,440(1) 506 ld 26,432(1) 507 ld 25,424(1) 508 ld 24,416(1) 509 ld 23,408(1) 510 ld 22,400(1) 511 ld 21,392(1) 512 ld 20,384(1) 513 ld 19,376(1) 514 ld 18,368(1) 515 ld 17,360(1) 516 ld 16,352(1) 517 ld 15,344(1) 518 ld 14,336(1) 519 ld 13,328(1) 520 521 /* r11 already holds VG_(machine_ppc64_has_VMX) value */ 522 cmpldi 11,0 523 beq .LafterVMX9 524 525 /* VRSAVE */ 526 lwz 4,324(1) 527 mfspr 4,256 /* VRSAVE reg is spr number 256 */ 528 529 /* Vector regs */ 530 li 4,304 531 lvx 31,4,1 532 li 4,288 533 lvx 30,4,1 534 li 4,272 535 lvx 29,4,1 536 li 4,256 537 lvx 28,4,1 538 li 4,240 539 lvx 27,4,1 540 li 4,224 541 lvx 26,4,1 542 li 4,208 543 lvx 25,4,1 544 li 4,192 545 lvx 24,4,1 546 li 4,176 547 lvx 23,4,1 548 li 4,160 549 lvx 22,4,1 550 li 4,144 551 lvx 21,4,1 552 li 4,128 553 lvx 20,4,1 554.LafterVMX9: 555 556 /* reset cr, lr, sp */ 557 ld 0,632(1) /* stack_size + 8 */ 558 mtcr 0 559 ld 0,640(1) /* stack_size + 16 */ 560 mtlr 0 561 addi 1,1,624 /* stack_size */ 562 blr 563 564 565/*------------------------------------------------------------*/ 566/*--- ---*/ 567/*--- A special dispatcher, for running no-redir ---*/ 568/*--- translations. Just runs the given translation once. ---*/ 569/*--- ---*/ 570/*------------------------------------------------------------*/ 571 572/* signature: 573void VG_(run_a_noredir_translation) ( UWord* argblock ); 574*/ 575 576/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args 577 and 2 to carry results: 578 0: input: ptr to translation 579 1: input: ptr to guest state 580 2: output: next guest PC 581 3: output: guest state pointer afterwards (== thread return code) 582*/ 583.section ".text" 584.align 2 585.globl VG_(run_a_noredir_translation) 586.section ".opd","aw" 587.align 3 588VG_(run_a_noredir_translation): 589.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0 590.previous 591.type .VG_(run_a_noredir_translation),@function 592.globl .VG_(run_a_noredir_translation) 593.VG_(run_a_noredir_translation): 594 /* save callee-save int regs, & lr */ 595 stdu 1,-512(1) 596 std 14,256(1) 597 std 15,264(1) 598 std 16,272(1) 599 std 17,280(1) 600 std 18,288(1) 601 std 19,296(1) 602 std 20,304(1) 603 std 21,312(1) 604 std 22,320(1) 605 std 23,328(1) 606 std 24,336(1) 607 std 25,344(1) 608 std 26,352(1) 609 std 27,360(1) 610 std 28,368(1) 611 std 29,376(1) 612 std 30,384(1) 613 std 31,392(1) 614 mflr 31 615 std 31,400(1) 616 std 2,408(1) /* also preserve R2, just in case .. */ 617 618 std 3,416(1) 619 ld 31,8(3) 620 ld 30,0(3) 621 mtlr 30 622 blrl 623 624 ld 4,416(1) 625 std 3, 16(4) 626 std 31,24(4) 627 628 ld 14,256(1) 629 ld 15,264(1) 630 ld 16,272(1) 631 ld 17,280(1) 632 ld 18,288(1) 633 ld 19,296(1) 634 ld 20,304(1) 635 ld 21,312(1) 636 ld 22,320(1) 637 ld 23,328(1) 638 ld 24,336(1) 639 ld 25,344(1) 640 ld 26,352(1) 641 ld 27,360(1) 642 ld 28,368(1) 643 ld 29,376(1) 644 ld 30,384(1) 645 ld 31,400(1) 646 mtlr 31 647 ld 31,392(1) 648 ld 2,408(1) /* also preserve R2, just in case .. */ 649 650 addi 1,1,512 651 blr 652 653 654/* Let the linker know we don't need an executable stack */ 655.section .note.GNU-stack,"",@progbits 656 657#endif // defined(VGP_ppc64_linux) 658 659/*--------------------------------------------------------------------*/ 660/*--- end ---*/ 661/*--------------------------------------------------------------------*/ 662