dispatch-ppc64be-linux.S revision 8b68b64759254d514d98328c496cbd88cde4c9a5
1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2009 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#if defined(VGP_ppc64_linux) 32 33#include "pub_core_basics_asm.h" 34#include "pub_core_dispatch_asm.h" 35#include "pub_core_transtab_asm.h" 36#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 37 38 39/* References to globals via the TOC */ 40 41/* 42 .globl vgPlain_tt_fast 43 .lcomm vgPlain_tt_fast,4,4 44 .type vgPlain_tt_fast, @object 45*/ 46 .section ".toc","aw" 47.tocent__vgPlain_tt_fast: 48 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 49.tocent__vgPlain_tt_fastN: 50 .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN 51.tocent__vgPlain_dispatch_ctr: 52 .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr 53.tocent__vgPlain_machine_ppc64_has_VMX: 54 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 55 56/*------------------------------------------------------------*/ 57/*--- ---*/ 58/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ 59/*--- run all translations except no-redir ones. ---*/ 60/*--- ---*/ 61/*------------------------------------------------------------*/ 62 63/*----------------------------------------------------*/ 64/*--- Preamble (set everything up) ---*/ 65/*----------------------------------------------------*/ 66 67/* signature: 68UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); 69*/ 70 71.section ".text" 72.align 2 73.globl VG_(run_innerloop) 74.section ".opd","aw" 75.align 3 76VG_(run_innerloop): 77.quad .VG_(run_innerloop),.TOC.@tocbase,0 78.previous 79.type .VG_(run_innerloop),@function 80.globl .VG_(run_innerloop) 81.VG_(run_innerloop): 82 /* r3 holds guest_state */ 83 /* r4 holds do_profiling */ 84 85 /* ----- entry point to VG_(run_innerloop) ----- */ 86 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 87 88 /* Save lr, cr */ 89 mflr 0 90 std 0,16(1) 91 mfcr 0 92 std 0,8(1) 93 94 /* New stack frame */ 95 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 96 97 /* Save callee-saved registers... */ 98 99 /* Floating-point reg save area : 144 bytes */ 100 stfd 31,616(1) 101 stfd 30,608(1) 102 stfd 29,600(1) 103 stfd 28,592(1) 104 stfd 27,584(1) 105 stfd 26,576(1) 106 stfd 25,568(1) 107 stfd 24,560(1) 108 stfd 23,552(1) 109 stfd 22,544(1) 110 stfd 21,536(1) 111 stfd 20,528(1) 112 stfd 19,520(1) 113 stfd 18,512(1) 114 stfd 17,504(1) 115 stfd 16,496(1) 116 stfd 15,488(1) 117 stfd 14,480(1) 118 119 /* General reg save area : 144 bytes */ 120 std 31,472(1) 121 std 30,464(1) 122 std 29,456(1) 123 std 28,448(1) 124 std 27,440(1) 125 std 26,432(1) 126 std 25,424(1) 127 std 24,416(1) 128 std 23,408(1) 129 std 22,400(1) 130 std 21,392(1) 131 std 20,384(1) 132 std 19,376(1) 133 std 18,368(1) 134 std 17,360(1) 135 std 16,352(1) 136 std 15,344(1) 137 std 14,336(1) 138 /* Probably not necessary to save r13 (thread-specific ptr), 139 as VEX stays clear of it... but what the hey. */ 140 std 13,328(1) 141 142 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 143 The Linux kernel might not actually use VRSAVE for its intended 144 purpose, but it should be harmless to preserve anyway. */ 145 /* r3, r4 are live here, so use r5 */ 146 ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 147 ld 5,0(5) 148 cmpldi 5,0 149 beq .LafterVMX1 150 151 /* VRSAVE save word : 32 bytes */ 152 mfspr 5,256 /* vrsave reg is spr number 256 */ 153 stw 5,324(1) 154 155 /* Alignment padding : 4 bytes */ 156 157 /* Vector reg save area (quadword aligned) : 192 bytes */ 158 li 5,304 159 stvx 31,5,1 160 li 5,288 161 stvx 30,5,1 162 li 5,272 163 stvx 29,5,1 164 li 5,256 165 stvx 28,5,1 166 li 5,240 167 stvx 27,5,1 168 li 5,224 169 stvx 26,5,1 170 li 5,208 171 stvx 25,5,1 172 li 5,192 173 stvx 24,5,1 174 li 5,176 175 stvx 23,5,1 176 li 5,160 177 stvx 22,5,1 178 li 5,144 179 stvx 21,5,1 180 li 5,128 181 stvx 20,5,1 182.LafterVMX1: 183 184 /* Local variable space... */ 185 186 /* r3 holds guest_state */ 187 /* r4 holds do_profiling */ 188 mr 31,3 189 std 3,104(1) /* spill orig guest_state ptr */ 190 191 /* 96(sp) used later to check FPSCR[RM] */ 192 /* 88(sp) used later to load fpscr with zero */ 193 /* 48:87(sp) free */ 194 195 /* Linkage Area (reserved) 196 40(sp) : TOC 197 32(sp) : link editor doubleword 198 24(sp) : compiler doubleword 199 16(sp) : LR 200 8(sp) : CR 201 0(sp) : back-chain 202 */ 203 204// CAB TODO: Use a caller-saved reg for orig guest_state ptr 205// - rem to set non-allocateable in isel.c 206 207 /* hold dispatch_ctr (=32bit value) in r29 */ 208 ld 29,.tocent__vgPlain_dispatch_ctr@toc(2) 209 lwz 29,0(29) /* 32-bit zero-extending load */ 210 211 /* set host FPU control word to the default mode expected 212 by VEX-generated code. See comments in libvex.h for 213 more info. */ 214 /* => get zero into f3 (tedious) 215 fsub 3,3,3 is not a reliable way to do this, since if 216 f3 holds a NaN or similar then we don't necessarily 217 wind up with zero. */ 218 li 5,0 219 stw 5,88(1) 220 lfs 3,88(1) 221 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 222 223 /* set host AltiVec control word to the default mode expected 224 by VEX-generated code. */ 225 ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 226 ld 5,0(5) 227 cmpldi 5,0 228 beq .LafterVMX2 229 230 vspltisw 3,0x0 /* generate zero */ 231 mtvscr 3 232.LafterVMX2: 233 234 /* make a stack frame for the code we are calling */ 235 stdu 1,-48(1) 236 237 /* fetch %CIA into r3 */ 238 ld 3,OFFSET_ppc64_CIA(31) 239 240 /* fall into main loop (the right one) */ 241 /* r4 = do_profiling. It's probably trashed after here, 242 but that's OK: we don't need it after here. */ 243 cmplwi 4,0 244 beq .VG_(run_innerloop__dispatch_unprofiled) 245 b .VG_(run_innerloop__dispatch_profiled) 246 /*NOTREACHED*/ 247 248 249/*----------------------------------------------------*/ 250/*--- NO-PROFILING (standard) dispatcher ---*/ 251/*----------------------------------------------------*/ 252 253 .section ".text" 254 .align 2 255 .globl VG_(run_innerloop__dispatch_unprofiled) 256 .section ".opd","aw" 257 .align 3 258VG_(run_innerloop__dispatch_unprofiled): 259 .quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0 260 .previous 261 .type .VG_(run_innerloop__dispatch_unprofiled),@function 262 .globl .VG_(run_innerloop__dispatch_unprofiled) 263.VG_(run_innerloop__dispatch_unprofiled): 264 /* At entry: Live regs: 265 r1 (=sp) 266 r2 (toc pointer) 267 r3 (=CIA = next guest address) 268 r29 (=dispatch_ctr) 269 r31 (=guest_state) 270 Stack state: 271 152(r1) (=orig guest_state) 272 144(r1) (=var space for FPSCR[RM]) 273 */ 274 /* Has the guest state pointer been messed with? If yes, exit. 275 Also set up & VG_(tt_fast) early in an attempt at better 276 scheduling. */ 277 ld 9,152(1) /* original guest_state ptr */ 278 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 279 cmpd 9,31 280 bne .gsp_changed 281 282 /* save the jump address in the guest state */ 283 std 3,OFFSET_ppc64_CIA(31) 284 285 /* Are we out of timeslice? If yes, defer to scheduler. */ 286 subi 29,29,1 287 cmpldi 29,0 288 beq .counter_is_zero 289 290 /* try a fast lookup in the translation cache */ 291 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 292 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 293 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 294 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 295 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 296 ld 6,0(5) /* .guest */ 297 ld 7,8(5) /* .host */ 298 cmpd 3,6 299 bne .fast_lookup_failed 300 301 /* Found a match. Call .host. */ 302 mtctr 7 303 bctrl 304 305 /* On return from guest code: 306 r3 holds destination (original) address. 307 r31 may be unchanged (guest_state), or may indicate further 308 details of the control transfer requested to *r3. 309 */ 310 /* start over */ 311 b .VG_(run_innerloop__dispatch_unprofiled) 312 /*NOTREACHED*/ 313 .size VG_(run_innerloop), .-VG_(run_innerloop) 314 315 316/*----------------------------------------------------*/ 317/*--- PROFILING dispatcher (can be much slower) ---*/ 318/*----------------------------------------------------*/ 319 320 .section ".text" 321 .align 2 322 .globl VG_(run_innerloop__dispatch_profiled) 323 .section ".opd","aw" 324 .align 3 325VG_(run_innerloop__dispatch_profiled): 326 .quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0 327 .previous 328 .type .VG_(run_innerloop__dispatch_profiled),@function 329 .globl .VG_(run_innerloop__dispatch_profiled) 330.VG_(run_innerloop__dispatch_profiled): 331 /* At entry: Live regs: 332 r1 (=sp) 333 r2 (toc pointer) 334 r3 (=CIA = next guest address) 335 r29 (=dispatch_ctr) 336 r31 (=guest_state) 337 Stack state: 338 152(r1) (=orig guest_state) 339 144(r1) (=var space for FPSCR[RM]) 340 */ 341 /* Has the guest state pointer been messed with? If yes, exit. 342 Also set up & VG_(tt_fast) early in an attempt at better 343 scheduling. */ 344 ld 9,152(1) /* original guest_state ptr */ 345 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 346 cmpd 9,31 347 bne .gsp_changed 348 349 /* save the jump address in the guest state */ 350 std 3,OFFSET_ppc64_CIA(31) 351 352 /* Are we out of timeslice? If yes, defer to scheduler. */ 353 subi 29,29,1 354 cmpldi 29,0 355 beq .counter_is_zero 356 357 /* try a fast lookup in the translation cache */ 358 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 359 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 360 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 361 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 362 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 363 ld 6,0(5) /* .guest */ 364 ld 7,8(5) /* .host */ 365 cmpd 3,6 366 bne .fast_lookup_failed 367 368 /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */ 369 ld 9, .tocent__vgPlain_tt_fastN@toc(2) 370 srdi 4, 4,1 /* entry# * sizeof(UInt*) */ 371 ldx 9, 9,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */ 372 lwz 6, 0(9) /* *(UInt*)r7 ++ */ 373 addi 6, 6,1 374 stw 6, 0(9) 375 376 /* Found a match. Call .host. */ 377 mtctr 7 378 bctrl 379 380 /* On return from guest code: 381 r3 holds destination (original) address. 382 r31 may be unchanged (guest_state), or may indicate further 383 details of the control transfer requested to *r3. 384 */ 385 /* start over */ 386 b .VG_(run_innerloop__dispatch_profiled) 387 /*NOTREACHED*/ 388 .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation) 389 390 391/*----------------------------------------------------*/ 392/*--- exit points ---*/ 393/*----------------------------------------------------*/ 394 395.gsp_changed: 396 /* Someone messed with the gsp (in r31). Have to 397 defer to scheduler to resolve this. dispatch ctr 398 is not yet decremented, so no need to increment. */ 399 /* %CIA is NOT up to date here. First, need to write 400 %r3 back to %CIA, but without trashing %r31 since 401 that holds the value we want to return to the scheduler. 402 Hence use %r5 transiently for the guest state pointer. */ 403 ld 5,152(1) /* original guest_state ptr */ 404 std 3,OFFSET_ppc64_CIA(5) 405 mr 3,31 /* r3 = new gsp value */ 406 b .run_innerloop_exit 407 /*NOTREACHED*/ 408 409.counter_is_zero: 410 /* %CIA is up to date */ 411 /* back out decrement of the dispatch counter */ 412 addi 29,29,1 413 li 3,VG_TRC_INNER_COUNTERZERO 414 b .run_innerloop_exit 415 416.fast_lookup_failed: 417 /* %CIA is up to date */ 418 /* back out decrement of the dispatch counter */ 419 addi 29,29,1 420 li 3,VG_TRC_INNER_FASTMISS 421 b .run_innerloop_exit 422 423 424 425/* All exits from the dispatcher go through here. 426 r3 holds the return value. 427*/ 428.run_innerloop_exit: 429 /* We're leaving. Check that nobody messed with 430 VSCR or FPSCR. */ 431 432 /* Set fpscr back to a known state, since vex-generated code 433 may have messed with fpscr[rm]. */ 434 li 5,0 435 addi 1,1,-16 436 stw 5,0(1) 437 lfs 3,0(1) 438 addi 1,1,16 439 mtfsf 0xFF,3 /* fpscr = f3 */ 440 441 /* Using r11 - value used again further on, so don't trash! */ 442 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 443 ld 11,0(11) 444 cmpldi 11,0 445 beq .LafterVMX8 446 447 /* Check VSCR[NJ] == 1 */ 448 /* first generate 4x 0x00010000 */ 449 vspltisw 4,0x1 /* 4x 0x00000001 */ 450 vspltisw 5,0x0 /* zero */ 451 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 452 /* retrieve VSCR and mask wanted bits */ 453 mfvscr 7 454 vand 7,7,6 /* gives NJ flag */ 455 vspltw 7,7,0x3 /* flags-word to all lanes */ 456 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 457 bt 24,.invariant_violation /* branch if all_equal */ 458.LafterVMX8: 459 460 /* otherwise we're OK */ 461 b .run_innerloop_exit_REALLY 462 463 464.invariant_violation: 465 li 3,VG_TRC_INVARIANT_FAILED 466 b .run_innerloop_exit_REALLY 467 468.run_innerloop_exit_REALLY: 469 /* r3 holds VG_TRC_* value to return */ 470 471 /* Return to parent stack */ 472 addi 1,1,48 473 474 /* Write ctr to VG_(dispatch_ctr) (=32bit value) */ 475 ld 5,.tocent__vgPlain_dispatch_ctr@toc(2) 476 stw 29,0(5) 477 478 /* Restore cr */ 479 lwz 0,44(1) 480 mtcr 0 481 482 /* Restore callee-saved registers... */ 483 484 /* Floating-point regs */ 485 lfd 31,616(1) 486 lfd 30,608(1) 487 lfd 29,600(1) 488 lfd 28,592(1) 489 lfd 27,584(1) 490 lfd 26,576(1) 491 lfd 25,568(1) 492 lfd 24,560(1) 493 lfd 23,552(1) 494 lfd 22,544(1) 495 lfd 21,536(1) 496 lfd 20,528(1) 497 lfd 19,520(1) 498 lfd 18,512(1) 499 lfd 17,504(1) 500 lfd 16,496(1) 501 lfd 15,488(1) 502 lfd 14,480(1) 503 504 /* General regs */ 505 ld 31,472(1) 506 ld 30,464(1) 507 ld 29,456(1) 508 ld 28,448(1) 509 ld 27,440(1) 510 ld 26,432(1) 511 ld 25,424(1) 512 ld 24,416(1) 513 ld 23,408(1) 514 ld 22,400(1) 515 ld 21,392(1) 516 ld 20,384(1) 517 ld 19,376(1) 518 ld 18,368(1) 519 ld 17,360(1) 520 ld 16,352(1) 521 ld 15,344(1) 522 ld 14,336(1) 523 ld 13,328(1) 524 525 /* r11 already holds VG_(machine_ppc64_has_VMX) value */ 526 cmpldi 11,0 527 beq .LafterVMX9 528 529 /* VRSAVE */ 530 lwz 4,324(1) 531 mfspr 4,256 /* VRSAVE reg is spr number 256 */ 532 533 /* Vector regs */ 534 li 4,304 535 lvx 31,4,1 536 li 4,288 537 lvx 30,4,1 538 li 4,272 539 lvx 29,4,1 540 li 4,256 541 lvx 28,4,1 542 li 4,240 543 lvx 27,4,1 544 li 4,224 545 lvx 26,4,1 546 li 4,208 547 lvx 25,4,1 548 li 4,192 549 lvx 24,4,1 550 li 4,176 551 lvx 23,4,1 552 li 4,160 553 lvx 22,4,1 554 li 4,144 555 lvx 21,4,1 556 li 4,128 557 lvx 20,4,1 558.LafterVMX9: 559 560 /* reset cr, lr, sp */ 561 ld 0,632(1) /* stack_size + 8 */ 562 mtcr 0 563 ld 0,640(1) /* stack_size + 16 */ 564 mtlr 0 565 addi 1,1,624 /* stack_size */ 566 blr 567 568 569/*------------------------------------------------------------*/ 570/*--- ---*/ 571/*--- A special dispatcher, for running no-redir ---*/ 572/*--- translations. Just runs the given translation once. ---*/ 573/*--- ---*/ 574/*------------------------------------------------------------*/ 575 576/* signature: 577void VG_(run_a_noredir_translation) ( UWord* argblock ); 578*/ 579 580/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args 581 and 2 to carry results: 582 0: input: ptr to translation 583 1: input: ptr to guest state 584 2: output: next guest PC 585 3: output: guest state pointer afterwards (== thread return code) 586*/ 587.section ".text" 588.align 2 589.globl VG_(run_a_noredir_translation) 590.section ".opd","aw" 591.align 3 592VG_(run_a_noredir_translation): 593.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0 594.previous 595.type .VG_(run_a_noredir_translation),@function 596.globl .VG_(run_a_noredir_translation) 597.VG_(run_a_noredir_translation): 598 /* save callee-save int regs, & lr */ 599 stdu 1,-512(1) 600 std 14,256(1) 601 std 15,264(1) 602 std 16,272(1) 603 std 17,280(1) 604 std 18,288(1) 605 std 19,296(1) 606 std 20,304(1) 607 std 21,312(1) 608 std 22,320(1) 609 std 23,328(1) 610 std 24,336(1) 611 std 25,344(1) 612 std 26,352(1) 613 std 27,360(1) 614 std 28,368(1) 615 std 29,376(1) 616 std 30,384(1) 617 std 31,392(1) 618 mflr 31 619 std 31,400(1) 620 std 2,408(1) /* also preserve R2, just in case .. */ 621 622 std 3,416(1) 623 ld 31,8(3) 624 ld 30,0(3) 625 mtlr 30 626 blrl 627 628 ld 4,416(1) 629 std 3, 16(4) 630 std 31,24(4) 631 632 ld 14,256(1) 633 ld 15,264(1) 634 ld 16,272(1) 635 ld 17,280(1) 636 ld 18,288(1) 637 ld 19,296(1) 638 ld 20,304(1) 639 ld 21,312(1) 640 ld 22,320(1) 641 ld 23,328(1) 642 ld 24,336(1) 643 ld 25,344(1) 644 ld 26,352(1) 645 ld 27,360(1) 646 ld 28,368(1) 647 ld 29,376(1) 648 ld 30,384(1) 649 ld 31,400(1) 650 mtlr 31 651 ld 31,392(1) 652 ld 2,408(1) /* also preserve R2, just in case .. */ 653 654 addi 1,1,512 655 blr 656 657 658/* Let the linker know we don't need an executable stack */ 659.section .note.GNU-stack,"",@progbits 660 661#endif // defined(VGP_ppc64_linux) 662 663/*--------------------------------------------------------------------*/ 664/*--- end ---*/ 665/*--------------------------------------------------------------------*/ 666