1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2015 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#include "pub_core_basics_asm.h" 32 33#if defined(VGP_ppc64be_linux) 34 35#include "pub_core_dispatch_asm.h" 36#include "pub_core_transtab_asm.h" 37#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 38 39 40/* References to globals via the TOC */ 41 42/* 43 .globl vgPlain_tt_fast 44 .lcomm vgPlain_tt_fast,4,4 45 .type vgPlain_tt_fast, @object 46*/ 47.section ".toc","aw" 48.tocent__vgPlain_tt_fast: 49 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 50.tocent__vgPlain_stats__n_xindirs_32: 51 .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32 52.tocent__vgPlain_stats__n_xindir_misses_32: 53 .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32 54.tocent__vgPlain_machine_ppc64_has_VMX: 55 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 56 57/*------------------------------------------------------------*/ 58/*--- ---*/ 59/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ 60/*--- used to run all translations, ---*/ 61/*--- including no-redir ones. ---*/ 62/*--- ---*/ 63/*------------------------------------------------------------*/ 64 65/*----------------------------------------------------*/ 66/*--- Entry and preamble (set everything up) ---*/ 67/*----------------------------------------------------*/ 68 69/* signature: 70void VG_(disp_run_translations)( UWord* two_words, 71 void* guest_state, 72 Addr host_addr ); 73*/ 74 75.section ".text" 76.align 2 77.globl VG_(disp_run_translations) 78.section ".opd","aw" 79.align 3 80VG_(disp_run_translations): 81.quad .VG_(disp_run_translations),.TOC.@tocbase,0 82.previous 83.type .VG_(disp_run_translations),@function 84.globl .VG_(disp_run_translations) 85.VG_(disp_run_translations): 86 /* r3 holds two_words */ 87 /* r4 holds guest_state */ 88 /* r5 holds host_addr */ 89 90 /* ----- entry point to VG_(disp_run_translations) ----- */ 91 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 92 93 /* Save lr, cr */ 94 mflr 6 95 std 6,16(1) 96 mfcr 6 97 std 6,8(1) 98 99 /* New stack frame */ 100 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 101 102 /* General reg save area : 152 bytes */ 103 std 31,472(1) 104 std 30,464(1) 105 std 29,456(1) 106 std 28,448(1) 107 std 27,440(1) 108 std 26,432(1) 109 std 25,424(1) 110 std 24,416(1) 111 std 23,408(1) 112 std 22,400(1) 113 std 21,392(1) 114 std 20,384(1) 115 std 19,376(1) 116 std 18,368(1) 117 std 17,360(1) 118 std 16,352(1) 119 std 15,344(1) 120 std 14,336(1) 121 std 13,328(1) 122 std 3,104(1) /* save two_words for later */ 123 124 /* Save callee-saved registers... */ 125 /* Floating-point reg save area : 144 bytes */ 126 stfd 31,616(1) 127 stfd 30,608(1) 128 stfd 29,600(1) 129 stfd 28,592(1) 130 stfd 27,584(1) 131 stfd 26,576(1) 132 stfd 25,568(1) 133 stfd 24,560(1) 134 stfd 23,552(1) 135 stfd 22,544(1) 136 stfd 21,536(1) 137 stfd 20,528(1) 138 stfd 19,520(1) 139 stfd 18,512(1) 140 stfd 17,504(1) 141 stfd 16,496(1) 142 stfd 15,488(1) 143 stfd 14,480(1) 144 145 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 146 The Linux kernel might not actually use VRSAVE for its intended 147 purpose, but it should be harmless to preserve anyway. */ 148 /* r3, r4, r5 are live here, so use r6 */ 149 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 150 ld 6,0(6) 151 cmpldi 6,0 152 beq .LafterVMX1 153 154 /* VRSAVE save word : 32 bytes */ 155 mfspr 6,256 /* vrsave reg is spr number 256 */ 156 stw 6,324(1) 157 158 /* Alignment padding : 4 bytes */ 159 160 /* Vector reg save area (quadword aligned) : 192 bytes */ 161 li 6,304 162 stvx 31,6,1 163 li 6,288 164 stvx 30,6,1 165 li 6,272 166 stvx 29,6,1 167 li 6,256 168 stvx 28,6,1 169 li 6,240 170 stvx 27,6,1 171 li 6,224 172 stvx 26,6,1 173 li 6,208 174 stvx 25,6,1 175 li 6,192 176 stvx 24,6,1 177 li 6,176 178 stvx 23,6,1 179 li 6,160 180 stvx 22,6,1 181 li 6,144 182 stvx 21,6,1 183 li 6,128 184 stvx 20,6,1 185.LafterVMX1: 186 187 /* Local variable space... */ 188 189 /* r3 holds two_words */ 190 /* r4 holds guest_state */ 191 /* r5 holds host_addr */ 192 193 /* 96(sp) used later to check FPSCR[RM] */ 194 /* 88(sp) used later to load fpscr with zero */ 195 /* 48:87(sp) free */ 196 197 /* Linkage Area (reserved) BE ABI 198 40(sp) : TOC 199 32(sp) : link editor doubleword 200 24(sp) : compiler doubleword 201 16(sp) : LR 202 8(sp) : CR 203 0(sp) : back-chain 204 */ 205 206 /* set host FPU control word to the default mode expected 207 by VEX-generated code. See comments in libvex.h for 208 more info. */ 209 /* => get zero into f3 (tedious) 210 fsub 3,3,3 is not a reliable way to do this, since if 211 f3 holds a NaN or similar then we don't necessarily 212 wind up with zero. */ 213 li 6,0 214 stw 6,88(1) 215 lfs 3,88(1) 216 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 217 218 /* set host AltiVec control word to the default mode expected 219 by VEX-generated code. */ 220 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 221 ld 6,0(6) 222 cmpldi 6,0 223 beq .LafterVMX2 224 225 vspltisw 3,0x0 /* generate zero */ 226 mtvscr 3 227.LafterVMX2: 228 229 /* make a stack frame for the code we are calling */ 230 stdu 1,-48(1) 231 232 /* Set up the guest state ptr */ 233 mr 31,4 /* r31 (generated code gsp) = r4 */ 234 235 /* and jump into the code cache. Chained translations in 236 the code cache run, until for whatever reason, they can't 237 continue. When that happens, the translation in question 238 will jump (or call) to one of the continuation points 239 VG_(cp_...) below. */ 240 mtctr 5 241 bctr 242 /*NOTREACHED*/ 243 244/*----------------------------------------------------*/ 245/*--- Postamble and exit. ---*/ 246/*----------------------------------------------------*/ 247 248.postamble: 249 /* At this point, r6 and r7 contain two 250 words to be returned to the caller. r6 251 holds a TRC value, and r7 optionally may 252 hold another word (for CHAIN_ME exits, the 253 address of the place to patch.) */ 254 255 /* undo the "make a stack frame for the code we are calling" */ 256 addi 1,1,48 257 258 /* We're leaving. Check that nobody messed with 259 VSCR or FPSCR in ways we don't expect. */ 260 /* Using r11 - value used again further on, so don't trash! */ 261 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 262 ld 11,0(11) 263 264 /* Set fpscr back to a known state, since vex-generated code 265 may have messed with fpscr[rm]. */ 266 li 5,0 267 addi 1,1,-16 268 stw 5,0(1) 269 lfs 3,0(1) 270 addi 1,1,16 271 mtfsf 0xFF,3 /* fpscr = f3 */ 272 273 cmpldi 11,0 /* Do we have altivec? */ 274 beq .LafterVMX8 275 276 /* Check VSCR[NJ] == 1 */ 277 /* first generate 4x 0x00010000 */ 278 vspltisw 4,0x1 /* 4x 0x00000001 */ 279 vspltisw 5,0x0 /* zero */ 280 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 281 /* retrieve VSCR and mask wanted bits */ 282 mfvscr 7 283 vand 7,7,6 /* gives NJ flag */ 284 vspltw 7,7,0x3 /* flags-word to all lanes */ 285 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 286 bt 24,.invariant_violation /* branch if all_equal */ 287 288.LafterVMX8: 289 /* otherwise we're OK */ 290 b .remove_frame 291 292.invariant_violation: 293 li 6,VG_TRC_INVARIANT_FAILED 294 li 7,0 295 /* fall through */ 296 297.remove_frame: 298 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 299 cmplwi 11,0 300 beq .LafterVMX9 301 302 /* Restore Altivec regs. 303 Use r5 as scratch since r6/r7 are live. */ 304 /* VRSAVE */ 305 lwz 5,324(1) 306 mfspr 5,256 /* VRSAVE reg is spr number 256 */ 307 308 /* Vector regs */ 309 li 5,304 310 lvx 31,5,1 311 li 5,288 312 lvx 30,5,1 313 li 5,272 314 lvx 29,5,1 315 li 5,256 316 lvx 28,5,1 317 li 5,240 318 lvx 27,5,1 319 li 5,224 320 lvx 26,5,1 321 li 5,208 322 lvx 25,5,1 323 li 5,192 324 lvx 24,5,1 325 li 5,176 326 lvx 23,5,1 327 li 5,160 328 lvx 22,5,1 329 li 5,144 330 lvx 21,5,1 331 li 5,128 332 lvx 20,5,1 333.LafterVMX9: 334 335 /* Restore FP regs */ 336 /* Floating-point regs */ 337 lfd 31,616(1) 338 lfd 30,608(1) 339 lfd 29,600(1) 340 lfd 28,592(1) 341 lfd 27,584(1) 342 lfd 26,576(1) 343 lfd 25,568(1) 344 lfd 24,560(1) 345 lfd 23,552(1) 346 lfd 22,544(1) 347 lfd 21,536(1) 348 lfd 20,528(1) 349 lfd 19,520(1) 350 lfd 18,512(1) 351 lfd 17,504(1) 352 lfd 16,496(1) 353 lfd 15,488(1) 354 lfd 14,480(1) 355 356 /* restore int regs, including importantly r3 (two_words) */ 357 ld 31,472(1) 358 ld 30,464(1) 359 ld 29,456(1) 360 ld 28,448(1) 361 ld 27,440(1) 362 ld 26,432(1) 363 ld 25,424(1) 364 ld 24,416(1) 365 ld 23,408(1) 366 ld 22,400(1) 367 ld 21,392(1) 368 ld 20,384(1) 369 ld 19,376(1) 370 ld 18,368(1) 371 ld 17,360(1) 372 ld 16,352(1) 373 ld 15,344(1) 374 ld 14,336(1) 375 ld 13,328(1) 376 ld 3,104(1) 377 /* Stash return values */ 378 std 6,0(3) 379 std 7,8(3) 380 381 /* restore lr & sp, and leave */ 382 ld 0,632(1) /* stack_size + 8 */ 383 mtcr 0 384 ld 0,640(1) /* stack_size + 16 */ 385 mtlr 0 386 addi 1,1,624 /* stack_size */ 387 blr 388 389 390/*----------------------------------------------------*/ 391/*--- Continuation points ---*/ 392/*----------------------------------------------------*/ 393 394/* ------ Chain me to slow entry point ------ */ 395 .section ".text" 396 .align 2 397 .globl VG_(disp_cp_chain_me_to_slowEP) 398 .section ".opd","aw" 399 .align 3 400VG_(disp_cp_chain_me_to_slowEP): 401 .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0 402 .previous 403 .type .VG_(disp_cp_chain_me_to_slowEP),@function 404 .globl .VG_(disp_cp_chain_me_to_slowEP) 405.VG_(disp_cp_chain_me_to_slowEP): 406 /* We got called. The return address indicates 407 where the patching needs to happen. Collect 408 the return address and, exit back to C land, 409 handing the caller the pair (Chain_me_S, RA) */ 410 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP 411 mflr 7 412 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP 413 4 = mtctr r30 414 4 = btctr 415 */ 416 subi 7,7,20+4+4 417 b .postamble 418 419/* ------ Chain me to fast entry point ------ */ 420 .section ".text" 421 .align 2 422 .globl VG_(disp_cp_chain_me_to_fastEP) 423 .section ".opd","aw" 424 .align 3 425VG_(disp_cp_chain_me_to_fastEP): 426 .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0 427 .previous 428 .type .VG_(disp_cp_chain_me_to_fastEP),@function 429 .globl .VG_(disp_cp_chain_me_to_fastEP) 430.VG_(disp_cp_chain_me_to_fastEP): 431 /* We got called. The return address indicates 432 where the patching needs to happen. Collect 433 the return address and, exit back to C land, 434 handing the caller the pair (Chain_me_S, RA) */ 435 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP 436 mflr 7 437 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP 438 4 = mtctr r30 439 4 = btctr 440 */ 441 subi 7,7,20+4+4 442 b .postamble 443 444/* ------ Indirect but boring jump ------ */ 445 .section ".text" 446 .align 2 447 .globl VG_(disp_cp_xindir) 448 .section ".opd","aw" 449 .align 3 450VG_(disp_cp_xindir): 451 .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0 452 .previous 453 .type .VG_(disp_cp_xindir),@function 454 .globl .VG_(disp_cp_xindir) 455.VG_(disp_cp_xindir): 456 /* Where are we going? */ 457 ld 3,OFFSET_ppc64_CIA(31) 458 459 /* stats only */ 460 ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2) 461 lwz 6,0(5) 462 addi 6,6,1 463 stw 6,0(5) 464 465 /* r5 = &VG_(tt_fast) */ 466 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 467 468 /* try a fast lookup in the translation cache */ 469 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 470 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 471 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 472 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 473 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 474 ld 6,0(5) /* .guest */ 475 ld 7,8(5) /* .host */ 476 cmpd 3,6 477 bne .fast_lookup_failed 478 479 /* Found a match. Jump to .host. */ 480 mtctr 7 481 bctr 482 483.fast_lookup_failed: 484 /* stats only */ 485 ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2) 486 lwz 6,0(5) 487 addi 6,6,1 488 stw 6,0(5) 489 490 li 6,VG_TRC_INNER_FASTMISS 491 li 7,0 492 b .postamble 493 /*NOTREACHED*/ 494 495/* ------ Assisted jump ------ */ 496.section ".text" 497 .align 2 498 .globl VG_(disp_cp_xassisted) 499 .section ".opd","aw" 500 .align 3 501VG_(disp_cp_xassisted): 502 .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0 503 .previous 504 .type .VG_(disp_cp_xassisted),@function 505 .globl .VG_(disp_cp_xassisted) 506.VG_(disp_cp_xassisted): 507 /* r31 contains the TRC */ 508 mr 6,31 509 li 7,0 510 b .postamble 511 512/* ------ Event check failed ------ */ 513 .section ".text" 514 .align 2 515 .globl VG_(disp_cp_evcheck_fail) 516 .section ".opd","aw" 517 .align 3 518VG_(disp_cp_evcheck_fail): 519 .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0 520 .previous 521 .type .VG_(disp_cp_evcheck_fail),@function 522 .globl .VG_(disp_cp_evcheck_fail) 523.VG_(disp_cp_evcheck_fail): 524 li 6,VG_TRC_INNER_COUNTERZERO 525 li 7,0 526 b .postamble 527 528 529.size .VG_(disp_run_translations), .-.VG_(disp_run_translations) 530 531#endif // defined(VGP_ppc64be_linux) 532 533/* Let the linker know we don't need an executable stack */ 534MARK_STACK_NO_EXEC 535 536/*--------------------------------------------------------------------*/ 537/*--- end ---*/ 538/*--------------------------------------------------------------------*/ 539