1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc64-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2012 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_ppc64_linux)
32
33#include "pub_core_basics_asm.h"
34#include "pub_core_dispatch_asm.h"
35#include "pub_core_transtab_asm.h"
36#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
37
38
39/* References to globals via the TOC */
40
41/*
42        .globl  vgPlain_tt_fast
43        .lcomm  vgPlain_tt_fast,4,4
44        .type   vgPlain_tt_fast, @object
45*/
46.section ".toc","aw"
47.tocent__vgPlain_tt_fast:
48        .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
49.tocent__vgPlain_stats__n_xindirs_32:
50        .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
51.tocent__vgPlain_stats__n_xindir_misses_32:
52        .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
53.tocent__vgPlain_machine_ppc64_has_VMX:
54        .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
55
56/*------------------------------------------------------------*/
57/*---                                                      ---*/
58/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
59/*--- used to run all translations,                        ---*/
60/*--- including no-redir ones.                             ---*/
61/*---                                                      ---*/
62/*------------------------------------------------------------*/
63
64/*----------------------------------------------------*/
65/*--- Entry and preamble (set everything up)       ---*/
66/*----------------------------------------------------*/
67
68/* signature:
69void VG_(disp_run_translations)( UWord* two_words,
70                                 void*  guest_state,
71                                 Addr   host_addr );
72*/
73
74.section ".text"
75.align   2
76.globl   VG_(disp_run_translations)
77.section ".opd","aw"
78.align   3
79VG_(disp_run_translations):
80.quad    .VG_(disp_run_translations),.TOC.@tocbase,0
81.previous
82.type    .VG_(disp_run_translations),@function
83.globl   .VG_(disp_run_translations)
84.VG_(disp_run_translations):
85	/* r3 holds two_words */
86	/* r4 holds guest_state */
87        /* r5 holds host_addr */
88
89        /* ----- entry point to VG_(disp_run_translations) ----- */
90        /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
91
92        /* Save lr, cr */
93        mflr    6
94        std     6,16(1)
95        mfcr    6
96        std     6,8(1)
97
98        /* New stack frame */
99        stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
100
101        /* General reg save area : 152 bytes */
102        std     31,472(1)
103        std     30,464(1)
104        std     29,456(1)
105        std     28,448(1)
106        std     27,440(1)
107        std     26,432(1)
108        std     25,424(1)
109        std     24,416(1)
110        std     23,408(1)
111        std     22,400(1)
112        std     21,392(1)
113        std     20,384(1)
114        std     19,376(1)
115        std     18,368(1)
116        std     17,360(1)
117        std     16,352(1)
118        std     15,344(1)
119        std     14,336(1)
120        std     13,328(1)
121        std     3,104(1)  /* save two_words for later */
122
123        /* Save callee-saved registers... */
124        /* Floating-point reg save area : 144 bytes */
125        stfd    31,616(1)
126        stfd    30,608(1)
127        stfd    29,600(1)
128        stfd    28,592(1)
129        stfd    27,584(1)
130        stfd    26,576(1)
131        stfd    25,568(1)
132        stfd    24,560(1)
133        stfd    23,552(1)
134        stfd    22,544(1)
135        stfd    21,536(1)
136        stfd    20,528(1)
137        stfd    19,520(1)
138        stfd    18,512(1)
139        stfd    17,504(1)
140        stfd    16,496(1)
141        stfd    15,488(1)
142        stfd    14,480(1)
143
144        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
145           The Linux kernel might not actually use VRSAVE for its intended
146           purpose, but it should be harmless to preserve anyway. */
147	/* r3, r4, r5 are live here, so use r6 */
148	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
149	ld	6,0(6)
150        cmpldi  6,0
151        beq     .LafterVMX1
152
153        /* VRSAVE save word : 32 bytes */
154        mfspr   6,256         /* vrsave reg is spr number 256 */
155        stw     6,324(1)
156
157        /* Alignment padding : 4 bytes */
158
159        /* Vector reg save area (quadword aligned) : 192 bytes */
160        li      6,304
161        stvx    31,6,1
162        li      6,288
163        stvx    30,6,1
164        li      6,272
165        stvx    29,6,1
166        li      6,256
167        stvx    28,6,1
168        li      6,240
169        stvx    27,6,1
170        li      6,224
171        stvx    26,6,1
172        li      6,208
173        stvx    25,6,1
174        li      6,192
175        stvx    24,6,1
176        li      6,176
177        stvx    23,6,1
178        li      6,160
179        stvx    22,6,1
180        li      6,144
181        stvx    21,6,1
182        li      6,128
183        stvx    20,6,1
184.LafterVMX1:
185
186        /* Local variable space... */
187
188	/* r3 holds two_words */
189	/* r4 holds guest_state */
190        /* r5 holds host_addr */
191
192        /* 96(sp) used later to check FPSCR[RM] */
193        /* 88(sp) used later to load fpscr with zero */
194	/* 48:87(sp) free */
195
196        /* Linkage Area (reserved)
197           40(sp) : TOC
198           32(sp) : link editor doubleword
199           24(sp) : compiler doubleword
200           16(sp) : LR
201           8(sp)  : CR
202           0(sp)  : back-chain
203        */
204
205        /* set host FPU control word to the default mode expected
206           by VEX-generated code.  See comments in libvex.h for
207           more info. */
208        /* => get zero into f3 (tedious)
209           fsub 3,3,3 is not a reliable way to do this, since if
210           f3 holds a NaN or similar then we don't necessarily
211           wind up with zero. */
212        li      6,0
213        stw     6,88(1)
214        lfs     3,88(1)
215        mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
216
217        /* set host AltiVec control word to the default mode expected
218           by VEX-generated code. */
219	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
220	ld	6,0(6)
221        cmpldi  6,0
222        beq     .LafterVMX2
223
224        vspltisw 3,0x0  /* generate zero */
225        mtvscr  3
226.LafterVMX2:
227
228        /* make a stack frame for the code we are calling */
229        stdu    1,-48(1)
230
231        /* Set up the guest state ptr */
232        mr      31,4      /* r31 (generated code gsp) = r4 */
233
234        /* and jump into the code cache.  Chained translations in
235           the code cache run, until for whatever reason, they can't
236           continue.  When that happens, the translation in question
237           will jump (or call) to one of the continuation points
238           VG_(cp_...) below. */
239        mtctr   5
240        bctr
241	/*NOTREACHED*/
242
243/*----------------------------------------------------*/
244/*--- Postamble and exit.                          ---*/
245/*----------------------------------------------------*/
246
247.postamble:
248        /* At this point, r6 and r7 contain two
249           words to be returned to the caller.  r6
250           holds a TRC value, and r7 optionally may
251           hold another word (for CHAIN_ME exits, the
252           address of the place to patch.) */
253
254        /* undo the "make a stack frame for the code we are calling" */
255        addi    1,1,48
256
257        /* We're leaving.  Check that nobody messed with
258           VSCR or FPSCR in ways we don't expect. */
259	/* Using r11 - value used again further on, so don't trash! */
260	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
261	ld	11,0(11)
262
263	/* Set fpscr back to a known state, since vex-generated code
264	   may have messed with fpscr[rm]. */
265        li      5,0
266        addi    1,1,-16
267        stw     5,0(1)
268        lfs     3,0(1)
269        addi    1,1,16
270        mtfsf   0xFF,3   /* fpscr = f3 */
271
272        cmpldi  11,0    /* Do we have altivec? */
273        beq     .LafterVMX8
274
275        /* Check VSCR[NJ] == 1 */
276        /* first generate 4x 0x00010000 */
277        vspltisw  4,0x1                   /* 4x 0x00000001 */
278        vspltisw  5,0x0                   /* zero */
279        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
280        /* retrieve VSCR and mask wanted bits */
281        mfvscr    7
282        vand      7,7,6                   /* gives NJ flag */
283        vspltw    7,7,0x3                 /* flags-word to all lanes */
284        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
285        bt        24,.invariant_violation /* branch if all_equal */
286
287.LafterVMX8:
288	/* otherwise we're OK */
289        b       .remove_frame
290
291.invariant_violation:
292        li      6,VG_TRC_INVARIANT_FAILED
293        li      7,0
294        /* fall through */
295
296.remove_frame:
297        /* r11 already holds VG_(machine_ppc32_has_VMX) value */
298        cmplwi  11,0
299        beq     .LafterVMX9
300
301        /* Restore Altivec regs.
302           Use r5 as scratch since r6/r7 are live. */
303        /* VRSAVE */
304        lwz     5,324(1)
305        mfspr   5,256         /* VRSAVE reg is spr number 256 */
306
307        /* Vector regs */
308        li      5,304
309        lvx     31,5,1
310        li      5,288
311        lvx     30,5,1
312        li      5,272
313        lvx     29,5,1
314        li      5,256
315        lvx     28,5,1
316        li      5,240
317        lvx     27,5,1
318        li      5,224
319        lvx     26,5,1
320        li      5,208
321        lvx     25,5,1
322        li      5,192
323        lvx     24,5,1
324        li      5,176
325        lvx     23,5,1
326        li      5,160
327        lvx     22,5,1
328        li      5,144
329        lvx     21,5,1
330        li      5,128
331        lvx     20,5,1
332.LafterVMX9:
333
334        /* Restore FP regs */
335        /* Floating-point regs */
336        lfd     31,616(1)
337        lfd     30,608(1)
338        lfd     29,600(1)
339        lfd     28,592(1)
340        lfd     27,584(1)
341        lfd     26,576(1)
342        lfd     25,568(1)
343        lfd     24,560(1)
344        lfd     23,552(1)
345        lfd     22,544(1)
346        lfd     21,536(1)
347        lfd     20,528(1)
348        lfd     19,520(1)
349        lfd     18,512(1)
350        lfd     17,504(1)
351        lfd     16,496(1)
352        lfd     15,488(1)
353        lfd     14,480(1)
354
355        /* restore int regs, including importantly r3 (two_words) */
356        ld      31,472(1)
357        ld      30,464(1)
358        ld      29,456(1)
359        ld      28,448(1)
360        ld      27,440(1)
361        ld      26,432(1)
362        ld      25,424(1)
363        ld      24,416(1)
364        ld      23,408(1)
365        ld      22,400(1)
366        ld      21,392(1)
367        ld      20,384(1)
368        ld      19,376(1)
369        ld      18,368(1)
370        ld      17,360(1)
371        ld      16,352(1)
372        ld      15,344(1)
373        ld      14,336(1)
374        ld      13,328(1)
375        ld      3,104(1)
376        /* Stash return values */
377        std     6,0(3)
378        std     7,8(3)
379
380        /* restore lr & sp, and leave */
381        ld      0,632(1)  /* stack_size + 8 */
382        mtcr    0
383        ld      0,640(1)  /* stack_size + 16 */
384        mtlr    0
385        addi    1,1,624   /* stack_size */
386        blr
387
388
389/*----------------------------------------------------*/
390/*--- Continuation points                          ---*/
391/*----------------------------------------------------*/
392
393/* ------ Chain me to slow entry point ------ */
394        .section ".text"
395        .align   2
396        .globl   VG_(disp_cp_chain_me_to_slowEP)
397        .section ".opd","aw"
398        .align   3
399VG_(disp_cp_chain_me_to_slowEP):
400        .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
401        .previous
402        .type    .VG_(disp_cp_chain_me_to_slowEP),@function
403        .globl   .VG_(disp_cp_chain_me_to_slowEP)
404.VG_(disp_cp_chain_me_to_slowEP):
405        /* We got called.  The return address indicates
406           where the patching needs to happen.  Collect
407           the return address and, exit back to C land,
408           handing the caller the pair (Chain_me_S, RA) */
409        li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
410        mflr 7
411        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
412           4  = mtctr r30
413           4  = btctr
414        */
415        subi 7,7,20+4+4
416        b    .postamble
417
418/* ------ Chain me to fast entry point ------ */
419        .section ".text"
420        .align   2
421        .globl   VG_(disp_cp_chain_me_to_fastEP)
422        .section ".opd","aw"
423        .align   3
424VG_(disp_cp_chain_me_to_fastEP):
425        .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
426        .previous
427        .type    .VG_(disp_cp_chain_me_to_fastEP),@function
428        .globl   .VG_(disp_cp_chain_me_to_fastEP)
429.VG_(disp_cp_chain_me_to_fastEP):
430        /* We got called.  The return address indicates
431           where the patching needs to happen.  Collect
432           the return address and, exit back to C land,
433           handing the caller the pair (Chain_me_S, RA) */
434        li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
435        mflr 7
436        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
437           4  = mtctr r30
438           4  = btctr
439        */
440        subi 7,7,20+4+4
441        b    .postamble
442
443/* ------ Indirect but boring jump ------ */
444        .section ".text"
445        .align   2
446        .globl   VG_(disp_cp_xindir)
447        .section ".opd","aw"
448        .align   3
449VG_(disp_cp_xindir):
450        .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
451        .previous
452        .type    .VG_(disp_cp_xindir),@function
453        .globl   .VG_(disp_cp_xindir)
454.VG_(disp_cp_xindir):
455        /* Where are we going? */
456        ld      3,OFFSET_ppc64_CIA(31)
457
458        /* stats only */
459	ld	5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
460        lwz     6,0(5)
461        addi    6,6,1
462        stw     6,0(5)
463
464	/* r5 = &VG_(tt_fast) */
465	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
466
467        /* try a fast lookup in the translation cache */
468        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
469              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
470	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
471	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
472	add	5,5,4      /* & VG_(tt_fast)[entry#] */
473	ld	6,0(5)     /* .guest */
474	ld	7,8(5)     /* .host */
475        cmpd    3,6
476        bne     .fast_lookup_failed
477
478        /* Found a match.  Jump to .host. */
479        mtctr   7
480        bctr
481
482.fast_lookup_failed:
483        /* stats only */
484	ld	5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
485        lwz     6,0(5)
486        addi    6,6,1
487        stw     6,0(5)
488
489        li      6,VG_TRC_INNER_FASTMISS
490        li      7,0
491        b       .postamble
492	/*NOTREACHED*/
493
494/* ------ Assisted jump ------ */
495.section ".text"
496        .align   2
497        .globl   VG_(disp_cp_xassisted)
498        .section ".opd","aw"
499        .align   3
500VG_(disp_cp_xassisted):
501        .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
502        .previous
503        .type    .VG_(disp_cp_xassisted),@function
504        .globl   .VG_(disp_cp_xassisted)
505.VG_(disp_cp_xassisted):
506        /* r31 contains the TRC */
507        mr      6,31
508        li      7,0
509        b       .postamble
510
511/* ------ Event check failed ------ */
512        .section ".text"
513        .align   2
514        .globl   VG_(disp_cp_evcheck_fail)
515        .section ".opd","aw"
516        .align   3
517VG_(disp_cp_evcheck_fail):
518        .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
519        .previous
520        .type    .VG_(disp_cp_evcheck_fail),@function
521        .globl   .VG_(disp_cp_evcheck_fail)
522.VG_(disp_cp_evcheck_fail):
523        li      6,VG_TRC_INNER_COUNTERZERO
524        li      7,0
525        b       .postamble
526
527
528.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
529
530/* Let the linker know we don't need an executable stack */
531.section .note.GNU-stack,"",@progbits
532
533#endif // defined(VGP_ppc64_linux)
534
535/*--------------------------------------------------------------------*/
536/*--- end                                                          ---*/
537/*--------------------------------------------------------------------*/
538