1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc64-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2015 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics_asm.h"
32
33#if defined(VGP_ppc64be_linux)
34
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
38
39
40/* References to globals via the TOC */
41
42/*
43        .globl  vgPlain_tt_fast
44        .lcomm  vgPlain_tt_fast,4,4
45        .type   vgPlain_tt_fast, @object
46*/
47.section ".toc","aw"
48.tocent__vgPlain_tt_fast:
49        .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
50.tocent__vgPlain_stats__n_xindirs_32:
51        .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
52.tocent__vgPlain_stats__n_xindir_misses_32:
53        .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
54.tocent__vgPlain_machine_ppc64_has_VMX:
55        .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
56
57/*------------------------------------------------------------*/
58/*---                                                      ---*/
59/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
60/*--- used to run all translations,                        ---*/
61/*--- including no-redir ones.                             ---*/
62/*---                                                      ---*/
63/*------------------------------------------------------------*/
64
65/*----------------------------------------------------*/
66/*--- Entry and preamble (set everything up)       ---*/
67/*----------------------------------------------------*/
68
69/* signature:
70void VG_(disp_run_translations)( UWord* two_words,
71                                 void*  guest_state,
72                                 Addr   host_addr );
73*/
74
75.section ".text"
76.align   2
77.globl   VG_(disp_run_translations)
78.section ".opd","aw"
79.align   3
80VG_(disp_run_translations):
81.quad    .VG_(disp_run_translations),.TOC.@tocbase,0
82.previous
83.type    .VG_(disp_run_translations),@function
84.globl   .VG_(disp_run_translations)
85.VG_(disp_run_translations):
86	/* r3 holds two_words */
87	/* r4 holds guest_state */
88        /* r5 holds host_addr */
89
90        /* ----- entry point to VG_(disp_run_translations) ----- */
91        /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
92
93        /* Save lr, cr */
94        mflr    6
95        std     6,16(1)
96        mfcr    6
97        std     6,8(1)
98
99        /* New stack frame */
100        stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
101
102        /* General reg save area : 152 bytes */
103        std     31,472(1)
104        std     30,464(1)
105        std     29,456(1)
106        std     28,448(1)
107        std     27,440(1)
108        std     26,432(1)
109        std     25,424(1)
110        std     24,416(1)
111        std     23,408(1)
112        std     22,400(1)
113        std     21,392(1)
114        std     20,384(1)
115        std     19,376(1)
116        std     18,368(1)
117        std     17,360(1)
118        std     16,352(1)
119        std     15,344(1)
120        std     14,336(1)
121        std     13,328(1)
122        std     3,104(1)  /* save two_words for later */
123
124        /* Save callee-saved registers... */
125        /* Floating-point reg save area : 144 bytes */
126        stfd    31,616(1)
127        stfd    30,608(1)
128        stfd    29,600(1)
129        stfd    28,592(1)
130        stfd    27,584(1)
131        stfd    26,576(1)
132        stfd    25,568(1)
133        stfd    24,560(1)
134        stfd    23,552(1)
135        stfd    22,544(1)
136        stfd    21,536(1)
137        stfd    20,528(1)
138        stfd    19,520(1)
139        stfd    18,512(1)
140        stfd    17,504(1)
141        stfd    16,496(1)
142        stfd    15,488(1)
143        stfd    14,480(1)
144
145        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
146           The Linux kernel might not actually use VRSAVE for its intended
147           purpose, but it should be harmless to preserve anyway. */
148	/* r3, r4, r5 are live here, so use r6 */
149	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
150	ld	6,0(6)
151        cmpldi  6,0
152        beq     .LafterVMX1
153
154        /* VRSAVE save word : 32 bytes */
155        mfspr   6,256         /* vrsave reg is spr number 256 */
156        stw     6,324(1)
157
158        /* Alignment padding : 4 bytes */
159
160        /* Vector reg save area (quadword aligned) : 192 bytes */
161        li      6,304
162        stvx    31,6,1
163        li      6,288
164        stvx    30,6,1
165        li      6,272
166        stvx    29,6,1
167        li      6,256
168        stvx    28,6,1
169        li      6,240
170        stvx    27,6,1
171        li      6,224
172        stvx    26,6,1
173        li      6,208
174        stvx    25,6,1
175        li      6,192
176        stvx    24,6,1
177        li      6,176
178        stvx    23,6,1
179        li      6,160
180        stvx    22,6,1
181        li      6,144
182        stvx    21,6,1
183        li      6,128
184        stvx    20,6,1
185.LafterVMX1:
186
187        /* Local variable space... */
188
189	/* r3 holds two_words */
190	/* r4 holds guest_state */
191        /* r5 holds host_addr */
192
193        /* 96(sp) used later to check FPSCR[RM] */
194        /* 88(sp) used later to load fpscr with zero */
195	/* 48:87(sp) free */
196
197        /* Linkage Area (reserved)  BE ABI
198           40(sp) : TOC
199           32(sp) : link editor doubleword
200           24(sp) : compiler doubleword
201           16(sp) : LR
202           8(sp)  : CR
203           0(sp)  : back-chain
204        */
205
206        /* set host FPU control word to the default mode expected
207           by VEX-generated code.  See comments in libvex.h for
208           more info. */
209        /* => get zero into f3 (tedious)
210           fsub 3,3,3 is not a reliable way to do this, since if
211           f3 holds a NaN or similar then we don't necessarily
212           wind up with zero. */
213        li      6,0
214        stw     6,88(1)
215        lfs     3,88(1)
216        mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
217
218        /* set host AltiVec control word to the default mode expected
219           by VEX-generated code. */
220	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
221	ld	6,0(6)
222        cmpldi  6,0
223        beq     .LafterVMX2
224
225        vspltisw 3,0x0  /* generate zero */
226        mtvscr  3
227.LafterVMX2:
228
229        /* make a stack frame for the code we are calling */
230        stdu    1,-48(1)
231
232        /* Set up the guest state ptr */
233        mr      31,4      /* r31 (generated code gsp) = r4 */
234
235        /* and jump into the code cache.  Chained translations in
236           the code cache run, until for whatever reason, they can't
237           continue.  When that happens, the translation in question
238           will jump (or call) to one of the continuation points
239           VG_(cp_...) below. */
240        mtctr   5
241        bctr
242	/*NOTREACHED*/
243
244/*----------------------------------------------------*/
245/*--- Postamble and exit.                          ---*/
246/*----------------------------------------------------*/
247
248.postamble:
249        /* At this point, r6 and r7 contain two
250           words to be returned to the caller.  r6
251           holds a TRC value, and r7 optionally may
252           hold another word (for CHAIN_ME exits, the
253           address of the place to patch.) */
254
255        /* undo the "make a stack frame for the code we are calling" */
256        addi    1,1,48
257
258        /* We're leaving.  Check that nobody messed with
259           VSCR or FPSCR in ways we don't expect. */
260	/* Using r11 - value used again further on, so don't trash! */
261	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
262	ld	11,0(11)
263
264	/* Set fpscr back to a known state, since vex-generated code
265	   may have messed with fpscr[rm]. */
266        li      5,0
267        addi    1,1,-16
268        stw     5,0(1)
269        lfs     3,0(1)
270        addi    1,1,16
271        mtfsf   0xFF,3   /* fpscr = f3 */
272
273        cmpldi  11,0    /* Do we have altivec? */
274        beq     .LafterVMX8
275
276        /* Check VSCR[NJ] == 1 */
277        /* first generate 4x 0x00010000 */
278        vspltisw  4,0x1                   /* 4x 0x00000001 */
279        vspltisw  5,0x0                   /* zero */
280        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
281        /* retrieve VSCR and mask wanted bits */
282        mfvscr    7
283        vand      7,7,6                   /* gives NJ flag */
284        vspltw    7,7,0x3                 /* flags-word to all lanes */
285        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
286        bt        24,.invariant_violation /* branch if all_equal */
287
288.LafterVMX8:
289	/* otherwise we're OK */
290        b       .remove_frame
291
292.invariant_violation:
293        li      6,VG_TRC_INVARIANT_FAILED
294        li      7,0
295        /* fall through */
296
297.remove_frame:
298        /* r11 already holds VG_(machine_ppc32_has_VMX) value */
299        cmplwi  11,0
300        beq     .LafterVMX9
301
302        /* Restore Altivec regs.
303           Use r5 as scratch since r6/r7 are live. */
304        /* VRSAVE */
305        lwz     5,324(1)
306        mfspr   5,256         /* VRSAVE reg is spr number 256 */
307
308        /* Vector regs */
309        li      5,304
310        lvx     31,5,1
311        li      5,288
312        lvx     30,5,1
313        li      5,272
314        lvx     29,5,1
315        li      5,256
316        lvx     28,5,1
317        li      5,240
318        lvx     27,5,1
319        li      5,224
320        lvx     26,5,1
321        li      5,208
322        lvx     25,5,1
323        li      5,192
324        lvx     24,5,1
325        li      5,176
326        lvx     23,5,1
327        li      5,160
328        lvx     22,5,1
329        li      5,144
330        lvx     21,5,1
331        li      5,128
332        lvx     20,5,1
333.LafterVMX9:
334
335        /* Restore FP regs */
336        /* Floating-point regs */
337        lfd     31,616(1)
338        lfd     30,608(1)
339        lfd     29,600(1)
340        lfd     28,592(1)
341        lfd     27,584(1)
342        lfd     26,576(1)
343        lfd     25,568(1)
344        lfd     24,560(1)
345        lfd     23,552(1)
346        lfd     22,544(1)
347        lfd     21,536(1)
348        lfd     20,528(1)
349        lfd     19,520(1)
350        lfd     18,512(1)
351        lfd     17,504(1)
352        lfd     16,496(1)
353        lfd     15,488(1)
354        lfd     14,480(1)
355
356        /* restore int regs, including importantly r3 (two_words) */
357        ld      31,472(1)
358        ld      30,464(1)
359        ld      29,456(1)
360        ld      28,448(1)
361        ld      27,440(1)
362        ld      26,432(1)
363        ld      25,424(1)
364        ld      24,416(1)
365        ld      23,408(1)
366        ld      22,400(1)
367        ld      21,392(1)
368        ld      20,384(1)
369        ld      19,376(1)
370        ld      18,368(1)
371        ld      17,360(1)
372        ld      16,352(1)
373        ld      15,344(1)
374        ld      14,336(1)
375        ld      13,328(1)
376        ld      3,104(1)
377        /* Stash return values */
378        std     6,0(3)
379        std     7,8(3)
380
381        /* restore lr & sp, and leave */
382        ld      0,632(1)  /* stack_size + 8 */
383        mtcr    0
384        ld      0,640(1)  /* stack_size + 16 */
385        mtlr    0
386        addi    1,1,624   /* stack_size */
387        blr
388
389
390/*----------------------------------------------------*/
391/*--- Continuation points                          ---*/
392/*----------------------------------------------------*/
393
394/* ------ Chain me to slow entry point ------ */
395        .section ".text"
396        .align   2
397        .globl   VG_(disp_cp_chain_me_to_slowEP)
398        .section ".opd","aw"
399        .align   3
400VG_(disp_cp_chain_me_to_slowEP):
401        .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
402        .previous
403        .type    .VG_(disp_cp_chain_me_to_slowEP),@function
404        .globl   .VG_(disp_cp_chain_me_to_slowEP)
405.VG_(disp_cp_chain_me_to_slowEP):
406        /* We got called.  The return address indicates
407           where the patching needs to happen.  Collect
408           the return address and, exit back to C land,
409           handing the caller the pair (Chain_me_S, RA) */
410        li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
411        mflr 7
412        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
413           4  = mtctr r30
414           4  = btctr
415        */
416        subi 7,7,20+4+4
417        b    .postamble
418
419/* ------ Chain me to fast entry point ------ */
420        .section ".text"
421        .align   2
422        .globl   VG_(disp_cp_chain_me_to_fastEP)
423        .section ".opd","aw"
424        .align   3
425VG_(disp_cp_chain_me_to_fastEP):
426        .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
427        .previous
428        .type    .VG_(disp_cp_chain_me_to_fastEP),@function
429        .globl   .VG_(disp_cp_chain_me_to_fastEP)
430.VG_(disp_cp_chain_me_to_fastEP):
431        /* We got called.  The return address indicates
432           where the patching needs to happen.  Collect
433           the return address and, exit back to C land,
434           handing the caller the pair (Chain_me_S, RA) */
435        li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
436        mflr 7
437        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
438           4  = mtctr r30
439           4  = btctr
440        */
441        subi 7,7,20+4+4
442        b    .postamble
443
444/* ------ Indirect but boring jump ------ */
445        .section ".text"
446        .align   2
447        .globl   VG_(disp_cp_xindir)
448        .section ".opd","aw"
449        .align   3
450VG_(disp_cp_xindir):
451        .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
452        .previous
453        .type    .VG_(disp_cp_xindir),@function
454        .globl   .VG_(disp_cp_xindir)
455.VG_(disp_cp_xindir):
456        /* Where are we going? */
457        ld      3,OFFSET_ppc64_CIA(31)
458
459        /* stats only */
460	ld	5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
461        lwz     6,0(5)
462        addi    6,6,1
463        stw     6,0(5)
464
465	/* r5 = &VG_(tt_fast) */
466	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
467
468        /* try a fast lookup in the translation cache */
469        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
470              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
471	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
472	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
473	add	5,5,4      /* & VG_(tt_fast)[entry#] */
474	ld	6,0(5)     /* .guest */
475	ld	7,8(5)     /* .host */
476        cmpd    3,6
477        bne     .fast_lookup_failed
478
479        /* Found a match.  Jump to .host. */
480        mtctr   7
481        bctr
482
483.fast_lookup_failed:
484        /* stats only */
485	ld	5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
486        lwz     6,0(5)
487        addi    6,6,1
488        stw     6,0(5)
489
490        li      6,VG_TRC_INNER_FASTMISS
491        li      7,0
492        b       .postamble
493	/*NOTREACHED*/
494
495/* ------ Assisted jump ------ */
496.section ".text"
497        .align   2
498        .globl   VG_(disp_cp_xassisted)
499        .section ".opd","aw"
500        .align   3
501VG_(disp_cp_xassisted):
502        .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
503        .previous
504        .type    .VG_(disp_cp_xassisted),@function
505        .globl   .VG_(disp_cp_xassisted)
506.VG_(disp_cp_xassisted):
507        /* r31 contains the TRC */
508        mr      6,31
509        li      7,0
510        b       .postamble
511
512/* ------ Event check failed ------ */
513        .section ".text"
514        .align   2
515        .globl   VG_(disp_cp_evcheck_fail)
516        .section ".opd","aw"
517        .align   3
518VG_(disp_cp_evcheck_fail):
519        .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
520        .previous
521        .type    .VG_(disp_cp_evcheck_fail),@function
522        .globl   .VG_(disp_cp_evcheck_fail)
523.VG_(disp_cp_evcheck_fail):
524        li      6,VG_TRC_INNER_COUNTERZERO
525        li      7,0
526        b       .postamble
527
528
529.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
530
531#endif // defined(VGP_ppc64be_linux)
532
533/* Let the linker know we don't need an executable stack */
534MARK_STACK_NO_EXEC
535
536/*--------------------------------------------------------------------*/
537/*--- end                                                          ---*/
538/*--------------------------------------------------------------------*/
539