1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc64-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2017 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics_asm.h"
32
33#if  defined(VGP_ppc64le_linux)
34
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
38
39/* NOTE: PPC64 supports Big Endian and Little Endian.  It also supports the
40	ELF version 1 and ELF version 2 APIs.
41
42	Currently LE uses ELF version 2 and BE uses ELF version 1.  However,
43	BE and LE may support the other ELF version in the future.  So, the
44	_CALL_ELF is used in the assembly function to enable code for a
45	specific ELF version independently of the Enianess of the machine.
46	The test "#if  _CALL_ELF == 2" checks if ELF version 2 is being used.
47*/
48
49/* References to globals via the TOC */
50
51/*
52        .globl  vgPlain_tt_fast
53        .lcomm  vgPlain_tt_fast,4,4
54        .type   vgPlain_tt_fast, @object
55*/
56.section ".toc","aw"
57.tocent__vgPlain_tt_fast:
58        .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
59.tocent__vgPlain_stats__n_xindirs_32:
60        .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
61.tocent__vgPlain_stats__n_xindir_misses_32:
62        .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
63.tocent__vgPlain_machine_ppc64_has_VMX:
64        .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
65
66/*------------------------------------------------------------*/
67/*---                                                      ---*/
68/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
69/*--- used to run all translations,                        ---*/
70/*--- including no-redir ones.                             ---*/
71/*---                                                      ---*/
72/*------------------------------------------------------------*/
73
74/*----------------------------------------------------*/
75/*--- Entry and preamble (set everything up)       ---*/
76/*----------------------------------------------------*/
77
78/* signature:
79void VG_(disp_run_translations)( UWord* two_words,
80                                 void*  guest_state,
81                                 Addr   host_addr );
82*/
83
84.section ".text"
85.align   2
86.globl   VG_(disp_run_translations)
87#if _CALL_ELF == 2
88.type VG_(disp_run_translations),@function
89VG_(disp_run_translations):
90.type    .VG_(disp_run_translations),@function
91#else
92.section ".opd","aw"
93.align   3
94VG_(disp_run_translations):
95.quad    .VG_(disp_run_translations),.TOC.@tocbase,0
96.previous
97.type    .VG_(disp_run_translations),@function
98#endif
99.globl   .VG_(disp_run_translations)
100.VG_(disp_run_translations):
101#if  _CALL_ELF == 2
1020:      addis 2, 12,.TOC.-0b@ha
103        addi  2,2,.TOC.-0b@l
104        .localentry VG_(disp_run_translations), .-VG_(disp_run_translations)
105#endif
106
107	/* r3 holds two_words */
108	/* r4 holds guest_state */
109        /* r5 holds host_addr */
110
111        /* ----- entry point to VG_(disp_run_translations) ----- */
112        /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
113
114        /* Save lr, cr */
115        mflr    6
116        std     6,16(1)
117        mfcr    6
118        std     6,8(1)
119
120        /* New stack frame */
121        stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
122
123        /* General reg save area : 152 bytes */
124        std     31,472(1)
125        std     30,464(1)
126        std     29,456(1)
127        std     28,448(1)
128        std     27,440(1)
129        std     26,432(1)
130        std     25,424(1)
131        std     24,416(1)
132        std     23,408(1)
133        std     22,400(1)
134        std     21,392(1)
135        std     20,384(1)
136        std     19,376(1)
137        std     18,368(1)
138        std     17,360(1)
139        std     16,352(1)
140        std     15,344(1)
141        std     14,336(1)
142        std     13,328(1)
143        std     3,104(1)  /* save two_words for later */
144
145        /* Save callee-saved registers... */
146        /* Floating-point reg save area : 144 bytes */
147        stfd    31,616(1)
148        stfd    30,608(1)
149        stfd    29,600(1)
150        stfd    28,592(1)
151        stfd    27,584(1)
152        stfd    26,576(1)
153        stfd    25,568(1)
154        stfd    24,560(1)
155        stfd    23,552(1)
156        stfd    22,544(1)
157        stfd    21,536(1)
158        stfd    20,528(1)
159        stfd    19,520(1)
160        stfd    18,512(1)
161        stfd    17,504(1)
162        stfd    16,496(1)
163        stfd    15,488(1)
164        stfd    14,480(1)
165
166        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
167           The Linux kernel might not actually use VRSAVE for its intended
168           purpose, but it should be harmless to preserve anyway. */
169	/* r3, r4, r5 are live here, so use r6 */
170	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
171	ld	6,0(6)
172        cmpldi  6,0
173        beq     .LafterVMX1
174
175        /* VRSAVE save word : 32 bytes */
176        mfspr   6,256         /* vrsave reg is spr number 256 */
177        stw     6,324(1)
178
179        /* Alignment padding : 4 bytes */
180
181        /* Vector reg save area (quadword aligned) : 192 bytes */
182        li      6,304
183        stvx    31,6,1
184        li      6,288
185        stvx    30,6,1
186        li      6,272
187        stvx    29,6,1
188        li      6,256
189        stvx    28,6,1
190        li      6,240
191        stvx    27,6,1
192        li      6,224
193        stvx    26,6,1
194        li      6,208
195        stvx    25,6,1
196        li      6,192
197        stvx    24,6,1
198        li      6,176
199        stvx    23,6,1
200        li      6,160
201        stvx    22,6,1
202        li      6,144
203        stvx    21,6,1
204        li      6,128
205        stvx    20,6,1
206.LafterVMX1:
207
208        /* Local variable space... */
209
210	/* r3 holds two_words */
211	/* r4 holds guest_state */
212        /* r5 holds host_addr */
213
214        /* 96(sp) used later to check FPSCR[RM] */
215        /* 88(sp) used later to load fpscr with zero */
216	/* 48:87(sp) free */
217
218        /* Linkage Area (reserved)  BE ABI
219           40(sp) : TOC
220           32(sp) : link editor doubleword
221           24(sp) : compiler doubleword
222           16(sp) : LR
223           8(sp)  : CR
224           0(sp)  : back-chain
225        */
226
227        /* set host FPU control word to the default mode expected
228           by VEX-generated code.  See comments in libvex.h for
229           more info. */
230        /* => get zero into f3 (tedious)
231           fsub 3,3,3 is not a reliable way to do this, since if
232           f3 holds a NaN or similar then we don't necessarily
233           wind up with zero. */
234        li      6,0
235        stw     6,88(1)
236        lfs     3,88(1)
237        mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
238
239        /* set host AltiVec control word to the default mode expected
240           by VEX-generated code. */
241	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
242	ld	6,0(6)
243        cmpldi  6,0
244        beq     .LafterVMX2
245
246        vspltisw 3,0x0  /* generate zero */
247        mtvscr  3
248.LafterVMX2:
249
250        /* make a stack frame for the code we are calling */
251        stdu    1,-48(1)
252
253	/* Set up the guest state ptr */
254        mr      31,4      /* r31 (generated code gsp) = r4 */
255#if  _CALL_ELF == 2
256/*  for the LE ABI need to setup r2 and r12 */
2570:      addis 2, 12,.TOC.-0b@ha
258        addi  2,2,.TOC.-0b@l
259#endif
260
261        /* and jump into the code cache.  Chained translations in
262           the code cache run, until for whatever reason, they can't
263           continue.  When that happens, the translation in question
264           will jump (or call) to one of the continuation points
265           VG_(cp_...) below. */
266        mtctr   5
267        bctr
268	/*NOTREACHED*/
269
270/*----------------------------------------------------*/
271/*--- Postamble and exit.                          ---*/
272/*----------------------------------------------------*/
273
274.postamble:
275        /* At this point, r6 and r7 contain two
276           words to be returned to the caller.  r6
277           holds a TRC value, and r7 optionally may
278           hold another word (for CHAIN_ME exits, the
279           address of the place to patch.) */
280
281        /* undo the "make a stack frame for the code we are calling" */
282        addi    1,1,48
283
284        /* We're leaving.  Check that nobody messed with
285           VSCR or FPSCR in ways we don't expect. */
286	/* Using r11 - value used again further on, so don't trash! */
287	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
288	ld	11,0(11)
289
290	/* Set fpscr back to a known state, since vex-generated code
291	   may have messed with fpscr[rm]. */
292        li      5,0
293        addi    1,1,-16
294        stw     5,0(1)
295        lfs     3,0(1)
296        addi    1,1,16
297        mtfsf   0xFF,3   /* fpscr = f3 */
298
299        cmpldi  11,0    /* Do we have altivec? */
300        beq     .LafterVMX8
301
302        /* Check VSCR[NJ] == 1 */
303        /* first generate 4x 0x00010000 */
304        vspltisw  4,0x1                   /* 4x 0x00000001 */
305        vspltisw  5,0x0                   /* zero */
306        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
307        /* retrieve VSCR and mask wanted bits */
308        mfvscr    7
309        vand      7,7,6                   /* gives NJ flag */
310        vspltw    7,7,0x3                 /* flags-word to all lanes */
311        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
312        bt        24,.invariant_violation /* branch if all_equal */
313
314.LafterVMX8:
315	/* otherwise we're OK */
316        b       .remove_frame
317
318.invariant_violation:
319        li      6,VG_TRC_INVARIANT_FAILED
320        li      7,0
321        /* fall through */
322
323.remove_frame:
324        /* r11 already holds VG_(machine_ppc32_has_VMX) value */
325        cmplwi  11,0
326        beq     .LafterVMX9
327
328        /* Restore Altivec regs.
329           Use r5 as scratch since r6/r7 are live. */
330        /* VRSAVE */
331        lwz     5,324(1)
332        mfspr   5,256         /* VRSAVE reg is spr number 256 */
333
334        /* Vector regs */
335        li      5,304
336        lvx     31,5,1
337        li      5,288
338        lvx     30,5,1
339        li      5,272
340        lvx     29,5,1
341        li      5,256
342        lvx     28,5,1
343        li      5,240
344        lvx     27,5,1
345        li      5,224
346        lvx     26,5,1
347        li      5,208
348        lvx     25,5,1
349        li      5,192
350        lvx     24,5,1
351        li      5,176
352        lvx     23,5,1
353        li      5,160
354        lvx     22,5,1
355        li      5,144
356        lvx     21,5,1
357        li      5,128
358        lvx     20,5,1
359.LafterVMX9:
360
361        /* Restore FP regs */
362        /* Floating-point regs */
363        lfd     31,616(1)
364        lfd     30,608(1)
365        lfd     29,600(1)
366        lfd     28,592(1)
367        lfd     27,584(1)
368        lfd     26,576(1)
369        lfd     25,568(1)
370        lfd     24,560(1)
371        lfd     23,552(1)
372        lfd     22,544(1)
373        lfd     21,536(1)
374        lfd     20,528(1)
375        lfd     19,520(1)
376        lfd     18,512(1)
377        lfd     17,504(1)
378        lfd     16,496(1)
379        lfd     15,488(1)
380        lfd     14,480(1)
381
382        /* restore int regs, including importantly r3 (two_words) */
383        ld      31,472(1)
384        ld      30,464(1)
385        ld      29,456(1)
386        ld      28,448(1)
387        ld      27,440(1)
388        ld      26,432(1)
389        ld      25,424(1)
390        ld      24,416(1)
391        ld      23,408(1)
392        ld      22,400(1)
393        ld      21,392(1)
394        ld      20,384(1)
395        ld      19,376(1)
396        ld      18,368(1)
397        ld      17,360(1)
398        ld      16,352(1)
399        ld      15,344(1)
400        ld      14,336(1)
401        ld      13,328(1)
402        ld      3,104(1)
403        /* Stash return values */
404        std     6,0(3)
405        std     7,8(3)
406
407        /* restore lr & sp, and leave */
408        ld      0,632(1)  /* stack_size + 8 */
409        mtcr    0
410        ld      0,640(1)  /* stack_size + 16 */
411        mtlr    0
412        addi    1,1,624   /* stack_size */
413        blr
414#if _CALL_ELF == 2
415	.size VG_(disp_run_translations),.-VG_(disp_run_translations)
416#endif
417
418
419/*----------------------------------------------------*/
420/*--- Continuation points                          ---*/
421/*----------------------------------------------------*/
422
423/* ------ Chain me to slow entry point ------ */
424        .section ".text"
425        .align   2
426        .globl   VG_(disp_cp_chain_me_to_slowEP)
427#if  _CALL_ELF == 2
428        .type VG_(disp_cp_chain_me_to_slowEP),@function
429	VG_(disp_cp_chain_me_to_slowEP):
430#else
431	.section ".opd","aw"
432        .align   3
433VG_(disp_cp_chain_me_to_slowEP):
434        .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
435        .previous
436#endif
437        .type    .VG_(disp_cp_chain_me_to_slowEP),@function
438        .globl   .VG_(disp_cp_chain_me_to_slowEP)
439.VG_(disp_cp_chain_me_to_slowEP):
440#if  _CALL_ELF == 2
4410:      addis 2, 12,.TOC.-0b@ha
442        addi  2,2,.TOC.-0b@l
443        .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP)
444#endif
445	/* We got called.  The return address indicates
446           where the patching needs to happen.  Collect
447           the return address and, exit back to C land,
448           handing the caller the pair (Chain_me_S, RA) */
449        li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
450        mflr 7
451        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
452           4  = mtctr r30
453           4  = btctr
454        */
455        subi 7,7,20+4+4
456        b    .postamble
457#if  _CALL_ELF == 2
458        .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP)
459#endif
460
461/* ------ Chain me to fast entry point ------ */
462        .section ".text"
463        .align   2
464        .globl   VG_(disp_cp_chain_me_to_fastEP)
465#if  _CALL_ELF == 2
466        .type VG_(disp_cp_chain_me_to_fastEP),@function
467VG_(disp_cp_chain_me_to_fastEP):
468#else
469	.section ".opd","aw"
470        .align   3
471VG_(disp_cp_chain_me_to_fastEP):
472        .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
473        .previous
474#endif
475        .type    .VG_(disp_cp_chain_me_to_fastEP),@function
476        .globl   .VG_(disp_cp_chain_me_to_fastEP)
477.VG_(disp_cp_chain_me_to_fastEP):
478#if  _CALL_ELF == 2
4790:      addis 2, 12,.TOC.-0b@ha
480        addi  2,2,.TOC.-0b@l
481        .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP)
482#endif
483	/* We got called.  The return address indicates
484           where the patching needs to happen.  Collect
485           the return address and, exit back to C land,
486           handing the caller the pair (Chain_me_S, RA) */
487        li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
488        mflr 7
489        /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
490           4  = mtctr r30
491           4  = btctr
492        */
493        subi 7,7,20+4+4
494        b    .postamble
495#if _CALL_ELF == 2
496        .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP)
497#endif
498
499/* ------ Indirect but boring jump ------ */
500        .section ".text"
501        .align   2
502        .globl   VG_(disp_cp_xindir)
503#if _CALL_ELF == 2
504        .type VG_(disp_cp_xindir),@function
505VG_(disp_cp_xindir):
506#else
507	.section ".opd","aw"
508        .align   3
509VG_(disp_cp_xindir):
510        .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
511        .previous
512#endif
513        .type    .VG_(disp_cp_xindir),@function
514        .globl   .VG_(disp_cp_xindir)
515.VG_(disp_cp_xindir):
516#if  _CALL_ELF == 2
5170:      addis 2, 12,.TOC.-0b@ha
518        addi  2,2,.TOC.-0b@l
519        .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir)
520#endif
521	/* Where are we going? */
522        ld      3,OFFSET_ppc64_CIA(31)
523
524        /* stats only */
525	ld	5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
526        lwz     6,0(5)
527        addi    6,6,1
528        stw     6,0(5)
529
530	/* r5 = &VG_(tt_fast) */
531	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
532
533        /* try a fast lookup in the translation cache */
534        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
535              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
536	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
537	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
538	add	5,5,4      /* & VG_(tt_fast)[entry#] */
539	ld	6,0(5)     /* .guest */
540	ld	7,8(5)     /* .host */
541        cmpd    3,6
542        bne     .fast_lookup_failed
543
544        /* Found a match.  Jump to .host. */
545        mtctr   7
546        bctr
547#if _CALL_ELF == 2
548        .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir)
549#endif
550
551.fast_lookup_failed:
552        /* stats only */
553	ld	5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
554        lwz     6,0(5)
555        addi    6,6,1
556        stw     6,0(5)
557
558        li      6,VG_TRC_INNER_FASTMISS
559        li      7,0
560        b       .postamble
561	/*NOTREACHED*/
562
563/* ------ Assisted jump ------ */
564.section ".text"
565        .align   2
566        .globl   VG_(disp_cp_xassisted)
567#if _CALL_ELF == 2
568        .type VG_(disp_cp_xassisted),@function
569VG_(disp_cp_xassisted):
570#else
571	.section ".opd","aw"
572        .align   3
573VG_(disp_cp_xassisted):
574        .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
575        .previous
576#endif
577#if  _CALL_ELF == 2
5780:      addis 2, 12,.TOC.-0b@ha
579        addi  2,2,.TOC.-0b@l
580        .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted)
581#endif
582	.type    .VG_(disp_cp_xassisted),@function
583        .globl   .VG_(disp_cp_xassisted)
584.VG_(disp_cp_xassisted):
585        /* r31 contains the TRC */
586        mr      6,31
587        li      7,0
588        b       .postamble
589#if _CALL_ELF == 2
590        .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted)
591#endif
592
593/* ------ Event check failed ------ */
594        .section ".text"
595        .align   2
596        .globl   VG_(disp_cp_evcheck_fail)
597#if _CALL_ELF == 2
598        .type VG_(disp_cp_evcheck_fail),@function
599VG_(disp_cp_evcheck_fail):
600#else
601	.section ".opd","aw"
602        .align   3
603VG_(disp_cp_evcheck_fail):
604        .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
605        .previous
606#endif
607#if  _CALL_ELF == 2
6080:      addis 2, 12,.TOC.-0b@ha
609        addi  2,2,.TOC.-0b@l
610        .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail)
611#endif
612        .type    .VG_(disp_cp_evcheck_fail),@function
613        .globl   .VG_(disp_cp_evcheck_fail)
614.VG_(disp_cp_evcheck_fail):
615        li      6,VG_TRC_INNER_COUNTERZERO
616        li      7,0
617        b       .postamble
618#if  _CALL_ELF == 2
619       .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail)
620#endif
621
622.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
623
624#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
625
626/* Let the linker know we don't need an executable stack */
627MARK_STACK_NO_EXEC
628
629/*--------------------------------------------------------------------*/
630/*--- end                                                          ---*/
631/*--------------------------------------------------------------------*/
632