1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc64-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2011 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_ppc64_linux)
32
33#include "pub_core_basics_asm.h"
34#include "pub_core_dispatch_asm.h"
35#include "pub_core_transtab_asm.h"
36#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
37
38
39/* References to globals via the TOC */
40
41/*
42        .globl vgPlain_tt_fast
43        .lcomm  vgPlain_tt_fast,4,4
44        .type   vgPlain_tt_fast, @object
45*/
46        .section        ".toc","aw"
47.tocent__vgPlain_tt_fast:
48        .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
49.tocent__vgPlain_tt_fastN:
50        .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN
51.tocent__vgPlain_dispatch_ctr:
52        .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr
53.tocent__vgPlain_machine_ppc64_has_VMX:
54        .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
55
56/*------------------------------------------------------------*/
57/*---                                                      ---*/
58/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
59/*--- run all translations except no-redir ones.           ---*/
60/*---                                                      ---*/
61/*------------------------------------------------------------*/
62
63/*----------------------------------------------------*/
64/*--- Preamble (set everything up)                 ---*/
65/*----------------------------------------------------*/
66
67/* signature:
68UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
69*/
70
71.section ".text"
72.align   2
73.globl VG_(run_innerloop)
74.section ".opd","aw"
75.align   3
76VG_(run_innerloop):
77.quad    .VG_(run_innerloop),.TOC.@tocbase,0
78.previous
79.type    .VG_(run_innerloop),@function
80.globl   .VG_(run_innerloop)
81.VG_(run_innerloop):
82	/* r3 holds guest_state */
83	/* r4 holds do_profiling */
84
85        /* ----- entry point to VG_(run_innerloop) ----- */
86        /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
87
88        /* Save lr, cr */
89        mflr    0
90        std     0,16(1)
91        mfcr    0
92        std     0,8(1)
93
94        /* New stack frame */
95        stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
96
97        /* Save callee-saved registers... */
98
99        /* Floating-point reg save area : 144 bytes */
100        stfd    31,616(1)
101        stfd    30,608(1)
102        stfd    29,600(1)
103        stfd    28,592(1)
104        stfd    27,584(1)
105        stfd    26,576(1)
106        stfd    25,568(1)
107        stfd    24,560(1)
108        stfd    23,552(1)
109        stfd    22,544(1)
110        stfd    21,536(1)
111        stfd    20,528(1)
112        stfd    19,520(1)
113        stfd    18,512(1)
114        stfd    17,504(1)
115        stfd    16,496(1)
116        stfd    15,488(1)
117        stfd    14,480(1)
118
119        /* General reg save area : 144 bytes */
120        std     31,472(1)
121        std     30,464(1)
122        std     29,456(1)
123        std     28,448(1)
124        std     27,440(1)
125        std     26,432(1)
126        std     25,424(1)
127        std     24,416(1)
128        std     23,408(1)
129        std     22,400(1)
130        std     21,392(1)
131        std     20,384(1)
132        std     19,376(1)
133        std     18,368(1)
134        std     17,360(1)
135        std     16,352(1)
136        std     15,344(1)
137        std     14,336(1)
138        /* Probably not necessary to save r13 (thread-specific ptr),
139           as VEX stays clear of it... but what the hey. */
140        std     13,328(1)
141
142        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
143           The Linux kernel might not actually use VRSAVE for its intended
144           purpose, but it should be harmless to preserve anyway. */
145	/* r3, r4 are live here, so use r5 */
146	ld	5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
147	ld	5,0(5)
148        cmpldi  5,0
149        beq     .LafterVMX1
150
151        /* VRSAVE save word : 32 bytes */
152        mfspr   5,256         /* vrsave reg is spr number 256 */
153        stw     5,324(1)
154
155        /* Alignment padding : 4 bytes */
156
157        /* Vector reg save area (quadword aligned) : 192 bytes */
158        li      5,304
159        stvx    31,5,1
160        li      5,288
161        stvx    30,5,1
162        li      5,272
163        stvx    29,5,1
164        li      5,256
165        stvx    28,5,1
166        li      5,240
167        stvx    27,5,1
168        li      5,224
169        stvx    26,5,1
170        li      5,208
171        stvx    25,5,1
172        li      5,192
173        stvx    24,5,1
174        li      5,176
175        stvx    23,5,1
176        li      5,160
177        stvx    22,5,1
178        li      5,144
179        stvx    21,5,1
180        li      5,128
181        stvx    20,5,1
182.LafterVMX1:
183
184        /* Local variable space... */
185
186        /* r3 holds guest_state */
187        /* r4 holds do_profiling */
188        mr      31,3
189        std     3,104(1)       /* spill orig guest_state ptr */
190
191        /* 96(sp) used later to check FPSCR[RM] */
192        /* 88(sp) used later to load fpscr with zero */
193	/* 48:87(sp) free */
194
195        /* Linkage Area (reserved)
196           40(sp) : TOC
197           32(sp) : link editor doubleword
198           24(sp) : compiler doubleword
199           16(sp) : LR
200           8(sp)  : CR
201           0(sp)  : back-chain
202        */
203
204// CAB TODO: Use a caller-saved reg for orig guest_state ptr
205// - rem to set non-allocateable in isel.c
206
207        /* hold dispatch_ctr (=32bit value) in r29 */
208	ld	29,.tocent__vgPlain_dispatch_ctr@toc(2)
209	lwz	29,0(29)  /* 32-bit zero-extending load */
210
211        /* set host FPU control word to the default mode expected
212           by VEX-generated code.  See comments in libvex.h for
213           more info. */
214        /* => get zero into f3 (tedious)
215           fsub 3,3,3 is not a reliable way to do this, since if
216           f3 holds a NaN or similar then we don't necessarily
217           wind up with zero. */
218        li      5,0
219        stw     5,88(1)
220        lfs     3,88(1)
221        mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
222
223        /* set host AltiVec control word to the default mode expected
224           by VEX-generated code. */
225	ld	5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
226	ld	5,0(5)
227        cmpldi  5,0
228        beq     .LafterVMX2
229
230        vspltisw 3,0x0  /* generate zero */
231        mtvscr  3
232.LafterVMX2:
233
234        /* make a stack frame for the code we are calling */
235        stdu    1,-48(1)
236
237        /* fetch %CIA into r3 */
238        ld      3,OFFSET_ppc64_CIA(31)
239
240        /* fall into main loop (the right one) */
241	/* r4 = do_profiling.  It's probably trashed after here,
242           but that's OK: we don't need it after here. */
243	cmplwi	4,0
244	beq	.VG_(run_innerloop__dispatch_unprofiled)
245	b	.VG_(run_innerloop__dispatch_profiled)
246	/*NOTREACHED*/
247
248
249/*----------------------------------------------------*/
250/*--- NO-PROFILING (standard) dispatcher           ---*/
251/*----------------------------------------------------*/
252
253        .section        ".text"
254        .align 2
255        .globl VG_(run_innerloop__dispatch_unprofiled)
256        .section        ".opd","aw"
257        .align 3
258VG_(run_innerloop__dispatch_unprofiled):
259        .quad   .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0
260        .previous
261        .type   .VG_(run_innerloop__dispatch_unprofiled),@function
262        .globl  .VG_(run_innerloop__dispatch_unprofiled)
263.VG_(run_innerloop__dispatch_unprofiled):
264	/* At entry: Live regs:
265		r1  (=sp)
266		r2  (toc pointer)
267		r3  (=CIA = next guest address)
268		r29 (=dispatch_ctr)
269		r31 (=guest_state)
270	   Stack state:
271		144(r1) (=var space for FPSCR[RM])
272	*/
273	/* Has the guest state pointer been messed with?  If yes, exit.
274           Also set up & VG_(tt_fast) early in an attempt at better
275           scheduling. */
276	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
277        rldicl. 0,31,0,63
278        bne	.gsp_changed
279
280        /* save the jump address in the guest state */
281        std     3,OFFSET_ppc64_CIA(31)
282
283        /* Are we out of timeslice?  If yes, defer to scheduler. */
284	subi	29,29,1
285	cmpldi	29,0
286        beq	.counter_is_zero
287
288        /* try a fast lookup in the translation cache */
289        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
290              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
291	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
292	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
293	add	5,5,4      /* & VG_(tt_fast)[entry#] */
294	ld	6,0(5)     /* .guest */
295	ld	7,8(5)     /* .host */
296        cmpd    3,6
297        bne     .fast_lookup_failed
298
299        /* Found a match.  Call .host. */
300        mtctr   7
301        bctrl
302
303        /* On return from guest code:
304	   r3  holds destination (original) address.
305           r31 may be unchanged (guest_state), or may indicate further
306           details of the control transfer requested to *r3.
307        */
308	/* start over */
309	b	.VG_(run_innerloop__dispatch_unprofiled)
310	/*NOTREACHED*/
311        .size .VG_(run_innerloop), .-.VG_(run_innerloop)
312
313
314/*----------------------------------------------------*/
315/*--- PROFILING dispatcher (can be much slower)    ---*/
316/*----------------------------------------------------*/
317
318        .section        ".text"
319        .align 2
320        .globl VG_(run_innerloop__dispatch_profiled)
321        .section        ".opd","aw"
322        .align 3
323VG_(run_innerloop__dispatch_profiled):
324        .quad   .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0
325        .previous
326        .type   .VG_(run_innerloop__dispatch_profiled),@function
327        .globl  .VG_(run_innerloop__dispatch_profiled)
328.VG_(run_innerloop__dispatch_profiled):
329	/* At entry: Live regs:
330		r1  (=sp)
331		r2  (toc pointer)
332		r3  (=CIA = next guest address)
333		r29 (=dispatch_ctr)
334		r31 (=guest_state)
335	   Stack state:
336		144(r1) (=var space for FPSCR[RM])
337	*/
338	/* Has the guest state pointer been messed with?  If yes, exit.
339           Also set up & VG_(tt_fast) early in an attempt at better
340           scheduling. */
341	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
342        rldicl. 0,31,0,63
343        bne	.gsp_changed
344
345        /* save the jump address in the guest state */
346        std     3,OFFSET_ppc64_CIA(31)
347
348        /* Are we out of timeslice?  If yes, defer to scheduler. */
349	subi	29,29,1
350	cmpldi	29,0
351        beq	.counter_is_zero
352
353        /* try a fast lookup in the translation cache */
354        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
355              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
356	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
357	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
358	add	5,5,4      /* & VG_(tt_fast)[entry#] */
359	ld	6,0(5)     /* .guest */
360	ld	7,8(5)     /* .host */
361        cmpd    3,6
362        bne     .fast_lookup_failed
363
364        /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */
365	ld	9, .tocent__vgPlain_tt_fastN@toc(2)
366	srdi	4, 4,1     /* entry# * sizeof(UInt*) */
367	ldx	9, 9,4     /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */
368	lwz	6, 0(9)    /* *(UInt*)r7 ++ */
369	addi	6, 6,1
370	stw	6, 0(9)
371
372        /* Found a match.  Call .host. */
373        mtctr   7
374        bctrl
375
376        /* On return from guest code:
377	   r3  holds destination (original) address.
378           r31 may be unchanged (guest_state), or may indicate further
379           details of the control transfer requested to *r3.
380        */
381	/* start over */
382	b	.VG_(run_innerloop__dispatch_profiled)
383	/*NOTREACHED*/
384        .size .VG_(run_a_noredir_translation), .-.VG_(run_a_noredir_translation)
385
386
387/*----------------------------------------------------*/
388/*--- exit points                                  ---*/
389/*----------------------------------------------------*/
390
391.gsp_changed:
392	/* Someone messed with the gsp (in r31).  Have to
393           defer to scheduler to resolve this.  dispatch ctr
394	   is not yet decremented, so no need to increment. */
395	/* %CIA is NOT up to date here.  First, need to write
396	   %r3 back to %CIA, but without trashing %r31 since
397	   that holds the value we want to return to the scheduler.
398	   Hence use %r5 transiently for the guest state pointer. */
399        ld      5,152(1)         /* original guest_state ptr */
400        std     3,OFFSET_ppc64_CIA(5)
401	mr	3,31		/* r3 = new gsp value */
402	b	.run_innerloop_exit
403	/*NOTREACHED*/
404
405.counter_is_zero:
406	/* %CIA is up to date */
407	/* back out decrement of the dispatch counter */
408        addi    29,29,1
409        li      3,VG_TRC_INNER_COUNTERZERO
410        b       .run_innerloop_exit
411
412.fast_lookup_failed:
413	/* %CIA is up to date */
414	/* back out decrement of the dispatch counter */
415        addi    29,29,1
416        li      3,VG_TRC_INNER_FASTMISS
417	b       .run_innerloop_exit
418
419
420
421/* All exits from the dispatcher go through here.
422   r3 holds the return value.
423*/
424.run_innerloop_exit:
425        /* We're leaving.  Check that nobody messed with
426           VSCR or FPSCR. */
427
428	/* Set fpscr back to a known state, since vex-generated code
429	   may have messed with fpscr[rm]. */
430        li      5,0
431        addi    1,1,-16
432        stw     5,0(1)
433        lfs     3,0(1)
434        addi    1,1,16
435        mtfsf   0xFF,3   /* fpscr = f3 */
436
437	/* Using r11 - value used again further on, so don't trash! */
438	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
439        ld      11,0(11)
440        cmpldi  11,0
441        beq     .LafterVMX8
442
443        /* Check VSCR[NJ] == 1 */
444        /* first generate 4x 0x00010000 */
445        vspltisw  4,0x1                   /* 4x 0x00000001 */
446        vspltisw  5,0x0                   /* zero */
447        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
448        /* retrieve VSCR and mask wanted bits */
449        mfvscr    7
450        vand      7,7,6                   /* gives NJ flag */
451        vspltw    7,7,0x3                 /* flags-word to all lanes */
452        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
453        bt        24,.invariant_violation /* branch if all_equal */
454.LafterVMX8:
455
456	/* otherwise we're OK */
457        b       .run_innerloop_exit_REALLY
458
459
460.invariant_violation:
461        li      3,VG_TRC_INVARIANT_FAILED
462        b       .run_innerloop_exit_REALLY
463
464.run_innerloop_exit_REALLY:
465        /* r3 holds VG_TRC_* value to return */
466
467        /* Return to parent stack */
468        addi    1,1,48
469
470        /* Write ctr to VG_(dispatch_ctr) (=32bit value) */
471	ld	5,.tocent__vgPlain_dispatch_ctr@toc(2)
472        stw     29,0(5)
473
474        /* Restore cr */
475        lwz     0,44(1)
476        mtcr    0
477
478        /* Restore callee-saved registers... */
479
480        /* Floating-point regs */
481        lfd     31,616(1)
482        lfd     30,608(1)
483        lfd     29,600(1)
484        lfd     28,592(1)
485        lfd     27,584(1)
486        lfd     26,576(1)
487        lfd     25,568(1)
488        lfd     24,560(1)
489        lfd     23,552(1)
490        lfd     22,544(1)
491        lfd     21,536(1)
492        lfd     20,528(1)
493        lfd     19,520(1)
494        lfd     18,512(1)
495        lfd     17,504(1)
496        lfd     16,496(1)
497        lfd     15,488(1)
498        lfd     14,480(1)
499
500        /* General regs */
501        ld      31,472(1)
502        ld      30,464(1)
503        ld      29,456(1)
504        ld      28,448(1)
505        ld      27,440(1)
506        ld      26,432(1)
507        ld      25,424(1)
508        ld      24,416(1)
509        ld      23,408(1)
510        ld      22,400(1)
511        ld      21,392(1)
512        ld      20,384(1)
513        ld      19,376(1)
514        ld      18,368(1)
515        ld      17,360(1)
516        ld      16,352(1)
517        ld      15,344(1)
518        ld      14,336(1)
519        ld      13,328(1)
520
521        /* r11 already holds VG_(machine_ppc64_has_VMX) value */
522        cmpldi  11,0
523        beq     .LafterVMX9
524
525        /* VRSAVE */
526        lwz     4,324(1)
527        mfspr   4,256         /* VRSAVE reg is spr number 256 */
528
529        /* Vector regs */
530        li      4,304
531        lvx     31,4,1
532        li      4,288
533        lvx     30,4,1
534        li      4,272
535        lvx     29,4,1
536        li      4,256
537        lvx     28,4,1
538        li      4,240
539        lvx     27,4,1
540        li      4,224
541        lvx     26,4,1
542        li      4,208
543        lvx     25,4,1
544        li      4,192
545        lvx     24,4,1
546        li      4,176
547        lvx     23,4,1
548        li      4,160
549        lvx     22,4,1
550        li      4,144
551        lvx     21,4,1
552        li      4,128
553        lvx     20,4,1
554.LafterVMX9:
555
556        /* reset cr, lr, sp */
557        ld      0,632(1)  /* stack_size + 8 */
558        mtcr    0
559        ld      0,640(1)  /* stack_size + 16 */
560        mtlr    0
561        addi    1,1,624   /* stack_size */
562        blr
563
564
565/*------------------------------------------------------------*/
566/*---                                                      ---*/
567/*--- A special dispatcher, for running no-redir           ---*/
568/*--- translations.  Just runs the given translation once. ---*/
569/*---                                                      ---*/
570/*------------------------------------------------------------*/
571
572/* signature:
573void VG_(run_a_noredir_translation) ( UWord* argblock );
574*/
575
576/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
577   and 2 to carry results:
578      0: input:  ptr to translation
579      1: input:  ptr to guest state
580      2: output: next guest PC
581      3: output: guest state pointer afterwards (== thread return code)
582*/
583.section ".text"
584.align   2
585.globl VG_(run_a_noredir_translation)
586.section ".opd","aw"
587.align   3
588VG_(run_a_noredir_translation):
589.quad    .VG_(run_a_noredir_translation),.TOC.@tocbase,0
590.previous
591.type    .VG_(run_a_noredir_translation),@function
592.globl   .VG_(run_a_noredir_translation)
593.VG_(run_a_noredir_translation):
594	/* save callee-save int regs, & lr */
595	stdu 1,-512(1)
596	std  14,256(1)
597	std  15,264(1)
598	std  16,272(1)
599	std  17,280(1)
600	std  18,288(1)
601	std  19,296(1)
602	std  20,304(1)
603	std  21,312(1)
604	std  22,320(1)
605	std  23,328(1)
606	std  24,336(1)
607	std  25,344(1)
608	std  26,352(1)
609	std  27,360(1)
610	std  28,368(1)
611	std  29,376(1)
612	std  30,384(1)
613	std  31,392(1)
614	mflr 31
615	std  31,400(1)
616	std   2,408(1)  /* also preserve R2, just in case .. */
617
618	std  3,416(1)
619	ld   31,8(3)
620	ld   30,0(3)
621	mtlr 30
622	blrl
623
624	ld   4,416(1)
625	std  3, 16(4)
626	std  31,24(4)
627
628	ld   14,256(1)
629	ld   15,264(1)
630	ld   16,272(1)
631	ld   17,280(1)
632	ld   18,288(1)
633	ld   19,296(1)
634	ld   20,304(1)
635	ld   21,312(1)
636	ld   22,320(1)
637	ld   23,328(1)
638	ld   24,336(1)
639	ld   25,344(1)
640	ld   26,352(1)
641	ld   27,360(1)
642	ld   28,368(1)
643	ld   29,376(1)
644	ld   30,384(1)
645	ld   31,400(1)
646	mtlr 31
647	ld   31,392(1)
648	ld    2,408(1)  /* also preserve R2, just in case .. */
649
650	addi 1,1,512
651	blr
652
653
654/* Let the linker know we don't need an executable stack */
655.section .note.GNU-stack,"",@progbits
656
657#endif // defined(VGP_ppc64_linux)
658
659/*--------------------------------------------------------------------*/
660/*--- end                                                          ---*/
661/*--------------------------------------------------------------------*/
662