dispatch-ppc64be-linux.S revision 8b68b64759254d514d98328c496cbd88cde4c9a5
1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc64-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2009 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_ppc64_linux)
32
33#include "pub_core_basics_asm.h"
34#include "pub_core_dispatch_asm.h"
35#include "pub_core_transtab_asm.h"
36#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
37
38
39/* References to globals via the TOC */
40
41/*
42        .globl vgPlain_tt_fast
43        .lcomm  vgPlain_tt_fast,4,4
44        .type   vgPlain_tt_fast, @object
45*/
46        .section        ".toc","aw"
47.tocent__vgPlain_tt_fast:
48        .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
49.tocent__vgPlain_tt_fastN:
50        .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN
51.tocent__vgPlain_dispatch_ctr:
52        .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr
53.tocent__vgPlain_machine_ppc64_has_VMX:
54        .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
55
56/*------------------------------------------------------------*/
57/*---                                                      ---*/
58/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
59/*--- run all translations except no-redir ones.           ---*/
60/*---                                                      ---*/
61/*------------------------------------------------------------*/
62
63/*----------------------------------------------------*/
64/*--- Preamble (set everything up)                 ---*/
65/*----------------------------------------------------*/
66
67/* signature:
68UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
69*/
70
71.section ".text"
72.align   2
73.globl VG_(run_innerloop)
74.section ".opd","aw"
75.align   3
76VG_(run_innerloop):
77.quad    .VG_(run_innerloop),.TOC.@tocbase,0
78.previous
79.type    .VG_(run_innerloop),@function
80.globl   .VG_(run_innerloop)
81.VG_(run_innerloop):
82	/* r3 holds guest_state */
83	/* r4 holds do_profiling */
84
85        /* ----- entry point to VG_(run_innerloop) ----- */
86        /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
87
88        /* Save lr, cr */
89        mflr    0
90        std     0,16(1)
91        mfcr    0
92        std     0,8(1)
93
94        /* New stack frame */
95        stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
96
97        /* Save callee-saved registers... */
98
99        /* Floating-point reg save area : 144 bytes */
100        stfd    31,616(1)
101        stfd    30,608(1)
102        stfd    29,600(1)
103        stfd    28,592(1)
104        stfd    27,584(1)
105        stfd    26,576(1)
106        stfd    25,568(1)
107        stfd    24,560(1)
108        stfd    23,552(1)
109        stfd    22,544(1)
110        stfd    21,536(1)
111        stfd    20,528(1)
112        stfd    19,520(1)
113        stfd    18,512(1)
114        stfd    17,504(1)
115        stfd    16,496(1)
116        stfd    15,488(1)
117        stfd    14,480(1)
118
119        /* General reg save area : 144 bytes */
120        std     31,472(1)
121        std     30,464(1)
122        std     29,456(1)
123        std     28,448(1)
124        std     27,440(1)
125        std     26,432(1)
126        std     25,424(1)
127        std     24,416(1)
128        std     23,408(1)
129        std     22,400(1)
130        std     21,392(1)
131        std     20,384(1)
132        std     19,376(1)
133        std     18,368(1)
134        std     17,360(1)
135        std     16,352(1)
136        std     15,344(1)
137        std     14,336(1)
138        /* Probably not necessary to save r13 (thread-specific ptr),
139           as VEX stays clear of it... but what the hey. */
140        std     13,328(1)
141
142        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
143           The Linux kernel might not actually use VRSAVE for its intended
144           purpose, but it should be harmless to preserve anyway. */
145	/* r3, r4 are live here, so use r5 */
146	ld	5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
147	ld	5,0(5)
148        cmpldi  5,0
149        beq     .LafterVMX1
150
151        /* VRSAVE save word : 32 bytes */
152        mfspr   5,256         /* vrsave reg is spr number 256 */
153        stw     5,324(1)
154
155        /* Alignment padding : 4 bytes */
156
157        /* Vector reg save area (quadword aligned) : 192 bytes */
158        li      5,304
159        stvx    31,5,1
160        li      5,288
161        stvx    30,5,1
162        li      5,272
163        stvx    29,5,1
164        li      5,256
165        stvx    28,5,1
166        li      5,240
167        stvx    27,5,1
168        li      5,224
169        stvx    26,5,1
170        li      5,208
171        stvx    25,5,1
172        li      5,192
173        stvx    24,5,1
174        li      5,176
175        stvx    23,5,1
176        li      5,160
177        stvx    22,5,1
178        li      5,144
179        stvx    21,5,1
180        li      5,128
181        stvx    20,5,1
182.LafterVMX1:
183
184        /* Local variable space... */
185
186        /* r3 holds guest_state */
187        /* r4 holds do_profiling */
188        mr      31,3
189        std     3,104(1)       /* spill orig guest_state ptr */
190
191        /* 96(sp) used later to check FPSCR[RM] */
192        /* 88(sp) used later to load fpscr with zero */
193	/* 48:87(sp) free */
194
195        /* Linkage Area (reserved)
196           40(sp) : TOC
197           32(sp) : link editor doubleword
198           24(sp) : compiler doubleword
199           16(sp) : LR
200           8(sp)  : CR
201           0(sp)  : back-chain
202        */
203
204// CAB TODO: Use a caller-saved reg for orig guest_state ptr
205// - rem to set non-allocateable in isel.c
206
207        /* hold dispatch_ctr (=32bit value) in r29 */
208	ld	29,.tocent__vgPlain_dispatch_ctr@toc(2)
209	lwz	29,0(29)  /* 32-bit zero-extending load */
210
211        /* set host FPU control word to the default mode expected
212           by VEX-generated code.  See comments in libvex.h for
213           more info. */
214        /* => get zero into f3 (tedious)
215           fsub 3,3,3 is not a reliable way to do this, since if
216           f3 holds a NaN or similar then we don't necessarily
217           wind up with zero. */
218        li      5,0
219        stw     5,88(1)
220        lfs     3,88(1)
221        mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
222
223        /* set host AltiVec control word to the default mode expected
224           by VEX-generated code. */
225	ld	5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
226	ld	5,0(5)
227        cmpldi  5,0
228        beq     .LafterVMX2
229
230        vspltisw 3,0x0  /* generate zero */
231        mtvscr  3
232.LafterVMX2:
233
234        /* make a stack frame for the code we are calling */
235        stdu    1,-48(1)
236
237        /* fetch %CIA into r3 */
238        ld      3,OFFSET_ppc64_CIA(31)
239
240        /* fall into main loop (the right one) */
241	/* r4 = do_profiling.  It's probably trashed after here,
242           but that's OK: we don't need it after here. */
243	cmplwi	4,0
244	beq	.VG_(run_innerloop__dispatch_unprofiled)
245	b	.VG_(run_innerloop__dispatch_profiled)
246	/*NOTREACHED*/
247
248
249/*----------------------------------------------------*/
250/*--- NO-PROFILING (standard) dispatcher           ---*/
251/*----------------------------------------------------*/
252
253        .section        ".text"
254        .align 2
255        .globl VG_(run_innerloop__dispatch_unprofiled)
256        .section        ".opd","aw"
257        .align 3
258VG_(run_innerloop__dispatch_unprofiled):
259        .quad   .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0
260        .previous
261        .type   .VG_(run_innerloop__dispatch_unprofiled),@function
262        .globl  .VG_(run_innerloop__dispatch_unprofiled)
263.VG_(run_innerloop__dispatch_unprofiled):
264	/* At entry: Live regs:
265		r1  (=sp)
266		r2  (toc pointer)
267		r3  (=CIA = next guest address)
268		r29 (=dispatch_ctr)
269		r31 (=guest_state)
270	   Stack state:
271		152(r1) (=orig guest_state)
272		144(r1) (=var space for FPSCR[RM])
273	*/
274	/* Has the guest state pointer been messed with?  If yes, exit.
275           Also set up & VG_(tt_fast) early in an attempt at better
276           scheduling. */
277        ld      9,152(1)        /* original guest_state ptr */
278	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
279        cmpd    9,31
280        bne	.gsp_changed
281
282        /* save the jump address in the guest state */
283        std     3,OFFSET_ppc64_CIA(31)
284
285        /* Are we out of timeslice?  If yes, defer to scheduler. */
286	subi	29,29,1
287	cmpldi	29,0
288        beq	.counter_is_zero
289
290        /* try a fast lookup in the translation cache */
291        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
292              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
293	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
294	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
295	add	5,5,4      /* & VG_(tt_fast)[entry#] */
296	ld	6,0(5)     /* .guest */
297	ld	7,8(5)     /* .host */
298        cmpd    3,6
299        bne     .fast_lookup_failed
300
301        /* Found a match.  Call .host. */
302        mtctr   7
303        bctrl
304
305        /* On return from guest code:
306	   r3  holds destination (original) address.
307           r31 may be unchanged (guest_state), or may indicate further
308           details of the control transfer requested to *r3.
309        */
310	/* start over */
311	b	.VG_(run_innerloop__dispatch_unprofiled)
312	/*NOTREACHED*/
313        .size VG_(run_innerloop), .-VG_(run_innerloop)
314
315
316/*----------------------------------------------------*/
317/*--- PROFILING dispatcher (can be much slower)    ---*/
318/*----------------------------------------------------*/
319
320        .section        ".text"
321        .align 2
322        .globl VG_(run_innerloop__dispatch_profiled)
323        .section        ".opd","aw"
324        .align 3
325VG_(run_innerloop__dispatch_profiled):
326        .quad   .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0
327        .previous
328        .type   .VG_(run_innerloop__dispatch_profiled),@function
329        .globl  .VG_(run_innerloop__dispatch_profiled)
330.VG_(run_innerloop__dispatch_profiled):
331	/* At entry: Live regs:
332		r1  (=sp)
333		r2  (toc pointer)
334		r3  (=CIA = next guest address)
335		r29 (=dispatch_ctr)
336		r31 (=guest_state)
337	   Stack state:
338		152(r1) (=orig guest_state)
339		144(r1) (=var space for FPSCR[RM])
340	*/
341	/* Has the guest state pointer been messed with?  If yes, exit.
342           Also set up & VG_(tt_fast) early in an attempt at better
343           scheduling. */
344        ld      9,152(1)        /* original guest_state ptr */
345	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
346        cmpd    9,31
347        bne	.gsp_changed
348
349        /* save the jump address in the guest state */
350        std     3,OFFSET_ppc64_CIA(31)
351
352        /* Are we out of timeslice?  If yes, defer to scheduler. */
353	subi	29,29,1
354	cmpldi	29,0
355        beq	.counter_is_zero
356
357        /* try a fast lookup in the translation cache */
358        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
359              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
360	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
361	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
362	add	5,5,4      /* & VG_(tt_fast)[entry#] */
363	ld	6,0(5)     /* .guest */
364	ld	7,8(5)     /* .host */
365        cmpd    3,6
366        bne     .fast_lookup_failed
367
368        /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */
369	ld	9, .tocent__vgPlain_tt_fastN@toc(2)
370	srdi	4, 4,1     /* entry# * sizeof(UInt*) */
371	ldx	9, 9,4     /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */
372	lwz	6, 0(9)    /* *(UInt*)r7 ++ */
373	addi	6, 6,1
374	stw	6, 0(9)
375
376        /* Found a match.  Call .host. */
377        mtctr   7
378        bctrl
379
380        /* On return from guest code:
381	   r3  holds destination (original) address.
382           r31 may be unchanged (guest_state), or may indicate further
383           details of the control transfer requested to *r3.
384        */
385	/* start over */
386	b	.VG_(run_innerloop__dispatch_profiled)
387	/*NOTREACHED*/
388        .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
389
390
391/*----------------------------------------------------*/
392/*--- exit points                                  ---*/
393/*----------------------------------------------------*/
394
395.gsp_changed:
396	/* Someone messed with the gsp (in r31).  Have to
397           defer to scheduler to resolve this.  dispatch ctr
398	   is not yet decremented, so no need to increment. */
399	/* %CIA is NOT up to date here.  First, need to write
400	   %r3 back to %CIA, but without trashing %r31 since
401	   that holds the value we want to return to the scheduler.
402	   Hence use %r5 transiently for the guest state pointer. */
403        ld      5,152(1)         /* original guest_state ptr */
404        std     3,OFFSET_ppc64_CIA(5)
405	mr	3,31		/* r3 = new gsp value */
406	b	.run_innerloop_exit
407	/*NOTREACHED*/
408
409.counter_is_zero:
410	/* %CIA is up to date */
411	/* back out decrement of the dispatch counter */
412        addi    29,29,1
413        li      3,VG_TRC_INNER_COUNTERZERO
414        b       .run_innerloop_exit
415
416.fast_lookup_failed:
417	/* %CIA is up to date */
418	/* back out decrement of the dispatch counter */
419        addi    29,29,1
420        li      3,VG_TRC_INNER_FASTMISS
421	b       .run_innerloop_exit
422
423
424
425/* All exits from the dispatcher go through here.
426   r3 holds the return value.
427*/
428.run_innerloop_exit:
429        /* We're leaving.  Check that nobody messed with
430           VSCR or FPSCR. */
431
432	/* Set fpscr back to a known state, since vex-generated code
433	   may have messed with fpscr[rm]. */
434        li      5,0
435        addi    1,1,-16
436        stw     5,0(1)
437        lfs     3,0(1)
438        addi    1,1,16
439        mtfsf   0xFF,3   /* fpscr = f3 */
440
441	/* Using r11 - value used again further on, so don't trash! */
442	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
443        ld      11,0(11)
444        cmpldi  11,0
445        beq     .LafterVMX8
446
447        /* Check VSCR[NJ] == 1 */
448        /* first generate 4x 0x00010000 */
449        vspltisw  4,0x1                   /* 4x 0x00000001 */
450        vspltisw  5,0x0                   /* zero */
451        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
452        /* retrieve VSCR and mask wanted bits */
453        mfvscr    7
454        vand      7,7,6                   /* gives NJ flag */
455        vspltw    7,7,0x3                 /* flags-word to all lanes */
456        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
457        bt        24,.invariant_violation /* branch if all_equal */
458.LafterVMX8:
459
460	/* otherwise we're OK */
461        b       .run_innerloop_exit_REALLY
462
463
464.invariant_violation:
465        li      3,VG_TRC_INVARIANT_FAILED
466        b       .run_innerloop_exit_REALLY
467
468.run_innerloop_exit_REALLY:
469        /* r3 holds VG_TRC_* value to return */
470
471        /* Return to parent stack */
472        addi    1,1,48
473
474        /* Write ctr to VG_(dispatch_ctr) (=32bit value) */
475	ld	5,.tocent__vgPlain_dispatch_ctr@toc(2)
476        stw     29,0(5)
477
478        /* Restore cr */
479        lwz     0,44(1)
480        mtcr    0
481
482        /* Restore callee-saved registers... */
483
484        /* Floating-point regs */
485        lfd     31,616(1)
486        lfd     30,608(1)
487        lfd     29,600(1)
488        lfd     28,592(1)
489        lfd     27,584(1)
490        lfd     26,576(1)
491        lfd     25,568(1)
492        lfd     24,560(1)
493        lfd     23,552(1)
494        lfd     22,544(1)
495        lfd     21,536(1)
496        lfd     20,528(1)
497        lfd     19,520(1)
498        lfd     18,512(1)
499        lfd     17,504(1)
500        lfd     16,496(1)
501        lfd     15,488(1)
502        lfd     14,480(1)
503
504        /* General regs */
505        ld      31,472(1)
506        ld      30,464(1)
507        ld      29,456(1)
508        ld      28,448(1)
509        ld      27,440(1)
510        ld      26,432(1)
511        ld      25,424(1)
512        ld      24,416(1)
513        ld      23,408(1)
514        ld      22,400(1)
515        ld      21,392(1)
516        ld      20,384(1)
517        ld      19,376(1)
518        ld      18,368(1)
519        ld      17,360(1)
520        ld      16,352(1)
521        ld      15,344(1)
522        ld      14,336(1)
523        ld      13,328(1)
524
525        /* r11 already holds VG_(machine_ppc64_has_VMX) value */
526        cmpldi  11,0
527        beq     .LafterVMX9
528
529        /* VRSAVE */
530        lwz     4,324(1)
531        mfspr   4,256         /* VRSAVE reg is spr number 256 */
532
533        /* Vector regs */
534        li      4,304
535        lvx     31,4,1
536        li      4,288
537        lvx     30,4,1
538        li      4,272
539        lvx     29,4,1
540        li      4,256
541        lvx     28,4,1
542        li      4,240
543        lvx     27,4,1
544        li      4,224
545        lvx     26,4,1
546        li      4,208
547        lvx     25,4,1
548        li      4,192
549        lvx     24,4,1
550        li      4,176
551        lvx     23,4,1
552        li      4,160
553        lvx     22,4,1
554        li      4,144
555        lvx     21,4,1
556        li      4,128
557        lvx     20,4,1
558.LafterVMX9:
559
560        /* reset cr, lr, sp */
561        ld      0,632(1)  /* stack_size + 8 */
562        mtcr    0
563        ld      0,640(1)  /* stack_size + 16 */
564        mtlr    0
565        addi    1,1,624   /* stack_size */
566        blr
567
568
569/*------------------------------------------------------------*/
570/*---                                                      ---*/
571/*--- A special dispatcher, for running no-redir           ---*/
572/*--- translations.  Just runs the given translation once. ---*/
573/*---                                                      ---*/
574/*------------------------------------------------------------*/
575
576/* signature:
577void VG_(run_a_noredir_translation) ( UWord* argblock );
578*/
579
580/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
581   and 2 to carry results:
582      0: input:  ptr to translation
583      1: input:  ptr to guest state
584      2: output: next guest PC
585      3: output: guest state pointer afterwards (== thread return code)
586*/
587.section ".text"
588.align   2
589.globl VG_(run_a_noredir_translation)
590.section ".opd","aw"
591.align   3
592VG_(run_a_noredir_translation):
593.quad    .VG_(run_a_noredir_translation),.TOC.@tocbase,0
594.previous
595.type    .VG_(run_a_noredir_translation),@function
596.globl   .VG_(run_a_noredir_translation)
597.VG_(run_a_noredir_translation):
598	/* save callee-save int regs, & lr */
599	stdu 1,-512(1)
600	std  14,256(1)
601	std  15,264(1)
602	std  16,272(1)
603	std  17,280(1)
604	std  18,288(1)
605	std  19,296(1)
606	std  20,304(1)
607	std  21,312(1)
608	std  22,320(1)
609	std  23,328(1)
610	std  24,336(1)
611	std  25,344(1)
612	std  26,352(1)
613	std  27,360(1)
614	std  28,368(1)
615	std  29,376(1)
616	std  30,384(1)
617	std  31,392(1)
618	mflr 31
619	std  31,400(1)
620	std   2,408(1)  /* also preserve R2, just in case .. */
621
622	std  3,416(1)
623	ld   31,8(3)
624	ld   30,0(3)
625	mtlr 30
626	blrl
627
628	ld   4,416(1)
629	std  3, 16(4)
630	std  31,24(4)
631
632	ld   14,256(1)
633	ld   15,264(1)
634	ld   16,272(1)
635	ld   17,280(1)
636	ld   18,288(1)
637	ld   19,296(1)
638	ld   20,304(1)
639	ld   21,312(1)
640	ld   22,320(1)
641	ld   23,328(1)
642	ld   24,336(1)
643	ld   25,344(1)
644	ld   26,352(1)
645	ld   27,360(1)
646	ld   28,368(1)
647	ld   29,376(1)
648	ld   30,384(1)
649	ld   31,400(1)
650	mtlr 31
651	ld   31,392(1)
652	ld    2,408(1)  /* also preserve R2, just in case .. */
653
654	addi 1,1,512
655	blr
656
657
658/* Let the linker know we don't need an executable stack */
659.section .note.GNU-stack,"",@progbits
660
661#endif // defined(VGP_ppc64_linux)
662
663/*--------------------------------------------------------------------*/
664/*--- end                                                          ---*/
665/*--------------------------------------------------------------------*/
666