1dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
2dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj#include <stdio.h>
3dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj#include <stdlib.h>
483b62cbbab29bde83eba40231f307c2a311e73c8njn#include "tests/asm.h"
583b62cbbab29bde83eba40231f307c2a311e73c8njn#include "tests/malloc.h"
6dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj#include <string.h>
7dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
8dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjconst unsigned int vec0[4]
9dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
11dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjconst unsigned int vec1[4]
12dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
14dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjconst unsigned int vecZ[4]
15dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   = { 0, 0, 0, 0 };
16dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
17dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjvoid do_fxsave ( void* p ) {
18dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
19dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
20dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
21dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjvoid do_fxrstor ( void* p ) {
22dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
23dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
24dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
25dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjvoid do_zeroise ( void )
26dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj{
27dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("finit");
28dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__(
29dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
30dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
31dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
32dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
33dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
34dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
35dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
36dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "fldz\n\t"
37dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj    "finit\n");
3883b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
3983b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
4083b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
4183b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
4283b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
4383b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
4483b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
4583b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
46dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__(
47dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      "pushl $0\n\t"
48dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      "ldmxcsr 0(%esp)\n\t"
49dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      "addl $4,%esp\n");
50dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
51dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
52dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj/* set up the FP and SSE state, and then dump it. */
53dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjvoid do_setup_then_fxsave ( void* p )
54dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj{
55dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("finit");
56dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fldpi");
57dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fld1");
58dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fldln2");
59dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fldlg2");
60dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fld %st(3)");
61dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("fld %st(3)");
6283b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vec0) ", %xmm0");
6383b62cbbab29bde83eba40231f307c2a311e73c8njn   asm __volatile__("movups " VG_SYM(vec1) ", %xmm1");
64dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("xorps %xmm2, %xmm2");
65dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("movaps %xmm2, %xmm3");
66dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("movaps %xmm2, %xmm4");
67dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("movaps %xmm2, %xmm5");
68dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("movaps %xmm2, %xmm6");
69dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("movaps %xmm1, %xmm7");
70dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   asm __volatile__("xorps %xmm0, %xmm7");
71dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_fxsave (p);
72dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
73dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
74dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjint isFPLsbs ( int i )
75dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj{
76dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   int q;
77dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 32; if (i == q || i == q+1) return 1;
78dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 48; if (i == q || i == q+1) return 1;
79dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 64; if (i == q || i == q+1) return 1;
80dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 80; if (i == q || i == q+1) return 1;
81dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 96; if (i == q || i == q+1) return 1;
82dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 112; if (i == q || i == q+1) return 1;
83dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 128; if (i == q || i == q+1) return 1;
84dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   q = 144; if (i == q || i == q+1) return 1;
85dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   return 0;
86dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
87dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
88dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjvoid show ( unsigned char* buf, int xx )
89dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj{
90dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   int i;
91dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   for (i = 0; i < 512; i++) {
92dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      if ((i % 16) == 0)
93dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj         printf("%3d   ", i);
94dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      if (xx && isFPLsbs(i))
95dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj	 printf("xx ");
96dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      else
97dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj         printf("%02x ", buf[i]);
98dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      if (i > 0 && ((i % 16) == 15))
99dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj          printf("\n");
100dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   }
101dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
102dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
103dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
104dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardjint main ( int argc, char** argv )
105dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj{
10683b62cbbab29bde83eba40231f307c2a311e73c8njn   unsigned char* buf1 = memalign16(512);
10783b62cbbab29bde83eba40231f307c2a311e73c8njn   unsigned char* buf2 = memalign16(512);
10883b62cbbab29bde83eba40231f307c2a311e73c8njn   unsigned char* buf3 = memalign16(512);
109dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   int xx = argc > 1;
110dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   printf("Re-run with any arg to suppress least-significant\n"
111dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj          "   16 bits of FP numbers\n");
112dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   memset(buf1, 0x55, 512);
113dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   memset(buf2, 0x55, 512);
114dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   memset(buf3, 0x55, 512);
115dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
116dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   /* Load up x87/xmm state and dump it. */
117dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_setup_then_fxsave(buf1);
118dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   printf("\nBEFORE\n");
119dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   show(buf1, xx);
120dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
121dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   /* Zeroise x87/xmm state and dump it, to show that the
122dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj      regs have been cleared out. */
123dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_zeroise();
124dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_fxsave(buf2);
125dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   printf("\nZEROED\n");
126dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   show(buf2, xx);
127dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
128dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   /* Reload x87/xmm state from buf1 and dump it in buf3. */
129dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_fxrstor(buf1);
130dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   do_fxsave(buf3);
131dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   printf("\nRESTORED\n");
132dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   show(buf3, xx);
133dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
134dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   free(buf1); free(buf2); free(buf3);
135dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj
136dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj   return 0;
137dc056b7a82b6a3a570bb9e6c136dd3cee9185cbfsewardj}
138