1#include <config.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include "tests/asm.h"
5#include "tests/malloc.h"
6#include <string.h>
7
8const unsigned int vec0[4]
9   = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10
11const unsigned int vec1[4]
12   = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13
14const unsigned int vecZ[4]
15   = { 0, 0, 0, 0 };
16
17__attribute__((noinline))
18void do_fxsave ( void* p, int rexw ) {
19   if (rexw) {
20#ifdef HAVE_AS_AMD64_FXSAVE64
21      asm __volatile__("fxsave64 (%0)" : : "r" (p) : "memory" );
22#else
23      asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
24#endif
25   } else {
26      asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
27   }
28}
29
30__attribute__((noinline))
31void do_fxrstor ( void* p, int rexw ) {
32   if (rexw) {
33#ifdef HAVE_AS_AMD64_FXSAVE64
34      asm __volatile__("fxrstor64 (%0)" : : "r" (p) : "memory" );
35#else
36      asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
37#endif
38   } else {
39      asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
40   }
41}
42
43void do_zeroise ( void )
44{
45   asm __volatile__("finit");
46   asm __volatile__(
47    "fldz\n\t"
48    "fldz\n\t"
49    "fldz\n\t"
50    "fldz\n\t"
51    "fldz\n\t"
52    "fldz\n\t"
53    "fldz\n\t"
54    "fldz\n\t"
55    "finit\n");
56#ifndef VGP_amd64_darwin
57   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
58   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
59   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
60   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
61   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
62   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
63   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
64   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
65   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
66   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
67   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
68   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
69   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
70   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
71   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
72   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
73#else
74   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
75   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
76   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
77   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
78   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
79   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
80   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
81   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
82   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
83   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
84   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
85   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
86   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
87   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
88   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
89   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
90#endif
91   asm __volatile__(
92      "pushq $0\n\t"
93      "ldmxcsr 0(%rsp)\n\t"
94      "addq $8,%rsp\n");
95}
96
97/* set up the FP and SSE state, and then dump it. */
98void do_setup_then_fxsave ( void* p, int rexw )
99{
100   asm __volatile__("finit");
101   asm __volatile__("fldpi");
102   asm __volatile__("fld1");
103   asm __volatile__("fldln2");
104   asm __volatile__("fldlg2");
105   asm __volatile__("fld %st(3)");
106   asm __volatile__("fld %st(3)");
107   asm __volatile__("fld1");
108   asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
109   asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
110   asm __volatile__("xorps  %xmm2, %xmm2");
111   asm __volatile__("movaps %xmm0, %xmm3");
112   asm __volatile__("movaps %xmm1, %xmm4");
113   asm __volatile__("movaps %xmm2, %xmm5");
114   asm __volatile__("movaps %xmm0, %xmm6");
115   asm __volatile__("movaps %xmm1, %xmm7");
116   asm __volatile__("movaps %xmm1, %xmm8");
117   asm __volatile__("movaps %xmm2, %xmm9");
118   asm __volatile__("movaps %xmm0, %xmm10");
119   asm __volatile__("movaps %xmm1, %xmm11");
120   asm __volatile__("movaps %xmm1, %xmm12");
121   asm __volatile__("movaps %xmm2, %xmm13");
122   asm __volatile__("movaps %xmm0, %xmm14");
123   asm __volatile__("movaps %xmm1, %xmm15");
124   do_fxsave(p, rexw);
125}
126
127int isFPLsbs ( int i )
128{
129   int q;
130   q = 32; if (i == q || i == q+1) return 1;
131   q = 48; if (i == q || i == q+1) return 1;
132   q = 64; if (i == q || i == q+1) return 1;
133   q = 80; if (i == q || i == q+1) return 1;
134   q = 96; if (i == q || i == q+1) return 1;
135   q = 112; if (i == q || i == q+1) return 1;
136   q = 128; if (i == q || i == q+1) return 1;
137   q = 144; if (i == q || i == q+1) return 1;
138   return 0;
139}
140
141void show ( unsigned char* buf, int xx )
142{
143   int i;
144   for (i = 0; i < 512; i++) {
145      if ((i % 16) == 0)
146         printf("%3d   ", i);
147      if (xx && isFPLsbs(i))
148	 printf("xx ");
149      else
150         printf("%02x ", buf[i]);
151      if (i > 0 && ((i % 16) == 15))
152          printf("\n");
153   }
154}
155
156
157int main ( int argc, char** argv )
158{
159   unsigned char* buf1 = memalign16(512);
160   unsigned char* buf2 = memalign16(512);
161   unsigned char* buf3 = memalign16(512);
162   int xx = argc > 1;
163   printf("Re-run with any arg to suppress least-significant\n"
164          "   16 bits of FP numbers\n");
165
166   printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
167
168   memset(buf1, 0x55, 512);
169   memset(buf2, 0x55, 512);
170   memset(buf3, 0x55, 512);
171
172   /* Load up x87/xmm state and dump it. */
173   do_setup_then_fxsave(buf1, 0);
174   printf("\nBEFORE\n");
175   show(buf1, xx);
176
177   /* Zeroise x87/xmm state and dump it, to show that the
178      regs have been cleared out. */
179   do_zeroise();
180   do_fxsave(buf2, 0);
181   printf("\nZEROED\n");
182   show(buf2, xx);
183
184   /* Reload x87/xmm state from buf1 and dump it in buf3. */
185   do_fxrstor(buf1, 0);
186   do_fxsave(buf3, 0);
187   printf("\nRESTORED\n");
188   show(buf3, xx);
189
190   printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
191
192   memset(buf1, 0x55, 512);
193   memset(buf2, 0x55, 512);
194   memset(buf3, 0x55, 512);
195
196   /* Load up x87/xmm state and dump it. */
197   do_setup_then_fxsave(buf1, 1);
198   printf("\nBEFORE\n");
199   show(buf1, xx);
200
201   /* Zeroise x87/xmm state and dump it, to show that the
202      regs have been cleared out. */
203   do_zeroise();
204   do_fxsave(buf2, 1);
205   printf("\nZEROED\n");
206   show(buf2, xx);
207
208   /* Reload x87/xmm state from buf1 and dump it in buf3. */
209   do_fxrstor(buf1, 1);
210   do_fxsave(buf3, 1);
211   printf("\nRESTORED\n");
212   show(buf3, xx);
213
214
215   free(buf1); free(buf2); free(buf3);
216
217   return 0;
218}
219