1
2#include <stdio.h>
3#include <stdlib.h>
4#include "tests/asm.h"
5#include "tests/malloc.h"
6#include <string.h>
7
8const unsigned int vec0[4]
9   = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10
11const unsigned int vec1[4]
12   = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13
14const unsigned int vecZ[4]
15   = { 0, 0, 0, 0 };
16
17__attribute__((noinline))
18void do_fxsave ( void* p, int rexw ) {
19   if (rexw) {
20      asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
21   } else {
22      asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
23   }
24}
25
26__attribute__((noinline))
27void do_fxrstor ( void* p, int rexw ) {
28   if (rexw) {
29      asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
30   } else {
31      asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
32   }
33}
34
35void do_zeroise ( void )
36{
37   asm __volatile__("finit");
38   asm __volatile__(
39    "fldz\n\t"
40    "fldz\n\t"
41    "fldz\n\t"
42    "fldz\n\t"
43    "fldz\n\t"
44    "fldz\n\t"
45    "fldz\n\t"
46    "fldz\n\t"
47    "finit\n");
48#ifndef VGP_amd64_darwin
49   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
50   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
51   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
52   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
53   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
54   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
55   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
56   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
57   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
58   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
59   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
60   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
61   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
62   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
63   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
64   asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
65#else
66   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
67   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
68   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
69   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
70   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
71   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
72   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
73   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
74   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
75   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
76   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
77   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
78   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
79   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
80   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
81   asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
82#endif
83   asm __volatile__(
84      "pushq $0\n\t"
85      "ldmxcsr 0(%rsp)\n\t"
86      "addq $8,%rsp\n");
87}
88
89/* set up the FP and SSE state, and then dump it. */
90void do_setup_then_fxsave ( void* p, int rexw )
91{
92   asm __volatile__("finit");
93   asm __volatile__("fldpi");
94   asm __volatile__("fld1");
95   asm __volatile__("fldln2");
96   asm __volatile__("fldlg2");
97   asm __volatile__("fld %st(3)");
98   asm __volatile__("fld %st(3)");
99   asm __volatile__("fld1");
100   asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
101   asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
102   asm __volatile__("xorps  %xmm2, %xmm2");
103   asm __volatile__("movaps %xmm0, %xmm3");
104   asm __volatile__("movaps %xmm1, %xmm4");
105   asm __volatile__("movaps %xmm2, %xmm5");
106   asm __volatile__("movaps %xmm0, %xmm6");
107   asm __volatile__("movaps %xmm1, %xmm7");
108   asm __volatile__("movaps %xmm1, %xmm8");
109   asm __volatile__("movaps %xmm2, %xmm9");
110   asm __volatile__("movaps %xmm0, %xmm10");
111   asm __volatile__("movaps %xmm1, %xmm11");
112   asm __volatile__("movaps %xmm1, %xmm12");
113   asm __volatile__("movaps %xmm2, %xmm13");
114   asm __volatile__("movaps %xmm0, %xmm14");
115   asm __volatile__("movaps %xmm1, %xmm15");
116   do_fxsave(p, rexw);
117}
118
119int isFPLsbs ( int i )
120{
121   int q;
122   q = 32; if (i == q || i == q+1) return 1;
123   q = 48; if (i == q || i == q+1) return 1;
124   q = 64; if (i == q || i == q+1) return 1;
125   q = 80; if (i == q || i == q+1) return 1;
126   q = 96; if (i == q || i == q+1) return 1;
127   q = 112; if (i == q || i == q+1) return 1;
128   q = 128; if (i == q || i == q+1) return 1;
129   q = 144; if (i == q || i == q+1) return 1;
130   return 0;
131}
132
133void show ( unsigned char* buf, int xx )
134{
135   int i;
136   for (i = 0; i < 512; i++) {
137      if ((i % 16) == 0)
138         printf("%3d   ", i);
139      if (xx && isFPLsbs(i))
140	 printf("xx ");
141      else
142         printf("%02x ", buf[i]);
143      if (i > 0 && ((i % 16) == 15))
144          printf("\n");
145   }
146}
147
148
149int main ( int argc, char** argv )
150{
151   unsigned char* buf1 = memalign16(512);
152   unsigned char* buf2 = memalign16(512);
153   unsigned char* buf3 = memalign16(512);
154   int xx = argc > 1;
155   printf("Re-run with any arg to suppress least-significant\n"
156          "   16 bits of FP numbers\n");
157
158   printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
159
160   memset(buf1, 0x55, 512);
161   memset(buf2, 0x55, 512);
162   memset(buf3, 0x55, 512);
163
164   /* Load up x87/xmm state and dump it. */
165   do_setup_then_fxsave(buf1, 0);
166   printf("\nBEFORE\n");
167   show(buf1, xx);
168
169   /* Zeroise x87/xmm state and dump it, to show that the
170      regs have been cleared out. */
171   do_zeroise();
172   do_fxsave(buf2, 0);
173   printf("\nZEROED\n");
174   show(buf2, xx);
175
176   /* Reload x87/xmm state from buf1 and dump it in buf3. */
177   do_fxrstor(buf1, 0);
178   do_fxsave(buf3, 0);
179   printf("\nRESTORED\n");
180   show(buf3, xx);
181
182   printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
183
184   memset(buf1, 0x55, 512);
185   memset(buf2, 0x55, 512);
186   memset(buf3, 0x55, 512);
187
188   /* Load up x87/xmm state and dump it. */
189   do_setup_then_fxsave(buf1, 1);
190   printf("\nBEFORE\n");
191   show(buf1, xx);
192
193   /* Zeroise x87/xmm state and dump it, to show that the
194      regs have been cleared out. */
195   do_zeroise();
196   do_fxsave(buf2, 1);
197   printf("\nZEROED\n");
198   show(buf2, xx);
199
200   /* Reload x87/xmm state from buf1 and dump it in buf3. */
201   do_fxrstor(buf1, 1);
202   do_fxsave(buf3, 1);
203   printf("\nRESTORED\n");
204   show(buf3, xx);
205
206
207   free(buf1); free(buf2); free(buf3);
208
209   return 0;
210}
211