1
2#include <stdio.h>
3#include <stdlib.h>
4#include <assert.h>
5#include "tests/asm.h"
6#include "tests/malloc.h"
7#include <string.h>
8
9#define XSAVE_AREA_SIZE 832
10
11typedef  unsigned char           UChar;
12typedef  unsigned int            UInt;
13typedef  unsigned long long int  ULong;
14
15typedef  unsigned long int       UWord;
16
17typedef  unsigned char  Bool;
18#define  True   ((Bool)1)
19#define  False  ((Bool)0)
20
21const unsigned int vec0[8]
22   = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
23       0x15263748, 0x91929394, 0x19293949, 0x48372615 };
24
25const unsigned int vec1[8]
26   = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
27       0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
28
29const unsigned int vecZ[8]
30   = { 0, 0, 0, 0, 0, 0, 0, 0 };
31
32/* A version of memset that doesn't use XMM or YMM registers. */
33static __attribute__((noinline))
34void* my_memset(void* s, int c, size_t n)
35{
36   size_t i;
37   for (i = 0; i < n; i++) {
38      ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
39      /* Defeat any attempt at autovectorisation */
40      __asm__ __volatile__("" ::: "cc","memory");
41   }
42   return s;
43}
44
45/* Ditto for memcpy */
46static __attribute__((noinline))
47void* my_memcpy(void *dest, const void *src, size_t n)
48{
49   size_t i;
50   for (i = 0; i < n; i++) {
51      ((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
52      __asm__ __volatile__("" ::: "cc","memory");
53   }
54   return dest;
55}
56
57static void* memalign_zeroed64(size_t size)
58{
59   char* p = memalign64(size);
60   if (p && size > 0) {
61      my_memset(p, 0, size);
62   }
63   return p;
64}
65
66__attribute__((noinline))
67static void do_xsave ( void* p, UInt rfbm )
68{
69   assert(rfbm <= 7);
70   __asm__ __volatile__(
71      "movq %0, %%rax;  xorq %%rdx, %%rdx;  xsave (%1)"
72         : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
73         : /*TRASH*/ "memory", "rax", "rdx"
74   );
75}
76
77__attribute__((noinline))
78static void do_xrstor ( void* p, UInt rfbm )
79{
80   assert(rfbm <= 7);
81   __asm__ __volatile__(
82      "movq %0, %%rax;  xorq %%rdx, %%rdx;  xrstor (%1)"
83         : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
84         : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
85   );
86}
87
88/* set up the FP, SSE and AVX state, and then dump it. */
89static void do_setup_then_xsave ( void* p, UInt rfbm )
90{
91   __asm__ __volatile__("finit");
92   __asm__ __volatile__("fldpi");
93   __asm__ __volatile__("fld1");
94   __asm__ __volatile__("fldln2");
95   __asm__ __volatile__("fldlg2");
96   __asm__ __volatile__("fld %st(3)");
97   __asm__ __volatile__("fld %st(3)");
98   __asm__ __volatile__("fld1");
99   __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
100   __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
101   __asm__ __volatile__("vxorps  %ymm2, %ymm2, %ymm2");
102   __asm__ __volatile__("vmovaps %ymm0, %ymm3");
103   __asm__ __volatile__("vmovaps %ymm1, %ymm4");
104   __asm__ __volatile__("vmovaps %ymm2, %ymm5");
105   __asm__ __volatile__("vmovaps %ymm0, %ymm6");
106   __asm__ __volatile__("vmovaps %ymm1, %ymm7");
107   __asm__ __volatile__("vmovaps %ymm1, %ymm8");
108   __asm__ __volatile__("vmovaps %ymm2, %ymm9");
109   __asm__ __volatile__("vmovaps %ymm0, %ymm10");
110   __asm__ __volatile__("vmovaps %ymm1, %ymm11");
111   __asm__ __volatile__("vmovaps %ymm1, %ymm12");
112   __asm__ __volatile__("vmovaps %ymm2, %ymm13");
113   __asm__ __volatile__("vmovaps %ymm0, %ymm14");
114   __asm__ __volatile__("vmovaps %ymm1, %ymm15");
115   do_xsave(p, rfbm);
116}
117
118static int isFPLsbs ( int i )
119{
120   int q;
121   q = 32; if (i == q || i == q+1) return 1;
122   q = 48; if (i == q || i == q+1) return 1;
123   q = 64; if (i == q || i == q+1) return 1;
124   q = 80; if (i == q || i == q+1) return 1;
125   q = 96; if (i == q || i == q+1) return 1;
126   q = 112; if (i == q || i == q+1) return 1;
127   q = 128; if (i == q || i == q+1) return 1;
128   q = 144; if (i == q || i == q+1) return 1;
129   return 0;
130}
131
132static void show ( unsigned char* buf, Bool hideBits64to79 )
133{
134   int i;
135   for (i = 0; i < XSAVE_AREA_SIZE; i++) {
136      if ((i % 16) == 0)
137         fprintf(stderr, "%3d   ", i);
138      if (hideBits64to79 && isFPLsbs(i))
139	 fprintf(stderr, "xx ");
140      else
141         fprintf(stderr, "%02x ", buf[i]);
142      if (i > 0 && ((i % 16) == 15))
143         fprintf(stderr, "\n");
144   }
145}
146
147static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
148                    UInt index, UInt ecx_in )
149{
150   UInt a,b,c,d;
151   asm volatile ("cpuid"
152                 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
153                 : "0" (index), "2"(ecx_in) );
154   *eax = a; *ebx = b; *ecx = c; *edx = d;
155   //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
156   //        index,ecx_in, a,b,c,d );
157}
158
159static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
160{
161   UInt a,d;
162   asm volatile ("xgetbv"
163                 : "=a" (a), "=d" (d) \
164                 : "c"(ecx_in) );
165   *eax = a; *edx = d;
166}
167
168static void check_for_xsave ( void )
169{
170   UInt eax, ebx, ecx, edx;
171   Bool ok = True;
172
173   eax = ebx = ecx = edx = 0;
174   cpuid(&eax, &ebx, &ecx, &edx, 1,0);
175   //fprintf(stderr, "cpuid(1).ecx[26=xsave]   = %u\n", (ecx >> 26) & 1);
176   ok = ok && (((ecx >> 26) & 1) == 1);
177
178   eax = ebx = ecx = edx = 0;
179   cpuid(&eax, &ebx, &ecx, &edx, 1,0);
180   //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
181   ok = ok && (((ecx >> 27) & 1) == 1);
182
183   eax = ebx = ecx = edx = 0;
184   xgetbv(&eax, &edx, 0);
185   //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
186   ok = ok && (edx == 0) && (eax == 7);
187
188   if (ok) return;
189
190   fprintf(stderr,
191           "This program must be run on a CPU that supports AVX and XSAVE.\n");
192   exit(1);
193}
194
195
196void test_xsave ( Bool hideBits64to79 )
197{
198   /* Testing XSAVE:
199
200      For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
201      and AVX registers with some values, do XSAVE to dump it, and
202      print the resulting buffer. */
203
204   UInt rfbm;
205   for (rfbm = 0; rfbm <= 7; rfbm++) {
206      UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
207
208      my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
209      saved_img[512] = 0;
210      do_setup_then_xsave(saved_img, rfbm);
211
212      fprintf(stderr,
213              "------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
214      show(saved_img, hideBits64to79);
215      fprintf(stderr, "\n");
216
217      free(saved_img);
218   }
219}
220
221
222void test_xrstor ( Bool hideBits64to79 )
223{
224   /* Testing XRSTOR is more complex than testing XSAVE, because the
225      loaded value(s) depend not only on what bits are requested (by
226      RBFM) but also on what bits are actually present in the image
227      (defined by XSTATE_BV).  So we have to test all 64 (8 x 8)
228      combinations.
229
230      The approach is to fill a memory buffer with data, do XRSTOR
231      from the buffer, them dump all components with XSAVE in a new
232      buffer, and print the result.  This is complicated by the fact
233      that we need to be able to see which parts of the state (in
234      registers) are neither overwritten nor zeroed by the restore.
235      Hence the registers must be pre-filled with values which are
236      neither zero nor the data to be loaded.  We choose to use 0x55
237      where possible. */
238
239   UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
240   my_memset(fives, 0x55, XSAVE_AREA_SIZE);
241   /* Set MXCSR so that the insn doesn't fault */
242   fives[24] = 0x80;
243   fives[25] = 0x1f;
244   fives[26] = 0;
245   fives[27] = 0;
246   /* Ditto for the XSAVE header area.  Also set XSTATE_BV. */
247   fives[512] = 7;
248   UInt i;
249   for (i = 1; i <= 23; i++) fives[512+i] = 0;
250   /* Fill the x87 register values with something that VEX's
251      80-vs-64-bit kludging won't mess up -- an 80 bit number which is
252      representable also as 64 bit: 123456789.0123 */
253   for (i = 0; i <= 7; i++) {
254      UChar* p = &fives[32 + 16 * i];
255      p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
256      p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
257   }
258   /* And mark the tags for all 8 dumped regs as "valid". */
259   fives[4/*FTW*/] = 0xFF;
260
261   /* (1) (see comment in loop below) */
262   UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
263   do_setup_then_xsave(standard_test_data, 7);
264
265   UInt xstate_bv, rfbm;
266   for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
267      for (rfbm = 0; rfbm <= 7; rfbm++) {
268   //{ xstate_bv = 7;
269   //      { rfbm = 6;
270         /* 1.  Copy the "standard test data" into registers, and dump
271                it with XSAVE.  This gives us an image we can try
272                restoring from.
273
274            2.  Set the register state to all-0x55s (as far as is
275                possible), so we can see which parts get overwritten
276                and which parts get zeroed on the test restore.
277
278            3.  Do the restore from the image prepared in (1).
279
280            4.  Dump the state with XSAVE and print it.
281         */
282
283         /* (3a).  We can't use |standard_test_data| directly, since we
284            need to put in the required |xstate_bv| value.  So make a
285            copy and modify that instead. */
286         UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
287         my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
288         img_to_restore_from[512] = xstate_bv;
289
290         /* (4a) */
291         UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
292         my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
293         saved_img[512] = 0;
294
295         /* (2) */
296         do_xrstor(fives, 7);
297
298         // X87, SSE, AVX state LIVE
299
300         /* (3b) */
301         /* and this is what we're actually trying to test */
302         do_xrstor(img_to_restore_from, rfbm);
303
304         // X87, SSE, AVX state LIVE
305
306         /* (4b) */
307         do_xsave(saved_img, 7);
308
309         fprintf(stderr,
310                 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
311                xstate_bv, rfbm);
312         show(saved_img, hideBits64to79);
313         fprintf(stderr, "\n");
314
315         free(saved_img);
316         free(img_to_restore_from);
317      }
318   }
319}
320
321
322int main ( int argc, char** argv )
323{
324   Bool hideBits64to79 = argc > 1;
325   fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
326                   "   16 bits of 80-bit FP numbers\n");
327
328   check_for_xsave();
329
330   if (1)
331   test_xsave(hideBits64to79);
332
333   if (1)
334   test_xrstor(hideBits64to79);
335
336   return 0;
337}
338