141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <stdio.h>
341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <stdlib.h>
441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <assert.h>
541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include "tests/asm.h"
641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include "tests/malloc.h"
741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <string.h>
841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define XSAVE_AREA_SIZE 832
1041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
1141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef  unsigned char           UChar;
1241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef  unsigned int            UInt;
1341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef  unsigned long long int  ULong;
1441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
1541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef  unsigned long int       UWord;
1641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
1741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef  unsigned char  Bool;
1841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define  True   ((Bool)1)
1941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define  False  ((Bool)0)
2041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
2141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vec0[8]
2241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
2341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj       0x15263748, 0x91929394, 0x19293949, 0x48372615 };
2441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
2541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vec1[8]
2641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
2741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj       0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
2841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
2941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vecZ[8]
3041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   = { 0, 0, 0, 0, 0, 0, 0, 0 };
3141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
3241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* A version of memset that doesn't use XMM or YMM registers. */
3341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic __attribute__((noinline))
3441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid* my_memset(void* s, int c, size_t n)
3541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
3641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   size_t i;
3741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (i = 0; i < n; i++) {
3841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
3941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      /* Defeat any attempt at autovectorisation */
4041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      __asm__ __volatile__("" ::: "cc","memory");
4141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
4241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   return s;
4341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
4441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
4541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* Ditto for memcpy */
4641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic __attribute__((noinline))
4741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid* my_memcpy(void *dest, const void *src, size_t n)
4841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
4941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   size_t i;
5041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (i = 0; i < n; i++) {
5141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      ((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
5241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      __asm__ __volatile__("" ::: "cc","memory");
5341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
5441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   return dest;
5541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
5641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
5707d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskiddstatic void* memalign_zeroed64(size_t size)
5841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
5907d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd   char* p = memalign64(size);
6041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   if (p && size > 0) {
6141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      my_memset(p, 0, size);
6241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
6341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   return p;
6441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
6541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
6641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj__attribute__((noinline))
6741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_xsave ( void* p, UInt rfbm )
6841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
6941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   assert(rfbm <= 7);
7041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__(
7141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      "movq %0, %%rax;  xorq %%rdx, %%rdx;  xsave (%1)"
7241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
7341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         : /*TRASH*/ "memory", "rax", "rdx"
7441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   );
7541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
7641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
7741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj__attribute__((noinline))
7841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_xrstor ( void* p, UInt rfbm )
7941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
8041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   assert(rfbm <= 7);
8141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__(
8241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      "movq %0, %%rax;  xorq %%rdx, %%rdx;  xrstor (%1)"
8341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
8441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
8541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   );
8641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
8741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
8841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* set up the FP, SSE and AVX state, and then dump it. */
8941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_setup_then_xsave ( void* p, UInt rfbm )
9041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
9141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("finit");
9241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fldpi");
9341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fld1");
9441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fldln2");
9541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fldlg2");
9641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fld %st(3)");
9741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fld %st(3)");
9841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("fld1");
9940fc6b218cc8f60e44d1858965e69c1dec58000cflorian   __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
10040fc6b218cc8f60e44d1858965e69c1dec58000cflorian   __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
10141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vxorps  %ymm2, %ymm2, %ymm2");
10241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm0, %ymm3");
10341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm4");
10441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm2, %ymm5");
10541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm0, %ymm6");
10641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm7");
10741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm8");
10841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm2, %ymm9");
10941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm0, %ymm10");
11041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm11");
11141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm12");
11241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm2, %ymm13");
11341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm0, %ymm14");
11441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   __asm__ __volatile__("vmovaps %ymm1, %ymm15");
11541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   do_xsave(p, rfbm);
11641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
11741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
11841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic int isFPLsbs ( int i )
11941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
12041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   int q;
12141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 32; if (i == q || i == q+1) return 1;
12241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 48; if (i == q || i == q+1) return 1;
12341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 64; if (i == q || i == q+1) return 1;
12441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 80; if (i == q || i == q+1) return 1;
12541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 96; if (i == q || i == q+1) return 1;
12641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 112; if (i == q || i == q+1) return 1;
12741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 128; if (i == q || i == q+1) return 1;
12841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   q = 144; if (i == q || i == q+1) return 1;
12941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   return 0;
13041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
13141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
13241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void show ( unsigned char* buf, Bool hideBits64to79 )
13341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
13441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   int i;
13541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (i = 0; i < XSAVE_AREA_SIZE; i++) {
13641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      if ((i % 16) == 0)
13741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         fprintf(stderr, "%3d   ", i);
13841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      if (hideBits64to79 && isFPLsbs(i))
13941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj	 fprintf(stderr, "xx ");
14041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      else
14141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         fprintf(stderr, "%02x ", buf[i]);
14241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      if (i > 0 && ((i % 16) == 15))
14341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         fprintf(stderr, "\n");
14441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
14541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
14641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
14741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
14841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                    UInt index, UInt ecx_in )
14941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
15041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt a,b,c,d;
15141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   asm volatile ("cpuid"
15241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
15341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                 : "0" (index), "2"(ecx_in) );
15441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   *eax = a; *ebx = b; *ecx = c; *edx = d;
15541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
15641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //        index,ecx_in, a,b,c,d );
15741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
15841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
15941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
16041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
16141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt a,d;
16241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   asm volatile ("xgetbv"
16341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                 : "=a" (a), "=d" (d) \
16441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                 : "c"(ecx_in) );
16541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   *eax = a; *edx = d;
16641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
16741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
16841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void check_for_xsave ( void )
16941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
17041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt eax, ebx, ecx, edx;
17141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   Bool ok = True;
17241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
17341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   eax = ebx = ecx = edx = 0;
17441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   cpuid(&eax, &ebx, &ecx, &edx, 1,0);
17541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //fprintf(stderr, "cpuid(1).ecx[26=xsave]   = %u\n", (ecx >> 26) & 1);
17641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   ok = ok && (((ecx >> 26) & 1) == 1);
17741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
17841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   eax = ebx = ecx = edx = 0;
17941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   cpuid(&eax, &ebx, &ecx, &edx, 1,0);
18041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
18141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   ok = ok && (((ecx >> 27) & 1) == 1);
18241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
18341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   eax = ebx = ecx = edx = 0;
18441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   xgetbv(&eax, &edx, 0);
18541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
18641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   ok = ok && (edx == 0) && (eax == 7);
18741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
18841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   if (ok) return;
18941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
19041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fprintf(stderr,
19141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj           "This program must be run on a CPU that supports AVX and XSAVE.\n");
19241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   exit(1);
19341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
19441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
19541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
19641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid test_xsave ( Bool hideBits64to79 )
19741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
19841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* Testing XSAVE:
19941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
20041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
20141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      and AVX registers with some values, do XSAVE to dump it, and
20241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      print the resulting buffer. */
20341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
20441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt rfbm;
20541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (rfbm = 0; rfbm <= 7; rfbm++) {
20607d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd      UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
20741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
20841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
20941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      saved_img[512] = 0;
21041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      do_setup_then_xsave(saved_img, rfbm);
21141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
21241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      fprintf(stderr,
21341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj              "------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
21441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      show(saved_img, hideBits64to79);
21541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      fprintf(stderr, "\n");
21641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
21741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      free(saved_img);
21841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
21941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
22041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
22141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
22241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid test_xrstor ( Bool hideBits64to79 )
22341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
22441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* Testing XRSTOR is more complex than testing XSAVE, because the
22541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      loaded value(s) depend not only on what bits are requested (by
22641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      RBFM) but also on what bits are actually present in the image
22741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      (defined by XSTATE_BV).  So we have to test all 64 (8 x 8)
22841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      combinations.
22941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
23041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      The approach is to fill a memory buffer with data, do XRSTOR
23141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      from the buffer, them dump all components with XSAVE in a new
23241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      buffer, and print the result.  This is complicated by the fact
23341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      that we need to be able to see which parts of the state (in
23441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      registers) are neither overwritten nor zeroed by the restore.
23541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      Hence the registers must be pre-filled with values which are
23641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      neither zero nor the data to be loaded.  We choose to use 0x55
23741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      where possible. */
23841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
23907d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd   UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
24041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   my_memset(fives, 0x55, XSAVE_AREA_SIZE);
24141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* Set MXCSR so that the insn doesn't fault */
24241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[24] = 0x80;
24341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[25] = 0x1f;
24441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[26] = 0;
24541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[27] = 0;
24641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* Ditto for the XSAVE header area.  Also set XSTATE_BV. */
24741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[512] = 7;
24841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt i;
24941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (i = 1; i <= 23; i++) fives[512+i] = 0;
25041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* Fill the x87 register values with something that VEX's
25141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      80-vs-64-bit kludging won't mess up -- an 80 bit number which is
25241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      representable also as 64 bit: 123456789.0123 */
25341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (i = 0; i <= 7; i++) {
25441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      UChar* p = &fives[32 + 16 * i];
25541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
25641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
25741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
25841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* And mark the tags for all 8 dumped regs as "valid". */
25941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fives[4/*FTW*/] = 0xFF;
26041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
26141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   /* (1) (see comment in loop below) */
26207d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd   UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
26341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   do_setup_then_xsave(standard_test_data, 7);
26441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
26541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   UInt xstate_bv, rfbm;
26641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
26741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      for (rfbm = 0; rfbm <= 7; rfbm++) {
26841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //{ xstate_bv = 7;
26941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   //      { rfbm = 6;
27041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* 1.  Copy the "standard test data" into registers, and dump
27141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                it with XSAVE.  This gives us an image we can try
27241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                restoring from.
27341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
27441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj            2.  Set the register state to all-0x55s (as far as is
27541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                possible), so we can see which parts get overwritten
27641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                and which parts get zeroed on the test restore.
27741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
27841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj            3.  Do the restore from the image prepared in (1).
27941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
28041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj            4.  Dump the state with XSAVE and print it.
28141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         */
28241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
28341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* (3a).  We can't use |standard_test_data| directly, since we
28441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj            need to put in the required |xstate_bv| value.  So make a
28541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj            copy and modify that instead. */
28607d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd         UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
28741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
28841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         img_to_restore_from[512] = xstate_bv;
28941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
29041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* (4a) */
29107d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd         UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
29241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
29341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         saved_img[512] = 0;
29441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
29541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* (2) */
29641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         do_xrstor(fives, 7);
29741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
29841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         // X87, SSE, AVX state LIVE
29941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
30041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* (3b) */
30141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* and this is what we're actually trying to test */
30241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         do_xrstor(img_to_restore_from, rfbm);
30341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
30441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         // X87, SSE, AVX state LIVE
30541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
30641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         /* (4b) */
30741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         do_xsave(saved_img, 7);
30841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
30941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         fprintf(stderr,
31041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
31141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                xstate_bv, rfbm);
31241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         show(saved_img, hideBits64to79);
31341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         fprintf(stderr, "\n");
31441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
31541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         free(saved_img);
31641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj         free(img_to_restore_from);
31741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj      }
31841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   }
31941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
32041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
32141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
32241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjint main ( int argc, char** argv )
32341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{
32441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   Bool hideBits64to79 = argc > 1;
32541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
32641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj                   "   16 bits of 80-bit FP numbers\n");
32741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
32841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   check_for_xsave();
32941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
33041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   if (1)
33141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   test_xsave(hideBits64to79);
33241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
33341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   if (1)
33441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   test_xrstor(hideBits64to79);
33541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj
33641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj   return 0;
33741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj}
338