141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <stdio.h> 341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <stdlib.h> 441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <assert.h> 541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include "tests/asm.h" 641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include "tests/malloc.h" 741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#include <string.h> 841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define XSAVE_AREA_SIZE 832 1041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 1141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef unsigned char UChar; 1241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef unsigned int UInt; 1341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef unsigned long long int ULong; 1441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 1541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef unsigned long int UWord; 1641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 1741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjtypedef unsigned char Bool; 1841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define True ((Bool)1) 1941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj#define False ((Bool)0) 2041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 2141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vec0[8] 2241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj = { 0x12345678, 0x11223344, 0x55667788, 0x87654321, 2341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 0x15263748, 0x91929394, 0x19293949, 0x48372615 }; 2441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 2541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vec1[8] 2641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA, 2741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB }; 2841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 2941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjconst unsigned int vecZ[8] 3041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj = { 0, 0, 0, 0, 0, 0, 0, 0 }; 3141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 3241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* A version of memset that doesn't use XMM or YMM registers. */ 3341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic __attribute__((noinline)) 3441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid* my_memset(void* s, int c, size_t n) 3541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 3641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj size_t i; 3741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (i = 0; i < n; i++) { 3841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c; 3941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Defeat any attempt at autovectorisation */ 4041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("" ::: "cc","memory"); 4141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 4241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj return s; 4341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 4441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 4541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* Ditto for memcpy */ 4641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic __attribute__((noinline)) 4741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid* my_memcpy(void *dest, const void *src, size_t n) 4841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 4941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj size_t i; 5041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (i = 0; i < n; i++) { 5141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ((unsigned char*)dest)[i] = ((unsigned char*)src)[i]; 5241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("" ::: "cc","memory"); 5341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 5441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj return dest; 5541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 5641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 5707d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskiddstatic void* memalign_zeroed64(size_t size) 5841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 5907d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd char* p = memalign64(size); 6041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (p && size > 0) { 6141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj my_memset(p, 0, size); 6241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 6341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj return p; 6441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 6541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 6641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj__attribute__((noinline)) 6741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_xsave ( void* p, UInt rfbm ) 6841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 6941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj assert(rfbm <= 7); 7041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__( 7141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)" 7241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) 7341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : /*TRASH*/ "memory", "rax", "rdx" 7441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ); 7541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 7641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 7741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj__attribute__((noinline)) 7841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_xrstor ( void* p, UInt rfbm ) 7941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 8041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj assert(rfbm <= 7); 8141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__( 8241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)" 8341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) 8441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */ 8541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ); 8641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 8741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 8841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj/* set up the FP, SSE and AVX state, and then dump it. */ 8941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void do_setup_then_xsave ( void* p, UInt rfbm ) 9041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 9141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("finit"); 9241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fldpi"); 9341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fld1"); 9441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fldln2"); 9541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fldlg2"); 9641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fld %st(3)"); 9741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fld %st(3)"); 9841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("fld1"); 9940fc6b218cc8f60e44d1858965e69c1dec58000cflorian __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" ); 10040fc6b218cc8f60e44d1858965e69c1dec58000cflorian __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" ); 10141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2"); 10241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm0, %ymm3"); 10341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm4"); 10441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm2, %ymm5"); 10541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm0, %ymm6"); 10641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm7"); 10741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm8"); 10841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm2, %ymm9"); 10941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm0, %ymm10"); 11041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm11"); 11141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm12"); 11241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm2, %ymm13"); 11341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm0, %ymm14"); 11441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj __asm__ __volatile__("vmovaps %ymm1, %ymm15"); 11541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_xsave(p, rfbm); 11641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 11741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 11841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic int isFPLsbs ( int i ) 11941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 12041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj int q; 12141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 32; if (i == q || i == q+1) return 1; 12241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 48; if (i == q || i == q+1) return 1; 12341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 64; if (i == q || i == q+1) return 1; 12441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 80; if (i == q || i == q+1) return 1; 12541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 96; if (i == q || i == q+1) return 1; 12641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 112; if (i == q || i == q+1) return 1; 12741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 128; if (i == q || i == q+1) return 1; 12841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj q = 144; if (i == q || i == q+1) return 1; 12941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj return 0; 13041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 13141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 13241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void show ( unsigned char* buf, Bool hideBits64to79 ) 13341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 13441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj int i; 13541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (i = 0; i < XSAVE_AREA_SIZE; i++) { 13641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if ((i % 16) == 0) 13741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "%3d ", i); 13841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (hideBits64to79 && isFPLsbs(i)) 13941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "xx "); 14041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj else 14141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "%02x ", buf[i]); 14241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (i > 0 && ((i % 16) == 15)) 14341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "\n"); 14441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 14541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 14641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 14741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx, 14841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt index, UInt ecx_in ) 14941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 15041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt a,b,c,d; 15141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj asm volatile ("cpuid" 15241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ 15341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : "0" (index), "2"(ecx_in) ); 15441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj *eax = a; *ebx = b; *ecx = c; *edx = d; 15541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n", 15641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj // index,ecx_in, a,b,c,d ); 15741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 15841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 15941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in ) 16041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 16141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt a,d; 16241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj asm volatile ("xgetbv" 16341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : "=a" (a), "=d" (d) \ 16441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj : "c"(ecx_in) ); 16541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj *eax = a; *edx = d; 16641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 16741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 16841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjstatic void check_for_xsave ( void ) 16941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 17041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt eax, ebx, ecx, edx; 17141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj Bool ok = True; 17241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 17341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj eax = ebx = ecx = edx = 0; 17441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj cpuid(&eax, &ebx, &ecx, &edx, 1,0); 17541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1); 17641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ok = ok && (((ecx >> 26) & 1) == 1); 17741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 17841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj eax = ebx = ecx = edx = 0; 17941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj cpuid(&eax, &ebx, &ecx, &edx, 1,0); 18041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1); 18141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ok = ok && (((ecx >> 27) & 1) == 1); 18241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 18341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj eax = ebx = ecx = edx = 0; 18441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj xgetbv(&eax, &edx, 0); 18541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax); 18641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj ok = ok && (edx == 0) && (eax == 7); 18741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 18841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (ok) return; 18941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 19041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, 19141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj "This program must be run on a CPU that supports AVX and XSAVE.\n"); 19241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj exit(1); 19341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 19441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 19541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 19641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid test_xsave ( Bool hideBits64to79 ) 19741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 19841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Testing XSAVE: 19941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 20041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE 20141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj and AVX registers with some values, do XSAVE to dump it, and 20241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj print the resulting buffer. */ 20341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 20441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt rfbm; 20541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (rfbm = 0; rfbm <= 7; rfbm++) { 20607d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); 20741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 20841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); 20941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj saved_img[512] = 0; 21041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_setup_then_xsave(saved_img, rfbm); 21141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 21241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, 21341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj "------------------ XSAVE, rfbm = %u ------------------\n", rfbm); 21441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj show(saved_img, hideBits64to79); 21541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "\n"); 21641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 21741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj free(saved_img); 21841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 21941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 22041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 22141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 22241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjvoid test_xrstor ( Bool hideBits64to79 ) 22341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 22441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Testing XRSTOR is more complex than testing XSAVE, because the 22541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj loaded value(s) depend not only on what bits are requested (by 22641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj RBFM) but also on what bits are actually present in the image 22741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj (defined by XSTATE_BV). So we have to test all 64 (8 x 8) 22841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj combinations. 22941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 23041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj The approach is to fill a memory buffer with data, do XRSTOR 23141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj from the buffer, them dump all components with XSAVE in a new 23241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj buffer, and print the result. This is complicated by the fact 23341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj that we need to be able to see which parts of the state (in 23441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj registers) are neither overwritten nor zeroed by the restore. 23541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj Hence the registers must be pre-filled with values which are 23641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj neither zero nor the data to be loaded. We choose to use 0x55 23741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj where possible. */ 23841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 23907d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE); 24041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj my_memset(fives, 0x55, XSAVE_AREA_SIZE); 24141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Set MXCSR so that the insn doesn't fault */ 24241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[24] = 0x80; 24341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[25] = 0x1f; 24441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[26] = 0; 24541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[27] = 0; 24641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Ditto for the XSAVE header area. Also set XSTATE_BV. */ 24741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[512] = 7; 24841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt i; 24941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (i = 1; i <= 23; i++) fives[512+i] = 0; 25041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* Fill the x87 register values with something that VEX's 25141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 80-vs-64-bit kludging won't mess up -- an 80 bit number which is 25241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj representable also as 64 bit: 123456789.0123 */ 25341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (i = 0; i <= 7; i++) { 25441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UChar* p = &fives[32 + 16 * i]; 25541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0; 25641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40; 25741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 25841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* And mark the tags for all 8 dumped regs as "valid". */ 25941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fives[4/*FTW*/] = 0xFF; 26041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 26141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (1) (see comment in loop below) */ 26207d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE); 26341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_setup_then_xsave(standard_test_data, 7); 26441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 26541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj UInt xstate_bv, rfbm; 26641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) { 26741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj for (rfbm = 0; rfbm <= 7; rfbm++) { 26841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj //{ xstate_bv = 7; 26941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj // { rfbm = 6; 27041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* 1. Copy the "standard test data" into registers, and dump 27141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj it with XSAVE. This gives us an image we can try 27241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj restoring from. 27341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 27441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 2. Set the register state to all-0x55s (as far as is 27541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj possible), so we can see which parts get overwritten 27641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj and which parts get zeroed on the test restore. 27741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 27841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 3. Do the restore from the image prepared in (1). 27941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 28041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 4. Dump the state with XSAVE and print it. 28141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj */ 28241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 28341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (3a). We can't use |standard_test_data| directly, since we 28441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj need to put in the required |xstate_bv| value. So make a 28541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj copy and modify that instead. */ 28607d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE); 28741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE); 28841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj img_to_restore_from[512] = xstate_bv; 28941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 29041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (4a) */ 29107d0c9e54b7fed470ab3363bdf3ba0a8ffec9cb7rhyskidd UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); 29241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); 29341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj saved_img[512] = 0; 29441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 29541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (2) */ 29641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_xrstor(fives, 7); 29741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 29841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj // X87, SSE, AVX state LIVE 29941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 30041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (3b) */ 30141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* and this is what we're actually trying to test */ 30241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_xrstor(img_to_restore_from, rfbm); 30341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 30441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj // X87, SSE, AVX state LIVE 30541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 30641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj /* (4b) */ 30741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj do_xsave(saved_img, 7); 30841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 30941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, 31041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n", 31141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj xstate_bv, rfbm); 31241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj show(saved_img, hideBits64to79); 31341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "\n"); 31441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 31541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj free(saved_img); 31641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj free(img_to_restore_from); 31741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 31841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj } 31941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 32041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 32141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 32241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardjint main ( int argc, char** argv ) 32341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj{ 32441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj Bool hideBits64to79 = argc > 1; 32541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj fprintf(stderr, "Re-run with any arg to suppress least-significant\n" 32641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj " 16 bits of 80-bit FP numbers\n"); 32741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 32841863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj check_for_xsave(); 32941863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 33041863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (1) 33141863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj test_xsave(hideBits64to79); 33241863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 33341863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj if (1) 33441863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj test_xrstor(hideBits64to79); 33541863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj 33641863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj return 0; 33741863f8a8d11cf78f5b5a08f6caf171dc249abe5sewardj} 338