1#include <config.h> 2#include <stdio.h> 3#include <stdlib.h> 4#include "tests/asm.h" 5#include "tests/malloc.h" 6#include <string.h> 7 8const unsigned int vec0[4] 9 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 }; 10 11const unsigned int vec1[4] 12 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA }; 13 14const unsigned int vecZ[4] 15 = { 0, 0, 0, 0 }; 16 17__attribute__((noinline)) 18void do_fxsave ( void* p, int rexw ) { 19 if (rexw) { 20#ifdef HAVE_AS_AMD64_FXSAVE64 21 asm __volatile__("fxsave64 (%0)" : : "r" (p) : "memory" ); 22#else 23 asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" ); 24#endif 25 } else { 26 asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" ); 27 } 28} 29 30__attribute__((noinline)) 31void do_fxrstor ( void* p, int rexw ) { 32 if (rexw) { 33#ifdef HAVE_AS_AMD64_FXSAVE64 34 asm __volatile__("fxrstor64 (%0)" : : "r" (p) : "memory" ); 35#else 36 asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" ); 37#endif 38 } else { 39 asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" ); 40 } 41} 42 43void do_zeroise ( void ) 44{ 45 asm __volatile__("finit"); 46 asm __volatile__( 47 "fldz\n\t" 48 "fldz\n\t" 49 "fldz\n\t" 50 "fldz\n\t" 51 "fldz\n\t" 52 "fldz\n\t" 53 "fldz\n\t" 54 "fldz\n\t" 55 "finit\n"); 56#ifndef VGP_amd64_darwin 57 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0"); 58 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1"); 59 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2"); 60 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3"); 61 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4"); 62 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5"); 63 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6"); 64 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7"); 65 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8"); 66 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9"); 67 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10"); 68 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11"); 69 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12"); 70 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13"); 71 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14"); 72 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15"); 73#else 74 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0"); 75 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1"); 76 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2"); 77 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3"); 78 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4"); 79 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5"); 80 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6"); 81 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7"); 82 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8"); 83 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9"); 84 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10"); 85 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11"); 86 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12"); 87 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13"); 88 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14"); 89 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15"); 90#endif 91 asm __volatile__( 92 "pushq $0\n\t" 93 "ldmxcsr 0(%rsp)\n\t" 94 "addq $8,%rsp\n"); 95} 96 97/* set up the FP and SSE state, and then dump it. */ 98void do_setup_then_fxsave ( void* p, int rexw ) 99{ 100 asm __volatile__("finit"); 101 asm __volatile__("fldpi"); 102 asm __volatile__("fld1"); 103 asm __volatile__("fldln2"); 104 asm __volatile__("fldlg2"); 105 asm __volatile__("fld %st(3)"); 106 asm __volatile__("fld %st(3)"); 107 asm __volatile__("fld1"); 108 asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" ); 109 asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" ); 110 asm __volatile__("xorps %xmm2, %xmm2"); 111 asm __volatile__("movaps %xmm0, %xmm3"); 112 asm __volatile__("movaps %xmm1, %xmm4"); 113 asm __volatile__("movaps %xmm2, %xmm5"); 114 asm __volatile__("movaps %xmm0, %xmm6"); 115 asm __volatile__("movaps %xmm1, %xmm7"); 116 asm __volatile__("movaps %xmm1, %xmm8"); 117 asm __volatile__("movaps %xmm2, %xmm9"); 118 asm __volatile__("movaps %xmm0, %xmm10"); 119 asm __volatile__("movaps %xmm1, %xmm11"); 120 asm __volatile__("movaps %xmm1, %xmm12"); 121 asm __volatile__("movaps %xmm2, %xmm13"); 122 asm __volatile__("movaps %xmm0, %xmm14"); 123 asm __volatile__("movaps %xmm1, %xmm15"); 124 do_fxsave(p, rexw); 125} 126 127int isFPLsbs ( int i ) 128{ 129 int q; 130 q = 32; if (i == q || i == q+1) return 1; 131 q = 48; if (i == q || i == q+1) return 1; 132 q = 64; if (i == q || i == q+1) return 1; 133 q = 80; if (i == q || i == q+1) return 1; 134 q = 96; if (i == q || i == q+1) return 1; 135 q = 112; if (i == q || i == q+1) return 1; 136 q = 128; if (i == q || i == q+1) return 1; 137 q = 144; if (i == q || i == q+1) return 1; 138 return 0; 139} 140 141void show ( unsigned char* buf, int xx ) 142{ 143 int i; 144 for (i = 0; i < 512; i++) { 145 if ((i % 16) == 0) 146 printf("%3d ", i); 147 if (xx && isFPLsbs(i)) 148 printf("xx "); 149 else 150 printf("%02x ", buf[i]); 151 if (i > 0 && ((i % 16) == 15)) 152 printf("\n"); 153 } 154} 155 156 157int main ( int argc, char** argv ) 158{ 159 unsigned char* buf1 = memalign16(512); 160 unsigned char* buf2 = memalign16(512); 161 unsigned char* buf3 = memalign16(512); 162 int xx = argc > 1; 163 printf("Re-run with any arg to suppress least-significant\n" 164 " 16 bits of FP numbers\n"); 165 166 printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n"); 167 168 memset(buf1, 0x55, 512); 169 memset(buf2, 0x55, 512); 170 memset(buf3, 0x55, 512); 171 172 /* Load up x87/xmm state and dump it. */ 173 do_setup_then_fxsave(buf1, 0); 174 printf("\nBEFORE\n"); 175 show(buf1, xx); 176 177 /* Zeroise x87/xmm state and dump it, to show that the 178 regs have been cleared out. */ 179 do_zeroise(); 180 do_fxsave(buf2, 0); 181 printf("\nZEROED\n"); 182 show(buf2, xx); 183 184 /* Reload x87/xmm state from buf1 and dump it in buf3. */ 185 do_fxrstor(buf1, 0); 186 do_fxsave(buf3, 0); 187 printf("\nRESTORED\n"); 188 show(buf3, xx); 189 190 printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n"); 191 192 memset(buf1, 0x55, 512); 193 memset(buf2, 0x55, 512); 194 memset(buf3, 0x55, 512); 195 196 /* Load up x87/xmm state and dump it. */ 197 do_setup_then_fxsave(buf1, 1); 198 printf("\nBEFORE\n"); 199 show(buf1, xx); 200 201 /* Zeroise x87/xmm state and dump it, to show that the 202 regs have been cleared out. */ 203 do_zeroise(); 204 do_fxsave(buf2, 1); 205 printf("\nZEROED\n"); 206 show(buf2, xx); 207 208 /* Reload x87/xmm state from buf1 and dump it in buf3. */ 209 do_fxrstor(buf1, 1); 210 do_fxsave(buf3, 1); 211 printf("\nRESTORED\n"); 212 show(buf3, xx); 213 214 215 free(buf1); free(buf2); free(buf3); 216 217 return 0; 218} 219