fxsave-amd64.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1 2#include <stdio.h> 3#include <stdlib.h> 4#include "tests/asm.h" 5#include "tests/malloc.h" 6#include <string.h> 7 8const unsigned int vec0[4] 9 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 }; 10 11const unsigned int vec1[4] 12 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA }; 13 14const unsigned int vecZ[4] 15 = { 0, 0, 0, 0 }; 16 17__attribute__((noinline)) 18void do_fxsave ( void* p, int rexw ) { 19 if (rexw) { 20 asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" ); 21 } else { 22 asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" ); 23 } 24} 25 26__attribute__((noinline)) 27void do_fxrstor ( void* p, int rexw ) { 28 if (rexw) { 29 asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" ); 30 } else { 31 asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" ); 32 } 33} 34 35void do_zeroise ( void ) 36{ 37 asm __volatile__("finit"); 38 asm __volatile__( 39 "fldz\n\t" 40 "fldz\n\t" 41 "fldz\n\t" 42 "fldz\n\t" 43 "fldz\n\t" 44 "fldz\n\t" 45 "fldz\n\t" 46 "fldz\n\t" 47 "finit\n"); 48#ifndef VGP_amd64_darwin 49 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0"); 50 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1"); 51 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2"); 52 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3"); 53 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4"); 54 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5"); 55 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6"); 56 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7"); 57 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8"); 58 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9"); 59 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10"); 60 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11"); 61 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12"); 62 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13"); 63 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14"); 64 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15"); 65#else 66 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0"); 67 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1"); 68 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2"); 69 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3"); 70 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4"); 71 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5"); 72 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6"); 73 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7"); 74 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8"); 75 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9"); 76 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10"); 77 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11"); 78 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12"); 79 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13"); 80 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14"); 81 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15"); 82#endif 83 asm __volatile__( 84 "pushq $0\n\t" 85 "ldmxcsr 0(%rsp)\n\t" 86 "addq $8,%rsp\n"); 87} 88 89/* set up the FP and SSE state, and then dump it. */ 90void do_setup_then_fxsave ( void* p, int rexw ) 91{ 92 asm __volatile__("finit"); 93 asm __volatile__("fldpi"); 94 asm __volatile__("fld1"); 95 asm __volatile__("fldln2"); 96 asm __volatile__("fldlg2"); 97 asm __volatile__("fld %st(3)"); 98 asm __volatile__("fld %st(3)"); 99 asm __volatile__("fld1"); 100 asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" ); 101 asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" ); 102 asm __volatile__("xorps %xmm2, %xmm2"); 103 asm __volatile__("movaps %xmm0, %xmm3"); 104 asm __volatile__("movaps %xmm1, %xmm4"); 105 asm __volatile__("movaps %xmm2, %xmm5"); 106 asm __volatile__("movaps %xmm0, %xmm6"); 107 asm __volatile__("movaps %xmm1, %xmm7"); 108 asm __volatile__("movaps %xmm1, %xmm8"); 109 asm __volatile__("movaps %xmm2, %xmm9"); 110 asm __volatile__("movaps %xmm0, %xmm10"); 111 asm __volatile__("movaps %xmm1, %xmm11"); 112 asm __volatile__("movaps %xmm1, %xmm12"); 113 asm __volatile__("movaps %xmm2, %xmm13"); 114 asm __volatile__("movaps %xmm0, %xmm14"); 115 asm __volatile__("movaps %xmm1, %xmm15"); 116 do_fxsave(p, rexw); 117} 118 119int isFPLsbs ( int i ) 120{ 121 int q; 122 q = 32; if (i == q || i == q+1) return 1; 123 q = 48; if (i == q || i == q+1) return 1; 124 q = 64; if (i == q || i == q+1) return 1; 125 q = 80; if (i == q || i == q+1) return 1; 126 q = 96; if (i == q || i == q+1) return 1; 127 q = 112; if (i == q || i == q+1) return 1; 128 q = 128; if (i == q || i == q+1) return 1; 129 q = 144; if (i == q || i == q+1) return 1; 130 return 0; 131} 132 133void show ( unsigned char* buf, int xx ) 134{ 135 int i; 136 for (i = 0; i < 512; i++) { 137 if ((i % 16) == 0) 138 printf("%3d ", i); 139 if (xx && isFPLsbs(i)) 140 printf("xx "); 141 else 142 printf("%02x ", buf[i]); 143 if (i > 0 && ((i % 16) == 15)) 144 printf("\n"); 145 } 146} 147 148 149int main ( int argc, char** argv ) 150{ 151 unsigned char* buf1 = memalign16(512); 152 unsigned char* buf2 = memalign16(512); 153 unsigned char* buf3 = memalign16(512); 154 int xx = argc > 1; 155 printf("Re-run with any arg to suppress least-significant\n" 156 " 16 bits of FP numbers\n"); 157 158 printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n"); 159 160 memset(buf1, 0x55, 512); 161 memset(buf2, 0x55, 512); 162 memset(buf3, 0x55, 512); 163 164 /* Load up x87/xmm state and dump it. */ 165 do_setup_then_fxsave(buf1, 0); 166 printf("\nBEFORE\n"); 167 show(buf1, xx); 168 169 /* Zeroise x87/xmm state and dump it, to show that the 170 regs have been cleared out. */ 171 do_zeroise(); 172 do_fxsave(buf2, 0); 173 printf("\nZEROED\n"); 174 show(buf2, xx); 175 176 /* Reload x87/xmm state from buf1 and dump it in buf3. */ 177 do_fxrstor(buf1, 0); 178 do_fxsave(buf3, 0); 179 printf("\nRESTORED\n"); 180 show(buf3, xx); 181 182 printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n"); 183 184 memset(buf1, 0x55, 512); 185 memset(buf2, 0x55, 512); 186 memset(buf3, 0x55, 512); 187 188 /* Load up x87/xmm state and dump it. */ 189 do_setup_then_fxsave(buf1, 1); 190 printf("\nBEFORE\n"); 191 show(buf1, xx); 192 193 /* Zeroise x87/xmm state and dump it, to show that the 194 regs have been cleared out. */ 195 do_zeroise(); 196 do_fxsave(buf2, 1); 197 printf("\nZEROED\n"); 198 show(buf2, xx); 199 200 /* Reload x87/xmm state from buf1 and dump it in buf3. */ 201 do_fxrstor(buf1, 1); 202 do_fxsave(buf3, 1); 203 printf("\nRESTORED\n"); 204 show(buf3, xx); 205 206 207 free(buf1); free(buf2); free(buf3); 208 209 return 0; 210} 211