1#include <stdio.h> 2#include <string.h> 3 4#define N 64 5struct float_test { 6 float x[N], y[N], z[N], expected[N], res[N]; 7} ft __attribute__((aligned (32))); 8 9struct double_test { 10 double x[N], y[N], z[N], expected[N], res[N]; 11} dt __attribute__((aligned (32))); 12 13float plus_zero, plus_infty, minus_infty, nan_value; 14 15static int testf( float x, float y ) 16{ 17 unsigned int a, b; 18 memcpy( &a, &x, sizeof (a) ); 19 memcpy( &b, &y, sizeof (b) ); 20 if ((a & 0x7fc00000U) == 0x7fc00000U) 21 return (b & 0x7fc00000U) != 0x7fc00000U; 22 return memcmp( &a, &b, sizeof (a) ) != 0; 23} 24 25static int test_fmaf( void ) 26{ 27 int res = 0, i, j; 28 float w; 29 for (i = 0; i < N; i++) { 30 int thisres = 0; 31 __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 32 thisres |= testf( w, ft.expected[i] ); 33 __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 34 thisres |= testf( w, ft.expected[i] ); 35 __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 36 thisres |= testf( w, ft.expected[i] ); 37 __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 38 thisres |= testf( w, ft.expected[i] ); 39 __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 40 thisres |= testf( w, ft.expected[i] ); 41 __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 42 thisres |= testf( w, ft.expected[i] ); 43 if (thisres) 44 printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] ); 45 res |= thisres; 46 thisres = 0; 47 __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 48 thisres |= testf( -w, ft.expected[i] ); 49 __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 50 thisres |= testf( -w, ft.expected[i] ); 51 __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 52 thisres |= testf( -w, ft.expected[i] ); 53 __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 54 thisres |= testf( -w, ft.expected[i] ); 55 __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 56 thisres |= testf( -w, ft.expected[i] ); 57 __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 58 thisres |= testf( -w, ft.expected[i] ); 59 if (thisres) 60 printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] ); 61 res |= thisres; 62 } 63 for (i = 0; i < N; i++) 64 ft.z[i] = -ft.z[i]; 65 for (i = 0; i < N; i++) { 66 int thisres = 0; 67 __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 68 thisres |= testf( w, ft.expected[i] ); 69 __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 70 thisres |= testf( w, ft.expected[i] ); 71 __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 72 thisres |= testf( w, ft.expected[i] ); 73 __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 74 thisres |= testf( w, ft.expected[i] ); 75 __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 76 thisres |= testf( w, ft.expected[i] ); 77 __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 78 thisres |= testf( w, ft.expected[i] ); 79 if (thisres) 80 printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] ); 81 res |= thisres; 82 thisres = 0; 83 __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 84 thisres |= testf( -w, ft.expected[i] ); 85 __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 86 thisres |= testf( -w, ft.expected[i] ); 87 __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 88 thisres |= testf( -w, ft.expected[i] ); 89 __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 90 thisres |= testf( -w, ft.expected[i] ); 91 __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 92 thisres |= testf( -w, ft.expected[i] ); 93 __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 94 thisres |= testf( -w, ft.expected[i] ); 95 if (thisres) 96 printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] ); 97 res |= thisres; 98 } 99 for (i = 0; i < N; i++) 100 ft.z[i] = -ft.z[i]; 101 for (i = 0; i < N; i += 4) { 102 int thisres = 0; 103 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 104 "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;" 105 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 106 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 107 for (j = 0; j < 4; j++) 108 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 109 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 110 "vfmadd132ps (%2), %%xmm8, %%xmm9;" 111 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 112 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 113 for (j = 0; j < 4; j++) 114 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 115 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 116 "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;" 117 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 118 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 119 for (j = 0; j < 4; j++) 120 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 121 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 122 "vfmadd213ps (%3), %%xmm8, %%xmm9;" 123 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 124 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 125 for (j = 0; j < 4; j++) 126 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 127 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 128 "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;" 129 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 130 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 131 for (j = 0; j < 4; j++) 132 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 133 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 134 "vfmadd231ps (%2), %%xmm8, %%xmm9;" 135 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 136 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 137 for (j = 0; j < 4; j++) 138 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 139 if (thisres) { 140 printf( "Failure 5 %d", i ); 141 for (j = 0; j < 4; j++) 142 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 143 printf( "\n" ); 144 } 145 res |= thisres; 146 thisres = 0; 147 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 148 "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;" 149 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 150 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 151 for (j = 0; j < 4; j++) 152 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 153 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 154 "vfnmsub132ps (%2), %%xmm8, %%xmm9;" 155 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 156 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 157 for (j = 0; j < 4; j++) 158 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 159 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 160 "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;" 161 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 162 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 163 for (j = 0; j < 4; j++) 164 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 165 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 166 "vfnmsub213ps (%3), %%xmm8, %%xmm9;" 167 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 168 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 169 for (j = 0; j < 4; j++) 170 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 171 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 172 "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;" 173 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 174 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 175 for (j = 0; j < 4; j++) 176 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 177 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 178 "vfnmsub231ps (%2), %%xmm8, %%xmm9;" 179 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 180 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 181 for (j = 0; j < 4; j++) 182 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 183 if (thisres) { 184 printf( "Failure 6 %d", i ); 185 for (j = 0; j < 4; j++) 186 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 187 printf( "\n" ); 188 } 189 res |= thisres; 190 } 191 for (i = 0; i < N; i++) 192 ft.z[i] = -ft.z[i]; 193 for (i = 0; i < N; i += 4) { 194 int thisres = 0; 195 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 196 "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;" 197 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 198 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 199 for (j = 0; j < 4; j++) 200 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 201 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 202 "vfmsub132ps (%2), %%xmm8, %%xmm9;" 203 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 204 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 205 for (j = 0; j < 4; j++) 206 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 207 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 208 "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;" 209 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 210 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 211 for (j = 0; j < 4; j++) 212 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 213 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 214 "vfmsub213ps (%3), %%xmm8, %%xmm9;" 215 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 216 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 217 for (j = 0; j < 4; j++) 218 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 219 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 220 "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;" 221 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 222 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 223 for (j = 0; j < 4; j++) 224 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 225 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 226 "vfmsub231ps (%2), %%xmm8, %%xmm9;" 227 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 228 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 229 for (j = 0; j < 4; j++) 230 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 231 if (thisres) { 232 printf( "Failure 7 %d", i ); 233 for (j = 0; j < 4; j++) 234 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 235 printf( "\n" ); 236 } 237 res |= thisres; 238 thisres = 0; 239 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 240 "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;" 241 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 242 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 243 for (j = 0; j < 4; j++) 244 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 245 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 246 "vfnmadd132ps (%2), %%xmm8, %%xmm9;" 247 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 248 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 249 for (j = 0; j < 4; j++) 250 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 251 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 252 "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;" 253 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 254 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 255 for (j = 0; j < 4; j++) 256 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 257 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 258 "vfnmadd213ps (%3), %%xmm8, %%xmm9;" 259 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 260 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 261 for (j = 0; j < 4; j++) 262 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 263 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 264 "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;" 265 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 266 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 267 for (j = 0; j < 4; j++) 268 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 269 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 270 "vfnmadd231ps (%2), %%xmm8, %%xmm9;" 271 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 272 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 273 for (j = 0; j < 4; j++) 274 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 275 if (thisres) { 276 printf( "Failure 8 %d", i ); 277 for (j = 0; j < 4; j++) 278 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 279 printf( "\n" ); 280 } 281 res |= thisres; 282 } 283 for (i = 1; i < N; i += 2) 284 ft.z[i] = -ft.z[i]; 285 for (i = 0; i < N; i += 4) { 286 int thisres = 0; 287 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 288 "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;" 289 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 290 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 291 for (j = 0; j < 4; j++) 292 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 293 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 294 "vfmaddsub132ps (%2), %%xmm8, %%xmm9;" 295 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 296 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 297 for (j = 0; j < 4; j++) 298 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 299 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 300 "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;" 301 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 302 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 303 for (j = 0; j < 4; j++) 304 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 305 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 306 "vfmaddsub213ps (%3), %%xmm8, %%xmm9;" 307 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 308 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 309 for (j = 0; j < 4; j++) 310 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 311 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 312 "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;" 313 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 314 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 315 for (j = 0; j < 4; j++) 316 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 317 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 318 "vfmaddsub231ps (%2), %%xmm8, %%xmm9;" 319 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 320 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 321 for (j = 0; j < 4; j++) 322 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 323 if (thisres) { 324 printf( "Failure 9 %d", i ); 325 for (j = 0; j < 4; j++) 326 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 327 printf( "\n" ); 328 } 329 res |= thisres; 330 } 331 for (i = 0; i < N; i++) 332 ft.z[i] = -ft.z[i]; 333 for (i = 0; i < N; i += 4) { 334 int thisres = 0; 335 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 336 "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;" 337 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 338 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 339 for (j = 0; j < 4; j++) 340 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 341 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 342 "vfmsubadd132ps (%2), %%xmm8, %%xmm9;" 343 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 344 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 345 for (j = 0; j < 4; j++) 346 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 347 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 348 "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;" 349 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 350 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 351 for (j = 0; j < 4; j++) 352 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 353 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 354 "vfmsubadd213ps (%3), %%xmm8, %%xmm9;" 355 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 356 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 357 for (j = 0; j < 4; j++) 358 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 359 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 360 "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;" 361 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 362 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 363 for (j = 0; j < 4; j++) 364 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 365 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 366 "vfmsubadd231ps (%2), %%xmm8, %%xmm9;" 367 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 368 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 369 for (j = 0; j < 4; j++) 370 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 371 if (thisres) { 372 printf( "Failure 10 %d", i ); 373 for (j = 0; j < 4; j++) 374 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 375 printf( "\n" ); 376 } 377 res |= thisres; 378 } 379 for (i = 1; i < N; i += 2) 380 ft.z[i] = -ft.z[i]; 381 for (i = 0; i < N; i += 8) { 382 int thisres = 0; 383 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 384 "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;" 385 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 386 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 387 for (j = 0; j < 8; j++) 388 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 389 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 390 "vfmadd132ps (%2), %%ymm8, %%ymm9;" 391 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 392 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 393 for (j = 0; j < 8; j++) 394 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 395 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 396 "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;" 397 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 398 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 399 for (j = 0; j < 8; j++) 400 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 401 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 402 "vfmadd213ps (%3), %%ymm8, %%ymm9;" 403 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 404 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 405 for (j = 0; j < 8; j++) 406 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 407 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 408 "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;" 409 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 410 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 411 for (j = 0; j < 8; j++) 412 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 413 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 414 "vfmadd231ps (%2), %%ymm8, %%ymm9;" 415 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 416 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 417 for (j = 0; j < 8; j++) 418 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 419 if (thisres) { 420 printf( "Failure 11 %d", i ); 421 for (j = 0; j < 8; j++) 422 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 423 printf( "\n" ); 424 } 425 res |= thisres; 426 thisres = 0; 427 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 428 "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;" 429 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 430 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 431 for (j = 0; j < 8; j++) 432 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 433 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 434 "vfnmsub132ps (%2), %%ymm8, %%ymm9;" 435 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 436 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 437 for (j = 0; j < 8; j++) 438 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 439 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 440 "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;" 441 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 442 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 443 for (j = 0; j < 8; j++) 444 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 445 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 446 "vfnmsub213ps (%3), %%ymm8, %%ymm9;" 447 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 448 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 449 for (j = 0; j < 8; j++) 450 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 451 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 452 "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;" 453 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 454 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 455 for (j = 0; j < 8; j++) 456 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 457 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 458 "vfnmsub231ps (%2), %%ymm8, %%ymm9;" 459 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 460 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 461 for (j = 0; j < 8; j++) 462 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 463 if (thisres) { 464 printf( "Failure 12 %d", i ); 465 for (j = 0; j < 8; j++) 466 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 467 printf( "\n" ); 468 } 469 res |= thisres; 470 } 471 for (i = 0; i < N; i++) 472 ft.z[i] = -ft.z[i]; 473 for (i = 0; i < N; i += 8) { 474 int thisres = 0; 475 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 476 "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;" 477 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 478 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 479 for (j = 0; j < 8; j++) 480 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 481 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 482 "vfmsub132ps (%2), %%ymm8, %%ymm9;" 483 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 484 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 485 for (j = 0; j < 8; j++) 486 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 487 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 488 "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;" 489 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 490 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 491 for (j = 0; j < 8; j++) 492 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 493 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 494 "vfmsub213ps (%3), %%ymm8, %%ymm9;" 495 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 496 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 497 for (j = 0; j < 8; j++) 498 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 499 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 500 "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;" 501 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 502 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 503 for (j = 0; j < 8; j++) 504 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 505 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 506 "vfmsub231ps (%2), %%ymm8, %%ymm9;" 507 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 508 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 509 for (j = 0; j < 8; j++) 510 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 511 if (thisres) { 512 printf( "Failure 13 %d", i ); 513 for (j = 0; j < 8; j++) 514 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 515 printf( "\n" ); 516 } 517 res |= thisres; 518 thisres = 0; 519 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 520 "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;" 521 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 522 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 523 for (j = 0; j < 8; j++) 524 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 525 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 526 "vfnmadd132ps (%2), %%ymm8, %%ymm9;" 527 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 528 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 529 for (j = 0; j < 8; j++) 530 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 531 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 532 "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;" 533 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 534 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 535 for (j = 0; j < 8; j++) 536 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 537 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 538 "vfnmadd213ps (%3), %%ymm8, %%ymm9;" 539 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 540 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 541 for (j = 0; j < 8; j++) 542 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 543 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 544 "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;" 545 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 546 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 547 for (j = 0; j < 8; j++) 548 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 549 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 550 "vfnmadd231ps (%2), %%ymm8, %%ymm9;" 551 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 552 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 553 for (j = 0; j < 8; j++) 554 thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 555 if (thisres) { 556 printf( "Failure 14 %d", i ); 557 for (j = 0; j < 8; j++) 558 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 559 printf( "\n" ); 560 } 561 res |= thisres; 562 } 563 for (i = 1; i < N; i += 2) 564 ft.z[i] = -ft.z[i]; 565 for (i = 0; i < N; i += 8) { 566 int thisres = 0; 567 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 568 "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;" 569 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 570 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 571 for (j = 0; j < 8; j++) 572 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 573 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 574 "vfmaddsub132ps (%2), %%ymm8, %%ymm9;" 575 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 576 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 577 for (j = 0; j < 8; j++) 578 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 579 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 580 "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;" 581 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 582 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 583 for (j = 0; j < 8; j++) 584 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 585 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 586 "vfmaddsub213ps (%3), %%ymm8, %%ymm9;" 587 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 588 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 589 for (j = 0; j < 8; j++) 590 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 591 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 592 "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;" 593 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 594 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 595 for (j = 0; j < 8; j++) 596 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 597 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 598 "vfmaddsub231ps (%2), %%ymm8, %%ymm9;" 599 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 600 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 601 for (j = 0; j < 8; j++) 602 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 603 if (thisres) { 604 printf( "Failure 15 %d", i ); 605 for (j = 0; j < 8; j++) 606 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 607 printf( "\n" ); 608 } 609 res |= thisres; 610 } 611 for (i = 0; i < N; i++) 612 ft.z[i] = -ft.z[i]; 613 for (i = 0; i < N; i += 8) { 614 int thisres = 0; 615 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 616 "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;" 617 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 618 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 619 for (j = 0; j < 8; j++) 620 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 621 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 622 "vfmsubadd132ps (%2), %%ymm8, %%ymm9;" 623 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 624 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 625 for (j = 0; j < 8; j++) 626 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 627 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 628 "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;" 629 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 630 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 631 for (j = 0; j < 8; j++) 632 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 633 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 634 "vfmsubadd213ps (%3), %%ymm8, %%ymm9;" 635 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 636 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 637 for (j = 0; j < 8; j++) 638 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 639 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 640 "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;" 641 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 642 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 643 for (j = 0; j < 8; j++) 644 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 645 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 646 "vfmsubadd231ps (%2), %%ymm8, %%ymm9;" 647 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 648 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 649 for (j = 0; j < 8; j++) 650 thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 651 if (thisres) { 652 printf( "Failure 16 %d", i ); 653 for (j = 0; j < 8; j++) 654 printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 655 printf( "\n" ); 656 } 657 res |= thisres; 658 } 659 for (i = 1; i < N; i += 2) 660 ft.z[i] = -ft.z[i]; 661 return res; 662} 663 664static int test( double x, double y ) 665{ 666 unsigned long long a, b; 667 memcpy( &a, &x, sizeof (a) ); 668 memcpy( &b, &y, sizeof (b) ); 669 if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL) 670 return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL; 671 return memcmp( &a, &b, sizeof (a) ) != 0; 672} 673 674static int test_fma( void ) 675{ 676 int res = 0, i, j; 677 double w; 678 for (i = 0; i < N; i++) { 679 int thisres = 0; 680 __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 681 thisres |= test( w, dt.expected[i] ); 682 __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 683 thisres |= test( w, dt.expected[i] ); 684 __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 685 thisres |= test( w, dt.expected[i] ); 686 __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 687 thisres |= test( w, dt.expected[i] ); 688 __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 689 thisres |= test( w, dt.expected[i] ); 690 __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 691 thisres |= test( w, dt.expected[i] ); 692 if (thisres) 693 printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] ); 694 res |= thisres; 695 thisres = 0; 696 __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 697 thisres |= test( -w, dt.expected[i] ); 698 __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 699 thisres |= test( -w, dt.expected[i] ); 700 __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 701 thisres |= test( -w, dt.expected[i] ); 702 __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 703 thisres |= test( -w, dt.expected[i] ); 704 __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 705 thisres |= test( -w, dt.expected[i] ); 706 __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 707 thisres |= test( -w, dt.expected[i] ); 708 if (thisres) 709 printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] ); 710 res |= thisres; 711 } 712 for (i = 0; i < N; i++) 713 dt.z[i] = -dt.z[i]; 714 for (i = 0; i < N; i++) { 715 int thisres = 0; 716 __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 717 thisres |= test( w, dt.expected[i] ); 718 __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 719 thisres |= test( w, dt.expected[i] ); 720 __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 721 thisres |= test( w, dt.expected[i] ); 722 __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 723 thisres |= test( w, dt.expected[i] ); 724 __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 725 thisres |= test( w, dt.expected[i] ); 726 __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 727 thisres |= test( w, dt.expected[i] ); 728 if (thisres) 729 printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] ); 730 res |= thisres; 731 thisres = 0; 732 __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 733 thisres |= test( -w, dt.expected[i] ); 734 __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 735 thisres |= test( -w, dt.expected[i] ); 736 __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 737 thisres |= test( -w, dt.expected[i] ); 738 __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 739 thisres |= test( -w, dt.expected[i] ); 740 __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 741 thisres |= test( -w, dt.expected[i] ); 742 __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 743 thisres |= test( -w, dt.expected[i] ); 744 if (thisres) 745 printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] ); 746 res |= thisres; 747 } 748 for (i = 0; i < N; i++) 749 dt.z[i] = -dt.z[i]; 750 for (i = 0; i < N; i += 2) { 751 int thisres = 0; 752 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 753 "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;" 754 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 755 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 756 for (j = 0; j < 2; j++) 757 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 758 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 759 "vfmadd132pd (%2), %%xmm8, %%xmm9;" 760 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 761 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 762 for (j = 0; j < 2; j++) 763 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 764 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 765 "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;" 766 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 767 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 768 for (j = 0; j < 2; j++) 769 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 770 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 771 "vfmadd213pd (%3), %%xmm8, %%xmm9;" 772 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 773 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 774 for (j = 0; j < 2; j++) 775 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 776 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 777 "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;" 778 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 779 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 780 for (j = 0; j < 2; j++) 781 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 782 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 783 "vfmadd231pd (%2), %%xmm8, %%xmm9;" 784 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 785 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 786 for (j = 0; j < 2; j++) 787 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 788 if (thisres) { 789 printf( "Failure 5 %d", i ); 790 for (j = 0; j < 2; j++) 791 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 792 printf( "\n" ); 793 } 794 res |= thisres; 795 thisres = 0; 796 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 797 "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;" 798 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 799 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 800 for (j = 0; j < 2; j++) 801 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 802 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 803 "vfnmsub132pd (%2), %%xmm8, %%xmm9;" 804 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 805 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 806 for (j = 0; j < 2; j++) 807 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 808 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 809 "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;" 810 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 811 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 812 for (j = 0; j < 2; j++) 813 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 814 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 815 "vfnmsub213pd (%3), %%xmm8, %%xmm9;" 816 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 817 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 818 for (j = 0; j < 2; j++) 819 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 820 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 821 "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;" 822 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 823 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 824 for (j = 0; j < 2; j++) 825 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 826 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 827 "vfnmsub231pd (%2), %%xmm8, %%xmm9;" 828 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 829 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 830 for (j = 0; j < 2; j++) 831 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 832 if (thisres) { 833 printf( "Failure 6 %d", i ); 834 for (j = 0; j < 2; j++) 835 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 836 printf( "\n" ); 837 } 838 res |= thisres; 839 } 840 for (i = 0; i < N; i++) 841 dt.z[i] = -dt.z[i]; 842 for (i = 0; i < N; i += 2) { 843 int thisres = 0; 844 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 845 "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;" 846 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 847 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 848 for (j = 0; j < 2; j++) 849 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 850 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 851 "vfmsub132pd (%2), %%xmm8, %%xmm9;" 852 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 853 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 854 for (j = 0; j < 2; j++) 855 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 856 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 857 "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;" 858 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 859 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 860 for (j = 0; j < 2; j++) 861 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 862 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 863 "vfmsub213pd (%3), %%xmm8, %%xmm9;" 864 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 865 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 866 for (j = 0; j < 2; j++) 867 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 868 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 869 "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;" 870 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 871 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 872 for (j = 0; j < 2; j++) 873 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 874 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 875 "vfmsub231pd (%2), %%xmm8, %%xmm9;" 876 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 877 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 878 for (j = 0; j < 2; j++) 879 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 880 if (thisres) { 881 printf( "Failure 7 %d", i ); 882 for (j = 0; j < 2; j++) 883 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 884 printf( "\n" ); 885 } 886 res |= thisres; 887 thisres = 0; 888 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 889 "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;" 890 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 891 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 892 for (j = 0; j < 2; j++) 893 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 894 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 895 "vfnmadd132pd (%2), %%xmm8, %%xmm9;" 896 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 897 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 898 for (j = 0; j < 2; j++) 899 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 900 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 901 "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;" 902 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 903 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 904 for (j = 0; j < 2; j++) 905 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 906 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 907 "vfnmadd213pd (%3), %%xmm8, %%xmm9;" 908 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 909 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 910 for (j = 0; j < 2; j++) 911 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 912 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 913 "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;" 914 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 915 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 916 for (j = 0; j < 2; j++) 917 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 918 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 919 "vfnmadd231pd (%2), %%xmm8, %%xmm9;" 920 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 921 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 922 for (j = 0; j < 2; j++) 923 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 924 if (thisres) { 925 printf( "Failure 8 %d", i ); 926 for (j = 0; j < 2; j++) 927 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 928 printf( "\n" ); 929 } 930 res |= thisres; 931 } 932 for (i = 1; i < N; i += 2) 933 dt.z[i] = -dt.z[i]; 934 for (i = 0; i < N; i += 2) { 935 int thisres = 0; 936 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 937 "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;" 938 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 939 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 940 for (j = 0; j < 2; j++) 941 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 942 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 943 "vfmaddsub132pd (%2), %%xmm8, %%xmm9;" 944 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 945 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 946 for (j = 0; j < 2; j++) 947 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 948 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 949 "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;" 950 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 951 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 952 for (j = 0; j < 2; j++) 953 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 954 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 955 "vfmaddsub213pd (%3), %%xmm8, %%xmm9;" 956 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 957 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 958 for (j = 0; j < 2; j++) 959 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 960 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 961 "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;" 962 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 963 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 964 for (j = 0; j < 2; j++) 965 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 966 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 967 "vfmaddsub231pd (%2), %%xmm8, %%xmm9;" 968 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 969 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 970 for (j = 0; j < 2; j++) 971 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 972 if (thisres) { 973 printf( "Failure 9 %d", i ); 974 for (j = 0; j < 2; j++) 975 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 976 printf( "\n" ); 977 } 978 res |= thisres; 979 } 980 for (i = 0; i < N; i++) 981 dt.z[i] = -dt.z[i]; 982 for (i = 0; i < N; i += 2) { 983 int thisres = 0; 984 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 985 "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;" 986 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 987 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 988 for (j = 0; j < 2; j++) 989 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 990 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 991 "vfmsubadd132pd (%2), %%xmm8, %%xmm9;" 992 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 993 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 994 for (j = 0; j < 2; j++) 995 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 996 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 997 "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;" 998 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 999 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1000 for (j = 0; j < 2; j++) 1001 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1002 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 1003 "vfmsubadd213pd (%3), %%xmm8, %%xmm9;" 1004 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1005 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1006 for (j = 0; j < 2; j++) 1007 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1008 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 1009 "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;" 1010 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1011 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1012 for (j = 0; j < 2; j++) 1013 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1014 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 1015 "vfmsubadd231pd (%2), %%xmm8, %%xmm9;" 1016 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1017 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1018 for (j = 0; j < 2; j++) 1019 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1020 if (thisres) { 1021 printf( "Failure 10 %d", i ); 1022 for (j = 0; j < 2; j++) 1023 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1024 printf( "\n" ); 1025 } 1026 res |= thisres; 1027 } 1028 for (i = 1; i < N; i += 2) 1029 dt.z[i] = -dt.z[i]; 1030 for (i = 0; i < N; i += 4) { 1031 int thisres = 0; 1032 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1033 "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;" 1034 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1035 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1036 for (j = 0; j < 4; j++) 1037 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1038 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1039 "vfmadd132pd (%2), %%ymm8, %%ymm9;" 1040 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1041 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1042 for (j = 0; j < 4; j++) 1043 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1044 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1045 "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;" 1046 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1047 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1048 for (j = 0; j < 4; j++) 1049 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1050 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1051 "vfmadd213pd (%3), %%ymm8, %%ymm9;" 1052 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1053 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1054 for (j = 0; j < 4; j++) 1055 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1056 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1057 "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;" 1058 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1059 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1060 for (j = 0; j < 4; j++) 1061 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1062 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1063 "vfmadd231pd (%2), %%ymm8, %%ymm9;" 1064 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1065 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1066 for (j = 0; j < 4; j++) 1067 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1068 if (thisres) { 1069 printf( "Failure 11 %d", i ); 1070 for (j = 0; j < 4; j++) 1071 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1072 printf( "\n" ); 1073 } 1074 res |= thisres; 1075 thisres = 0; 1076 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1077 "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;" 1078 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1079 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1080 for (j = 0; j < 4; j++) 1081 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1082 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1083 "vfnmsub132pd (%2), %%ymm8, %%ymm9;" 1084 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1085 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1086 for (j = 0; j < 4; j++) 1087 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1088 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1089 "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;" 1090 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1091 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1092 for (j = 0; j < 4; j++) 1093 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1094 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1095 "vfnmsub213pd (%3), %%ymm8, %%ymm9;" 1096 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1097 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1098 for (j = 0; j < 4; j++) 1099 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1100 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1101 "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;" 1102 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1103 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1104 for (j = 0; j < 4; j++) 1105 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1106 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1107 "vfnmsub231pd (%2), %%ymm8, %%ymm9;" 1108 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1109 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1110 for (j = 0; j < 4; j++) 1111 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1112 if (thisres) { 1113 printf( "Failure 12 %d", i ); 1114 for (j = 0; j < 4; j++) 1115 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1116 printf( "\n" ); 1117 } 1118 res |= thisres; 1119 } 1120 for (i = 0; i < N; i++) 1121 dt.z[i] = -dt.z[i]; 1122 for (i = 0; i < N; i += 4) { 1123 int thisres = 0; 1124 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1125 "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;" 1126 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1127 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1128 for (j = 0; j < 4; j++) 1129 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1130 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1131 "vfmsub132pd (%2), %%ymm8, %%ymm9;" 1132 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1133 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1134 for (j = 0; j < 4; j++) 1135 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1136 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1137 "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;" 1138 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1139 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1140 for (j = 0; j < 4; j++) 1141 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1142 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1143 "vfmsub213pd (%3), %%ymm8, %%ymm9;" 1144 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1145 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1146 for (j = 0; j < 4; j++) 1147 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1148 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1149 "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;" 1150 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1151 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1152 for (j = 0; j < 4; j++) 1153 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1154 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1155 "vfmsub231pd (%2), %%ymm8, %%ymm9;" 1156 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1157 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1158 for (j = 0; j < 4; j++) 1159 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1160 if (thisres) { 1161 printf( "Failure 13 %d", i ); 1162 for (j = 0; j < 4; j++) 1163 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1164 printf( "\n" ); 1165 } 1166 res |= thisres; 1167 thisres = 0; 1168 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1169 "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;" 1170 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1171 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1172 for (j = 0; j < 4; j++) 1173 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1174 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1175 "vfnmadd132pd (%2), %%ymm8, %%ymm9;" 1176 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1177 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1178 for (j = 0; j < 4; j++) 1179 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1180 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1181 "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;" 1182 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1183 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1184 for (j = 0; j < 4; j++) 1185 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1186 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1187 "vfnmadd213pd (%3), %%ymm8, %%ymm9;" 1188 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1189 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1190 for (j = 0; j < 4; j++) 1191 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1192 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1193 "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;" 1194 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1195 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1196 for (j = 0; j < 4; j++) 1197 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1198 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1199 "vfnmadd231pd (%2), %%ymm8, %%ymm9;" 1200 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1201 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1202 for (j = 0; j < 4; j++) 1203 thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1204 if (thisres) { 1205 printf( "Failure 14 %d", i ); 1206 for (j = 0; j < 4; j++) 1207 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1208 printf( "\n" ); 1209 } 1210 res |= thisres; 1211 } 1212 for (i = 1; i < N; i += 2) 1213 dt.z[i] = -dt.z[i]; 1214 for (i = 0; i < N; i += 4) { 1215 int thisres = 0; 1216 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1217 "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;" 1218 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1219 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1220 for (j = 0; j < 4; j++) 1221 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1222 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1223 "vfmaddsub132pd (%2), %%ymm8, %%ymm9;" 1224 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1225 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1226 for (j = 0; j < 4; j++) 1227 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1228 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1229 "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;" 1230 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1231 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1232 for (j = 0; j < 4; j++) 1233 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1234 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1235 "vfmaddsub213pd (%3), %%ymm8, %%ymm9;" 1236 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1237 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1238 for (j = 0; j < 4; j++) 1239 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1240 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1241 "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;" 1242 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1243 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1244 for (j = 0; j < 4; j++) 1245 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1246 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1247 "vfmaddsub231pd (%2), %%ymm8, %%ymm9;" 1248 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1249 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1250 for (j = 0; j < 4; j++) 1251 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1252 if (thisres) { 1253 printf( "Failure 15 %d", i ); 1254 for (j = 0; j < 4; j++) 1255 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1256 printf( "\n" ); 1257 } 1258 res |= thisres; 1259 } 1260 for (i = 0; i < N; i++) 1261 dt.z[i] = -dt.z[i]; 1262 for (i = 0; i < N; i += 4) { 1263 int thisres = 0; 1264 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1265 "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;" 1266 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1267 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1268 for (j = 0; j < 4; j++) 1269 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1270 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1271 "vfmsubadd132pd (%2), %%ymm8, %%ymm9;" 1272 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1273 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1274 for (j = 0; j < 4; j++) 1275 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1276 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1277 "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;" 1278 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1279 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1280 for (j = 0; j < 4; j++) 1281 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1282 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1283 "vfmsubadd213pd (%3), %%ymm8, %%ymm9;" 1284 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1285 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1286 for (j = 0; j < 4; j++) 1287 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1288 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1289 "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;" 1290 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1291 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1292 for (j = 0; j < 4; j++) 1293 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1294 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1295 "vfmsubadd231pd (%2), %%ymm8, %%ymm9;" 1296 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1297 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1298 for (j = 0; j < 4; j++) 1299 thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1300 if (thisres) { 1301 printf( "Failure 16 %d", i ); 1302 for (j = 0; j < 4; j++) 1303 printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1304 printf( "\n" ); 1305 } 1306 res |= thisres; 1307 } 1308 for (i = 1; i < N; i += 2) 1309 dt.z[i] = -dt.z[i]; 1310 return res; 1311} 1312 1313int main( ) 1314{ 1315 int res = 0; 1316 int i = 0; 1317 plus_zero = 0.0; 1318 __asm __volatile__ ("" : : "r" (&plus_zero) : "memory"); 1319 nan_value = plus_zero / plus_zero; 1320 plus_infty = 3.40282346638528859812e+38F * 16.0F; 1321 minus_infty = -plus_infty; 1322#define TEST_F( a, b, c, d ) \ 1323 do { \ 1324 ft.x[i] = a; \ 1325 ft.y[i] = b; \ 1326 ft.z[i] = c; \ 1327 ft.expected[i] = d; \ 1328 i++; \ 1329 } while (0) 1330 TEST_F( 1.0, 2.0, 3.0, 5.0 ); 1331 TEST_F( nan_value, 2.0, 3.0, nan_value ); 1332 TEST_F( 1.0, nan_value, 3.0, nan_value ); 1333 TEST_F( 1.0, 2.0, nan_value, nan_value ); 1334 TEST_F( plus_infty, 0.0, nan_value, nan_value ); 1335 TEST_F( minus_infty, 0.0, nan_value, nan_value ); 1336 TEST_F( 0.0, plus_infty, nan_value, nan_value ); 1337 TEST_F( 0.0, minus_infty, nan_value, nan_value ); 1338 TEST_F( plus_infty, 0.0, 1.0, nan_value ); 1339 TEST_F( minus_infty, 0.0, 1.0, nan_value ); 1340 TEST_F( 0.0, plus_infty, 1.0, nan_value ); 1341 TEST_F( 0.0, minus_infty, 1.0, nan_value ); 1342 TEST_F( plus_infty, plus_infty, minus_infty, nan_value ); 1343 TEST_F( minus_infty, plus_infty, plus_infty, nan_value ); 1344 TEST_F( plus_infty, minus_infty, plus_infty, nan_value ); 1345 TEST_F( minus_infty, minus_infty, minus_infty, nan_value ); 1346 TEST_F( plus_infty, 3.5L, minus_infty, nan_value ); 1347 TEST_F( minus_infty, -7.5L, minus_infty, nan_value ); 1348 TEST_F( -13.5L, plus_infty, plus_infty, nan_value ); 1349 TEST_F( minus_infty, 7.5L, plus_infty, nan_value ); 1350 TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L ); 1351 TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty ); 1352 TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty ); 1353 TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty ); 1354 TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty ); 1355 TEST_F( plus_infty, 4, plus_infty, plus_infty ); 1356 TEST_F( 2, minus_infty, minus_infty, minus_infty ); 1357 TEST_F( minus_infty, minus_infty, plus_infty, plus_infty ); 1358 TEST_F( plus_infty, minus_infty, minus_infty, minus_infty ); 1359 TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 ); 1360 TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 ); 1361 TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 ); 1362 TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 ); 1363 TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 ); 1364 TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 ); 1365 TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 ); 1366 1367 res |= test_fmaf( ); 1368 i = 0; 1369#define TEST( a, b, c, d ) \ 1370 do { \ 1371 dt.x[i] = a; \ 1372 dt.y[i] = b; \ 1373 dt.z[i] = c; \ 1374 dt.expected[i] = d; \ 1375 i++; \ 1376 } while (0) 1377 TEST( 1.0, 2.0, 3.0, 5.0 ); 1378 TEST( nan_value, 2.0, 3.0, nan_value ); 1379 TEST( 1.0, nan_value, 3.0, nan_value ); 1380 TEST( 1.0, 2.0, nan_value, nan_value ); 1381 TEST( plus_infty, 0.0, nan_value, nan_value ); 1382 TEST( minus_infty, 0.0, nan_value, nan_value ); 1383 TEST( 0.0, plus_infty, nan_value, nan_value ); 1384 TEST( 0.0, minus_infty, nan_value, nan_value ); 1385 TEST( plus_infty, 0.0, 1.0, nan_value ); 1386 TEST( minus_infty, 0.0, 1.0, nan_value ); 1387 TEST( 0.0, plus_infty, 1.0, nan_value ); 1388 TEST( 0.0, minus_infty, 1.0, nan_value ); 1389 TEST( plus_infty, plus_infty, minus_infty, nan_value ); 1390 TEST( minus_infty, plus_infty, plus_infty, nan_value ); 1391 TEST( plus_infty, minus_infty, plus_infty, nan_value ); 1392 TEST( minus_infty, minus_infty, minus_infty, nan_value ); 1393 TEST( plus_infty, 3.5L, minus_infty, nan_value ); 1394 TEST( minus_infty, -7.5L, minus_infty, nan_value ); 1395 TEST( -13.5L, plus_infty, plus_infty, nan_value ); 1396 TEST( minus_infty, 7.5L, plus_infty, nan_value ); 1397 TEST( 1.25L, 0.75L, 0.0625L, 1.0L ); 1398 TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty ); 1399 TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty ); 1400 TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty ); 1401 TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty ); 1402 TEST( plus_infty, 4, plus_infty, plus_infty ); 1403 TEST( 2, minus_infty, minus_infty, minus_infty ); 1404 TEST( minus_infty, minus_infty, plus_infty, plus_infty ); 1405 TEST( plus_infty, minus_infty, minus_infty, minus_infty ); 1406 TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 ); 1407 TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 ); 1408 TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 ); 1409 TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 ); 1410 TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 ); 1411 TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 ); 1412 TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 ); 1413 TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 ); 1414 TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 ); 1415 TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 ); 1416 TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 ); 1417 TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 ); 1418 TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 ); 1419 TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 ); 1420 TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 ); 1421 TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 ); 1422 TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 ); 1423 TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 ); 1424 TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 ); 1425 TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 ); 1426 1427 res |= test_fma( ); 1428 if (res == 0) 1429 printf( "Testing successful\n"); 1430 return 0; 1431} 1432