1436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include <stdio.h> 2436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include <string.h> 3436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 4436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define N 64 5436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstruct float_test { 6436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov float x[N], y[N], z[N], expected[N], res[N]; 7436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} ft __attribute__((aligned (32))); 8436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 9436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstruct double_test { 10436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov double x[N], y[N], z[N], expected[N], res[N]; 11436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} dt __attribute__((aligned (32))); 12436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 13436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovfloat plus_zero, plus_infty, minus_infty, nan_value; 14436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 15436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int testf( float x, float y ) 16436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 17436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int a, b; 18436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov memcpy( &a, &x, sizeof (a) ); 19436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov memcpy( &b, &y, sizeof (b) ); 20436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((a & 0x7fc00000U) == 0x7fc00000U) 21436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return (b & 0x7fc00000U) != 0x7fc00000U; 22436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return memcmp( &a, &b, sizeof (a) ) != 0; 23436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 24436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 25436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test_fmaf( void ) 26436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 27436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int res = 0, i, j; 28436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov float w; 29436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) { 30436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 31436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 32436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 33436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 34436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 35436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 36436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 37436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 38436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 39436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 40436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 41436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 42436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 43436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 44436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] ); 45436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 46436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 47436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 48436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 49436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 50436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 51436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 52436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 53436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 54436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 55436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 56436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 57436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 58436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 59436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 60436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] ); 61436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 62436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 63436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 64436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 65436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) { 66436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 67436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 68436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 69436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 70436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 71436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 72436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 73436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 74436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 75436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 76436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 77436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 78436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( w, ft.expected[i] ); 79436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 80436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] ); 81436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 82436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 83436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 84436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 85436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); 86436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 87436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); 88436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 89436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); 90436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 91436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); 92436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 93436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); 94436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -w, ft.expected[i] ); 95436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 96436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] ); 97436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 98436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 99436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;" 105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132ps (%2), %%xmm8, %%xmm9;" 111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;" 117436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 118436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 119436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 120436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 121436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 122436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213ps (%3), %%xmm8, %%xmm9;" 123436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 124436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 125436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 126436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 127436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 128436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;" 129436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 130436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 131436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 134436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231ps (%2), %%xmm8, %%xmm9;" 135436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 137436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 138436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 5 %d", i ); 141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;" 149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 150436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 152436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 153436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132ps (%2), %%xmm8, %%xmm9;" 155436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 156436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 157436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 158436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 160436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;" 161436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213ps (%3), %%xmm8, %%xmm9;" 167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 169436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 170436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;" 173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231ps (%2), %%xmm8, %%xmm9;" 179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 6 %d", i ); 185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;" 197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132ps (%2), %%xmm8, %%xmm9;" 203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;" 209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213ps (%3), %%xmm8, %%xmm9;" 215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 216436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 217436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 218436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 219436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 220436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;" 221436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 222436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 223436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 224436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 225436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 226436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231ps (%2), %%xmm8, %%xmm9;" 227436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 228436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 229436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 230436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 231436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 232436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 7 %d", i ); 233436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 234436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 235436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 236436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 237436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 238436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 239436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 240436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;" 241436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 242436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 243436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 244436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 245436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 246436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132ps (%2), %%xmm8, %%xmm9;" 247436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 248436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 249436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 250436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 251436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 252436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;" 253436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 254436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 255436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 256436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 257436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 258436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213ps (%3), %%xmm8, %%xmm9;" 259436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 260436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 261436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 262436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 263436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 264436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;" 265436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 266436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 267436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 268436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 269436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 270436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231ps (%2), %%xmm8, %%xmm9;" 271436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 272436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 273436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 274436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 275436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 276436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 8 %d", i ); 277436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 278436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 279436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 280436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 281436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 282436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 283436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 284436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 285436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 286436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 287436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 288436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;" 289436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 290436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 291436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 292436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 293436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 294436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132ps (%2), %%xmm8, %%xmm9;" 295436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 296436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 297436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;" 301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213ps (%3), %%xmm8, %%xmm9;" 307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;" 313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 316436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 317436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 318436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231ps (%2), %%xmm8, %%xmm9;" 319436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 320436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 321436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 322436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 323436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 324436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 9 %d", i ); 325436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 326436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 327436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 328436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 329436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 330436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 331436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 332436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 333436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 334436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 335436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" 336436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;" 337436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 338436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 339436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 340436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 341436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" 342436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132ps (%2), %%xmm8, %%xmm9;" 343436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 344436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 345436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 346436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 347436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" 348436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;" 349436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 350436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 351436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 352436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 353436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" 354436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213ps (%3), %%xmm8, %%xmm9;" 355436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 356436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 357436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 358436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 359436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" 360436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;" 361436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 362436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 363436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 364436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 365436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" 366436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231ps (%2), %%xmm8, %%xmm9;" 367436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 368436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 369436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 370436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 371436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 372436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 10 %d", i ); 373436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 374436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 375436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 376436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 377436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 378436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 379436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 380436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 381436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 8) { 382436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 383436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 384436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;" 385436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 386436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 387436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 388436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 389436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 390436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132ps (%2), %%ymm8, %%ymm9;" 391436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 392436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 393436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 394436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 395436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 396436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;" 397436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 398436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 399436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 400436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 401436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 402436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213ps (%3), %%ymm8, %%ymm9;" 403436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 404436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 405436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 406436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 407436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 408436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;" 409436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 410436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 411436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 412436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 413436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 414436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231ps (%2), %%ymm8, %%ymm9;" 415436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 416436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 417436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 418436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 419436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 420436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 11 %d", i ); 421436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 422436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 423436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 424436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 425436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 426436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 427436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 428436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;" 429436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 430436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 431436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 432436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 433436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 434436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132ps (%2), %%ymm8, %%ymm9;" 435436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 436436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 437436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 438436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 439436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 440436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;" 441436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 442436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 443436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 444436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 445436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 446436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213ps (%3), %%ymm8, %%ymm9;" 447436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 448436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 449436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 450436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 451436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 452436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;" 453436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 454436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 455436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 456436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 457436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 458436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231ps (%2), %%ymm8, %%ymm9;" 459436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 460436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 461436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 462436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 463436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 464436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 12 %d", i ); 465436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 466436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 467436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 468436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 469436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 470436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 471436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 472436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 473436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 8) { 474436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 475436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 476436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;" 477436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 478436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 479436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 480436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 481436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 482436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132ps (%2), %%ymm8, %%ymm9;" 483436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 484436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 485436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 486436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 487436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 488436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;" 489436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 490436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 491436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 492436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 493436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 494436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213ps (%3), %%ymm8, %%ymm9;" 495436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 496436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 497436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 498436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 499436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 500436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;" 501436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 502436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 503436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 504436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 505436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 506436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231ps (%2), %%ymm8, %%ymm9;" 507436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 508436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 509436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 510436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 511436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 512436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 13 %d", i ); 513436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 514436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 515436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 516436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 517436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 518436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 519436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 520436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;" 521436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 522436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 523436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 524436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 525436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 526436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132ps (%2), %%ymm8, %%ymm9;" 527436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 528436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 529436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 530436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 531436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 532436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;" 533436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 534436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 535436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 536436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 537436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 538436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213ps (%3), %%ymm8, %%ymm9;" 539436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 540436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 541436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 542436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 543436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 544436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;" 545436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 546436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 547436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 548436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 549436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 550436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231ps (%2), %%ymm8, %%ymm9;" 551436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 552436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 553436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 554436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); 555436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 556436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 14 %d", i ); 557436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 558436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 559436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 560436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 561436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 562436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 563436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 564436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 565436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 8) { 566436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 567436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 568436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;" 569436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 570436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 571436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 572436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 573436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 574436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132ps (%2), %%ymm8, %%ymm9;" 575436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 576436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 577436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 578436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 579436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 580436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;" 581436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 582436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 583436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 584436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 585436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 586436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213ps (%3), %%ymm8, %%ymm9;" 587436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 588436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 589436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 590436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 591436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 592436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;" 593436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 594436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 595436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 596436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 597436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 598436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231ps (%2), %%ymm8, %%ymm9;" 599436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 600436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 601436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 602436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 603436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 604436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 15 %d", i ); 605436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 606436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 607436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 608436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 609436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 610436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 611436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 612436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 613436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 8) { 614436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 615436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" 616436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;" 617436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 618436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 619436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 620436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 621436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" 622436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132ps (%2), %%ymm8, %%ymm9;" 623436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 624436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 625436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 626436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 627436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" 628436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;" 629436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 630436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 631436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 632436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 633436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" 634436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213ps (%3), %%ymm8, %%ymm9;" 635436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 636436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 637436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 638436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 639436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" 640436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;" 641436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 642436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 643436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 644436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 645436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" 646436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231ps (%2), %%ymm8, %%ymm9;" 647436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), 648436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); 649436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 650436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= testf( ft.res[i+j], ft.expected[i+j] ); 651436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 652436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 16 %d", i ); 653436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 8; j++) 654436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); 655436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 656436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 657436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 658436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 659436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 660436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = -ft.z[i]; 661436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return res; 662436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 663436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 664436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test( double x, double y ) 665436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 666436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned long long a, b; 667436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov memcpy( &a, &x, sizeof (a) ); 668436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov memcpy( &b, &y, sizeof (b) ); 669436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL) 670436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL; 671436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return memcmp( &a, &b, sizeof (a) ) != 0; 672436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 673436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 674436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test_fma( void ) 675436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 676436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int res = 0, i, j; 677436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov double w; 678436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) { 679436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 680436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 681436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 682436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 683436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 684436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 685436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 686436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 687436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 688436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 689436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 690436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 691436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 692436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 693436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] ); 694436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 695436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 696436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 697436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 698436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 699436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 700436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 701436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 702436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 703436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 704436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 705436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 706436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 707436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 708436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 709436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] ); 710436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 711436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 712436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 713436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 714436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) { 715436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 716436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 717436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 718436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 719436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 720436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 721436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 722436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 723436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 724436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 725436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 726436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 727436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( w, dt.expected[i] ); 728436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 729436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] ); 730436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 731436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 732436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 733436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 734436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); 735436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 736436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); 737436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 738436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); 739436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 740436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); 741436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 742436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); 743436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -w, dt.expected[i] ); 744436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) 745436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] ); 746436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 747436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 748436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 749436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 750436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 2) { 751436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 752436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 753436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;" 754436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 755436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 756436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 757436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 758436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 759436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132pd (%2), %%xmm8, %%xmm9;" 760436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 761436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 762436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 763436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 764436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 765436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;" 766436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 767436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 768436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 769436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 770436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 771436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213pd (%3), %%xmm8, %%xmm9;" 772436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 773436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 774436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 775436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 776436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 777436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;" 778436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 779436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 780436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 781436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 782436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 783436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231pd (%2), %%xmm8, %%xmm9;" 784436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 785436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 786436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 787436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 788436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 789436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 5 %d", i ); 790436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 791436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 792436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 793436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 794436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 795436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 796436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 797436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;" 798436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 799436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 800436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 801436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 802436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 803436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132pd (%2), %%xmm8, %%xmm9;" 804436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 805436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 806436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 807436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 808436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 809436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;" 810436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 811436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 812436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 813436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 814436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 815436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213pd (%3), %%xmm8, %%xmm9;" 816436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 817436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 818436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 819436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 820436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 821436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;" 822436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 823436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 824436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 825436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 826436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 827436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231pd (%2), %%xmm8, %%xmm9;" 828436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 829436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 830436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 831436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 832436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 833436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 6 %d", i ); 834436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 835436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 836436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 837436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 838436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 839436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 840436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 841436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 842436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 2) { 843436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 844436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 845436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;" 846436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 847436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 848436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 849436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 850436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 851436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132pd (%2), %%xmm8, %%xmm9;" 852436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 853436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 854436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 855436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 856436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 857436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;" 858436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 859436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 860436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 861436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 862436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 863436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213pd (%3), %%xmm8, %%xmm9;" 864436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 865436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 866436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 867436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 868436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 869436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;" 870436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 871436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 872436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 873436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 874436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 875436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231pd (%2), %%xmm8, %%xmm9;" 876436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 877436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 878436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 879436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 880436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 881436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 7 %d", i ); 882436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 883436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 884436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 885436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 886436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 887436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 888436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 889436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;" 890436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 891436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 892436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 893436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 894436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 895436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132pd (%2), %%xmm8, %%xmm9;" 896436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 897436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 898436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 899436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 900436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 901436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;" 902436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 903436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 904436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 905436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 906436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 907436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213pd (%3), %%xmm8, %%xmm9;" 908436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 909436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 910436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 911436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 912436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 913436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;" 914436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 915436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 916436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 917436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 918436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 919436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231pd (%2), %%xmm8, %%xmm9;" 920436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 921436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 922436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 923436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 924436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 925436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 8 %d", i ); 926436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 927436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 928436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 929436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 930436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 931436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 932436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 933436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 934436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 2) { 935436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 936436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 937436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;" 938436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 939436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 940436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 941436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 942436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 943436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132pd (%2), %%xmm8, %%xmm9;" 944436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 945436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 946436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 947436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 948436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 949436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;" 950436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 951436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 952436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 953436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 954436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 955436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213pd (%3), %%xmm8, %%xmm9;" 956436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 957436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 958436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 959436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 960436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 961436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;" 962436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 963436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 964436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 965436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 966436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 967436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231pd (%2), %%xmm8, %%xmm9;" 968436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 969436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 970436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 971436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 972436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 973436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 9 %d", i ); 974436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 975436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 976436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 977436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 978436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 979436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 980436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 981436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 982436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 2) { 983436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 984436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" 985436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;" 986436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 987436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 988436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 989436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 990436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" 991436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132pd (%2), %%xmm8, %%xmm9;" 992436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 993436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 994436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 995436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 996436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" 997436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;" 998436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 999436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1000436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 1001436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1002436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" 1003436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213pd (%3), %%xmm8, %%xmm9;" 1004436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1005436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1006436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 1007436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1008436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" 1009436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;" 1010436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1011436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1012436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 1013436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1014436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" 1015436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231pd (%2), %%xmm8, %%xmm9;" 1016436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1017436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1018436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 1019436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1020436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1021436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 10 %d", i ); 1022436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 2; j++) 1023436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1024436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1025436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1026436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1027436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1028436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 1029436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 1030436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 1031436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 1032436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1033436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;" 1034436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1035436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1036436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1037436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1038436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1039436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd132pd (%2), %%ymm8, %%ymm9;" 1040436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1041436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1042436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1043436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1044436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1045436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;" 1046436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1047436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1048436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1049436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1050436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1051436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd213pd (%3), %%ymm8, %%ymm9;" 1052436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1053436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1054436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1055436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1056436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1057436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;" 1058436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1059436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1060436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1061436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1062436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1063436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmadd231pd (%2), %%ymm8, %%ymm9;" 1064436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1065436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1066436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1067436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1068436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1069436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 11 %d", i ); 1070436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1071436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1072436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1073436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1074436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1075436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 1076436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1077436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;" 1078436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1079436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1080436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1081436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1082436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1083436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub132pd (%2), %%ymm8, %%ymm9;" 1084436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1085436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1086436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1087436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1088436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1089436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;" 1090436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1091436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1092436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1093436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1094436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1095436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub213pd (%3), %%ymm8, %%ymm9;" 1096436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1097436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1098436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1099436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;" 1102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmsub231pd (%2), %%ymm8, %%ymm9;" 1108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 12 %d", i ); 1114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1117436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1118436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1119436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1120436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 1121436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 1122436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 1123436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 1124436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1125436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;" 1126436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1127436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1128436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1129436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1130436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1131436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub132pd (%2), %%ymm8, %%ymm9;" 1132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1134436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1135436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1137436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;" 1138436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub213pd (%3), %%ymm8, %%ymm9;" 1144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;" 1150436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1152436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1153436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1155436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsub231pd (%2), %%ymm8, %%ymm9;" 1156436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1157436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1158436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1160436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1161436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 13 %d", i ); 1162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres = 0; 1168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1169436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;" 1170436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd132pd (%2), %%ymm8, %%ymm9;" 1176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;" 1182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd213pd (%3), %%ymm8, %%ymm9;" 1188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;" 1194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfnmadd231pd (%2), %%ymm8, %%ymm9;" 1200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( -dt.res[i+j], dt.expected[i+j] ); 1204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 14 %d", i ); 1206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 1213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 1214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 1215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 1216436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1217436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;" 1218436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1219436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1220436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1221436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1222436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1223436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub132pd (%2), %%ymm8, %%ymm9;" 1224436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1225436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1226436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1227436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1228436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1229436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;" 1230436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1231436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1232436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1233436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1234436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1235436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub213pd (%3), %%ymm8, %%ymm9;" 1236436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1237436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1238436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1239436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1240436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1241436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;" 1242436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1243436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1244436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1245436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1246436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1247436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmaddsub231pd (%2), %%ymm8, %%ymm9;" 1248436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1249436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1250436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1251436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1252436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1253436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 15 %d", i ); 1254436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1255436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1256436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1257436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1258436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1259436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1260436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i++) 1261436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 1262436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 0; i < N; i += 4) { 1263436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int thisres = 0; 1264436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" 1265436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;" 1266436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1267436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1268436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1269436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1270436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" 1271436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd132pd (%2), %%ymm8, %%ymm9;" 1272436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1273436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1274436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1275436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1276436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" 1277436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;" 1278436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1279436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1280436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1281436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1282436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" 1283436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd213pd (%3), %%ymm8, %%ymm9;" 1284436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1285436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1286436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1287436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1288436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" 1289436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;" 1290436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1291436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1292436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1293436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1294436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" 1295436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vfmsubadd231pd (%2), %%ymm8, %%ymm9;" 1296436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), 1297436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); 1298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov thisres |= test( dt.res[i+j], dt.expected[i+j] ); 1300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (thisres) { 1301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Failure 16 %d", i ); 1302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (j = 0; j < 4; j++) 1303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); 1304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "\n" ); 1305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= thisres; 1307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 1308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for (i = 1; i < N; i += 2) 1309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = -dt.z[i]; 1310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return res; 1311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 1312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 1313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovint main( ) 1314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 1315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int res = 0; 1316436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int i = 0; 1317436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov plus_zero = 0.0; 1318436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("" : : "r" (&plus_zero) : "memory"); 1319436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov nan_value = plus_zero / plus_zero; 1320436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov plus_infty = 3.40282346638528859812e+38F * 16.0F; 1321436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov minus_infty = -plus_infty; 1322436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define TEST_F( a, b, c, d ) \ 1323436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov do { \ 1324436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.x[i] = a; \ 1325436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.y[i] = b; \ 1326436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.z[i] = c; \ 1327436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ft.expected[i] = d; \ 1328436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov i++; \ 1329436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } while (0) 1330436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 1.0, 2.0, 3.0, 5.0 ); 1331436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( nan_value, 2.0, 3.0, nan_value ); 1332436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 1.0, nan_value, 3.0, nan_value ); 1333436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 1.0, 2.0, nan_value, nan_value ); 1334436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, 0.0, nan_value, nan_value ); 1335436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, 0.0, nan_value, nan_value ); 1336436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0.0, plus_infty, nan_value, nan_value ); 1337436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0.0, minus_infty, nan_value, nan_value ); 1338436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, 0.0, 1.0, nan_value ); 1339436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, 0.0, 1.0, nan_value ); 1340436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0.0, plus_infty, 1.0, nan_value ); 1341436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0.0, minus_infty, 1.0, nan_value ); 1342436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, plus_infty, minus_infty, nan_value ); 1343436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, plus_infty, plus_infty, nan_value ); 1344436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, minus_infty, plus_infty, nan_value ); 1345436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, minus_infty, minus_infty, nan_value ); 1346436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, 3.5L, minus_infty, nan_value ); 1347436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, -7.5L, minus_infty, nan_value ); 1348436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( -13.5L, plus_infty, plus_infty, nan_value ); 1349436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, 7.5L, plus_infty, nan_value ); 1350436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L ); 1351436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty ); 1352436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty ); 1353436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty ); 1354436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty ); 1355436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, 4, plus_infty, plus_infty ); 1356436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 2, minus_infty, minus_infty, minus_infty ); 1357436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( minus_infty, minus_infty, plus_infty, plus_infty ); 1358436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( plus_infty, minus_infty, minus_infty, minus_infty ); 1359436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 ); 1360436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 ); 1361436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 ); 1362436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 ); 1363436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 ); 1364436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 ); 1365436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 ); 1366436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 1367436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= test_fmaf( ); 1368436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov i = 0; 1369436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define TEST( a, b, c, d ) \ 1370436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov do { \ 1371436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.x[i] = a; \ 1372436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.y[i] = b; \ 1373436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.z[i] = c; \ 1374436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov dt.expected[i] = d; \ 1375436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov i++; \ 1376436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } while (0) 1377436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.0, 2.0, 3.0, 5.0 ); 1378436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( nan_value, 2.0, 3.0, nan_value ); 1379436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.0, nan_value, 3.0, nan_value ); 1380436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.0, 2.0, nan_value, nan_value ); 1381436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, 0.0, nan_value, nan_value ); 1382436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, 0.0, nan_value, nan_value ); 1383436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0.0, plus_infty, nan_value, nan_value ); 1384436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0.0, minus_infty, nan_value, nan_value ); 1385436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, 0.0, 1.0, nan_value ); 1386436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, 0.0, 1.0, nan_value ); 1387436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0.0, plus_infty, 1.0, nan_value ); 1388436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0.0, minus_infty, 1.0, nan_value ); 1389436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, plus_infty, minus_infty, nan_value ); 1390436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, plus_infty, plus_infty, nan_value ); 1391436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, minus_infty, plus_infty, nan_value ); 1392436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, minus_infty, minus_infty, nan_value ); 1393436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, 3.5L, minus_infty, nan_value ); 1394436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, -7.5L, minus_infty, nan_value ); 1395436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -13.5L, plus_infty, plus_infty, nan_value ); 1396436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, 7.5L, plus_infty, nan_value ); 1397436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.25L, 0.75L, 0.0625L, 1.0L ); 1398436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty ); 1399436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty ); 1400436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty ); 1401436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty ); 1402436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, 4, plus_infty, plus_infty ); 1403436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 2, minus_infty, minus_infty, minus_infty ); 1404436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( minus_infty, minus_infty, plus_infty, plus_infty ); 1405436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( plus_infty, minus_infty, minus_infty, minus_infty ); 1406436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 ); 1407436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 ); 1408436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 ); 1409436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 ); 1410436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 ); 1411436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 ); 1412436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 ); 1413436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 ); 1414436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 ); 1415436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 ); 1416436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 ); 1417436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 ); 1418436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 ); 1419436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 ); 1420436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 ); 1421436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 ); 1422436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 ); 1423436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 ); 1424436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 ); 1425436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 ); 1426436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 1427436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov res |= test_fma( ); 1428436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (res == 0) 1429436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov printf( "Testing successful\n"); 1430436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return 0; 1431436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 1432