1436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include <stdio.h>
2436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include <string.h>
3436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
4436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define N 64
5436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstruct float_test {
6436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   float x[N], y[N], z[N], expected[N], res[N];
7436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} ft __attribute__((aligned (32)));
8436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
9436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstruct double_test {
10436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   double x[N], y[N], z[N], expected[N], res[N];
11436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} dt __attribute__((aligned (32)));
12436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
13436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovfloat plus_zero, plus_infty, minus_infty, nan_value;
14436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
15436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int testf( float x, float y )
16436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{
17436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned int a, b;
18436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   memcpy( &a, &x, sizeof (a) );
19436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   memcpy( &b, &y, sizeof (b) );
20436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   if ((a & 0x7fc00000U) == 0x7fc00000U)
21436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      return (b & 0x7fc00000U) != 0x7fc00000U;
22436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   return memcmp( &a, &b, sizeof (a) ) != 0;
23436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}
24436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
25436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test_fmaf( void )
26436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{
27436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   int res = 0, i, j;
28436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   float w;
29436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++) {
30436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
31436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
32436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
33436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
34436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
35436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
36436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
37436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
38436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
39436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
40436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
41436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
42436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
43436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
44436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] );
45436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
46436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
47436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
48436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
49436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
50436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
51436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
52436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
53436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
54436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
55436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
56436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
57436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
58436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
59436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
60436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] );
61436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
62436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
63436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
64436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
65436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++) {
66436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
67436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
68436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
69436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
70436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
71436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
72436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
73436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
74436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
75436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
76436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
77436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
78436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( w, ft.expected[i] );
79436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
80436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] );
81436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
82436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
83436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
84436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
85436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
86436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
87436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
88436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
89436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
90436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
91436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
92436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
93436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
94436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= testf( -w, ft.expected[i] );
95436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
96436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] );
97436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
98436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
99436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;"
105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132ps (%2), %%xmm8, %%xmm9;"
111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;"
117436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
118436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
119436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
120436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
121436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
122436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213ps (%3), %%xmm8, %%xmm9;"
123436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
124436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
125436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
126436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
127436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
128436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;"
129436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
130436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
131436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
134436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231ps (%2), %%xmm8, %%xmm9;"
135436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
137436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
138436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 5 %d", i );
141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;"
149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
150436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
152436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
153436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132ps (%2), %%xmm8, %%xmm9;"
155436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
156436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
157436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
158436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
160436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;"
161436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213ps (%3), %%xmm8, %%xmm9;"
167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
169436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
170436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;"
173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231ps (%2), %%xmm8, %%xmm9;"
179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 6 %d", i );
185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;"
197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132ps (%2), %%xmm8, %%xmm9;"
203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;"
209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213ps (%3), %%xmm8, %%xmm9;"
215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
216436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
217436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
218436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
219436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
220436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;"
221436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
222436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
223436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
224436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
225436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
226436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231ps (%2), %%xmm8, %%xmm9;"
227436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
228436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
229436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
230436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
231436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
232436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 7 %d", i );
233436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
234436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
235436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
236436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
237436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
238436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
239436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
240436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;"
241436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
242436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
243436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
244436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
245436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
246436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132ps (%2), %%xmm8, %%xmm9;"
247436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
248436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
249436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
250436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
251436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
252436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;"
253436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
254436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
255436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
256436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
257436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
258436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213ps (%3), %%xmm8, %%xmm9;"
259436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
260436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
261436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
262436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
263436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
264436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;"
265436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
266436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
267436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
268436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
269436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
270436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231ps (%2), %%xmm8, %%xmm9;"
271436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
272436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
273436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
274436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
275436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
276436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 8 %d", i );
277436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
278436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
279436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
280436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
281436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
282436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
283436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
284436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
285436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
286436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
287436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
288436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;"
289436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
290436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
291436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
292436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
293436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
294436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132ps (%2), %%xmm8, %%xmm9;"
295436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
296436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
297436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;"
301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213ps (%3), %%xmm8, %%xmm9;"
307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;"
313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
316436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
317436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
318436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231ps (%2), %%xmm8, %%xmm9;"
319436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
320436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
321436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
322436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
323436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
324436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 9 %d", i );
325436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
326436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
327436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
328436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
329436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
330436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
331436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
332436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
333436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
334436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
335436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
336436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;"
337436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
338436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
339436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
340436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
341436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
342436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132ps (%2), %%xmm8, %%xmm9;"
343436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
344436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
345436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
346436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
347436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
348436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;"
349436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
350436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
351436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
352436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
353436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
354436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213ps (%3), %%xmm8, %%xmm9;"
355436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
356436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
357436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
358436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
359436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
360436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;"
361436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
362436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
363436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
364436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
365436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
366436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231ps (%2), %%xmm8, %%xmm9;"
367436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
368436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
369436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
370436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
371436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
372436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 10 %d", i );
373436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
374436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
375436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
376436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
377436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
378436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
379436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
380436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
381436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 8) {
382436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
383436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
384436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;"
385436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
386436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
387436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
388436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
389436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
390436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132ps (%2), %%ymm8, %%ymm9;"
391436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
392436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
393436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
394436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
395436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
396436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;"
397436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
398436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
399436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
400436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
401436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
402436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213ps (%3), %%ymm8, %%ymm9;"
403436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
404436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
405436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
406436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
407436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
408436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;"
409436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
410436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
411436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
412436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
413436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
414436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231ps (%2), %%ymm8, %%ymm9;"
415436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
416436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
417436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
418436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
419436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
420436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 11 %d", i );
421436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
422436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
423436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
424436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
425436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
426436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
427436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
428436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;"
429436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
430436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
431436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
432436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
433436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
434436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132ps (%2), %%ymm8, %%ymm9;"
435436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
436436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
437436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
438436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
439436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
440436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;"
441436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
442436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
443436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
444436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
445436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
446436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213ps (%3), %%ymm8, %%ymm9;"
447436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
448436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
449436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
450436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
451436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
452436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;"
453436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
454436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
455436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
456436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
457436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
458436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231ps (%2), %%ymm8, %%ymm9;"
459436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
460436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
461436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
462436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
463436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
464436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 12 %d", i );
465436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
466436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
467436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
468436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
469436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
470436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
471436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
472436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
473436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 8) {
474436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
475436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
476436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;"
477436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
478436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
479436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
480436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
481436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
482436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132ps (%2), %%ymm8, %%ymm9;"
483436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
484436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
485436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
486436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
487436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
488436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;"
489436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
490436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
491436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
492436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
493436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
494436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213ps (%3), %%ymm8, %%ymm9;"
495436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
496436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
497436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
498436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
499436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
500436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;"
501436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
502436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
503436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
504436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
505436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
506436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231ps (%2), %%ymm8, %%ymm9;"
507436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
508436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
509436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
510436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
511436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
512436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 13 %d", i );
513436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
514436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
515436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
516436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
517436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
518436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
519436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
520436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;"
521436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
522436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
523436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
524436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
525436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
526436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132ps (%2), %%ymm8, %%ymm9;"
527436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
528436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
529436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
530436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
531436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
532436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;"
533436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
534436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
535436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
536436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
537436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
538436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213ps (%3), %%ymm8, %%ymm9;"
539436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
540436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
541436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
542436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
543436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
544436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;"
545436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
546436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
547436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
548436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
549436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
550436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231ps (%2), %%ymm8, %%ymm9;"
551436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
552436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
553436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
554436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
555436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
556436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 14 %d", i );
557436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
558436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
559436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
560436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
561436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
562436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
563436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
564436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
565436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 8) {
566436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
567436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
568436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;"
569436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
570436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
571436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
572436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
573436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
574436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132ps (%2), %%ymm8, %%ymm9;"
575436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
576436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
577436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
578436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
579436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
580436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;"
581436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
582436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
583436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
584436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
585436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
586436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213ps (%3), %%ymm8, %%ymm9;"
587436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
588436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
589436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
590436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
591436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
592436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;"
593436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
594436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
595436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
596436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
597436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
598436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231ps (%2), %%ymm8, %%ymm9;"
599436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
600436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
601436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
602436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
603436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
604436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 15 %d", i );
605436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
606436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
607436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
608436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
609436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
610436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
611436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
612436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
613436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 8) {
614436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
615436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
616436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;"
617436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
618436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
619436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
620436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
621436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
622436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132ps (%2), %%ymm8, %%ymm9;"
623436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
624436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
625436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
626436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
627436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
628436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;"
629436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
630436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
631436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
632436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
633436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
634436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213ps (%3), %%ymm8, %%ymm9;"
635436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
636436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
637436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
638436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
639436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
640436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;"
641436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
642436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
643436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
644436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
645436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
646436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231ps (%2), %%ymm8, %%ymm9;"
647436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
648436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
649436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 8; j++)
650436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= testf( ft.res[i+j], ft.expected[i+j] );
651436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
652436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 16 %d", i );
653436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 8; j++)
654436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
655436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
656436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
657436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
658436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
659436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
660436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = -ft.z[i];
661436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   return res;
662436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}
663436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
664436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test( double x, double y )
665436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{
666436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long long a, b;
667436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   memcpy( &a, &x, sizeof (a) );
668436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   memcpy( &b, &y, sizeof (b) );
669436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL)
670436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL;
671436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   return memcmp( &a, &b, sizeof (a) ) != 0;
672436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}
673436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
674436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic int test_fma( void )
675436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{
676436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   int res = 0, i, j;
677436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   double w;
678436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++) {
679436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
680436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
681436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
682436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
683436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
684436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
685436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
686436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
687436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
688436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
689436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
690436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
691436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
692436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
693436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] );
694436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
695436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
696436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
697436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
698436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
699436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
700436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
701436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
702436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
703436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
704436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
705436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
706436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
707436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
708436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
709436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] );
710436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
711436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
712436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
713436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
714436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++) {
715436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
716436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
717436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
718436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
719436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
720436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
721436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
722436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
723436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
724436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
725436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
726436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
727436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( w, dt.expected[i] );
728436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
729436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] );
730436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
731436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
732436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
733436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
734436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
735436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
736436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
737436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
738436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
739436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
740436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
741436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
742436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
743436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres |= test( -w, dt.expected[i] );
744436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres)
745436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] );
746436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
747436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
748436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
749436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
750436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 2) {
751436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
752436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
753436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;"
754436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
755436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
756436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
757436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
758436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
759436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132pd (%2), %%xmm8, %%xmm9;"
760436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
761436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
762436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
763436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
764436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
765436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;"
766436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
767436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
768436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
769436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
770436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
771436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213pd (%3), %%xmm8, %%xmm9;"
772436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
773436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
774436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
775436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
776436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
777436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;"
778436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
779436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
780436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
781436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
782436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
783436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231pd (%2), %%xmm8, %%xmm9;"
784436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
785436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
786436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
787436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
788436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
789436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 5 %d", i );
790436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
791436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
792436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
793436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
794436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
795436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
796436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
797436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;"
798436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
799436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
800436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
801436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
802436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
803436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132pd (%2), %%xmm8, %%xmm9;"
804436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
805436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
806436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
807436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
808436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
809436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;"
810436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
811436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
812436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
813436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
814436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
815436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213pd (%3), %%xmm8, %%xmm9;"
816436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
817436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
818436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
819436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
820436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
821436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;"
822436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
823436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
824436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
825436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
826436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
827436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231pd (%2), %%xmm8, %%xmm9;"
828436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
829436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
830436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
831436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
832436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
833436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 6 %d", i );
834436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
835436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
836436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
837436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
838436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
839436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
840436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
841436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
842436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 2) {
843436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
844436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
845436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;"
846436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
847436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
848436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
849436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
850436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
851436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132pd (%2), %%xmm8, %%xmm9;"
852436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
853436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
854436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
855436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
856436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
857436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;"
858436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
859436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
860436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
861436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
862436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
863436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213pd (%3), %%xmm8, %%xmm9;"
864436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
865436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
866436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
867436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
868436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
869436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;"
870436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
871436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
872436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
873436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
874436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
875436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231pd (%2), %%xmm8, %%xmm9;"
876436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
877436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
878436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
879436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
880436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
881436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 7 %d", i );
882436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
883436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
884436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
885436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
886436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
887436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
888436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
889436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;"
890436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
891436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
892436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
893436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
894436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
895436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132pd (%2), %%xmm8, %%xmm9;"
896436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
897436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
898436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
899436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
900436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
901436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;"
902436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
903436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
904436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
905436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
906436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
907436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213pd (%3), %%xmm8, %%xmm9;"
908436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
909436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
910436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
911436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
912436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
913436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;"
914436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
915436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
916436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
917436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
918436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
919436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231pd (%2), %%xmm8, %%xmm9;"
920436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
921436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
922436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
923436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
924436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
925436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 8 %d", i );
926436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
927436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
928436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
929436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
930436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
931436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
932436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
933436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
934436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 2) {
935436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
936436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
937436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;"
938436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
939436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
940436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
941436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
942436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
943436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132pd (%2), %%xmm8, %%xmm9;"
944436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
945436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
946436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
947436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
948436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
949436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;"
950436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
951436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
952436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
953436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
954436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
955436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213pd (%3), %%xmm8, %%xmm9;"
956436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
957436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
958436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
959436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
960436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
961436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;"
962436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
963436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
964436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
965436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
966436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
967436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231pd (%2), %%xmm8, %%xmm9;"
968436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
969436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
970436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
971436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
972436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
973436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 9 %d", i );
974436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
975436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
976436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
977436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
978436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
979436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
980436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
981436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
982436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 2) {
983436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
984436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
985436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;"
986436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
987436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
988436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
989436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
990436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
991436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132pd (%2), %%xmm8, %%xmm9;"
992436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
993436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
994436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
995436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
996436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
997436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;"
998436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
999436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1000436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
1001436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1002436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
1003436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213pd (%3), %%xmm8, %%xmm9;"
1004436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1005436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1006436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
1007436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1008436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
1009436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;"
1010436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1011436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1012436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
1013436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1014436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
1015436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231pd (%2), %%xmm8, %%xmm9;"
1016436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1017436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1018436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 2; j++)
1019436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1020436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1021436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 10 %d", i );
1022436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 2; j++)
1023436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1024436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1025436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1026436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1027436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
1028436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
1029436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
1030436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
1031436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
1032436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1033436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1034436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1035436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1036436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1037436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1038436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1039436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd132pd (%2), %%ymm8, %%ymm9;"
1040436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1041436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1042436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1043436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1044436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1045436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1046436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1047436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1048436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1049436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1050436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1051436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd213pd (%3), %%ymm8, %%ymm9;"
1052436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1053436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1054436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1055436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1056436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1057436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1058436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1059436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1060436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1061436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1062436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1063436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmadd231pd (%2), %%ymm8, %%ymm9;"
1064436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1065436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1066436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1067436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1068436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1069436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 11 %d", i );
1070436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1071436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1072436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1073436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1074436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1075436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
1076436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1077436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1078436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1079436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1080436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1081436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1082436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1083436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub132pd (%2), %%ymm8, %%ymm9;"
1084436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1085436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1086436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1087436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1088436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1089436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1090436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1091436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1092436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1093436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1094436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1095436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub213pd (%3), %%ymm8, %%ymm9;"
1096436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1097436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1098436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1099436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmsub231pd (%2), %%ymm8, %%ymm9;"
1108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 12 %d", i );
1114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1117436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1118436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1119436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
1120436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
1121436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
1122436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
1123436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
1124436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1125436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1126436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1127436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1128436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1129436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1130436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1131436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub132pd (%2), %%ymm8, %%ymm9;"
1132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1134436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1135436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1137436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1138436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub213pd (%3), %%ymm8, %%ymm9;"
1144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1150436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1152436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1153436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1155436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsub231pd (%2), %%ymm8, %%ymm9;"
1156436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1157436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1158436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1160436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1161436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 13 %d", i );
1162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      thisres = 0;
1168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1169436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1170436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd132pd (%2), %%ymm8, %%ymm9;"
1176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd213pd (%3), %%ymm8, %%ymm9;"
1188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfnmadd231pd (%2), %%ymm8, %%ymm9;"
1200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 14 %d", i );
1206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
1212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
1213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
1214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
1215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
1216436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1217436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;"
1218436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1219436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1220436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1221436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1222436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1223436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub132pd (%2), %%ymm8, %%ymm9;"
1224436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1225436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1226436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1227436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1228436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1229436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;"
1230436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1231436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1232436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1233436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1234436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1235436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub213pd (%3), %%ymm8, %%ymm9;"
1236436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1237436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1238436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1239436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1240436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1241436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;"
1242436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1243436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1244436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1245436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1246436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1247436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmaddsub231pd (%2), %%ymm8, %%ymm9;"
1248436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1249436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1250436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1251436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1252436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1253436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 15 %d", i );
1254436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1255436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1256436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1257436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1258436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1259436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
1260436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i++)
1261436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
1262436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 0; i < N; i += 4) {
1263436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      int thisres = 0;
1264436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1265436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;"
1266436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1267436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1268436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1269436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1270436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1271436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd132pd (%2), %%ymm8, %%ymm9;"
1272436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1273436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1274436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1275436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1276436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1277436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;"
1278436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1279436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1280436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1281436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1282436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1283436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd213pd (%3), %%ymm8, %%ymm9;"
1284436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1285436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1286436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1287436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1288436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1289436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;"
1290436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1291436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1292436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1293436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1294436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1295436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vfmsubadd231pd (%2), %%ymm8, %%ymm9;"
1296436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                          "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1297436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov                                                     "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      for (j = 0; j < 4; j++)
1299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         thisres |= test( dt.res[i+j], dt.expected[i+j] );
1300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      if (thisres) {
1301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "Failure 16 %d", i );
1302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         for (j = 0; j < 4; j++)
1303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov            printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         printf( "\n" );
1305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      }
1306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      res |= thisres;
1307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   }
1308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   for (i = 1; i < N; i += 2)
1309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = -dt.z[i];
1310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   return res;
1311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}
1312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
1313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovint main( )
1314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{
1315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   int res = 0;
1316436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   int i = 0;
1317436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   plus_zero = 0.0;
1318436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   __asm __volatile__ ("" : : "r" (&plus_zero) : "memory");
1319436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   nan_value = plus_zero / plus_zero;
1320436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   plus_infty = 3.40282346638528859812e+38F * 16.0F;
1321436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   minus_infty = -plus_infty;
1322436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define TEST_F( a, b, c, d ) \
1323436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {				\
1324436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.x[i] = a;		\
1325436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.y[i] = b;		\
1326436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.z[i] = c;		\
1327436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      ft.expected[i] = d;	\
1328436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      i++;			\
1329436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (0)
1330436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 1.0, 2.0, 3.0, 5.0 );
1331436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( nan_value, 2.0, 3.0, nan_value );
1332436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 1.0, nan_value, 3.0, nan_value );
1333436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 1.0, 2.0, nan_value, nan_value );
1334436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, 0.0, nan_value, nan_value );
1335436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, 0.0, nan_value, nan_value );
1336436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0.0, plus_infty, nan_value, nan_value );
1337436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0.0, minus_infty, nan_value, nan_value );
1338436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, 0.0, 1.0, nan_value );
1339436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, 0.0, 1.0, nan_value );
1340436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0.0, plus_infty, 1.0, nan_value );
1341436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0.0, minus_infty, 1.0, nan_value );
1342436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, plus_infty, minus_infty, nan_value );
1343436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, plus_infty, plus_infty, nan_value );
1344436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, minus_infty, plus_infty, nan_value );
1345436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, minus_infty, minus_infty, nan_value );
1346436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, 3.5L, minus_infty, nan_value );
1347436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, -7.5L, minus_infty, nan_value );
1348436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( -13.5L, plus_infty, plus_infty, nan_value );
1349436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, 7.5L, plus_infty, nan_value );
1350436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L );
1351436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty );
1352436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty );
1353436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty );
1354436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty );
1355436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, 4, plus_infty, plus_infty );
1356436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 2, minus_infty, minus_infty, minus_infty );
1357436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( minus_infty, minus_infty, plus_infty, plus_infty );
1358436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( plus_infty, minus_infty, minus_infty, minus_infty );
1359436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 );
1360436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 );
1361436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 );
1362436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 );
1363436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 );
1364436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 );
1365436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 );
1366436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
1367436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   res |= test_fmaf( );
1368436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   i = 0;
1369436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define TEST( a, b, c, d ) \
1370436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {				\
1371436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.x[i] = a;		\
1372436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.y[i] = b;		\
1373436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.z[i] = c;		\
1374436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      dt.expected[i] = d;	\
1375436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      i++;			\
1376436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (0)
1377436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.0, 2.0, 3.0, 5.0 );
1378436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( nan_value, 2.0, 3.0, nan_value );
1379436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.0, nan_value, 3.0, nan_value );
1380436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.0, 2.0, nan_value, nan_value );
1381436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, 0.0, nan_value, nan_value );
1382436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, 0.0, nan_value, nan_value );
1383436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0.0, plus_infty, nan_value, nan_value );
1384436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0.0, minus_infty, nan_value, nan_value );
1385436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, 0.0, 1.0, nan_value );
1386436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, 0.0, 1.0, nan_value );
1387436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0.0, plus_infty, 1.0, nan_value );
1388436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0.0, minus_infty, 1.0, nan_value );
1389436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, plus_infty, minus_infty, nan_value );
1390436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, plus_infty, plus_infty, nan_value );
1391436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, minus_infty, plus_infty, nan_value );
1392436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, minus_infty, minus_infty, nan_value );
1393436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, 3.5L, minus_infty, nan_value );
1394436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, -7.5L, minus_infty, nan_value );
1395436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -13.5L, plus_infty, plus_infty, nan_value );
1396436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, 7.5L, plus_infty, nan_value );
1397436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.25L, 0.75L, 0.0625L, 1.0L );
1398436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty );
1399436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty );
1400436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty );
1401436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty );
1402436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, 4, plus_infty, plus_infty );
1403436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 2, minus_infty, minus_infty, minus_infty );
1404436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( minus_infty, minus_infty, plus_infty, plus_infty );
1405436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( plus_infty, minus_infty, minus_infty, minus_infty );
1406436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 );
1407436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 );
1408436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 );
1409436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 );
1410436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 );
1411436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 );
1412436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 );
1413436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 );
1414436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 );
1415436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 );
1416436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 );
1417436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 );
1418436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 );
1419436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 );
1420436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 );
1421436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 );
1422436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 );
1423436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 );
1424436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 );
1425436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 );
1426436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov
1427436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   res |= test_fma( );
1428436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   if (res == 0)
1429436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      printf( "Testing successful\n");
1430436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   return 0;
1431436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}
1432