1fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
2fc9b17107d04d22f59c82a379d8fa059680166cfsewardj/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   check the core arithmetic in any detail.  */
4fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
5fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <string.h>
6fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <stdio.h>
7fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <assert.h>
8fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
9fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  unsigned char  V128[16];
10fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  unsigned int   UInt;
11fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  signed int     Int;
12fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  unsigned char  UChar;
13fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  unsigned long long int ULong;
14fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef  UChar          Bool;
15fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#define False ((Bool)0)
16fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#define True  ((Bool)1)
17fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
18fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid show_V128 ( V128* vec )
19fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{
20fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   Int i;
21fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   for (i = 15; i >= 0; i--)
22fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      printf("%02x", (UInt)( (*vec)[i] ));
23fc9b17107d04d22f59c82a379d8fa059680166cfsewardj}
24fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
25fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid expand ( V128* dst, char* summary )
26fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{
27fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   Int i;
28fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   assert( strlen(summary) == 16 );
29fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   for (i = 0; i < 16; i++) {
30fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      UChar xx = 0;
31fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      UChar x = summary[15-i];
32fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      if      (x >= '0' && x <= '9') { xx = x - '0'; }
33fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
34fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
35fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      else assert(0);
36fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
37fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      assert(xx < 16);
38fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      xx = (xx << 4) | xx;
39fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      assert(xx < 256);
40fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      (*dst)[i] = xx;
41fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   }
42fc9b17107d04d22f59c82a379d8fa059680166cfsewardj}
43fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
44fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
45fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{
46fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   V128 argL, argR;
47fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   expand( &argL, summL );
48fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   expand( &argR, summR );
49fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("\n");
50fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("rdx %016llx  argL ", rdxIN);
51fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128(&argL);
52fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rax %016llx  argR ", raxIN);
53fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128(&argR);
54fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("\n");
55fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
56fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   ULong block[ 2/*in:argL*/          // 0  0
57fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 2/*in:argR*/        // 2  16
58fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 1/*in:rdx*/         // 4  32
59fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 1/*in:rax*/         // 5  40
60fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 2/*inout:xmm0*/     // 6  48
61fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 1/*inout:rcx*/      // 8  64
62fc9b17107d04d22f59c82a379d8fa059680166cfsewardj                + 1/*out:rflags*/ ];  // 9  72
63fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   assert(sizeof(block) == 80);
64fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
65fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   UChar* blockC = (UChar*)&block[0];
66fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
67fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ISTRI_4A ---------------- */
68fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
69fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
70fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
71fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
72fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
73fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
74fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
75fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
76fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
77fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
78fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
79fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
80b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
81fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
82fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
83b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
84fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
85fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
86fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
87fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
88fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
89fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
90fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
91fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  istri $0x4A:  ");
92fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
93fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
94fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
95fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
96fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ISTRI_0A ---------------- */
97fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
98fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
99fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
100fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
101fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
102fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
103fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
104fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
105fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
106fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
107fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
108fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
109b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
110fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
111fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
112b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
113fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
114fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
115fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
116fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
117fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
118fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
119fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
120fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  istri $0x0A:  ");
121fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
122fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
123fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
124fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
125fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ISTRM_4A ---------------- */
126fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
127fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
128fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
129fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
130fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
131fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
132fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
133fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
134fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
135fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
136fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
137fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
138b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
139fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
140fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
141b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
142fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
143fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
144fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
145fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
146fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
147fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
148fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
149fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  istrm $0x4A:  ");
150fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
151fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
152fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
153fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
154fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ISTRM_0A ---------------- */
155fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
156fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
157fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
158fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
159fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
160fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
161fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
162fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
163fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
164fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
165fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
166fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
167b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
168fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
169fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
170b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
171fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
172fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
173fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
174fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
175fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
176fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
177fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
178fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  istrm $0x0A:  ");
179fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
180fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
181fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
182fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
183fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ESTRI_4A ---------------- */
184fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
185fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
186fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
187fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
188fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
189fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
190fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
191fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
192fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
193fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
194fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
195fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
196b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
197fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
198fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
199b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
200fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
201fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
202fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
203fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
204fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
205fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
206fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
207fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  estri $0x4A:  ");
208fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
209fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
210fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
211fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
212fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ESTRI_0A ---------------- */
213fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
214fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
215fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
216fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
217fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
218fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
219fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
220fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
221fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
222fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
223fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
224fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
225b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
226fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
227fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
228b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
229fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
230fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
231fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
232fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
233fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
234fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
235fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
236fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  estri $0x0A:  ");
237fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
238fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
239fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
240fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
241fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ESTRM_4A ---------------- */
242fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
243fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
244fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
245fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
246fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
247fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
248fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
249fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
250fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
251fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
252fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
253fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
254b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
255fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
256fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
257b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
258fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
259fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
260fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
261fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
262fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
263fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
264fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
265fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  estrm $0x4A:  ");
266fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
267fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
268fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
269fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
270fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   /* ---------------- ESTRM_0A ---------------- */
271fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memset(blockC, 0x55, 80);
272fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 0,  &argL,  16);
273fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 16, &argR,  16);
274fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 24, &rdxIN, 8);
275fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 32, &raxIN, 8);
276fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   memcpy(blockC + 40, &rdxIN, 8);
277fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   __asm__ __volatile__(
278fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
279fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
280fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      32(%0), %%rdx"           "\n\t"
281fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      40(%0), %%rax"           "\n\t"
282fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
283b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
284fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
285fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
286b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
287fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "pushfq"                            "\n\t"
288fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "popq      %%r15"                   "\n\t"
289fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      "movq      %%r15, 72(%0)"           "\n\t"
290fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*out*/
291fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*in*/"r"(blockC)
292fc9b17107d04d22f59c82a379d8fa059680166cfsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
293fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   );
294fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  estrm $0x0A:  ");
295fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("    xmm0 ");
296fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   show_V128( (V128*)(blockC+48) );
297fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
298fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
299fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
300fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
301fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
302fc9b17107d04d22f59c82a379d8fa059680166cfsewardj}
303fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
304fc9b17107d04d22f59c82a379d8fa059680166cfsewardjint main ( void )
305fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{
306fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
308fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
309fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
312fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
313fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
317fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
318fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
322fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
323fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
324fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
327fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
328fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
329fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
332fc9b17107d04d22f59c82a379d8fa059680166cfsewardj
333fc9b17107d04d22f59c82a379d8fa059680166cfsewardj   return 0;
334fc9b17107d04d22f59c82a379d8fa059680166cfsewardj}
335