1
2/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3   check the core arithmetic in any detail. This file checks the 16-bit
4   character versions (w is for wide) */
5
6#include <string.h>
7#include <stdio.h>
8#include <assert.h>
9
10typedef  unsigned char  V128[16];
11typedef  unsigned int   UInt;
12typedef  signed int     Int;
13typedef  unsigned char  UChar;
14typedef  unsigned long long int ULong;
15typedef  UChar          Bool;
16#define False ((Bool)0)
17#define True  ((Bool)1)
18
19void show_V128 ( V128* vec )
20{
21   Int i;
22   for (i = 15; i >= 0; i--)
23      printf("%02x", (UInt)( (*vec)[i] ));
24}
25
26void expand ( V128* dst, char* summary )
27{
28   Int i;
29   assert( strlen(summary) == 16 );
30   for (i = 0; i < 16; i++) {
31      UChar xx = 0;
32      UChar x = summary[15-i];
33      if      (x >= '0' && x <= '9') { xx = x - '0'; }
34      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
35      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
36      else assert(0);
37
38      assert(xx < 16);
39      xx = (xx << 4) | xx;
40      assert(xx < 256);
41      (*dst)[i] = xx;
42   }
43}
44
45void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
46{
47   V128 argL, argR;
48   expand( &argL, summL );
49   expand( &argR, summR );
50   printf("\n");
51   printf("rdx %016llx  argL ", rdxIN);
52   show_V128(&argL);
53   printf("  rax %016llx  argR ", raxIN);
54   show_V128(&argR);
55   printf("\n");
56
57   ULong block[ 2/*in:argL*/          // 0  0
58                + 2/*in:argR*/        // 2  16
59                + 1/*in:rdx*/         // 4  32
60                + 1/*in:rax*/         // 5  40
61                + 2/*inout:xmm0*/     // 6  48
62                + 1/*inout:rcx*/      // 8  64
63                + 1/*out:rflags*/ ];  // 9  72
64   assert(sizeof(block) == 80);
65
66   UChar* blockC = (UChar*)&block[0];
67
68   /* ---------------- ISTRI_4B ---------------- */
69   memset(blockC, 0x55, 80);
70   memcpy(blockC + 0,  &argL,  16);
71   memcpy(blockC + 16, &argR,  16);
72   memcpy(blockC + 24, &rdxIN, 8);
73   memcpy(blockC + 32, &raxIN, 8);
74   memcpy(blockC + 40, &rdxIN, 8);
75   __asm__ __volatile__(
76      "movupd    0(%0), %%xmm2"           "\n\t"
77      "movupd    16(%0), %%xmm13"         "\n\t"
78      "movq      32(%0), %%rdx"           "\n\t"
79      "movq      40(%0), %%rax"           "\n\t"
80      "movupd    48(%0), %%xmm0"          "\n\t"
81      "movw      64(%0), %%cx"            "\n\t"
82      "pcmpistri $0x4B, %%xmm2, %%xmm13"  "\n\t"
83      "movupd    %%xmm0, 48(%0)"          "\n\t"
84      "movw      %%cx, 64(%0)"            "\n\t"
85      "pushfq"                            "\n\t"
86      "popq      %%r15"                   "\n\t"
87      "movq      %%r15, 72(%0)"           "\n\t"
88      : /*out*/
89      : /*in*/"r"(blockC)
90      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
91   );
92   printf("  istri $0x4B:  ");
93   printf("    xmm0 ");
94   show_V128( (V128*)(blockC+48) );
95   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
96
97   /* ---------------- ISTRI_0B ---------------- */
98   memset(blockC, 0x55, 80);
99   memcpy(blockC + 0,  &argL,  16);
100   memcpy(blockC + 16, &argR,  16);
101   memcpy(blockC + 24, &rdxIN, 8);
102   memcpy(blockC + 32, &raxIN, 8);
103   memcpy(blockC + 40, &rdxIN, 8);
104   __asm__ __volatile__(
105      "movupd    0(%0), %%xmm2"           "\n\t"
106      "movupd    16(%0), %%xmm13"         "\n\t"
107      "movq      32(%0), %%rdx"           "\n\t"
108      "movq      40(%0), %%rax"           "\n\t"
109      "movupd    48(%0), %%xmm0"          "\n\t"
110      "movw      64(%0), %%cx"            "\n\t"
111      "pcmpistri $0x0B, %%xmm2, %%xmm13"  "\n\t"
112      "movupd    %%xmm0, 48(%0)"          "\n\t"
113      "movw      %%cx, 64(%0)"            "\n\t"
114      "pushfq"                            "\n\t"
115      "popq      %%r15"                   "\n\t"
116      "movq      %%r15, 72(%0)"           "\n\t"
117      : /*out*/
118      : /*in*/"r"(blockC)
119      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
120   );
121   printf("  istri $0x0B:  ");
122   printf("    xmm0 ");
123   show_V128( (V128*)(blockC+48) );
124   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
125
126   /* ---------------- ISTRM_4B ---------------- */
127   memset(blockC, 0x55, 80);
128   memcpy(blockC + 0,  &argL,  16);
129   memcpy(blockC + 16, &argR,  16);
130   memcpy(blockC + 24, &rdxIN, 8);
131   memcpy(blockC + 32, &raxIN, 8);
132   memcpy(blockC + 40, &rdxIN, 8);
133   __asm__ __volatile__(
134      "movupd    0(%0), %%xmm2"           "\n\t"
135      "movupd    16(%0), %%xmm13"         "\n\t"
136      "movq      32(%0), %%rdx"           "\n\t"
137      "movq      40(%0), %%rax"           "\n\t"
138      "movupd    48(%0), %%xmm0"          "\n\t"
139      "movw      64(%0), %%cx"            "\n\t"
140      "pcmpistrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
141      "movupd    %%xmm0, 48(%0)"          "\n\t"
142      "movw      %%cx, 64(%0)"            "\n\t"
143      "pushfq"                            "\n\t"
144      "popq      %%r15"                   "\n\t"
145      "movq      %%r15, 72(%0)"           "\n\t"
146      : /*out*/
147      : /*in*/"r"(blockC)
148      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
149   );
150   printf("  istrm $0x4B:  ");
151   printf("    xmm0 ");
152   show_V128( (V128*)(blockC+48) );
153   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
154
155   /* ---------------- ISTRM_0B ---------------- */
156   memset(blockC, 0x55, 80);
157   memcpy(blockC + 0,  &argL,  16);
158   memcpy(blockC + 16, &argR,  16);
159   memcpy(blockC + 24, &rdxIN, 8);
160   memcpy(blockC + 32, &raxIN, 8);
161   memcpy(blockC + 40, &rdxIN, 8);
162   __asm__ __volatile__(
163      "movupd    0(%0), %%xmm2"           "\n\t"
164      "movupd    16(%0), %%xmm13"         "\n\t"
165      "movq      32(%0), %%rdx"           "\n\t"
166      "movq      40(%0), %%rax"           "\n\t"
167      "movupd    48(%0), %%xmm0"          "\n\t"
168      "movw      64(%0), %%cx"            "\n\t"
169      "pcmpistrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
170      "movupd    %%xmm0, 48(%0)"          "\n\t"
171      "movw      %%cx, 64(%0)"            "\n\t"
172      "pushfq"                            "\n\t"
173      "popq      %%r15"                   "\n\t"
174      "movq      %%r15, 72(%0)"           "\n\t"
175      : /*out*/
176      : /*in*/"r"(blockC)
177      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
178   );
179   printf("  istrm $0x0B:  ");
180   printf("    xmm0 ");
181   show_V128( (V128*)(blockC+48) );
182   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
183
184   /* ---------------- ESTRI_4B ---------------- */
185   memset(blockC, 0x55, 80);
186   memcpy(blockC + 0,  &argL,  16);
187   memcpy(blockC + 16, &argR,  16);
188   memcpy(blockC + 24, &rdxIN, 8);
189   memcpy(blockC + 32, &raxIN, 8);
190   memcpy(blockC + 40, &rdxIN, 8);
191   __asm__ __volatile__(
192      "movupd    0(%0), %%xmm2"           "\n\t"
193      "movupd    16(%0), %%xmm13"         "\n\t"
194      "movq      32(%0), %%rdx"           "\n\t"
195      "movq      40(%0), %%rax"           "\n\t"
196      "movupd    48(%0), %%xmm0"          "\n\t"
197      "movw      64(%0), %%cx"            "\n\t"
198      "pcmpestri $0x4B, %%xmm2, %%xmm13"  "\n\t"
199      "movupd    %%xmm0, 48(%0)"          "\n\t"
200      "movw      %%cx, 64(%0)"            "\n\t"
201      "pushfq"                            "\n\t"
202      "popq      %%r15"                   "\n\t"
203      "movq      %%r15, 72(%0)"           "\n\t"
204      : /*out*/
205      : /*in*/"r"(blockC)
206      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
207   );
208   printf("  estri $0x4B:  ");
209   printf("    xmm0 ");
210   show_V128( (V128*)(blockC+48) );
211   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
212
213   /* ---------------- ESTRI_0B ---------------- */
214   memset(blockC, 0x55, 80);
215   memcpy(blockC + 0,  &argL,  16);
216   memcpy(blockC + 16, &argR,  16);
217   memcpy(blockC + 24, &rdxIN, 8);
218   memcpy(blockC + 32, &raxIN, 8);
219   memcpy(blockC + 40, &rdxIN, 8);
220   __asm__ __volatile__(
221      "movupd    0(%0), %%xmm2"           "\n\t"
222      "movupd    16(%0), %%xmm13"         "\n\t"
223      "movq      32(%0), %%rdx"           "\n\t"
224      "movq      40(%0), %%rax"           "\n\t"
225      "movupd    48(%0), %%xmm0"          "\n\t"
226      "movw      64(%0), %%cx"            "\n\t"
227      "pcmpestri $0x0B, %%xmm2, %%xmm13"  "\n\t"
228      "movupd    %%xmm0, 48(%0)"          "\n\t"
229      "movw      %%cx, 64(%0)"            "\n\t"
230      "pushfq"                            "\n\t"
231      "popq      %%r15"                   "\n\t"
232      "movq      %%r15, 72(%0)"           "\n\t"
233      : /*out*/
234      : /*in*/"r"(blockC)
235      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
236   );
237   printf("  estri $0x0B:  ");
238   printf("    xmm0 ");
239   show_V128( (V128*)(blockC+48) );
240   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
241
242   /* ---------------- ESTRM_4B ---------------- */
243   memset(blockC, 0x55, 80);
244   memcpy(blockC + 0,  &argL,  16);
245   memcpy(blockC + 16, &argR,  16);
246   memcpy(blockC + 24, &rdxIN, 8);
247   memcpy(blockC + 32, &raxIN, 8);
248   memcpy(blockC + 40, &rdxIN, 8);
249   __asm__ __volatile__(
250      "movupd    0(%0), %%xmm2"           "\n\t"
251      "movupd    16(%0), %%xmm13"         "\n\t"
252      "movq      32(%0), %%rdx"           "\n\t"
253      "movq      40(%0), %%rax"           "\n\t"
254      "movupd    48(%0), %%xmm0"          "\n\t"
255      "movw      64(%0), %%cx"            "\n\t"
256      "pcmpestrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
257      "movupd    %%xmm0, 48(%0)"          "\n\t"
258      "movw      %%cx, 64(%0)"            "\n\t"
259      "pushfq"                            "\n\t"
260      "popq      %%r15"                   "\n\t"
261      "movq      %%r15, 72(%0)"           "\n\t"
262      : /*out*/
263      : /*in*/"r"(blockC)
264      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
265   );
266   printf("  estrm $0x4B:  ");
267   printf("    xmm0 ");
268   show_V128( (V128*)(blockC+48) );
269   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
270
271   /* ---------------- ESTRM_0B ---------------- */
272   memset(blockC, 0x55, 80);
273   memcpy(blockC + 0,  &argL,  16);
274   memcpy(blockC + 16, &argR,  16);
275   memcpy(blockC + 24, &rdxIN, 8);
276   memcpy(blockC + 32, &raxIN, 8);
277   memcpy(blockC + 40, &rdxIN, 8);
278   __asm__ __volatile__(
279      "movupd    0(%0), %%xmm2"           "\n\t"
280      "movupd    16(%0), %%xmm13"         "\n\t"
281      "movq      32(%0), %%rdx"           "\n\t"
282      "movq      40(%0), %%rax"           "\n\t"
283      "movupd    48(%0), %%xmm0"          "\n\t"
284      "movw      64(%0), %%cx"            "\n\t"
285      "pcmpestrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
286      "movupd    %%xmm0, 48(%0)"          "\n\t"
287      "movw      %%cx, 64(%0)"            "\n\t"
288      "pushfq"                            "\n\t"
289      "popq      %%r15"                   "\n\t"
290      "movq      %%r15, 72(%0)"           "\n\t"
291      : /*out*/
292      : /*in*/"r"(blockC)
293      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
294   );
295   printf("  estrm $0x0B:  ");
296   printf("    xmm0 ");
297   show_V128( (V128*)(blockC+48) );
298   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
299
300
301
302
303}
304
305int main ( void )
306{
307   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
308   one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
309
310   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
311   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
312   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
313
314   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
315   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
316   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
317   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
318
319   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
320   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
321   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
322   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
323
324   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
325   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
326   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
327   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
328
329   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
330   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
331   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
332   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
333
334   return 0;
335}
336