pcmpxstrx64.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1
2/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3   check the core arithmetic in any detail.  */
4
5#include <string.h>
6#include <stdio.h>
7#include <assert.h>
8
9typedef  unsigned char  V128[16];
10typedef  unsigned int   UInt;
11typedef  signed int     Int;
12typedef  unsigned char  UChar;
13typedef  unsigned long long int ULong;
14typedef  UChar          Bool;
15#define False ((Bool)0)
16#define True  ((Bool)1)
17
18void show_V128 ( V128* vec )
19{
20   Int i;
21   for (i = 15; i >= 0; i--)
22      printf("%02x", (UInt)( (*vec)[i] ));
23}
24
25void expand ( V128* dst, char* summary )
26{
27   Int i;
28   assert( strlen(summary) == 16 );
29   for (i = 0; i < 16; i++) {
30      UChar xx = 0;
31      UChar x = summary[15-i];
32      if      (x >= '0' && x <= '9') { xx = x - '0'; }
33      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
34      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
35      else assert(0);
36
37      assert(xx < 16);
38      xx = (xx << 4) | xx;
39      assert(xx < 256);
40      (*dst)[i] = xx;
41   }
42}
43
44void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
45{
46   V128 argL, argR;
47   expand( &argL, summL );
48   expand( &argR, summR );
49   printf("\n");
50   printf("rdx %016llx  argL ", rdxIN);
51   show_V128(&argL);
52   printf("  rax %016llx  argR ", raxIN);
53   show_V128(&argR);
54   printf("\n");
55
56   ULong block[ 2/*in:argL*/          // 0  0
57                + 2/*in:argR*/        // 2  16
58                + 1/*in:rdx*/         // 4  32
59                + 1/*in:rax*/         // 5  40
60                + 2/*inout:xmm0*/     // 6  48
61                + 1/*inout:rcx*/      // 8  64
62                + 1/*out:rflags*/ ];  // 9  72
63   assert(sizeof(block) == 80);
64
65   UChar* blockC = (UChar*)&block[0];
66
67   /* ---------------- ISTRI_4A ---------------- */
68   memset(blockC, 0x55, 80);
69   memcpy(blockC + 0,  &argL,  16);
70   memcpy(blockC + 16, &argR,  16);
71   memcpy(blockC + 24, &rdxIN, 8);
72   memcpy(blockC + 32, &raxIN, 8);
73   memcpy(blockC + 40, &rdxIN, 8);
74   __asm__ __volatile__(
75      "movupd    0(%0), %%xmm2"           "\n\t"
76      "movupd    16(%0), %%xmm13"         "\n\t"
77      "movq      32(%0), %%rdx"           "\n\t"
78      "movq      40(%0), %%rax"           "\n\t"
79      "movupd    48(%0), %%xmm0"          "\n\t"
80      "movw      64(%0), %%rcx"           "\n\t"
81      "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
82      "movupd    %%xmm0, 48(%0)"          "\n\t"
83      "movw      %%rcx, 64(%0)"           "\n\t"
84      "pushfq"                            "\n\t"
85      "popq      %%r15"                   "\n\t"
86      "movq      %%r15, 72(%0)"           "\n\t"
87      : /*out*/
88      : /*in*/"r"(blockC)
89      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
90   );
91   printf("  istri $0x4A:  ");
92   printf("    xmm0 ");
93   show_V128( (V128*)(blockC+48) );
94   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
95
96   /* ---------------- ISTRI_0A ---------------- */
97   memset(blockC, 0x55, 80);
98   memcpy(blockC + 0,  &argL,  16);
99   memcpy(blockC + 16, &argR,  16);
100   memcpy(blockC + 24, &rdxIN, 8);
101   memcpy(blockC + 32, &raxIN, 8);
102   memcpy(blockC + 40, &rdxIN, 8);
103   __asm__ __volatile__(
104      "movupd    0(%0), %%xmm2"           "\n\t"
105      "movupd    16(%0), %%xmm13"         "\n\t"
106      "movq      32(%0), %%rdx"           "\n\t"
107      "movq      40(%0), %%rax"           "\n\t"
108      "movupd    48(%0), %%xmm0"          "\n\t"
109      "movw      64(%0), %%rcx"           "\n\t"
110      "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
111      "movupd    %%xmm0, 48(%0)"          "\n\t"
112      "movw      %%rcx, 64(%0)"           "\n\t"
113      "pushfq"                            "\n\t"
114      "popq      %%r15"                   "\n\t"
115      "movq      %%r15, 72(%0)"           "\n\t"
116      : /*out*/
117      : /*in*/"r"(blockC)
118      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
119   );
120   printf("  istri $0x0A:  ");
121   printf("    xmm0 ");
122   show_V128( (V128*)(blockC+48) );
123   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
124
125   /* ---------------- ISTRM_4A ---------------- */
126   memset(blockC, 0x55, 80);
127   memcpy(blockC + 0,  &argL,  16);
128   memcpy(blockC + 16, &argR,  16);
129   memcpy(blockC + 24, &rdxIN, 8);
130   memcpy(blockC + 32, &raxIN, 8);
131   memcpy(blockC + 40, &rdxIN, 8);
132   __asm__ __volatile__(
133      "movupd    0(%0), %%xmm2"           "\n\t"
134      "movupd    16(%0), %%xmm13"         "\n\t"
135      "movq      32(%0), %%rdx"           "\n\t"
136      "movq      40(%0), %%rax"           "\n\t"
137      "movupd    48(%0), %%xmm0"          "\n\t"
138      "movw      64(%0), %%rcx"           "\n\t"
139      "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
140      "movupd    %%xmm0, 48(%0)"          "\n\t"
141      "movw      %%rcx, 64(%0)"           "\n\t"
142      "pushfq"                            "\n\t"
143      "popq      %%r15"                   "\n\t"
144      "movq      %%r15, 72(%0)"           "\n\t"
145      : /*out*/
146      : /*in*/"r"(blockC)
147      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
148   );
149   printf("  istrm $0x4A:  ");
150   printf("    xmm0 ");
151   show_V128( (V128*)(blockC+48) );
152   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
153
154   /* ---------------- ISTRM_0A ---------------- */
155   memset(blockC, 0x55, 80);
156   memcpy(blockC + 0,  &argL,  16);
157   memcpy(blockC + 16, &argR,  16);
158   memcpy(blockC + 24, &rdxIN, 8);
159   memcpy(blockC + 32, &raxIN, 8);
160   memcpy(blockC + 40, &rdxIN, 8);
161   __asm__ __volatile__(
162      "movupd    0(%0), %%xmm2"           "\n\t"
163      "movupd    16(%0), %%xmm13"         "\n\t"
164      "movq      32(%0), %%rdx"           "\n\t"
165      "movq      40(%0), %%rax"           "\n\t"
166      "movupd    48(%0), %%xmm0"          "\n\t"
167      "movw      64(%0), %%rcx"           "\n\t"
168      "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
169      "movupd    %%xmm0, 48(%0)"          "\n\t"
170      "movw      %%rcx, 64(%0)"           "\n\t"
171      "pushfq"                            "\n\t"
172      "popq      %%r15"                   "\n\t"
173      "movq      %%r15, 72(%0)"           "\n\t"
174      : /*out*/
175      : /*in*/"r"(blockC)
176      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
177   );
178   printf("  istrm $0x0A:  ");
179   printf("    xmm0 ");
180   show_V128( (V128*)(blockC+48) );
181   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
182
183   /* ---------------- ESTRI_4A ---------------- */
184   memset(blockC, 0x55, 80);
185   memcpy(blockC + 0,  &argL,  16);
186   memcpy(blockC + 16, &argR,  16);
187   memcpy(blockC + 24, &rdxIN, 8);
188   memcpy(blockC + 32, &raxIN, 8);
189   memcpy(blockC + 40, &rdxIN, 8);
190   __asm__ __volatile__(
191      "movupd    0(%0), %%xmm2"           "\n\t"
192      "movupd    16(%0), %%xmm13"         "\n\t"
193      "movq      32(%0), %%rdx"           "\n\t"
194      "movq      40(%0), %%rax"           "\n\t"
195      "movupd    48(%0), %%xmm0"          "\n\t"
196      "movw      64(%0), %%rcx"           "\n\t"
197      "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
198      "movupd    %%xmm0, 48(%0)"          "\n\t"
199      "movw      %%rcx, 64(%0)"           "\n\t"
200      "pushfq"                            "\n\t"
201      "popq      %%r15"                   "\n\t"
202      "movq      %%r15, 72(%0)"           "\n\t"
203      : /*out*/
204      : /*in*/"r"(blockC)
205      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
206   );
207   printf("  estri $0x4A:  ");
208   printf("    xmm0 ");
209   show_V128( (V128*)(blockC+48) );
210   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
211
212   /* ---------------- ESTRI_0A ---------------- */
213   memset(blockC, 0x55, 80);
214   memcpy(blockC + 0,  &argL,  16);
215   memcpy(blockC + 16, &argR,  16);
216   memcpy(blockC + 24, &rdxIN, 8);
217   memcpy(blockC + 32, &raxIN, 8);
218   memcpy(blockC + 40, &rdxIN, 8);
219   __asm__ __volatile__(
220      "movupd    0(%0), %%xmm2"           "\n\t"
221      "movupd    16(%0), %%xmm13"         "\n\t"
222      "movq      32(%0), %%rdx"           "\n\t"
223      "movq      40(%0), %%rax"           "\n\t"
224      "movupd    48(%0), %%xmm0"          "\n\t"
225      "movw      64(%0), %%rcx"           "\n\t"
226      "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
227      "movupd    %%xmm0, 48(%0)"          "\n\t"
228      "movw      %%rcx, 64(%0)"           "\n\t"
229      "pushfq"                            "\n\t"
230      "popq      %%r15"                   "\n\t"
231      "movq      %%r15, 72(%0)"           "\n\t"
232      : /*out*/
233      : /*in*/"r"(blockC)
234      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
235   );
236   printf("  estri $0x0A:  ");
237   printf("    xmm0 ");
238   show_V128( (V128*)(blockC+48) );
239   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
240
241   /* ---------------- ESTRM_4A ---------------- */
242   memset(blockC, 0x55, 80);
243   memcpy(blockC + 0,  &argL,  16);
244   memcpy(blockC + 16, &argR,  16);
245   memcpy(blockC + 24, &rdxIN, 8);
246   memcpy(blockC + 32, &raxIN, 8);
247   memcpy(blockC + 40, &rdxIN, 8);
248   __asm__ __volatile__(
249      "movupd    0(%0), %%xmm2"           "\n\t"
250      "movupd    16(%0), %%xmm13"         "\n\t"
251      "movq      32(%0), %%rdx"           "\n\t"
252      "movq      40(%0), %%rax"           "\n\t"
253      "movupd    48(%0), %%xmm0"          "\n\t"
254      "movw      64(%0), %%rcx"           "\n\t"
255      "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
256      "movupd    %%xmm0, 48(%0)"          "\n\t"
257      "movw      %%rcx, 64(%0)"           "\n\t"
258      "pushfq"                            "\n\t"
259      "popq      %%r15"                   "\n\t"
260      "movq      %%r15, 72(%0)"           "\n\t"
261      : /*out*/
262      : /*in*/"r"(blockC)
263      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
264   );
265   printf("  estrm $0x4A:  ");
266   printf("    xmm0 ");
267   show_V128( (V128*)(blockC+48) );
268   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
269
270   /* ---------------- ESTRM_0A ---------------- */
271   memset(blockC, 0x55, 80);
272   memcpy(blockC + 0,  &argL,  16);
273   memcpy(blockC + 16, &argR,  16);
274   memcpy(blockC + 24, &rdxIN, 8);
275   memcpy(blockC + 32, &raxIN, 8);
276   memcpy(blockC + 40, &rdxIN, 8);
277   __asm__ __volatile__(
278      "movupd    0(%0), %%xmm2"           "\n\t"
279      "movupd    16(%0), %%xmm13"         "\n\t"
280      "movq      32(%0), %%rdx"           "\n\t"
281      "movq      40(%0), %%rax"           "\n\t"
282      "movupd    48(%0), %%xmm0"          "\n\t"
283      "movw      64(%0), %%rcx"           "\n\t"
284      "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
285      "movupd    %%xmm0, 48(%0)"          "\n\t"
286      "movw      %%rcx, 64(%0)"           "\n\t"
287      "pushfq"                            "\n\t"
288      "popq      %%r15"                   "\n\t"
289      "movq      %%r15, 72(%0)"           "\n\t"
290      : /*out*/
291      : /*in*/"r"(blockC)
292      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
293   );
294   printf("  estrm $0x0A:  ");
295   printf("    xmm0 ");
296   show_V128( (V128*)(blockC+48) );
297   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
298
299
300
301
302}
303
304int main ( void )
305{
306   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307   one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
308
309   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
312
313   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
317
318   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
322
323   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
324   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
327
328   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
329   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
332
333   return 0;
334}
335
336/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
337   check the core arithmetic in any detail.  */
338
339#include <string.h>
340#include <stdio.h>
341#include <assert.h>
342
343typedef  unsigned char  V128[16];
344typedef  unsigned int   UInt;
345typedef  signed int     Int;
346typedef  unsigned char  UChar;
347typedef  unsigned long long int ULong;
348typedef  UChar          Bool;
349#define False ((Bool)0)
350#define True  ((Bool)1)
351
352void show_V128 ( V128* vec )
353{
354   Int i;
355   for (i = 15; i >= 0; i--)
356      printf("%02x", (UInt)( (*vec)[i] ));
357}
358
359void expand ( V128* dst, char* summary )
360{
361   Int i;
362   assert( strlen(summary) == 16 );
363   for (i = 0; i < 16; i++) {
364      UChar xx = 0;
365      UChar x = summary[15-i];
366      if      (x >= '0' && x <= '9') { xx = x - '0'; }
367      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
368      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
369      else assert(0);
370
371      assert(xx < 16);
372      xx = (xx << 4) | xx;
373      assert(xx < 256);
374      (*dst)[i] = xx;
375   }
376}
377
378void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
379{
380   V128 argL, argR;
381   expand( &argL, summL );
382   expand( &argR, summR );
383   printf("\n");
384   printf("rdx %016llx  argL ", rdxIN);
385   show_V128(&argL);
386   printf("  rax %016llx  argR ", raxIN);
387   show_V128(&argR);
388   printf("\n");
389
390   ULong block[ 2/*in:argL*/          // 0  0
391                + 2/*in:argR*/        // 2  16
392                + 1/*in:rdx*/         // 4  32
393                + 1/*in:rax*/         // 5  40
394                + 2/*inout:xmm0*/     // 6  48
395                + 1/*inout:rcx*/      // 8  64
396                + 1/*out:rflags*/ ];  // 9  72
397   assert(sizeof(block) == 80);
398
399   UChar* blockC = (UChar*)&block[0];
400
401   /* ---------------- ISTRI_4A ---------------- */
402   memset(blockC, 0x55, 80);
403   memcpy(blockC + 0,  &argL,  16);
404   memcpy(blockC + 16, &argR,  16);
405   memcpy(blockC + 24, &rdxIN, 8);
406   memcpy(blockC + 32, &raxIN, 8);
407   memcpy(blockC + 40, &rdxIN, 8);
408   __asm__ __volatile__(
409      "movupd    0(%0), %%xmm2"           "\n\t"
410      "movupd    16(%0), %%xmm13"         "\n\t"
411      "movq      32(%0), %%rdx"           "\n\t"
412      "movq      40(%0), %%rax"           "\n\t"
413      "movupd    48(%0), %%xmm0"          "\n\t"
414      "movw      64(%0), %%rcx"           "\n\t"
415      "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
416      "movupd    %%xmm0, 48(%0)"          "\n\t"
417      "movw      %%rcx, 64(%0)"           "\n\t"
418      "pushfq"                            "\n\t"
419      "popq      %%r15"                   "\n\t"
420      "movq      %%r15, 72(%0)"           "\n\t"
421      : /*out*/
422      : /*in*/"r"(blockC)
423      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
424   );
425   printf("  istri $0x4A:  ");
426   printf("    xmm0 ");
427   show_V128( (V128*)(blockC+48) );
428   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
429
430   /* ---------------- ISTRI_0A ---------------- */
431   memset(blockC, 0x55, 80);
432   memcpy(blockC + 0,  &argL,  16);
433   memcpy(blockC + 16, &argR,  16);
434   memcpy(blockC + 24, &rdxIN, 8);
435   memcpy(blockC + 32, &raxIN, 8);
436   memcpy(blockC + 40, &rdxIN, 8);
437   __asm__ __volatile__(
438      "movupd    0(%0), %%xmm2"           "\n\t"
439      "movupd    16(%0), %%xmm13"         "\n\t"
440      "movq      32(%0), %%rdx"           "\n\t"
441      "movq      40(%0), %%rax"           "\n\t"
442      "movupd    48(%0), %%xmm0"          "\n\t"
443      "movw      64(%0), %%rcx"           "\n\t"
444      "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
445      "movupd    %%xmm0, 48(%0)"          "\n\t"
446      "movw      %%rcx, 64(%0)"           "\n\t"
447      "pushfq"                            "\n\t"
448      "popq      %%r15"                   "\n\t"
449      "movq      %%r15, 72(%0)"           "\n\t"
450      : /*out*/
451      : /*in*/"r"(blockC)
452      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
453   );
454   printf("  istri $0x0A:  ");
455   printf("    xmm0 ");
456   show_V128( (V128*)(blockC+48) );
457   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
458
459   /* ---------------- ISTRM_4A ---------------- */
460   memset(blockC, 0x55, 80);
461   memcpy(blockC + 0,  &argL,  16);
462   memcpy(blockC + 16, &argR,  16);
463   memcpy(blockC + 24, &rdxIN, 8);
464   memcpy(blockC + 32, &raxIN, 8);
465   memcpy(blockC + 40, &rdxIN, 8);
466   __asm__ __volatile__(
467      "movupd    0(%0), %%xmm2"           "\n\t"
468      "movupd    16(%0), %%xmm13"         "\n\t"
469      "movq      32(%0), %%rdx"           "\n\t"
470      "movq      40(%0), %%rax"           "\n\t"
471      "movupd    48(%0), %%xmm0"          "\n\t"
472      "movw      64(%0), %%rcx"           "\n\t"
473      "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
474      "movupd    %%xmm0, 48(%0)"          "\n\t"
475      "movw      %%rcx, 64(%0)"           "\n\t"
476      "pushfq"                            "\n\t"
477      "popq      %%r15"                   "\n\t"
478      "movq      %%r15, 72(%0)"           "\n\t"
479      : /*out*/
480      : /*in*/"r"(blockC)
481      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
482   );
483   printf("  istrm $0x4A:  ");
484   printf("    xmm0 ");
485   show_V128( (V128*)(blockC+48) );
486   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
487
488   /* ---------------- ISTRM_0A ---------------- */
489   memset(blockC, 0x55, 80);
490   memcpy(blockC + 0,  &argL,  16);
491   memcpy(blockC + 16, &argR,  16);
492   memcpy(blockC + 24, &rdxIN, 8);
493   memcpy(blockC + 32, &raxIN, 8);
494   memcpy(blockC + 40, &rdxIN, 8);
495   __asm__ __volatile__(
496      "movupd    0(%0), %%xmm2"           "\n\t"
497      "movupd    16(%0), %%xmm13"         "\n\t"
498      "movq      32(%0), %%rdx"           "\n\t"
499      "movq      40(%0), %%rax"           "\n\t"
500      "movupd    48(%0), %%xmm0"          "\n\t"
501      "movw      64(%0), %%rcx"           "\n\t"
502      "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
503      "movupd    %%xmm0, 48(%0)"          "\n\t"
504      "movw      %%rcx, 64(%0)"           "\n\t"
505      "pushfq"                            "\n\t"
506      "popq      %%r15"                   "\n\t"
507      "movq      %%r15, 72(%0)"           "\n\t"
508      : /*out*/
509      : /*in*/"r"(blockC)
510      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
511   );
512   printf("  istrm $0x0A:  ");
513   printf("    xmm0 ");
514   show_V128( (V128*)(blockC+48) );
515   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
516
517   /* ---------------- ESTRI_4A ---------------- */
518   memset(blockC, 0x55, 80);
519   memcpy(blockC + 0,  &argL,  16);
520   memcpy(blockC + 16, &argR,  16);
521   memcpy(blockC + 24, &rdxIN, 8);
522   memcpy(blockC + 32, &raxIN, 8);
523   memcpy(blockC + 40, &rdxIN, 8);
524   __asm__ __volatile__(
525      "movupd    0(%0), %%xmm2"           "\n\t"
526      "movupd    16(%0), %%xmm13"         "\n\t"
527      "movq      32(%0), %%rdx"           "\n\t"
528      "movq      40(%0), %%rax"           "\n\t"
529      "movupd    48(%0), %%xmm0"          "\n\t"
530      "movw      64(%0), %%rcx"           "\n\t"
531      "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
532      "movupd    %%xmm0, 48(%0)"          "\n\t"
533      "movw      %%rcx, 64(%0)"           "\n\t"
534      "pushfq"                            "\n\t"
535      "popq      %%r15"                   "\n\t"
536      "movq      %%r15, 72(%0)"           "\n\t"
537      : /*out*/
538      : /*in*/"r"(blockC)
539      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
540   );
541   printf("  estri $0x4A:  ");
542   printf("    xmm0 ");
543   show_V128( (V128*)(blockC+48) );
544   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
545
546   /* ---------------- ESTRI_0A ---------------- */
547   memset(blockC, 0x55, 80);
548   memcpy(blockC + 0,  &argL,  16);
549   memcpy(blockC + 16, &argR,  16);
550   memcpy(blockC + 24, &rdxIN, 8);
551   memcpy(blockC + 32, &raxIN, 8);
552   memcpy(blockC + 40, &rdxIN, 8);
553   __asm__ __volatile__(
554      "movupd    0(%0), %%xmm2"           "\n\t"
555      "movupd    16(%0), %%xmm13"         "\n\t"
556      "movq      32(%0), %%rdx"           "\n\t"
557      "movq      40(%0), %%rax"           "\n\t"
558      "movupd    48(%0), %%xmm0"          "\n\t"
559      "movw      64(%0), %%rcx"           "\n\t"
560      "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
561      "movupd    %%xmm0, 48(%0)"          "\n\t"
562      "movw      %%rcx, 64(%0)"           "\n\t"
563      "pushfq"                            "\n\t"
564      "popq      %%r15"                   "\n\t"
565      "movq      %%r15, 72(%0)"           "\n\t"
566      : /*out*/
567      : /*in*/"r"(blockC)
568      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
569   );
570   printf("  estri $0x0A:  ");
571   printf("    xmm0 ");
572   show_V128( (V128*)(blockC+48) );
573   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
574
575   /* ---------------- ESTRM_4A ---------------- */
576   memset(blockC, 0x55, 80);
577   memcpy(blockC + 0,  &argL,  16);
578   memcpy(blockC + 16, &argR,  16);
579   memcpy(blockC + 24, &rdxIN, 8);
580   memcpy(blockC + 32, &raxIN, 8);
581   memcpy(blockC + 40, &rdxIN, 8);
582   __asm__ __volatile__(
583      "movupd    0(%0), %%xmm2"           "\n\t"
584      "movupd    16(%0), %%xmm13"         "\n\t"
585      "movq      32(%0), %%rdx"           "\n\t"
586      "movq      40(%0), %%rax"           "\n\t"
587      "movupd    48(%0), %%xmm0"          "\n\t"
588      "movw      64(%0), %%rcx"           "\n\t"
589      "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
590      "movupd    %%xmm0, 48(%0)"          "\n\t"
591      "movw      %%rcx, 64(%0)"           "\n\t"
592      "pushfq"                            "\n\t"
593      "popq      %%r15"                   "\n\t"
594      "movq      %%r15, 72(%0)"           "\n\t"
595      : /*out*/
596      : /*in*/"r"(blockC)
597      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
598   );
599   printf("  estrm $0x4A:  ");
600   printf("    xmm0 ");
601   show_V128( (V128*)(blockC+48) );
602   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
603
604   /* ---------------- ESTRM_0A ---------------- */
605   memset(blockC, 0x55, 80);
606   memcpy(blockC + 0,  &argL,  16);
607   memcpy(blockC + 16, &argR,  16);
608   memcpy(blockC + 24, &rdxIN, 8);
609   memcpy(blockC + 32, &raxIN, 8);
610   memcpy(blockC + 40, &rdxIN, 8);
611   __asm__ __volatile__(
612      "movupd    0(%0), %%xmm2"           "\n\t"
613      "movupd    16(%0), %%xmm13"         "\n\t"
614      "movq      32(%0), %%rdx"           "\n\t"
615      "movq      40(%0), %%rax"           "\n\t"
616      "movupd    48(%0), %%xmm0"          "\n\t"
617      "movw      64(%0), %%rcx"           "\n\t"
618      "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
619      "movupd    %%xmm0, 48(%0)"          "\n\t"
620      "movw      %%rcx, 64(%0)"           "\n\t"
621      "pushfq"                            "\n\t"
622      "popq      %%r15"                   "\n\t"
623      "movq      %%r15, 72(%0)"           "\n\t"
624      : /*out*/
625      : /*in*/"r"(blockC)
626      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
627   );
628   printf("  estrm $0x0A:  ");
629   printf("    xmm0 ");
630   show_V128( (V128*)(blockC+48) );
631   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
632
633
634
635
636}
637
638int main ( void )
639{
640   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
641   one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
642
643   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
644   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
645   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
646
647   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
648   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
649   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
650   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
651
652   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
653   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
654   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
655   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
656
657   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
658   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
659   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
660   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
661
662   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
663   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
664   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
665   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
666
667   return 0;
668}
669