19db268cc7344d8751213307737285e7cdac4fbecsewardj
29db268cc7344d8751213307737285e7cdac4fbecsewardj/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
39db268cc7344d8751213307737285e7cdac4fbecsewardj   check the core arithmetic in any detail. This file checks the 16-bit
49db268cc7344d8751213307737285e7cdac4fbecsewardj   character versions (w is for wide) */
59db268cc7344d8751213307737285e7cdac4fbecsewardj
69db268cc7344d8751213307737285e7cdac4fbecsewardj#include <string.h>
79db268cc7344d8751213307737285e7cdac4fbecsewardj#include <stdio.h>
89db268cc7344d8751213307737285e7cdac4fbecsewardj#include <assert.h>
99db268cc7344d8751213307737285e7cdac4fbecsewardj
109db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned char  V128[16];
119db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned int   UInt;
129db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  signed int     Int;
139db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned char  UChar;
149db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned long long int ULong;
159db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  UChar          Bool;
169db268cc7344d8751213307737285e7cdac4fbecsewardj#define False ((Bool)0)
179db268cc7344d8751213307737285e7cdac4fbecsewardj#define True  ((Bool)1)
189db268cc7344d8751213307737285e7cdac4fbecsewardj
199db268cc7344d8751213307737285e7cdac4fbecsewardjvoid show_V128 ( V128* vec )
209db268cc7344d8751213307737285e7cdac4fbecsewardj{
219db268cc7344d8751213307737285e7cdac4fbecsewardj   Int i;
229db268cc7344d8751213307737285e7cdac4fbecsewardj   for (i = 15; i >= 0; i--)
239db268cc7344d8751213307737285e7cdac4fbecsewardj      printf("%02x", (UInt)( (*vec)[i] ));
249db268cc7344d8751213307737285e7cdac4fbecsewardj}
259db268cc7344d8751213307737285e7cdac4fbecsewardj
269db268cc7344d8751213307737285e7cdac4fbecsewardjvoid expand ( V128* dst, char* summary )
279db268cc7344d8751213307737285e7cdac4fbecsewardj{
289db268cc7344d8751213307737285e7cdac4fbecsewardj   Int i;
299db268cc7344d8751213307737285e7cdac4fbecsewardj   assert( strlen(summary) == 16 );
309db268cc7344d8751213307737285e7cdac4fbecsewardj   for (i = 0; i < 16; i++) {
319db268cc7344d8751213307737285e7cdac4fbecsewardj      UChar xx = 0;
329db268cc7344d8751213307737285e7cdac4fbecsewardj      UChar x = summary[15-i];
339db268cc7344d8751213307737285e7cdac4fbecsewardj      if      (x >= '0' && x <= '9') { xx = x - '0'; }
349db268cc7344d8751213307737285e7cdac4fbecsewardj      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
359db268cc7344d8751213307737285e7cdac4fbecsewardj      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
369db268cc7344d8751213307737285e7cdac4fbecsewardj      else assert(0);
379db268cc7344d8751213307737285e7cdac4fbecsewardj
389db268cc7344d8751213307737285e7cdac4fbecsewardj      assert(xx < 16);
399db268cc7344d8751213307737285e7cdac4fbecsewardj      xx = (xx << 4) | xx;
409db268cc7344d8751213307737285e7cdac4fbecsewardj      assert(xx < 256);
419db268cc7344d8751213307737285e7cdac4fbecsewardj      (*dst)[i] = xx;
429db268cc7344d8751213307737285e7cdac4fbecsewardj   }
439db268cc7344d8751213307737285e7cdac4fbecsewardj}
449db268cc7344d8751213307737285e7cdac4fbecsewardj
459db268cc7344d8751213307737285e7cdac4fbecsewardjvoid one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
469db268cc7344d8751213307737285e7cdac4fbecsewardj{
479db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 argL, argR;
489db268cc7344d8751213307737285e7cdac4fbecsewardj   expand( &argL, summL );
499db268cc7344d8751213307737285e7cdac4fbecsewardj   expand( &argR, summR );
509db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("\n");
519db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("rdx %016llx  argL ", rdxIN);
529db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128(&argL);
539db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rax %016llx  argR ", raxIN);
549db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128(&argR);
559db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("\n");
569db268cc7344d8751213307737285e7cdac4fbecsewardj
579db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong block[ 2/*in:argL*/          // 0  0
589db268cc7344d8751213307737285e7cdac4fbecsewardj                + 2/*in:argR*/        // 2  16
599db268cc7344d8751213307737285e7cdac4fbecsewardj                + 1/*in:rdx*/         // 4  32
609db268cc7344d8751213307737285e7cdac4fbecsewardj                + 1/*in:rax*/         // 5  40
619db268cc7344d8751213307737285e7cdac4fbecsewardj                + 2/*inout:xmm0*/     // 6  48
629db268cc7344d8751213307737285e7cdac4fbecsewardj                + 1/*inout:rcx*/      // 8  64
639db268cc7344d8751213307737285e7cdac4fbecsewardj                + 1/*out:rflags*/ ];  // 9  72
649db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(sizeof(block) == 80);
659db268cc7344d8751213307737285e7cdac4fbecsewardj
669db268cc7344d8751213307737285e7cdac4fbecsewardj   UChar* blockC = (UChar*)&block[0];
679db268cc7344d8751213307737285e7cdac4fbecsewardj
689db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ISTRI_4B ---------------- */
699db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
709db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
719db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
729db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
739db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
749db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
759db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
769db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
779db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
789db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
799db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
809db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
81b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
829db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x4B, %%xmm2, %%xmm13"  "\n\t"
839db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
84b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
859db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
869db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
879db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
889db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
899db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
909db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
919db268cc7344d8751213307737285e7cdac4fbecsewardj   );
929db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  istri $0x4B:  ");
939db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
949db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
959db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
969db268cc7344d8751213307737285e7cdac4fbecsewardj
979db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ISTRI_0B ---------------- */
989db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
999db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
1009db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
1019db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
1029db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
1039db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
1049db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
1059db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
1069db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
1079db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
1089db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
1099db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
110b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
1119db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x0B, %%xmm2, %%xmm13"  "\n\t"
1129db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
113b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
1149db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
1159db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
1169db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
1179db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
1189db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
1199db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
1209db268cc7344d8751213307737285e7cdac4fbecsewardj   );
1219db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  istri $0x0B:  ");
1229db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
1239db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
1249db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
1259db268cc7344d8751213307737285e7cdac4fbecsewardj
1269db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ISTRM_4B ---------------- */
1279db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
1289db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
1299db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
1309db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
1319db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
1329db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
1339db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
1349db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
1359db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
1369db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
1379db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
1389db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
139b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
1409db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
1419db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
142b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
1439db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
1449db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
1459db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
1469db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
1479db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
1489db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
1499db268cc7344d8751213307737285e7cdac4fbecsewardj   );
1509db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  istrm $0x4B:  ");
1519db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
1529db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
1539db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
1549db268cc7344d8751213307737285e7cdac4fbecsewardj
1559db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ISTRM_0B ---------------- */
1569db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
1579db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
1589db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
1599db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
1609db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
1619db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
1629db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
1639db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
1649db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
1659db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
1669db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
1679db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
168b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
1699db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
1709db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
171b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
1729db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
1739db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
1749db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
1759db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
1769db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
1779db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
1789db268cc7344d8751213307737285e7cdac4fbecsewardj   );
1799db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  istrm $0x0B:  ");
1809db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
1819db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
1829db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
1839db268cc7344d8751213307737285e7cdac4fbecsewardj
1849db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ESTRI_4B ---------------- */
1859db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
1869db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
1879db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
1889db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
1899db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
1909db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
1919db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
1929db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
1939db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
1949db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
1959db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
1969db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
197b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
1989db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpestri $0x4B, %%xmm2, %%xmm13"  "\n\t"
1999db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
200b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
2019db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
2029db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
2039db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
2049db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
2059db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
2069db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
2079db268cc7344d8751213307737285e7cdac4fbecsewardj   );
2089db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  estri $0x4B:  ");
2099db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
2109db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
2119db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
2129db268cc7344d8751213307737285e7cdac4fbecsewardj
2139db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ESTRI_0B ---------------- */
2149db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
2159db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
2169db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
2179db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
2189db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
2199db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
2209db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
2219db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
2229db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
2239db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
2249db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
2259db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
226b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
2279db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpestri $0x0B, %%xmm2, %%xmm13"  "\n\t"
2289db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
229b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
2309db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
2319db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
2329db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
2339db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
2349db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
2359db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
2369db268cc7344d8751213307737285e7cdac4fbecsewardj   );
2379db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  estri $0x0B:  ");
2389db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
2399db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
2409db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
2419db268cc7344d8751213307737285e7cdac4fbecsewardj
2429db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ESTRM_4B ---------------- */
2439db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
2449db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
2459db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
2469db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
2479db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
2489db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
2499db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
2509db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
2519db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
2529db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
2539db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
2549db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
255b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
2569db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpestrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
2579db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
258b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
2599db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
2609db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
2619db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
2629db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
2639db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
2649db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
2659db268cc7344d8751213307737285e7cdac4fbecsewardj   );
2669db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  estrm $0x4B:  ");
2679db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
2689db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
2699db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
2709db268cc7344d8751213307737285e7cdac4fbecsewardj
2719db268cc7344d8751213307737285e7cdac4fbecsewardj   /* ---------------- ESTRM_0B ---------------- */
2729db268cc7344d8751213307737285e7cdac4fbecsewardj   memset(blockC, 0x55, 80);
2739db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 0,  &argL,  16);
2749db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 16, &argR,  16);
2759db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 24, &rdxIN, 8);
2769db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 32, &raxIN, 8);
2779db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(blockC + 40, &rdxIN, 8);
2789db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
2799db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    0(%0), %%xmm2"           "\n\t"
2809db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    16(%0), %%xmm13"         "\n\t"
2819db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      32(%0), %%rdx"           "\n\t"
2829db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      40(%0), %%rax"           "\n\t"
2839db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    48(%0), %%xmm0"          "\n\t"
284b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      64(%0), %%cx"            "\n\t"
2859db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpestrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
2869db268cc7344d8751213307737285e7cdac4fbecsewardj      "movupd    %%xmm0, 48(%0)"          "\n\t"
287b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian      "movw      %%cx, 64(%0)"            "\n\t"
2889db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                            "\n\t"
2899db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%r15"                   "\n\t"
2909db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%r15, 72(%0)"           "\n\t"
2919db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/
2929db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*in*/"r"(blockC)
2939db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
2949db268cc7344d8751213307737285e7cdac4fbecsewardj   );
2959db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  estrm $0x0B:  ");
2969db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("    xmm0 ");
2979db268cc7344d8751213307737285e7cdac4fbecsewardj   show_V128( (V128*)(blockC+48) );
2989db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
2999db268cc7344d8751213307737285e7cdac4fbecsewardj
3009db268cc7344d8751213307737285e7cdac4fbecsewardj
3019db268cc7344d8751213307737285e7cdac4fbecsewardj
3029db268cc7344d8751213307737285e7cdac4fbecsewardj
3039db268cc7344d8751213307737285e7cdac4fbecsewardj}
3049db268cc7344d8751213307737285e7cdac4fbecsewardj
3059db268cc7344d8751213307737285e7cdac4fbecsewardjint main ( void )
3069db268cc7344d8751213307737285e7cdac4fbecsewardj{
3079db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
3089db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
3099db268cc7344d8751213307737285e7cdac4fbecsewardj
3109db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
3119db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
3129db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
3139db268cc7344d8751213307737285e7cdac4fbecsewardj
3149db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
3159db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
3169db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
3179db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
3189db268cc7344d8751213307737285e7cdac4fbecsewardj
3199db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
3209db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
3219db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
3229db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
3239db268cc7344d8751213307737285e7cdac4fbecsewardj
3249db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
3259db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
3269db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
3279db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
3289db268cc7344d8751213307737285e7cdac4fbecsewardj
3299db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
3309db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
3319db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
3329db268cc7344d8751213307737285e7cdac4fbecsewardj   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
3339db268cc7344d8751213307737285e7cdac4fbecsewardj
3349db268cc7344d8751213307737285e7cdac4fbecsewardj   return 0;
3359db268cc7344d8751213307737285e7cdac4fbecsewardj}
336