10a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
27f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
37f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
47f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   aspect. */
57f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj
60a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <string.h>
70a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <stdio.h>
80a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <assert.h>
90a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef  unsigned int   UInt;
110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef  signed int     Int;
120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef  unsigned char  UChar;
1315df336557eb012a5f3b2f1482a0411857039496sewardjtypedef  signed char    Char;
140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef  unsigned long long int ULong;
155ac99069b0538adcb2f18b04b078ea27b00b4185sewardjtypedef  UChar          Bool;
165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj#define False ((Bool)0)
175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj#define True  ((Bool)1)
180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj//typedef  unsigned char  V128[16];
207f3019bfbbbbc5356c351c5cd319c36fe731b806sewardjtypedef
217f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   union {
227f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      UChar uChar[16];
237f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      UInt  uInt[4];
247f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   }
257f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   V128;
267f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj
270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_O   11
280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_S   7
290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_Z   6
300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_A   4
310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_C   0
320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_P   2
330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_O    (1ULL << SHIFT_O)
350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_S    (1ULL << SHIFT_S)
360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_Z    (1ULL << SHIFT_Z)
370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_A    (1ULL << SHIFT_A)
380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_C    (1ULL << SHIFT_C)
390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_P    (1ULL << SHIFT_P)
400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt clz32 ( UInt x )
430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   Int y, m, n;
450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   y = -(x >> 16);
460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   m = (y >> 16) & 16;
470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   n = 16 - m;
480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   x = x >> m;
490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   y = x - 0x100;
500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   m = (y >> 16) & 8;
510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   n = n + m;
520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   x = x << m;
530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   y = x - 0x1000;
540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   m = (y >> 16) & 4;
550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   n = n + m;
560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   x = x << m;
570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   y = x - 0x4000;
580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   m = (y >> 16) & 2;
590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   n = n + m;
600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   x = x << m;
610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   y = x >> 14;
620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   m = y & ~(y >> 1);
630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return n + 2 - m;
640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt ctz32 ( UInt x )
670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return 32 - clz32((~x) & (x-1));
690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid expand ( V128* dst, char* summary )
720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   Int i;
740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   assert( strlen(summary) == 16 );
750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   for (i = 0; i < 16; i++) {
760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      UChar xx = 0;
770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      UChar x = summary[15-i];
780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      if      (x >= '0' && x <= '9') { xx = x - '0'; }
790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      else assert(0);
820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      assert(xx < 16);
840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      xx = (xx << 4) | xx;
850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      assert(xx < 256);
867f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      dst->uChar[i] = xx;
870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   }
880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid try_istri ( char* which,
910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj                 UInt(*h_fn)(V128*,V128*),
920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj                 UInt(*s_fn)(V128*,V128*),
930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj                 char* summL, char* summR )
940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   assert(strlen(which) == 2);
960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   V128 argL, argR;
970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   expand(&argL, summL);
980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   expand(&argR, summR);
990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt h_res = h_fn(&argL, &argR);
1000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt s_res = s_fn(&argL, &argR);
1010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   printf("istri %s  %s %s -> %08x %08x %s\n",
1020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
1030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
1040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
1055ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt zmask_from_V128 ( V128* arg )
1065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
1075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt i, res = 0;
1085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   for (i = 0; i < 16; i++) {
1097f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
1105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
1115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return res;
1125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
1135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
1155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
1165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                       GENERAL                        //
1175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
1185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
1195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj/* Given partial results from a pcmpXstrX operation (intRes1,
1225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   basically), generate an I format (index value for ECX) output, and
1235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   also the new OSZACP flags.
1245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj*/
1255ac99069b0538adcb2f18b04b078ea27b00b4185sewardjstatic
1265ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
1275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                    /*OUT*/UInt* resOSZACP,
1285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                    UInt intRes1,
1295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                    UInt zmaskL, UInt zmaskR,
1305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                    UInt validL,
1315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                    UInt pol, UInt idx )
1325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
1335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert((pol >> 2) == 0);
1345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert((idx >> 1) == 0);
1355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt intRes2 = 0;
1375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   switch (pol) {
1385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      case 0: intRes2 = intRes1;          break; // pol +
1395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      case 1: intRes2 = ~intRes1;         break; // pol -
1405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      case 2: intRes2 = intRes1;          break; // pol m+
1415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
1425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
1435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   intRes2 &= 0xFFFF;
1445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   // generate ecx value
1465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt newECX = 0;
1475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   if (idx) {
1485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     // index of ms-1-bit
1495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
1505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   } else {
1515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     // index of ls-1-bit
1525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
1535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
1545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   *(UInt*)(&resV[0]) = newECX;
1565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   // generate new flags, common to all ISTRI and ISTRM cases
1585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   *resOSZACP    // A, P are zero
1595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
1605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
1615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
1625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
1635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
1645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   variants.
1685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   For xSTRI variants, the new ECX value is placed in the 32 bits
1705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   pointed to by *resV.  For xSTRM variants, the result is a 128 bit
1715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   value and is placed at *resV in the obvious way.
1725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   For all variants, the new OSZACP value is placed at *resOSZACP.
1745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   argLV and argRV are the vector args.  The caller must prepare a
1765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
1775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   must be 1 for each zero byte of of the respective arg.  For ESTRx
1785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   variants this is derived from the explicit length indication, and
1795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   must be 0 in all places except at the bit index corresponding to
1805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   the valid length (0 .. 16).  If the valid length is 16 then the
1815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   mask must be all zeroes.  In all cases, bits 31:16 must be zero.
1825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   imm8 is the original immediate from the instruction.  isSTRM
1845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   indicates whether this is a xSTRM or xSTRI variant, which controls
1855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   how much of *res is written.
1865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   If the given imm8 case can be handled, the return value is True.
1885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   If not, False is returned, and neither *res not *resOSZACP are
1895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   altered.
1905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj*/
1915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
1925ac99069b0538adcb2f18b04b078ea27b00b4185sewardjBool pcmpXstrX_WRK ( /*OUT*/V128* resV,
1935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                     /*OUT*/UInt* resOSZACP,
1945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                     V128* argLV,  V128* argRV,
1955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                     UInt zmaskL, UInt zmaskR,
1965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                     UInt imm8,   Bool isSTRM )
1975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
1985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(imm8 < 0x80);
1995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert((zmaskL >> 16) == 0);
2005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert((zmaskR >> 16) == 0);
2015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2027f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   /* Explicitly reject any imm8 values that haven't been validated,
2037f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      even if they would probably work.  Life is too short to have
2047f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      unvalidated cases in the code base. */
2057f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   switch (imm8) {
206a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x00: case 0x02:
207a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x08: case 0x0A: case 0x0C: case 0x0E:
208ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      case 0x10: case 0x12: case 0x14:
209a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x18: case 0x1A:
210a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x30:            case 0x34:
211a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x38: case 0x3A:
212a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x40: case 0x42: case 0x44: case 0x46:
213a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                 case 0x4A:
214a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                 case 0x62:
215a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      case 0x70: case 0x72:
2167f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj         break;
2177f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      default:
2187f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj         return False;
2197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   }
2207f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj
2215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
2225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
2235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
2245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
2255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
2275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*-- strcmp on byte data                --*/
2285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
2295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   if (agg == 2/*equal each, aka strcmp*/
2315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
2325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && !isSTRM) {
2335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      Int    i;
2345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argL = (UChar*)argLV;
2355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argR = (UChar*)argRV;
2365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt boolResII = 0;
2375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      for (i = 15; i >= 0; i--) {
2385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         UChar cL  = argL[i];
2395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         UChar cR  = argR[i];
2405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
2415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      }
2425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
2435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
2445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // do invalidation, common to all equal-each cases
2465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt intRes1
2475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         = (boolResII & validL & validR)  // if both valid, use cmpres
2485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj           | (~ (validL | validR));       // if both invalid, force 1
2495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                                          // else force 0
2505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      intRes1 &= 0xFFFF;
2515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // generate I-format output
2535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      pcmpXstrX_WRK_gen_output_fmt_I(
2545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         resV, resOSZACP,
2555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
2565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      );
2575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      return True;
2595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
2605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
2625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*-- set membership on byte data        --*/
2635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
2645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   if (agg == 0/*equal any, aka find chars in a set*/
2665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
2675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && !isSTRM) {
2685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      /* argL: the string,  argR: charset */
2695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   si, ci;
2705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argL    = (UChar*)argLV;
2715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argR    = (UChar*)argRV;
2725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   boolRes = 0;
2735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
2745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
2755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      for (si = 0; si < 16; si++) {
2775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         if ((validL & (1 << si)) == 0)
2785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            // run off the end of the string.
2795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            break;
2805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         UInt m = 0;
2815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         for (ci = 0; ci < 16; ci++) {
2825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if ((validR & (1 << ci)) == 0) break;
2835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if (argR[ci] == argL[si]) { m = 1; break; }
2845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         }
2855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         boolRes |= (m << si);
2865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      }
2875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // boolRes is "pre-invalidated"
2895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt intRes1 = boolRes & 0xFFFF;
2905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // generate I-format output
2925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      pcmpXstrX_WRK_gen_output_fmt_I(
2935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         resV, resOSZACP,
2945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
2955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      );
2965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
2975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      return True;
2985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
2995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
3015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*-- substring search on byte data      --*/
3025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
3035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   if (agg == 3/*equal ordered, aka substring search*/
3055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
3065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && !isSTRM) {
3075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      /* argL: haystack,  argR: needle */
3095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   ni, hi;
3105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argL    = (UChar*)argLV;
3115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argR    = (UChar*)argRV;
3125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   boolRes = 0;
3135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
3145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
3155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      for (hi = 0; hi < 16; hi++) {
3165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         UInt m = 1;
3175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         for (ni = 0; ni < 16; ni++) {
3185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if ((validR & (1 << ni)) == 0) break;
3195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            UInt i = ni + hi;
3205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if (i >= 16) break;
3215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if (argL[i] != argR[ni]) { m = 0; break; }
3225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         }
3235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         boolRes |= (m << hi);
324c5274ae844ae01cde66e35f1873ed37726dccd45weidendo         if ((validL & (1 << hi)) == 0)
325c5274ae844ae01cde66e35f1873ed37726dccd45weidendo            // run off the end of the haystack
326c5274ae844ae01cde66e35f1873ed37726dccd45weidendo            break;
3275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      }
3285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // boolRes is "pre-invalidated"
3305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt intRes1 = boolRes & 0xFFFF;
3315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // generate I-format output
3335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      pcmpXstrX_WRK_gen_output_fmt_I(
3345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         resV, resOSZACP,
3355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
3365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      );
3375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      return True;
3395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
3405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
3425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*-- ranges, unsigned byte data         --*/
3435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   /*----------------------------------------*/
3445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   if (agg == 1/*ranges*/
3465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && fmt == 0/*ub*/
3475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj       && !isSTRM) {
3485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      /* argL: string,  argR: range-pairs */
3505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   ri, si;
3515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argL    = (UChar*)argLV;
3525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UChar* argR    = (UChar*)argRV;
3535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   boolRes = 0;
3545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
3555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
3565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      for (si = 0; si < 16; si++) {
3575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         if ((validL & (1 << si)) == 0)
3585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            // run off the end of the string
3595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            break;
3605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         UInt m = 0;
3615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         for (ri = 0; ri < 16; ri += 2) {
3625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if ((validR & (3 << ri)) != (3 << ri)) break;
3635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
3645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj               m = 1; break;
3655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj            }
3665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         }
3675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         boolRes |= (m << si);
3685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      }
3695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // boolRes is "pre-invalidated"
3715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      UInt intRes1 = boolRes & 0xFFFF;
3725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      // generate I-format output
3745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      pcmpXstrX_WRK_gen_output_fmt_I(
3755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         resV, resOSZACP,
3765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
3775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      );
3785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
3795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      return True;
3805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   }
3815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
38215df336557eb012a5f3b2f1482a0411857039496sewardj   /*----------------------------------------*/
38315df336557eb012a5f3b2f1482a0411857039496sewardj   /*-- ranges, signed byte data           --*/
38415df336557eb012a5f3b2f1482a0411857039496sewardj   /*----------------------------------------*/
38515df336557eb012a5f3b2f1482a0411857039496sewardj
38615df336557eb012a5f3b2f1482a0411857039496sewardj   if (agg == 1/*ranges*/
38715df336557eb012a5f3b2f1482a0411857039496sewardj       && fmt == 2/*sb*/
38815df336557eb012a5f3b2f1482a0411857039496sewardj       && !isSTRM) {
38915df336557eb012a5f3b2f1482a0411857039496sewardj
39015df336557eb012a5f3b2f1482a0411857039496sewardj      /* argL: string,  argR: range-pairs */
39115df336557eb012a5f3b2f1482a0411857039496sewardj      UInt   ri, si;
39215df336557eb012a5f3b2f1482a0411857039496sewardj      Char*  argL    = (Char*)argLV;
39315df336557eb012a5f3b2f1482a0411857039496sewardj      Char*  argR    = (Char*)argRV;
39415df336557eb012a5f3b2f1482a0411857039496sewardj      UInt   boolRes = 0;
39515df336557eb012a5f3b2f1482a0411857039496sewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
39615df336557eb012a5f3b2f1482a0411857039496sewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
39715df336557eb012a5f3b2f1482a0411857039496sewardj      for (si = 0; si < 16; si++) {
39815df336557eb012a5f3b2f1482a0411857039496sewardj         if ((validL & (1 << si)) == 0)
39915df336557eb012a5f3b2f1482a0411857039496sewardj            // run off the end of the string
40015df336557eb012a5f3b2f1482a0411857039496sewardj            break;
40115df336557eb012a5f3b2f1482a0411857039496sewardj         UInt m = 0;
40215df336557eb012a5f3b2f1482a0411857039496sewardj         for (ri = 0; ri < 16; ri += 2) {
40315df336557eb012a5f3b2f1482a0411857039496sewardj            if ((validR & (3 << ri)) != (3 << ri)) break;
40415df336557eb012a5f3b2f1482a0411857039496sewardj            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
40515df336557eb012a5f3b2f1482a0411857039496sewardj               m = 1; break;
40615df336557eb012a5f3b2f1482a0411857039496sewardj            }
40715df336557eb012a5f3b2f1482a0411857039496sewardj         }
40815df336557eb012a5f3b2f1482a0411857039496sewardj         boolRes |= (m << si);
40915df336557eb012a5f3b2f1482a0411857039496sewardj      }
41015df336557eb012a5f3b2f1482a0411857039496sewardj
41115df336557eb012a5f3b2f1482a0411857039496sewardj      // boolRes is "pre-invalidated"
41215df336557eb012a5f3b2f1482a0411857039496sewardj      UInt intRes1 = boolRes & 0xFFFF;
41315df336557eb012a5f3b2f1482a0411857039496sewardj
41415df336557eb012a5f3b2f1482a0411857039496sewardj      // generate I-format output
41515df336557eb012a5f3b2f1482a0411857039496sewardj      pcmpXstrX_WRK_gen_output_fmt_I(
41615df336557eb012a5f3b2f1482a0411857039496sewardj         resV, resOSZACP,
41715df336557eb012a5f3b2f1482a0411857039496sewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
41815df336557eb012a5f3b2f1482a0411857039496sewardj      );
41915df336557eb012a5f3b2f1482a0411857039496sewardj
42015df336557eb012a5f3b2f1482a0411857039496sewardj      return True;
42115df336557eb012a5f3b2f1482a0411857039496sewardj   }
42215df336557eb012a5f3b2f1482a0411857039496sewardj
4235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return False;
4245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
4255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
4265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
4270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
4280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
4290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                       ISTRI_4A                       //
4300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
4310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
4320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_4A ( V128* argL, V128* argR )
4340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
4350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   V128 block[2];
4360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[0], argL, sizeof(V128));
4370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[1], argR, sizeof(V128));
4380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   ULong res, flags;
4390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   __asm__ __volatile__(
4400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "subq      $1024,  %%rsp"             "\n\t"
4410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
4420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
4430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
4440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pushfq"                              "\n\t"
4450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "popq      %%rdx"                     "\n\t"
4460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rcx,  %0"                "\n\t"
4470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rdx,  %1"                "\n\t"
4480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "addq      $1024,  %%rsp"             "\n\t"
4490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
4500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
4510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   );
4520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
4530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
4540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
4560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
4575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
4585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
4595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
4605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
4615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
4625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
4635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x4A, False/*!isSTRM*/
4645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
4655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
4667f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
4675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
4680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
4690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_4A ( void )
4710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
4720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   char* wot = "4A";
4730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
4740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
4750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
4770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
4810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
4820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
4840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
4850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
4860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
4940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
4950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
4980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
4990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
5000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
5010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
5030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
5040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
5050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
5070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
5080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
5090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
5110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
5120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
5130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
5150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
5160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
5170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
5190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
5200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                       ISTRI_3A                       //
5210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
5220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
5230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_3A ( V128* argL, V128* argR )
5250a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
5260a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   V128 block[2];
5270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[0], argL, sizeof(V128));
5280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[1], argR, sizeof(V128));
5290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   ULong res, flags;
5300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   __asm__ __volatile__(
5310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "subq      $1024,  %%rsp"             "\n\t"
5320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
5330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
5340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
5350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pushfq"                              "\n\t"
5360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "popq      %%rdx"                     "\n\t"
5370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rcx,  %0"                "\n\t"
5380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rdx,  %1"                "\n\t"
5390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "addq      $1024,  %%rsp"             "\n\t"
5400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
5410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
5420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   );
5430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
5440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
5450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
5470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
5485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
5495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
5505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
5515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
5525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
5535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
5545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x3A, False/*!isSTRM*/
5555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
5565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
5577f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
5585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
5590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
5600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_3A ( void )
5620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
5630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   char* wot = "3A";
5640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
5650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
5660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
5680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
5720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
5730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
5750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
5760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
5770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
5850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
5860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
5900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
5910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
5920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
5940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
5950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
5960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
5970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
5980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
5990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
6000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
6020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
6030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
6040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
6060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
6070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
6080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
6120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
6130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                       ISTRI_0C                       //
6140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
6150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
6160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj__attribute__((noinline))
6180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_0C ( V128* argL, V128* argR )
6190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
6200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   V128 block[2];
6210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[0], argL, sizeof(V128));
6220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[1], argR, sizeof(V128));
6230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   ULong res = 0, flags = 0;
6240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   __asm__ __volatile__(
625c5274ae844ae01cde66e35f1873ed37726dccd45weidendo      "movdqu    0(%2),  %%xmm2"            "\n\t"
626c5274ae844ae01cde66e35f1873ed37726dccd45weidendo      "movdqu    16(%2), %%xmm11"           "\n\t"
6270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
6280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
6290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      //"movd %%xmm0, %%ecx" "\n\t"
6300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pushfq"                              "\n\t"
6310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "popq      %%rdx"                     "\n\t"
6320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rcx,  %0"                "\n\t"
6330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rdx,  %1"                "\n\t"
6347f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
6350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
6360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   );
6370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
6380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
6390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
6410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
6425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
6435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
6445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
6455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
6465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
6475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
6485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x0C, False/*!isSTRM*/
6495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
6505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
6517f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
6525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
6530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
6540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_0C ( void )
6560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
6570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   char* wot = "0C";
6580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
6590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
6600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
6620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
6640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
6660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
6670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
6680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
6700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
6720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
6730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
6740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
6750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
6760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
6780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
6790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
6800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
6820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
6830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
6850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
6860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
6870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
6890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
6900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
691c5274ae844ae01cde66e35f1873ed37726dccd45weidendo
692c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
693c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
694c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
695c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
6960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
6970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
6990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
7000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
7010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                       ISTRI_08                       //
7020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
7030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
7040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_08 ( V128* argL, V128* argR )
7060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
7070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   V128 block[2];
7080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[0], argL, sizeof(V128));
7090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   memcpy(&block[1], argR, sizeof(V128));
7100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   ULong res, flags;
7110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   __asm__ __volatile__(
7120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "subq      $1024,  %%rsp"             "\n\t"
7130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
7140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
7150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
7160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "pushfq"                              "\n\t"
7170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "popq      %%rdx"                     "\n\t"
7180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rcx,  %0"                "\n\t"
7190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "movq      %%rdx,  %1"                "\n\t"
7200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      "addq      $1024,  %%rsp"             "\n\t"
7210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
7220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
7230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   );
7240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
7250a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
7260a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
7280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
7295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
7305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
7315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
7325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
7335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
7345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
7355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x08, False/*!isSTRM*/
7365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
7375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
7387f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
7395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
7405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
7410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7425ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_08 ( void )
7435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
7445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   char* wot = "08";
7455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_08;
7465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_08;
7470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
7490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
7535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
7540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
7565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
7575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
7580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
7665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
7675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
7715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
7725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
7735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
7755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
7765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
7775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
7795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
7805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
7815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
7835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
7845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
7855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
7875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
7880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
7890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
7905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
7925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
7935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
794053f436448ea3f8733f5205226d2989d4de31b66sewardj//                       ISTRI_18                       //
795053f436448ea3f8733f5205226d2989d4de31b66sewardj//                                                      //
796053f436448ea3f8733f5205226d2989d4de31b66sewardj//////////////////////////////////////////////////////////
797053f436448ea3f8733f5205226d2989d4de31b66sewardj
798053f436448ea3f8733f5205226d2989d4de31b66sewardjUInt h_pcmpistri_18 ( V128* argL, V128* argR )
799053f436448ea3f8733f5205226d2989d4de31b66sewardj{
800053f436448ea3f8733f5205226d2989d4de31b66sewardj   V128 block[2];
801053f436448ea3f8733f5205226d2989d4de31b66sewardj   memcpy(&block[0], argL, sizeof(V128));
802053f436448ea3f8733f5205226d2989d4de31b66sewardj   memcpy(&block[1], argR, sizeof(V128));
803053f436448ea3f8733f5205226d2989d4de31b66sewardj   ULong res, flags;
804053f436448ea3f8733f5205226d2989d4de31b66sewardj   __asm__ __volatile__(
805053f436448ea3f8733f5205226d2989d4de31b66sewardj      "subq      $1024,  %%rsp"             "\n\t"
806053f436448ea3f8733f5205226d2989d4de31b66sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
807053f436448ea3f8733f5205226d2989d4de31b66sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
808053f436448ea3f8733f5205226d2989d4de31b66sewardj      "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
809053f436448ea3f8733f5205226d2989d4de31b66sewardj      "pushfq"                              "\n\t"
810053f436448ea3f8733f5205226d2989d4de31b66sewardj      "popq      %%rdx"                     "\n\t"
811053f436448ea3f8733f5205226d2989d4de31b66sewardj      "movq      %%rcx,  %0"                "\n\t"
812053f436448ea3f8733f5205226d2989d4de31b66sewardj      "movq      %%rdx,  %1"                "\n\t"
813053f436448ea3f8733f5205226d2989d4de31b66sewardj      "addq      $1024,  %%rsp"             "\n\t"
814053f436448ea3f8733f5205226d2989d4de31b66sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
815053f436448ea3f8733f5205226d2989d4de31b66sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
816053f436448ea3f8733f5205226d2989d4de31b66sewardj   );
817053f436448ea3f8733f5205226d2989d4de31b66sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
818053f436448ea3f8733f5205226d2989d4de31b66sewardj}
819053f436448ea3f8733f5205226d2989d4de31b66sewardj
820053f436448ea3f8733f5205226d2989d4de31b66sewardjUInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
821053f436448ea3f8733f5205226d2989d4de31b66sewardj{
822053f436448ea3f8733f5205226d2989d4de31b66sewardj   V128 resV;
823053f436448ea3f8733f5205226d2989d4de31b66sewardj   UInt resOSZACP, resECX;
824053f436448ea3f8733f5205226d2989d4de31b66sewardj   Bool ok
825053f436448ea3f8733f5205226d2989d4de31b66sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
826053f436448ea3f8733f5205226d2989d4de31b66sewardj                       zmask_from_V128(argLU),
827053f436448ea3f8733f5205226d2989d4de31b66sewardj                       zmask_from_V128(argRU),
828053f436448ea3f8733f5205226d2989d4de31b66sewardj                       0x18, False/*!isSTRM*/
829053f436448ea3f8733f5205226d2989d4de31b66sewardj        );
830053f436448ea3f8733f5205226d2989d4de31b66sewardj   assert(ok);
831053f436448ea3f8733f5205226d2989d4de31b66sewardj   resECX = resV.uInt[0];
832053f436448ea3f8733f5205226d2989d4de31b66sewardj   return (resOSZACP << 16) | resECX;
833053f436448ea3f8733f5205226d2989d4de31b66sewardj}
834053f436448ea3f8733f5205226d2989d4de31b66sewardj
835053f436448ea3f8733f5205226d2989d4de31b66sewardjvoid istri_18 ( void )
836053f436448ea3f8733f5205226d2989d4de31b66sewardj{
837053f436448ea3f8733f5205226d2989d4de31b66sewardj   char* wot = "18";
838053f436448ea3f8733f5205226d2989d4de31b66sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_18;
839053f436448ea3f8733f5205226d2989d4de31b66sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_18;
840053f436448ea3f8733f5205226d2989d4de31b66sewardj
841053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
842053f436448ea3f8733f5205226d2989d4de31b66sewardj
843053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
844053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
845053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
846053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
847053f436448ea3f8733f5205226d2989d4de31b66sewardj
848053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
849053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
850053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
851053f436448ea3f8733f5205226d2989d4de31b66sewardj
852053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
853053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
854053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
855053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856053f436448ea3f8733f5205226d2989d4de31b66sewardj
857053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
858053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
859053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
860053f436448ea3f8733f5205226d2989d4de31b66sewardj
861053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
862053f436448ea3f8733f5205226d2989d4de31b66sewardj
863053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
864053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
865053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
866053f436448ea3f8733f5205226d2989d4de31b66sewardj
867053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
868053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
869053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
870053f436448ea3f8733f5205226d2989d4de31b66sewardj
871053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
872053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
873053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
874053f436448ea3f8733f5205226d2989d4de31b66sewardj
875053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
876053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
877053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
878053f436448ea3f8733f5205226d2989d4de31b66sewardj
879053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
880053f436448ea3f8733f5205226d2989d4de31b66sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
881053f436448ea3f8733f5205226d2989d4de31b66sewardj}
882053f436448ea3f8733f5205226d2989d4de31b66sewardj
883053f436448ea3f8733f5205226d2989d4de31b66sewardj
884053f436448ea3f8733f5205226d2989d4de31b66sewardj
885053f436448ea3f8733f5205226d2989d4de31b66sewardj//////////////////////////////////////////////////////////
886053f436448ea3f8733f5205226d2989d4de31b66sewardj//                                                      //
8875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                       ISTRI_1A                       //
8885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
8895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
8905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
8915ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_1A ( V128* argL, V128* argR )
8920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
8935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 block[2];
8945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[0], argL, sizeof(V128));
8955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[1], argR, sizeof(V128));
8965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   ULong res, flags;
8975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   __asm__ __volatile__(
8985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "subq      $1024,  %%rsp"             "\n\t"
8995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
9005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
9015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
9025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pushfq"                              "\n\t"
9035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "popq      %%rdx"                     "\n\t"
9045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rcx,  %0"                "\n\t"
9055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rdx,  %1"                "\n\t"
9065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "addq      $1024,  %%rsp"             "\n\t"
9075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
9085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
9095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   );
9105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
9115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
9125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
9135ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
9145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
9155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
9165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
9175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
9185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
9195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
9205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
9215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x1A, False/*!isSTRM*/
9225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
9235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
9247f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
9255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
9265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
9275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
9285ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_1A ( void )
9295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
9305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   char* wot = "1A";
9315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
9325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
9330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
9350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
9390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
9400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
9420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
9430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
9440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
9520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
9530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
9570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
9580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
9590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
9610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
9620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
9630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
9650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
9660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
9670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
9690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
9700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
9710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
9730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
9740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
9750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
9785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
9795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
9805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                       ISTRI_02                       //
9815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
9825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
9835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
9845ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_02 ( V128* argL, V128* argR )
9855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
9865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 block[2];
9875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[0], argL, sizeof(V128));
9885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[1], argR, sizeof(V128));
9895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   ULong res, flags;
9905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   __asm__ __volatile__(
9915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "subq      $1024,  %%rsp"             "\n\t"
9925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
9935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
9945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
9955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
9965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t"
9975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pushfq"                              "\n\t"
9985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "popq      %%rdx"                     "\n\t"
9995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rcx,  %0"                "\n\t"
10005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rdx,  %1"                "\n\t"
10015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "addq      $1024,  %%rsp"             "\n\t"
10025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
10035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
10045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   );
10055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
10065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
10075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10085ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
10095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
10105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
10115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
10125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
10135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
10145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
10155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
10165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x02, False/*!isSTRM*/
10175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
10185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
10197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
10205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
10215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
10225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10235ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_02 ( void )
10245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
10255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   char* wot = "02";
10265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_02;
10275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_02;
10285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
10305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
10315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
10325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
10335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
10355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
10365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
10375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
10385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
10395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
10415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
10425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
10435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
10445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
10465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
10475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
10495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
10505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
10515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
10525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
10545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
10565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
10575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
10585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
10615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
10625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                       ISTRI_12                       //
10635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
10645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
10655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10665ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_12 ( V128* argL, V128* argR )
10675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
10685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 block[2];
10695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[0], argL, sizeof(V128));
10705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[1], argR, sizeof(V128));
10715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   ULong res, flags;
10725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   __asm__ __volatile__(
10735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "subq      $1024,  %%rsp"             "\n\t"
10745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
10755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
10765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
10775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
10785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t"
10795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pushfq"                              "\n\t"
10805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "popq      %%rdx"                     "\n\t"
10815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rcx,  %0"                "\n\t"
10825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rdx,  %1"                "\n\t"
10835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "addq      $1024,  %%rsp"             "\n\t"
10845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
10855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
10865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   );
10875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
10885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
10895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
10905ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
10915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
10925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
10935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
10945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
10955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
10965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
10975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
10985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x12, False/*!isSTRM*/
10995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
11005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
11017f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
11025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
11035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
11045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11055ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_12 ( void )
11065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
11075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   char* wot = "12";
11085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_12;
11095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_12;
11105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
11125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
11135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
11145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
11155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
11175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
11185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
11195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
11205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
11215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
11235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
11245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
11255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
11265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
11285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
11295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
11315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
11325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
11335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
11345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
11365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
11385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
11395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
11405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
11445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
11455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                       ISTRI_44                       //
11465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//                                                      //
11475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//////////////////////////////////////////////////////////
11485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11495ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_44 ( V128* argL, V128* argR )
11505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
11515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 block[2];
11525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[0], argL, sizeof(V128));
11535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   memcpy(&block[1], argR, sizeof(V128));
11545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   ULong res, flags;
11555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   __asm__ __volatile__(
11565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "subq      $1024,  %%rsp"             "\n\t"
11575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
11585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
11595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
11605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
11615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t"
11625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "pushfq"                              "\n\t"
11635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "popq      %%rdx"                     "\n\t"
11645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rcx,  %0"                "\n\t"
11655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "movq      %%rdx,  %1"                "\n\t"
11665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      "addq      $1024,  %%rsp"             "\n\t"
11675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
11685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
11695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   );
11705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
11715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
11725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11735ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
11745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
11755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   V128 resV;
11765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt resOSZACP, resECX;
11775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   Bool ok
11785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
11795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argLU),
11805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       zmask_from_V128(argRU),
11815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj                       0x44, False/*!isSTRM*/
11825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj        );
11835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   assert(ok);
11847f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj   resECX = resV.uInt[0];
11855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   return (resOSZACP << 16) | resECX;
11865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
11875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11885ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_44 ( void )
11895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{
11905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   char* wot = "44";
11915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_44;
11925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_44;
11935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
11955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
11965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
11975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
11985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
11995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
12005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
12015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
12025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
12035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
12045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
12065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
12085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
12095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
12105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
12125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
12135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
12145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
12165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
12175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
12195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
122015df336557eb012a5f3b2f1482a0411857039496sewardj
122115df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
122215df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
122315df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
122415df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
12255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj}
12265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
12283b20b17c01834d95e1ce9785a0a366057320fe5csewardj//////////////////////////////////////////////////////////
12293b20b17c01834d95e1ce9785a0a366057320fe5csewardj//                                                      //
12303b20b17c01834d95e1ce9785a0a366057320fe5csewardj//                       ISTRI_00                       //
12313b20b17c01834d95e1ce9785a0a366057320fe5csewardj//                                                      //
12323b20b17c01834d95e1ce9785a0a366057320fe5csewardj//////////////////////////////////////////////////////////
12333b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12343b20b17c01834d95e1ce9785a0a366057320fe5csewardjUInt h_pcmpistri_00 ( V128* argL, V128* argR )
12353b20b17c01834d95e1ce9785a0a366057320fe5csewardj{
12363b20b17c01834d95e1ce9785a0a366057320fe5csewardj   V128 block[2];
12373b20b17c01834d95e1ce9785a0a366057320fe5csewardj   memcpy(&block[0], argL, sizeof(V128));
12383b20b17c01834d95e1ce9785a0a366057320fe5csewardj   memcpy(&block[1], argR, sizeof(V128));
12393b20b17c01834d95e1ce9785a0a366057320fe5csewardj   ULong res, flags;
12403b20b17c01834d95e1ce9785a0a366057320fe5csewardj   __asm__ __volatile__(
12413b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "subq      $1024,  %%rsp"             "\n\t"
12423b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
12433b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
12443b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
12453b20b17c01834d95e1ce9785a0a366057320fe5csewardj//"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
12463b20b17c01834d95e1ce9785a0a366057320fe5csewardj//"movd %%xmm0, %%ecx" "\n\t"
12473b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "pushfq"                              "\n\t"
12483b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "popq      %%rdx"                     "\n\t"
12493b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "movq      %%rcx,  %0"                "\n\t"
12503b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "movq      %%rdx,  %1"                "\n\t"
12513b20b17c01834d95e1ce9785a0a366057320fe5csewardj      "addq      $1024,  %%rsp"             "\n\t"
12523b20b17c01834d95e1ce9785a0a366057320fe5csewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
12533b20b17c01834d95e1ce9785a0a366057320fe5csewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
12543b20b17c01834d95e1ce9785a0a366057320fe5csewardj   );
12553b20b17c01834d95e1ce9785a0a366057320fe5csewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
12563b20b17c01834d95e1ce9785a0a366057320fe5csewardj}
12573b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12583b20b17c01834d95e1ce9785a0a366057320fe5csewardjUInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
12593b20b17c01834d95e1ce9785a0a366057320fe5csewardj{
12603b20b17c01834d95e1ce9785a0a366057320fe5csewardj   V128 resV;
12613b20b17c01834d95e1ce9785a0a366057320fe5csewardj   UInt resOSZACP, resECX;
12623b20b17c01834d95e1ce9785a0a366057320fe5csewardj   Bool ok
12633b20b17c01834d95e1ce9785a0a366057320fe5csewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
12643b20b17c01834d95e1ce9785a0a366057320fe5csewardj                       zmask_from_V128(argLU),
12653b20b17c01834d95e1ce9785a0a366057320fe5csewardj                       zmask_from_V128(argRU),
12663b20b17c01834d95e1ce9785a0a366057320fe5csewardj                       0x00, False/*!isSTRM*/
12673b20b17c01834d95e1ce9785a0a366057320fe5csewardj        );
12683b20b17c01834d95e1ce9785a0a366057320fe5csewardj   assert(ok);
12693b20b17c01834d95e1ce9785a0a366057320fe5csewardj   resECX = resV.uInt[0];
12703b20b17c01834d95e1ce9785a0a366057320fe5csewardj   return (resOSZACP << 16) | resECX;
12713b20b17c01834d95e1ce9785a0a366057320fe5csewardj}
12723b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12733b20b17c01834d95e1ce9785a0a366057320fe5csewardjvoid istri_00 ( void )
12743b20b17c01834d95e1ce9785a0a366057320fe5csewardj{
12753b20b17c01834d95e1ce9785a0a366057320fe5csewardj   char* wot = "00";
12763b20b17c01834d95e1ce9785a0a366057320fe5csewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_00;
12773b20b17c01834d95e1ce9785a0a366057320fe5csewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_00;
12783b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12793b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
12803b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
12813b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
12823b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
12833b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12843b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
12853b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
12863b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
12873b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
12883b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
12893b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12903b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
12913b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
12923b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
12933b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
12943b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12953b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
12963b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12973b20b17c01834d95e1ce9785a0a366057320fe5csewardj
12983b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
12993b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
13003b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
13013b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
13023b20b17c01834d95e1ce9785a0a366057320fe5csewardj
13033b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
13043b20b17c01834d95e1ce9785a0a366057320fe5csewardj
13053b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
13063b20b17c01834d95e1ce9785a0a366057320fe5csewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
13073b20b17c01834d95e1ce9785a0a366057320fe5csewardj}
13085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj
13090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
1310e801ed2c0d58802634b06de65a730364df4c08b2sewardj//////////////////////////////////////////////////////////
1311e801ed2c0d58802634b06de65a730364df4c08b2sewardj//                                                      //
1312e801ed2c0d58802634b06de65a730364df4c08b2sewardj//                       ISTRI_38                       //
1313e801ed2c0d58802634b06de65a730364df4c08b2sewardj//                                                      //
1314e801ed2c0d58802634b06de65a730364df4c08b2sewardj//////////////////////////////////////////////////////////
1315e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1316e801ed2c0d58802634b06de65a730364df4c08b2sewardjUInt h_pcmpistri_38 ( V128* argL, V128* argR )
1317e801ed2c0d58802634b06de65a730364df4c08b2sewardj{
1318e801ed2c0d58802634b06de65a730364df4c08b2sewardj   V128 block[2];
1319e801ed2c0d58802634b06de65a730364df4c08b2sewardj   memcpy(&block[0], argL, sizeof(V128));
1320e801ed2c0d58802634b06de65a730364df4c08b2sewardj   memcpy(&block[1], argR, sizeof(V128));
1321e801ed2c0d58802634b06de65a730364df4c08b2sewardj   ULong res, flags;
1322e801ed2c0d58802634b06de65a730364df4c08b2sewardj   __asm__ __volatile__(
1323e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "subq      $1024,  %%rsp"             "\n\t"
1324e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1325e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1326e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1327e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "pushfq"                              "\n\t"
1328e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "popq      %%rdx"                     "\n\t"
1329e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "movq      %%rcx,  %0"                "\n\t"
1330e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "movq      %%rdx,  %1"                "\n\t"
1331e801ed2c0d58802634b06de65a730364df4c08b2sewardj      "addq      $1024,  %%rsp"             "\n\t"
1332e801ed2c0d58802634b06de65a730364df4c08b2sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1333e801ed2c0d58802634b06de65a730364df4c08b2sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1334e801ed2c0d58802634b06de65a730364df4c08b2sewardj   );
1335e801ed2c0d58802634b06de65a730364df4c08b2sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1336e801ed2c0d58802634b06de65a730364df4c08b2sewardj}
1337e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1338e801ed2c0d58802634b06de65a730364df4c08b2sewardjUInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1339e801ed2c0d58802634b06de65a730364df4c08b2sewardj{
1340e801ed2c0d58802634b06de65a730364df4c08b2sewardj   V128 resV;
1341e801ed2c0d58802634b06de65a730364df4c08b2sewardj   UInt resOSZACP, resECX;
1342e801ed2c0d58802634b06de65a730364df4c08b2sewardj   Bool ok
1343e801ed2c0d58802634b06de65a730364df4c08b2sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1344e801ed2c0d58802634b06de65a730364df4c08b2sewardj                       zmask_from_V128(argLU),
1345e801ed2c0d58802634b06de65a730364df4c08b2sewardj                       zmask_from_V128(argRU),
1346e801ed2c0d58802634b06de65a730364df4c08b2sewardj                       0x38, False/*!isSTRM*/
1347e801ed2c0d58802634b06de65a730364df4c08b2sewardj        );
1348e801ed2c0d58802634b06de65a730364df4c08b2sewardj   assert(ok);
1349e801ed2c0d58802634b06de65a730364df4c08b2sewardj   resECX = resV.uInt[0];
1350e801ed2c0d58802634b06de65a730364df4c08b2sewardj   return (resOSZACP << 16) | resECX;
1351e801ed2c0d58802634b06de65a730364df4c08b2sewardj}
1352e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1353e801ed2c0d58802634b06de65a730364df4c08b2sewardjvoid istri_38 ( void )
1354e801ed2c0d58802634b06de65a730364df4c08b2sewardj{
1355e801ed2c0d58802634b06de65a730364df4c08b2sewardj   char* wot = "38";
1356e801ed2c0d58802634b06de65a730364df4c08b2sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1357e801ed2c0d58802634b06de65a730364df4c08b2sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1358e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1359e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1360e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1361e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1362e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1363e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1364e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1365e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1366e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1367e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1368e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1369e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1370e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1371e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1372e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1373e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1375e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1376e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1377e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1378e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1379e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1380e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1381e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1382e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1383e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1384e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1385e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1386e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1387e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1388e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1389e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1390e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1391e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1392e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1393e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1394e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1395e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1396e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1397e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1398e801ed2c0d58802634b06de65a730364df4c08b2sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1399e801ed2c0d58802634b06de65a730364df4c08b2sewardj}
1400e801ed2c0d58802634b06de65a730364df4c08b2sewardj
1401e801ed2c0d58802634b06de65a730364df4c08b2sewardj
14020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
14030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
14040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
140515df336557eb012a5f3b2f1482a0411857039496sewardj//                       ISTRI_46                       //
140615df336557eb012a5f3b2f1482a0411857039496sewardj//                                                      //
140715df336557eb012a5f3b2f1482a0411857039496sewardj//////////////////////////////////////////////////////////
140815df336557eb012a5f3b2f1482a0411857039496sewardj
140915df336557eb012a5f3b2f1482a0411857039496sewardjUInt h_pcmpistri_46 ( V128* argL, V128* argR )
141015df336557eb012a5f3b2f1482a0411857039496sewardj{
141115df336557eb012a5f3b2f1482a0411857039496sewardj   V128 block[2];
141215df336557eb012a5f3b2f1482a0411857039496sewardj   memcpy(&block[0], argL, sizeof(V128));
141315df336557eb012a5f3b2f1482a0411857039496sewardj   memcpy(&block[1], argR, sizeof(V128));
141415df336557eb012a5f3b2f1482a0411857039496sewardj   ULong res, flags;
141515df336557eb012a5f3b2f1482a0411857039496sewardj   __asm__ __volatile__(
141615df336557eb012a5f3b2f1482a0411857039496sewardj      "subq      $1024,  %%rsp"             "\n\t"
141715df336557eb012a5f3b2f1482a0411857039496sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
141815df336557eb012a5f3b2f1482a0411857039496sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
141915df336557eb012a5f3b2f1482a0411857039496sewardj      "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
142015df336557eb012a5f3b2f1482a0411857039496sewardj      "pushfq"                              "\n\t"
142115df336557eb012a5f3b2f1482a0411857039496sewardj      "popq      %%rdx"                     "\n\t"
142215df336557eb012a5f3b2f1482a0411857039496sewardj      "movq      %%rcx,  %0"                "\n\t"
142315df336557eb012a5f3b2f1482a0411857039496sewardj      "movq      %%rdx,  %1"                "\n\t"
142415df336557eb012a5f3b2f1482a0411857039496sewardj      "addq      $1024,  %%rsp"             "\n\t"
142515df336557eb012a5f3b2f1482a0411857039496sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
142615df336557eb012a5f3b2f1482a0411857039496sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
142715df336557eb012a5f3b2f1482a0411857039496sewardj   );
142815df336557eb012a5f3b2f1482a0411857039496sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
142915df336557eb012a5f3b2f1482a0411857039496sewardj}
143015df336557eb012a5f3b2f1482a0411857039496sewardj
143115df336557eb012a5f3b2f1482a0411857039496sewardjUInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
143215df336557eb012a5f3b2f1482a0411857039496sewardj{
143315df336557eb012a5f3b2f1482a0411857039496sewardj   V128 resV;
143415df336557eb012a5f3b2f1482a0411857039496sewardj   UInt resOSZACP, resECX;
143515df336557eb012a5f3b2f1482a0411857039496sewardj   Bool ok
143615df336557eb012a5f3b2f1482a0411857039496sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
143715df336557eb012a5f3b2f1482a0411857039496sewardj                       zmask_from_V128(argLU),
143815df336557eb012a5f3b2f1482a0411857039496sewardj                       zmask_from_V128(argRU),
143915df336557eb012a5f3b2f1482a0411857039496sewardj                       0x46, False/*!isSTRM*/
144015df336557eb012a5f3b2f1482a0411857039496sewardj        );
144115df336557eb012a5f3b2f1482a0411857039496sewardj   assert(ok);
144215df336557eb012a5f3b2f1482a0411857039496sewardj   resECX = resV.uInt[0];
144315df336557eb012a5f3b2f1482a0411857039496sewardj   return (resOSZACP << 16) | resECX;
144415df336557eb012a5f3b2f1482a0411857039496sewardj}
144515df336557eb012a5f3b2f1482a0411857039496sewardj
144615df336557eb012a5f3b2f1482a0411857039496sewardjvoid istri_46 ( void )
144715df336557eb012a5f3b2f1482a0411857039496sewardj{
144815df336557eb012a5f3b2f1482a0411857039496sewardj   char* wot = "46";
144915df336557eb012a5f3b2f1482a0411857039496sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_46;
145015df336557eb012a5f3b2f1482a0411857039496sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_46;
145115df336557eb012a5f3b2f1482a0411857039496sewardj
145215df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
145315df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
145415df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
145515df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
145615df336557eb012a5f3b2f1482a0411857039496sewardj
145715df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
145815df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
145915df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
146015df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
146115df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
146215df336557eb012a5f3b2f1482a0411857039496sewardj
146315df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
146415df336557eb012a5f3b2f1482a0411857039496sewardj
146515df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
146615df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
146715df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
146815df336557eb012a5f3b2f1482a0411857039496sewardj
146915df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
147015df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
147115df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
147215df336557eb012a5f3b2f1482a0411857039496sewardj
147315df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
147415df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
147515df336557eb012a5f3b2f1482a0411857039496sewardj
147615df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
147715df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
147815df336557eb012a5f3b2f1482a0411857039496sewardj
147915df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
148015df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
148115df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
148215df336557eb012a5f3b2f1482a0411857039496sewardj   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
148315df336557eb012a5f3b2f1482a0411857039496sewardj}
148415df336557eb012a5f3b2f1482a0411857039496sewardj
148515df336557eb012a5f3b2f1482a0411857039496sewardj
148615df336557eb012a5f3b2f1482a0411857039496sewardj//////////////////////////////////////////////////////////
148715df336557eb012a5f3b2f1482a0411857039496sewardj//                                                      //
1488473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                       ISTRI_30                       //
1489473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                                                      //
1490473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//////////////////////////////////////////////////////////
1491473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1492473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt h_pcmpistri_30 ( V128* argL, V128* argR )
1493473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1494473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   V128 block[2];
1495473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   memcpy(&block[0], argL, sizeof(V128));
1496473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   memcpy(&block[1], argR, sizeof(V128));
1497473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   ULong res, flags;
1498473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   __asm__ __volatile__(
1499473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "subq      $1024,  %%rsp"             "\n\t"
1500473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1501473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1502473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1503473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "pushfq"                              "\n\t"
1504473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "popq      %%rdx"                     "\n\t"
1505473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movq      %%rcx,  %0"                "\n\t"
1506473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movq      %%rdx,  %1"                "\n\t"
1507473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "addq      $1024,  %%rsp"             "\n\t"
1508473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1509473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1510473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   );
1511473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1512473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1513473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1514473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1515473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1516473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   V128 resV;
1517473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt resOSZACP, resECX;
1518473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   Bool ok
1519473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1520473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       zmask_from_V128(argLU),
1521473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       zmask_from_V128(argRU),
1522473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       0x30, False/*!isSTRM*/
1523473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj        );
1524473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   assert(ok);
1525473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   resECX = resV.uInt[0];
1526473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   return (resOSZACP << 16) | resECX;
1527473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1528473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1529473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjvoid istri_30 ( void )
1530473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1531473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   char* wot = "30";
1532473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1533473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1534473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1535473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1536473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1537473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1538473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1539473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1540473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1542473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1543473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1544473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1545473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1546473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1547473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1548473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1549473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1550473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1551473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1552473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1553473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1554473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1555473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1556473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1557473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1558473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1559473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1560473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1561473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1562473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1563473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1564473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1565473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1566473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//////////////////////////////////////////////////////////
1567473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                                                      //
1568473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                       ISTRI_40                       //
1569473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                                                      //
1570473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//////////////////////////////////////////////////////////
1571473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1572473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt h_pcmpistri_40 ( V128* argL, V128* argR )
1573473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1574473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   V128 block[2];
1575473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   memcpy(&block[0], argL, sizeof(V128));
1576473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   memcpy(&block[1], argR, sizeof(V128));
1577473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   ULong res, flags;
1578473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   __asm__ __volatile__(
1579473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "subq      $1024,  %%rsp"             "\n\t"
1580473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1581473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1582473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1583473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "pushfq"                              "\n\t"
1584473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "popq      %%rdx"                     "\n\t"
1585473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movq      %%rcx,  %0"                "\n\t"
1586473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "movq      %%rdx,  %1"                "\n\t"
1587473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      "addq      $1024,  %%rsp"             "\n\t"
1588473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1589473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1590473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   );
1591473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1592473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1593473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1594473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1595473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1596473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   V128 resV;
1597473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt resOSZACP, resECX;
1598473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   Bool ok
1599473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1600473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       zmask_from_V128(argLU),
1601473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       zmask_from_V128(argRU),
1602473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj                       0x40, False/*!isSTRM*/
1603473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj        );
1604473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   assert(ok);
1605473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   resECX = resV.uInt[0];
1606473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   return (resOSZACP << 16) | resECX;
1607473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1608473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1609473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjvoid istri_40 ( void )
1610473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{
1611473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   char* wot = "40";
1612473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1613473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1614473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1615473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1616473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1617473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1618473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1619473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1620473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1622473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1623473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1624473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1625473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1626473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1627473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1628473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1629473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1630473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1631473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1632473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1633473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1634473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1635473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1636473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1637473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1638473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1639473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1640473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1641473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1642473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1643473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj}
1644473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1645473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj
1646473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//////////////////////////////////////////////////////////
1647473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj//                                                      //
16487f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj//                       ISTRI_42                       //
16497f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj//                                                      //
16507f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj//////////////////////////////////////////////////////////
16517f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
16527f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjUInt h_pcmpistri_42 ( V128* argL, V128* argR )
16537f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{
16547f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   V128 block[2];
16557f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   memcpy(&block[0], argL, sizeof(V128));
16567f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   memcpy(&block[1], argR, sizeof(V128));
16577f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   ULong res, flags;
16587f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   __asm__ __volatile__(
16597f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "subq      $1024,  %%rsp"             "\n\t"
16607f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
16617f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
16627f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
16637f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "pushfq"                              "\n\t"
16647f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "popq      %%rdx"                     "\n\t"
16657f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "movq      %%rcx,  %0"                "\n\t"
16667f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "movq      %%rdx,  %1"                "\n\t"
16677f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      "addq      $1024,  %%rsp"             "\n\t"
16687f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
16697f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
16707f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   );
16717f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
16727f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj}
16737f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
16747f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjUInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
16757f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{
16767f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   V128 resV;
16777f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   UInt resOSZACP, resECX;
16787f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   Bool ok
16797f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
16807f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj                       zmask_from_V128(argLU),
16817f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj                       zmask_from_V128(argRU),
16827f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj                       0x42, False/*!isSTRM*/
16837f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj        );
16847f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   assert(ok);
16857f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   resECX = resV.uInt[0];
16867f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   return (resOSZACP << 16) | resECX;
16877f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj}
16887f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
16897f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjvoid istri_42 ( void )
16907f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{
16917f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   char* wot = "42";
16927f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_42;
16937f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_42;
16947f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
16957f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
16967f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
16977f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
16987f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
16997f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17007f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
17017f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
17027f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
17037f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
17047f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
17057f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17067f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
17077f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
17087f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
17097f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
17107f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17117f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
17127f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
17137f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17147f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
17157f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
17167f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
17177f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
17187f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17197f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
17207f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17217f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
17227f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
17237f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj}
17247f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17257f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj
17267f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj//////////////////////////////////////////////////////////
17277f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj//                                                      //
1728a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                       ISTRI_0E                       //
1729a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1730a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1731a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1732a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj__attribute__((noinline))
1733a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_0E ( V128* argL, V128* argR )
1734a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1735a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 block[2];
1736a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[0], argL, sizeof(V128));
1737a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[1], argR, sizeof(V128));
1738a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   ULong res = 0, flags = 0;
1739a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   __asm__ __volatile__(
1740a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1741a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1742a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
1743a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pushfq"                              "\n\t"
1744a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "popq      %%rdx"                     "\n\t"
1745a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rcx,  %0"                "\n\t"
1746a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rdx,  %1"                "\n\t"
1747a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1748a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1749a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   );
1750a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1751a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1752a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1753a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1754a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1755a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 resV;
1756a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt resOSZACP, resECX;
1757a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   Bool ok
1758a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1759a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argLU),
1760a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argRU),
1761a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       0x0E, False/*!isSTRM*/
1762a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj        );
1763a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   assert(ok);
1764a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   resECX = resV.uInt[0];
1765a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return (resOSZACP << 16) | resECX;
1766a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1767a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1768a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_0E ( void )
1769a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1770a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   char* wot = "0E";
1771a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1772a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1773a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1774a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1775a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1776a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1777a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1778a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1779a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1780a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1781a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1782a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1783a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1784a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1785a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1786a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1787a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1788a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1789a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1790a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1791a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1792a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1793a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1794a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1795a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1796a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1797a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1798a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1799a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1800a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1801a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1802a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1803a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1804a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1805a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1806a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1807a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1808a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1809a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1810a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1811a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1812a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1813a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1814a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                       ISTRI_34                       //
1815a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1816a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1817a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1818a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_34 ( V128* argL, V128* argR )
1819a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1820a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 block[2];
1821a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[0], argL, sizeof(V128));
1822a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[1], argR, sizeof(V128));
1823a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   ULong res, flags;
1824a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   __asm__ __volatile__(
1825a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "subq      $1024,  %%rsp"             "\n\t"
1826a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1827a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1828a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
1829a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pushfq"                              "\n\t"
1830a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "popq      %%rdx"                     "\n\t"
1831a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rcx,  %0"                "\n\t"
1832a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rdx,  %1"                "\n\t"
1833a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "addq      $1024,  %%rsp"             "\n\t"
1834a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1835a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1836a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   );
1837a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1838a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1839a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1840a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1841a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1842a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 resV;
1843a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt resOSZACP, resECX;
1844a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   Bool ok
1845a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1846a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argLU),
1847a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argRU),
1848a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       0x34, False/*!isSTRM*/
1849a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj        );
1850a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   assert(ok);
1851a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   resECX = resV.uInt[0];
1852a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return (resOSZACP << 16) | resECX;
1853a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1854a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1855a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_34 ( void )
1856a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1857a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   char* wot = "34";
1858a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1859a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1860a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1861a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1862a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1863a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1864a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1865a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1866a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1867a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1868a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1869a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1870a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1871a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1872a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1873a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1874a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1875a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1876a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1877a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1878a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1879a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1880a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1881a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1882a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1883a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1884a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1885a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1886a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1887a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1888a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1889a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1890a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1891a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1892a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1893a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1894a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1895a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1896a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1897a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                       ISTRI_14                       //
1898a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1899a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1900a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1901a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_14 ( V128* argL, V128* argR )
1902a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1903a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 block[2];
1904a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[0], argL, sizeof(V128));
1905a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   memcpy(&block[1], argR, sizeof(V128));
1906a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   ULong res, flags;
1907a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   __asm__ __volatile__(
1908a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "subq      $1024,  %%rsp"             "\n\t"
1909a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
1910a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
1911a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
1912a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "pushfq"                              "\n\t"
1913a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "popq      %%rdx"                     "\n\t"
1914a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rcx,  %0"                "\n\t"
1915a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "movq      %%rdx,  %1"                "\n\t"
1916a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      "addq      $1024,  %%rsp"             "\n\t"
1917a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1918a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1919a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   );
1920a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1921a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1922a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1923a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1924a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1925a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   V128 resV;
1926a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt resOSZACP, resECX;
1927a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   Bool ok
1928a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1929a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argLU),
1930a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       zmask_from_V128(argRU),
1931a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                       0x14, False/*!isSTRM*/
1932a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj        );
1933a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   assert(ok);
1934a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   resECX = resV.uInt[0];
1935a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   return (resOSZACP << 16) | resECX;
1936a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1937a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1938a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_14 ( void )
1939a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{
1940a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   char* wot = "14";
1941a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1942a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1943a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1944a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1945a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1946a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1947a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1948a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1949a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1950a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1951a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1952a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1953a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1954a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1955a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1956a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1957a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1958a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1959a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1960a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1961a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1962a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1963a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1964a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1965a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1966a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1967a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1968a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1969a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1970a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1971a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1972a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1973a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1974a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1975a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj}
1976a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1977a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj
1978a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//////////////////////////////////////////////////////////
1979a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj//                                                      //
1980a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                       ISTRI_70                       //
1981a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
1982a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
1983a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
1984a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_70 ( V128* argL, V128* argR )
1985a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
1986a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 block[2];
1987a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[0], argL, sizeof(V128));
1988a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[1], argR, sizeof(V128));
1989a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   ULong res, flags;
1990a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   __asm__ __volatile__(
1991a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "subq      $1024,  %%rsp"             "\n\t"
1992a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    0(%2),  %%xmm2"            "\n\t"
1993a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    16(%2), %%xmm11"           "\n\t"
1994a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pcmpistri $0x70,  %%xmm2, %%xmm11"   "\n\t"
1995a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pushfq"                              "\n\t"
1996a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "popq      %%rdx"                     "\n\t"
1997a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rcx,  %0"                "\n\t"
1998a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rdx,  %1"                "\n\t"
1999a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "addq      $1024,  %%rsp"             "\n\t"
2000a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2001a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2002a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   );
2003a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2004a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2005a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2006a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
2007a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2008a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 resV;
2009a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt resOSZACP, resECX;
2010a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   Bool ok
2011a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2012a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argLU),
2013a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argRU),
2014a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       0x70, False/*!isSTRM*/
2015a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes        );
2016a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   assert(ok);
2017a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   resECX = resV.uInt[0];
2018a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return (resOSZACP << 16) | resECX;
2019a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2020a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2021a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_70 ( void )
2022a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2023a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   char* wot = "70";
2024a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*h)(V128*,V128*) = h_pcmpistri_70;
2025a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*s)(V128*,V128*) = s_pcmpistri_70;
2026a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2027a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2028a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2029a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2030a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2031a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2032a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2033a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2034a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2035a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2036a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2037a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2038a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2039a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2040a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2041a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2042a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2043a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2044a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2045a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2046a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2047a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2048a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2049a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2050a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2051a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2052a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2053a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2054a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2055a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2056a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2057a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2058a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
2059a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
2060a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                       ISTRI_62                       //
2061a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
2062a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
2063a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2064a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_62 ( V128* argL, V128* argR )
2065a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2066a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 block[2];
2067a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[0], argL, sizeof(V128));
2068a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[1], argR, sizeof(V128));
2069a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   ULong res, flags;
2070a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   __asm__ __volatile__(
2071a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "subq      $1024,  %%rsp"             "\n\t"
2072a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    0(%2),  %%xmm2"            "\n\t"
2073a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    16(%2), %%xmm11"           "\n\t"
2074a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pcmpistri $0x62,  %%xmm2, %%xmm11"   "\n\t"
2075a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pushfq"                              "\n\t"
2076a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "popq      %%rdx"                     "\n\t"
2077a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rcx,  %0"                "\n\t"
2078a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rdx,  %1"                "\n\t"
2079a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "addq      $1024,  %%rsp"             "\n\t"
2080a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2081a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2082a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   );
2083a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2084a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2085a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2086a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
2087a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2088a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 resV;
2089a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt resOSZACP, resECX;
2090a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   Bool ok
2091a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2092a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argLU),
2093a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argRU),
2094a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       0x62, False/*!isSTRM*/
2095a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes        );
2096a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   assert(ok);
2097a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   resECX = resV.uInt[0];
2098a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return (resOSZACP << 16) | resECX;
2099a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2100a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2101a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_62 ( void )
2102a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2103a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   char* wot = "62";
2104a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*h)(V128*,V128*) = h_pcmpistri_62;
2105a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*s)(V128*,V128*) = s_pcmpistri_62;
2106a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2107a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2108a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2109a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2110a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2111a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2112a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2113a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2114a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2115a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2116a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2117a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2118a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2119a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2120a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2121a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2122a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2123a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2124a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2125a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2126a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2127a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2128a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2129a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2130a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2131a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2132a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2133a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2134a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2135a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2136a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2137a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2138a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
2139a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
2140a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                       ISTRI_72                       //
2141a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
2142a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
2143a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2144a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_72 ( V128* argL, V128* argR )
2145a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2146a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 block[2];
2147a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[0], argL, sizeof(V128));
2148a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   memcpy(&block[1], argR, sizeof(V128));
2149a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   ULong res, flags;
2150a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   __asm__ __volatile__(
2151a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "subq      $1024,  %%rsp"             "\n\t"
2152a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    0(%2),  %%xmm2"            "\n\t"
2153a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movdqu    16(%2), %%xmm11"           "\n\t"
2154a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pcmpistri $0x72,  %%xmm2, %%xmm11"   "\n\t"
2155a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "pushfq"                              "\n\t"
2156a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "popq      %%rdx"                     "\n\t"
2157a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rcx,  %0"                "\n\t"
2158a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "movq      %%rdx,  %1"                "\n\t"
2159a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      "addq      $1024,  %%rsp"             "\n\t"
2160a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2161a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2162a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   );
2163a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2164a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2165a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2166a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
2167a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2168a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   V128 resV;
2169a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt resOSZACP, resECX;
2170a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   Bool ok
2171a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2172a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argLU),
2173a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       zmask_from_V128(argRU),
2174a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes                       0x72, False/*!isSTRM*/
2175a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes        );
2176a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   assert(ok);
2177a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   resECX = resV.uInt[0];
2178a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   return (resOSZACP << 16) | resECX;
2179a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2180a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2181a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_72 ( void )
2182a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{
2183a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   char* wot = "72";
2184a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*h)(V128*,V128*) = h_pcmpistri_72;
2185a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   UInt(*s)(V128*,V128*) = s_pcmpistri_72;
2186a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2187a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2188a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2189a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2190a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2191a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2192a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2193a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2194a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2195a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2196a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2197a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2198a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2199a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2200a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2201a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2202a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2203a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2204a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2205a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2206a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2207a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2208a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2209a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2210a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2211a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2212a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2213a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2214a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2215a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes}
2216a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2217a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes
2218a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//////////////////////////////////////////////////////////
2219a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes//                                                      //
2220ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//                       ISTRI_10                       //
2221ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//                                                      //
2222ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//////////////////////////////////////////////////////////
2223ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2224ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott HughesUInt h_pcmpistri_10 ( V128* argL, V128* argR )
2225ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{
2226ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   V128 block[2];
2227ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   memcpy(&block[0], argL, sizeof(V128));
2228ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   memcpy(&block[1], argR, sizeof(V128));
2229ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   ULong res, flags;
2230ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   __asm__ __volatile__(
2231ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "subq      $1024,  %%rsp"             "\n\t"
2232ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "movdqu    0(%2),  %%xmm2"            "\n\t"
2233ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "movdqu    16(%2), %%xmm11"           "\n\t"
2234ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "pcmpistri $0x10,  %%xmm2, %%xmm11"   "\n\t"
2235ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//"pcmpistrm $0x10, %%xmm2, %%xmm11"   "\n\t"
2236ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//"movd %%xmm0, %%ecx" "\n\t"
2237ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "pushfq"                              "\n\t"
2238ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "popq      %%rdx"                     "\n\t"
2239ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "movq      %%rcx,  %0"                "\n\t"
2240ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "movq      %%rdx,  %1"                "\n\t"
2241ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      "addq      $1024,  %%rsp"             "\n\t"
2242ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2243ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2244ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   );
2245ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2246ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes}
2247ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2248ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott HughesUInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
2249ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{
2250ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   V128 resV;
2251ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   UInt resOSZACP, resECX;
2252ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   Bool ok
2253ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2254ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes                       zmask_from_V128(argLU),
2255ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes                       zmask_from_V128(argRU),
2256ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes                       0x10, False/*!isSTRM*/
2257ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes        );
2258ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   assert(ok);
2259ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   resECX = resV.uInt[0];
2260ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   return (resOSZACP << 16) | resECX;
2261ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes}
2262ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2263ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughesvoid istri_10 ( void )
2264ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{
2265ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   char* wot = "10";
2266ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   UInt(*h)(V128*,V128*) = h_pcmpistri_10;
2267ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   UInt(*s)(V128*,V128*) = s_pcmpistri_10;
2268ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2269ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2270ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2271ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2272ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2273ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2274ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2275ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2276ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2277ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2278ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2279ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2280ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2281ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2282ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2283ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2284ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2285ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2286ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2287ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2288ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2289ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2290ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2291ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2292ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2293ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2294ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2295ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2296ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2297ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes}
2298ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2299ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes
2300ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//////////////////////////////////////////////////////////
2301ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//                                                      //
23020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                         main                         //
23030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//                                                      //
23040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj//////////////////////////////////////////////////////////
23050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj
23060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjint main ( void )
23070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{
23080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   istri_4A();
23090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   istri_3A();
23100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   istri_08();
2311053f436448ea3f8733f5205226d2989d4de31b66sewardj   istri_18();
23125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   istri_1A();
23135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   istri_02();
23140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   istri_0C();
23155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   istri_12();
23165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj   istri_44();
23173b20b17c01834d95e1ce9785a0a366057320fe5csewardj   istri_00();
2318e801ed2c0d58802634b06de65a730364df4c08b2sewardj   istri_38();
231915df336557eb012a5f3b2f1482a0411857039496sewardj   istri_46();
2320473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   istri_30();
2321473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj   istri_40();
23227f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj   istri_42();
2323a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   istri_0E();
2324a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   istri_14();
2325a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj   istri_34();
2326a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   istri_70();
2327a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   istri_62();
2328a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes   istri_72();
2329ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   istri_10();
23300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj   return 0;
23310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj}
2332