19db268cc7344d8751213307737285e7cdac4fbecsewardj
29db268cc7344d8751213307737285e7cdac4fbecsewardj/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
39db268cc7344d8751213307737285e7cdac4fbecsewardj   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
49db268cc7344d8751213307737285e7cdac4fbecsewardj   aspect. */
59db268cc7344d8751213307737285e7cdac4fbecsewardj
69db268cc7344d8751213307737285e7cdac4fbecsewardj#include <string.h>
79db268cc7344d8751213307737285e7cdac4fbecsewardj#include <stdio.h>
89db268cc7344d8751213307737285e7cdac4fbecsewardj#include <assert.h>
99db268cc7344d8751213307737285e7cdac4fbecsewardj
109db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned int   UInt;
119db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  signed int     Int;
129db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned char  UChar;
139db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned short UShort;
149db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  unsigned long long int ULong;
159db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef  UChar          Bool;
169db268cc7344d8751213307737285e7cdac4fbecsewardj#define False ((Bool)0)
179db268cc7344d8751213307737285e7cdac4fbecsewardj#define True  ((Bool)1)
189db268cc7344d8751213307737285e7cdac4fbecsewardj
199db268cc7344d8751213307737285e7cdac4fbecsewardj//typedef  unsigned char  V128[16];
209db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef
219db268cc7344d8751213307737285e7cdac4fbecsewardj   union {
229db268cc7344d8751213307737285e7cdac4fbecsewardj      UChar  uChar[16];
239db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort uShort[8];
249db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   uInt[4];
259db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   w32[4];
269db268cc7344d8751213307737285e7cdac4fbecsewardj   }
279db268cc7344d8751213307737285e7cdac4fbecsewardj   V128;
289db268cc7344d8751213307737285e7cdac4fbecsewardj
299db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_O   11
309db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_S   7
319db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_Z   6
329db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_A   4
339db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_C   0
349db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_P   2
359db268cc7344d8751213307737285e7cdac4fbecsewardj
369db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_O    (1ULL << SHIFT_O)
379db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_S    (1ULL << SHIFT_S)
389db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_Z    (1ULL << SHIFT_Z)
399db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_A    (1ULL << SHIFT_A)
409db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_C    (1ULL << SHIFT_C)
419db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_P    (1ULL << SHIFT_P)
429db268cc7344d8751213307737285e7cdac4fbecsewardj
439db268cc7344d8751213307737285e7cdac4fbecsewardj
449db268cc7344d8751213307737285e7cdac4fbecsewardjUInt clz32 ( UInt x )
459db268cc7344d8751213307737285e7cdac4fbecsewardj{
469db268cc7344d8751213307737285e7cdac4fbecsewardj   Int y, m, n;
479db268cc7344d8751213307737285e7cdac4fbecsewardj   y = -(x >> 16);
489db268cc7344d8751213307737285e7cdac4fbecsewardj   m = (y >> 16) & 16;
499db268cc7344d8751213307737285e7cdac4fbecsewardj   n = 16 - m;
509db268cc7344d8751213307737285e7cdac4fbecsewardj   x = x >> m;
519db268cc7344d8751213307737285e7cdac4fbecsewardj   y = x - 0x100;
529db268cc7344d8751213307737285e7cdac4fbecsewardj   m = (y >> 16) & 8;
539db268cc7344d8751213307737285e7cdac4fbecsewardj   n = n + m;
549db268cc7344d8751213307737285e7cdac4fbecsewardj   x = x << m;
559db268cc7344d8751213307737285e7cdac4fbecsewardj   y = x - 0x1000;
569db268cc7344d8751213307737285e7cdac4fbecsewardj   m = (y >> 16) & 4;
579db268cc7344d8751213307737285e7cdac4fbecsewardj   n = n + m;
589db268cc7344d8751213307737285e7cdac4fbecsewardj   x = x << m;
599db268cc7344d8751213307737285e7cdac4fbecsewardj   y = x - 0x4000;
609db268cc7344d8751213307737285e7cdac4fbecsewardj   m = (y >> 16) & 2;
619db268cc7344d8751213307737285e7cdac4fbecsewardj   n = n + m;
629db268cc7344d8751213307737285e7cdac4fbecsewardj   x = x << m;
639db268cc7344d8751213307737285e7cdac4fbecsewardj   y = x >> 14;
649db268cc7344d8751213307737285e7cdac4fbecsewardj   m = y & ~(y >> 1);
659db268cc7344d8751213307737285e7cdac4fbecsewardj   return n + 2 - m;
669db268cc7344d8751213307737285e7cdac4fbecsewardj}
679db268cc7344d8751213307737285e7cdac4fbecsewardj
689db268cc7344d8751213307737285e7cdac4fbecsewardjUInt ctz32 ( UInt x )
699db268cc7344d8751213307737285e7cdac4fbecsewardj{
709db268cc7344d8751213307737285e7cdac4fbecsewardj   return 32 - clz32((~x) & (x-1));
719db268cc7344d8751213307737285e7cdac4fbecsewardj}
729db268cc7344d8751213307737285e7cdac4fbecsewardj
739db268cc7344d8751213307737285e7cdac4fbecsewardjvoid expand ( V128* dst, char* summary )
749db268cc7344d8751213307737285e7cdac4fbecsewardj{
759db268cc7344d8751213307737285e7cdac4fbecsewardj   Int i;
769db268cc7344d8751213307737285e7cdac4fbecsewardj   assert( strlen(summary) == 16 );
779db268cc7344d8751213307737285e7cdac4fbecsewardj   for (i = 0; i < 16; i++) {
789db268cc7344d8751213307737285e7cdac4fbecsewardj      UChar xx = 0;
799db268cc7344d8751213307737285e7cdac4fbecsewardj      UChar x = summary[15-i];
809db268cc7344d8751213307737285e7cdac4fbecsewardj      if      (x >= '0' && x <= '9') { xx = x - '0'; }
819db268cc7344d8751213307737285e7cdac4fbecsewardj      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
829db268cc7344d8751213307737285e7cdac4fbecsewardj      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
839db268cc7344d8751213307737285e7cdac4fbecsewardj      else assert(0);
849db268cc7344d8751213307737285e7cdac4fbecsewardj
859db268cc7344d8751213307737285e7cdac4fbecsewardj      assert(xx < 16);
869db268cc7344d8751213307737285e7cdac4fbecsewardj      xx = (xx << 4) | xx;
879db268cc7344d8751213307737285e7cdac4fbecsewardj      assert(xx < 256);
889db268cc7344d8751213307737285e7cdac4fbecsewardj      dst->uChar[i] = xx;
899db268cc7344d8751213307737285e7cdac4fbecsewardj   }
909db268cc7344d8751213307737285e7cdac4fbecsewardj}
919db268cc7344d8751213307737285e7cdac4fbecsewardj
929db268cc7344d8751213307737285e7cdac4fbecsewardjvoid try_istri ( char* which,
939db268cc7344d8751213307737285e7cdac4fbecsewardj                 UInt(*h_fn)(V128*,V128*),
949db268cc7344d8751213307737285e7cdac4fbecsewardj                 UInt(*s_fn)(V128*,V128*),
959db268cc7344d8751213307737285e7cdac4fbecsewardj                 char* summL, char* summR )
969db268cc7344d8751213307737285e7cdac4fbecsewardj{
979db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(strlen(which) == 2);
989db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 argL, argR;
999db268cc7344d8751213307737285e7cdac4fbecsewardj   expand(&argL, summL);
1009db268cc7344d8751213307737285e7cdac4fbecsewardj   expand(&argR, summR);
1019db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt h_res = h_fn(&argL, &argR);
1029db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt s_res = s_fn(&argL, &argR);
1039db268cc7344d8751213307737285e7cdac4fbecsewardj   printf("istri %s  %s %s -> %08x %08x %s\n",
1049db268cc7344d8751213307737285e7cdac4fbecsewardj          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
1059db268cc7344d8751213307737285e7cdac4fbecsewardj}
1069db268cc7344d8751213307737285e7cdac4fbecsewardj
1079db268cc7344d8751213307737285e7cdac4fbecsewardjUInt zmask_from_V128 ( V128* arg )
1089db268cc7344d8751213307737285e7cdac4fbecsewardj{
1099db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt i, res = 0;
1109db268cc7344d8751213307737285e7cdac4fbecsewardj   for (i = 0; i < 8; i++) {
1119db268cc7344d8751213307737285e7cdac4fbecsewardj      res |=  ((arg->uShort[i] == 0) ? 1 : 0) << i;
1129db268cc7344d8751213307737285e7cdac4fbecsewardj   }
1139db268cc7344d8751213307737285e7cdac4fbecsewardj   return res;
1149db268cc7344d8751213307737285e7cdac4fbecsewardj}
1159db268cc7344d8751213307737285e7cdac4fbecsewardj
1169db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
1179db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
1189db268cc7344d8751213307737285e7cdac4fbecsewardj//                       GENERAL                        //
1199db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
1209db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
1219db268cc7344d8751213307737285e7cdac4fbecsewardj
1229db268cc7344d8751213307737285e7cdac4fbecsewardj
1239db268cc7344d8751213307737285e7cdac4fbecsewardj/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
1249db268cc7344d8751213307737285e7cdac4fbecsewardj   basically), generate an I- or M-format output value, also the new
1259db268cc7344d8751213307737285e7cdac4fbecsewardj   OSZACP flags.  */
1269db268cc7344d8751213307737285e7cdac4fbecsewardjstatic
1279db268cc7344d8751213307737285e7cdac4fbecsewardjvoid PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV,
1289db268cc7344d8751213307737285e7cdac4fbecsewardj					   /*OUT*/UInt* resOSZACP,
1299db268cc7344d8751213307737285e7cdac4fbecsewardj					   UInt intRes1,
1309db268cc7344d8751213307737285e7cdac4fbecsewardj					   UInt zmaskL, UInt zmaskR,
1319db268cc7344d8751213307737285e7cdac4fbecsewardj					   UInt validL,
1329db268cc7344d8751213307737285e7cdac4fbecsewardj					   UInt pol, UInt idx )
1339db268cc7344d8751213307737285e7cdac4fbecsewardj{
1349db268cc7344d8751213307737285e7cdac4fbecsewardj   assert((pol >> 2) == 0);
1359db268cc7344d8751213307737285e7cdac4fbecsewardj   assert((idx >> 1) == 0);
1369db268cc7344d8751213307737285e7cdac4fbecsewardj
1379db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt intRes2 = 0;
1389db268cc7344d8751213307737285e7cdac4fbecsewardj   switch (pol) {
1399db268cc7344d8751213307737285e7cdac4fbecsewardj      case 0: intRes2 = intRes1;          break; // pol +
1409db268cc7344d8751213307737285e7cdac4fbecsewardj      case 1: intRes2 = ~intRes1;         break; // pol -
1419db268cc7344d8751213307737285e7cdac4fbecsewardj      case 2: intRes2 = intRes1;          break; // pol m+
1429db268cc7344d8751213307737285e7cdac4fbecsewardj      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
1439db268cc7344d8751213307737285e7cdac4fbecsewardj   }
1449db268cc7344d8751213307737285e7cdac4fbecsewardj   intRes2 &= 0xFF;
1459db268cc7344d8751213307737285e7cdac4fbecsewardj
1469db268cc7344d8751213307737285e7cdac4fbecsewardj   // generate I-format output (an index in ECX)
1479db268cc7344d8751213307737285e7cdac4fbecsewardj   // generate ecx value
1489db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt newECX = 0;
1499db268cc7344d8751213307737285e7cdac4fbecsewardj   if (idx) {
1509db268cc7344d8751213307737285e7cdac4fbecsewardj     // index of ms-1-bit
1519db268cc7344d8751213307737285e7cdac4fbecsewardj     newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
1529db268cc7344d8751213307737285e7cdac4fbecsewardj   } else {
1539db268cc7344d8751213307737285e7cdac4fbecsewardj     // index of ls-1-bit
1549db268cc7344d8751213307737285e7cdac4fbecsewardj     newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
1559db268cc7344d8751213307737285e7cdac4fbecsewardj   }
1569db268cc7344d8751213307737285e7cdac4fbecsewardj
1579db268cc7344d8751213307737285e7cdac4fbecsewardj   resV->w32[0] = newECX;
1589db268cc7344d8751213307737285e7cdac4fbecsewardj   resV->w32[1] = 0;
1599db268cc7344d8751213307737285e7cdac4fbecsewardj   resV->w32[2] = 0;
1609db268cc7344d8751213307737285e7cdac4fbecsewardj   resV->w32[3] = 0;
1619db268cc7344d8751213307737285e7cdac4fbecsewardj
1629db268cc7344d8751213307737285e7cdac4fbecsewardj   // generate new flags, common to all ISTRI and ISTRM cases
1639db268cc7344d8751213307737285e7cdac4fbecsewardj   *resOSZACP    // A, P are zero
1649db268cc7344d8751213307737285e7cdac4fbecsewardj     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
1659db268cc7344d8751213307737285e7cdac4fbecsewardj     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
1669db268cc7344d8751213307737285e7cdac4fbecsewardj     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
1679db268cc7344d8751213307737285e7cdac4fbecsewardj     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
1689db268cc7344d8751213307737285e7cdac4fbecsewardj}
1699db268cc7344d8751213307737285e7cdac4fbecsewardj
1709db268cc7344d8751213307737285e7cdac4fbecsewardj/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1719db268cc7344d8751213307737285e7cdac4fbecsewardj   variants on 16-bit characters.
1729db268cc7344d8751213307737285e7cdac4fbecsewardj
1739db268cc7344d8751213307737285e7cdac4fbecsewardj   For xSTRI variants, the new ECX value is placed in the 32 bits
1749db268cc7344d8751213307737285e7cdac4fbecsewardj   pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
1759db268cc7344d8751213307737285e7cdac4fbecsewardj   variants, the result is a 128 bit value and is placed at *resV in
1769db268cc7344d8751213307737285e7cdac4fbecsewardj   the obvious way.
1779db268cc7344d8751213307737285e7cdac4fbecsewardj
1789db268cc7344d8751213307737285e7cdac4fbecsewardj   For all variants, the new OSZACP value is placed at *resOSZACP.
1799db268cc7344d8751213307737285e7cdac4fbecsewardj
1809db268cc7344d8751213307737285e7cdac4fbecsewardj   argLV and argRV are the vector args.  The caller must prepare a
1819db268cc7344d8751213307737285e7cdac4fbecsewardj   8-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
1829db268cc7344d8751213307737285e7cdac4fbecsewardj   must be 1 for each zero byte of of the respective arg.  For ESTRx
1839db268cc7344d8751213307737285e7cdac4fbecsewardj   variants this is derived from the explicit length indication, and
1849db268cc7344d8751213307737285e7cdac4fbecsewardj   must be 0 in all places except at the bit index corresponding to
1859db268cc7344d8751213307737285e7cdac4fbecsewardj   the valid length (0 .. 8).  If the valid length is 8 then the
1869db268cc7344d8751213307737285e7cdac4fbecsewardj   mask must be all zeroes.  In all cases, bits 31:8 must be zero.
1879db268cc7344d8751213307737285e7cdac4fbecsewardj
1889db268cc7344d8751213307737285e7cdac4fbecsewardj   imm8 is the original immediate from the instruction.  isSTRM
1899db268cc7344d8751213307737285e7cdac4fbecsewardj   indicates whether this is a xSTRM or xSTRI variant, which controls
1909db268cc7344d8751213307737285e7cdac4fbecsewardj   how much of *res is written.
1919db268cc7344d8751213307737285e7cdac4fbecsewardj
1929db268cc7344d8751213307737285e7cdac4fbecsewardj   If the given imm8 case can be handled, the return value is True.
1939db268cc7344d8751213307737285e7cdac4fbecsewardj   If not, False is returned, and neither *res not *resOSZACP are
1949db268cc7344d8751213307737285e7cdac4fbecsewardj   altered.
1959db268cc7344d8751213307737285e7cdac4fbecsewardj*/
1969db268cc7344d8751213307737285e7cdac4fbecsewardj
1979db268cc7344d8751213307737285e7cdac4fbecsewardjBool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV,
1989db268cc7344d8751213307737285e7cdac4fbecsewardj			  /*OUT*/UInt* resOSZACP,
1999db268cc7344d8751213307737285e7cdac4fbecsewardj			  V128* argLV,  V128* argRV,
2009db268cc7344d8751213307737285e7cdac4fbecsewardj			  UInt zmaskL, UInt zmaskR,
2019db268cc7344d8751213307737285e7cdac4fbecsewardj			  UInt imm8,   Bool isxSTRM )
2029db268cc7344d8751213307737285e7cdac4fbecsewardj{
2039db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(imm8 < 0x80);
2049db268cc7344d8751213307737285e7cdac4fbecsewardj   assert((zmaskL >> 8) == 0);
2059db268cc7344d8751213307737285e7cdac4fbecsewardj   assert((zmaskR >> 8) == 0);
2069db268cc7344d8751213307737285e7cdac4fbecsewardj
2079db268cc7344d8751213307737285e7cdac4fbecsewardj   /* Explicitly reject any imm8 values that haven't been validated,
2089db268cc7344d8751213307737285e7cdac4fbecsewardj      even if they would probably work.  Life is too short to have
2099db268cc7344d8751213307737285e7cdac4fbecsewardj      unvalidated cases in the code base. */
2109db268cc7344d8751213307737285e7cdac4fbecsewardj   switch (imm8) {
211a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
212a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj      case 0x13:            case 0x1B:
213a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                            case 0x39: case 0x3B:
214a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj                 case 0x45:            case 0x4B:
2159db268cc7344d8751213307737285e7cdac4fbecsewardj         break;
2169db268cc7344d8751213307737285e7cdac4fbecsewardj      default:
2179db268cc7344d8751213307737285e7cdac4fbecsewardj         return False;
2189db268cc7344d8751213307737285e7cdac4fbecsewardj   }
2199db268cc7344d8751213307737285e7cdac4fbecsewardj
2209db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
2219db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
2229db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
2239db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
2249db268cc7344d8751213307737285e7cdac4fbecsewardj
2259db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
2269db268cc7344d8751213307737285e7cdac4fbecsewardj   /*-- strcmp on wide data                --*/
2279db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
2289db268cc7344d8751213307737285e7cdac4fbecsewardj
2299db268cc7344d8751213307737285e7cdac4fbecsewardj   if (agg == 2/*equal each, aka strcmp*/
2309db268cc7344d8751213307737285e7cdac4fbecsewardj       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
2319db268cc7344d8751213307737285e7cdac4fbecsewardj      Int    i;
2329db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argL = (UShort*)argLV;
2339db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argR = (UShort*)argRV;
2349db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt boolResII = 0;
2359db268cc7344d8751213307737285e7cdac4fbecsewardj      for (i = 7; i >= 0; i--) {
2369db268cc7344d8751213307737285e7cdac4fbecsewardj         UShort cL  = argL[i];
2379db268cc7344d8751213307737285e7cdac4fbecsewardj         UShort cR  = argR[i];
2389db268cc7344d8751213307737285e7cdac4fbecsewardj         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
2399db268cc7344d8751213307737285e7cdac4fbecsewardj      }
2409db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
2419db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
2429db268cc7344d8751213307737285e7cdac4fbecsewardj
2439db268cc7344d8751213307737285e7cdac4fbecsewardj      // do invalidation, common to all equal-each cases
2449db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt intRes1
2459db268cc7344d8751213307737285e7cdac4fbecsewardj         = (boolResII & validL & validR)  // if both valid, use cmpres
2469db268cc7344d8751213307737285e7cdac4fbecsewardj           | (~ (validL | validR));       // if both invalid, force 1
2479db268cc7344d8751213307737285e7cdac4fbecsewardj                                          // else force 0
2489db268cc7344d8751213307737285e7cdac4fbecsewardj      intRes1 &= 0xFF;
2499db268cc7344d8751213307737285e7cdac4fbecsewardj
2509db268cc7344d8751213307737285e7cdac4fbecsewardj      // generate I-format output
2519db268cc7344d8751213307737285e7cdac4fbecsewardj      PCMPxSTRx_WRK_gen_output_fmt_I_wide(
2529db268cc7344d8751213307737285e7cdac4fbecsewardj         resV, resOSZACP,
2539db268cc7344d8751213307737285e7cdac4fbecsewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
2549db268cc7344d8751213307737285e7cdac4fbecsewardj      );
2559db268cc7344d8751213307737285e7cdac4fbecsewardj
2569db268cc7344d8751213307737285e7cdac4fbecsewardj      return True;
2579db268cc7344d8751213307737285e7cdac4fbecsewardj   }
2589db268cc7344d8751213307737285e7cdac4fbecsewardj
2599db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
2609db268cc7344d8751213307737285e7cdac4fbecsewardj   /*-- set membership on wide data        --*/
2619db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
2629db268cc7344d8751213307737285e7cdac4fbecsewardj
2639db268cc7344d8751213307737285e7cdac4fbecsewardj   if (agg == 0/*equal any, aka find chars in a set*/
2649db268cc7344d8751213307737285e7cdac4fbecsewardj       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
2659db268cc7344d8751213307737285e7cdac4fbecsewardj      /* argL: the string,  argR: charset */
2669db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   si, ci;
2679db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argL    = (UShort*)argLV;
2689db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argR    = (UShort*)argRV;
2699db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   boolRes = 0;
2709db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
2719db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
2729db268cc7344d8751213307737285e7cdac4fbecsewardj
2739db268cc7344d8751213307737285e7cdac4fbecsewardj      for (si = 0; si < 8; si++) {
2749db268cc7344d8751213307737285e7cdac4fbecsewardj         if ((validL & (1 << si)) == 0)
2759db268cc7344d8751213307737285e7cdac4fbecsewardj            // run off the end of the string.
2769db268cc7344d8751213307737285e7cdac4fbecsewardj            break;
2779db268cc7344d8751213307737285e7cdac4fbecsewardj         UInt m = 0;
2789db268cc7344d8751213307737285e7cdac4fbecsewardj         for (ci = 0; ci < 8; ci++) {
2799db268cc7344d8751213307737285e7cdac4fbecsewardj            if ((validR & (1 << ci)) == 0) break;
2809db268cc7344d8751213307737285e7cdac4fbecsewardj            if (argR[ci] == argL[si]) { m = 1; break; }
2819db268cc7344d8751213307737285e7cdac4fbecsewardj         }
2829db268cc7344d8751213307737285e7cdac4fbecsewardj         boolRes |= (m << si);
2839db268cc7344d8751213307737285e7cdac4fbecsewardj      }
2849db268cc7344d8751213307737285e7cdac4fbecsewardj
2859db268cc7344d8751213307737285e7cdac4fbecsewardj      // boolRes is "pre-invalidated"
2869db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt intRes1 = boolRes & 0xFF;
2879db268cc7344d8751213307737285e7cdac4fbecsewardj
2889db268cc7344d8751213307737285e7cdac4fbecsewardj      // generate I-format output
2899db268cc7344d8751213307737285e7cdac4fbecsewardj      PCMPxSTRx_WRK_gen_output_fmt_I_wide(
2909db268cc7344d8751213307737285e7cdac4fbecsewardj         resV, resOSZACP,
2919db268cc7344d8751213307737285e7cdac4fbecsewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
2929db268cc7344d8751213307737285e7cdac4fbecsewardj      );
2939db268cc7344d8751213307737285e7cdac4fbecsewardj
2949db268cc7344d8751213307737285e7cdac4fbecsewardj      return True;
2959db268cc7344d8751213307737285e7cdac4fbecsewardj   }
2969db268cc7344d8751213307737285e7cdac4fbecsewardj
2979db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
2989db268cc7344d8751213307737285e7cdac4fbecsewardj   /*-- substring search on wide data      --*/
2999db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
3009db268cc7344d8751213307737285e7cdac4fbecsewardj
3019db268cc7344d8751213307737285e7cdac4fbecsewardj   if (agg == 3/*equal ordered, aka substring search*/
3029db268cc7344d8751213307737285e7cdac4fbecsewardj       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
3039db268cc7344d8751213307737285e7cdac4fbecsewardj
3049db268cc7344d8751213307737285e7cdac4fbecsewardj      /* argL: haystack,  argR: needle */
3059db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   ni, hi;
3069db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argL    = (UShort*)argLV;
3079db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argR    = (UShort*)argRV;
3089db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   boolRes = 0;
3099db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
3109db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
3119db268cc7344d8751213307737285e7cdac4fbecsewardj      for (hi = 0; hi < 8; hi++) {
3129db268cc7344d8751213307737285e7cdac4fbecsewardj         UInt m = 1;
3139db268cc7344d8751213307737285e7cdac4fbecsewardj         for (ni = 0; ni < 8; ni++) {
3149db268cc7344d8751213307737285e7cdac4fbecsewardj            if ((validR & (1 << ni)) == 0) break;
3159db268cc7344d8751213307737285e7cdac4fbecsewardj            UInt i = ni + hi;
3169db268cc7344d8751213307737285e7cdac4fbecsewardj            if (i >= 8) break;
3179db268cc7344d8751213307737285e7cdac4fbecsewardj            if (argL[i] != argR[ni]) { m = 0; break; }
3189db268cc7344d8751213307737285e7cdac4fbecsewardj         }
3199db268cc7344d8751213307737285e7cdac4fbecsewardj         boolRes |= (m << hi);
320c5274ae844ae01cde66e35f1873ed37726dccd45weidendo         if ((validL & (1 << hi)) == 0)
321c5274ae844ae01cde66e35f1873ed37726dccd45weidendo            // run off the end of the haystack
322c5274ae844ae01cde66e35f1873ed37726dccd45weidendo            break;
3239db268cc7344d8751213307737285e7cdac4fbecsewardj      }
3249db268cc7344d8751213307737285e7cdac4fbecsewardj
3259db268cc7344d8751213307737285e7cdac4fbecsewardj      // boolRes is "pre-invalidated"
3269db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt intRes1 = boolRes & 0xFF;
3279db268cc7344d8751213307737285e7cdac4fbecsewardj
3289db268cc7344d8751213307737285e7cdac4fbecsewardj      // generate I-format output
3299db268cc7344d8751213307737285e7cdac4fbecsewardj      PCMPxSTRx_WRK_gen_output_fmt_I_wide(
3309db268cc7344d8751213307737285e7cdac4fbecsewardj         resV, resOSZACP,
3319db268cc7344d8751213307737285e7cdac4fbecsewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
3329db268cc7344d8751213307737285e7cdac4fbecsewardj      );
3339db268cc7344d8751213307737285e7cdac4fbecsewardj
3349db268cc7344d8751213307737285e7cdac4fbecsewardj      return True;
3359db268cc7344d8751213307737285e7cdac4fbecsewardj   }
3369db268cc7344d8751213307737285e7cdac4fbecsewardj
3379db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
3389db268cc7344d8751213307737285e7cdac4fbecsewardj   /*-- ranges, unsigned wide data         --*/
3399db268cc7344d8751213307737285e7cdac4fbecsewardj   /*----------------------------------------*/
3409db268cc7344d8751213307737285e7cdac4fbecsewardj
3419db268cc7344d8751213307737285e7cdac4fbecsewardj   if (agg == 1/*ranges*/
3429db268cc7344d8751213307737285e7cdac4fbecsewardj       && fmt == 1/*uw*/) {
3439db268cc7344d8751213307737285e7cdac4fbecsewardj
3449db268cc7344d8751213307737285e7cdac4fbecsewardj      /* argL: string,  argR: range-pairs */
3459db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   ri, si;
3469db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argL    = (UShort*)argLV;
3479db268cc7344d8751213307737285e7cdac4fbecsewardj      UShort* argR    = (UShort*)argRV;
3489db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   boolRes = 0;
3499db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
3509db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
3519db268cc7344d8751213307737285e7cdac4fbecsewardj      for (si = 0; si < 8; si++) {
3529db268cc7344d8751213307737285e7cdac4fbecsewardj         if ((validL & (1 << si)) == 0)
3539db268cc7344d8751213307737285e7cdac4fbecsewardj            // run off the end of the string
3549db268cc7344d8751213307737285e7cdac4fbecsewardj            break;
3559db268cc7344d8751213307737285e7cdac4fbecsewardj         UInt m = 0;
3569db268cc7344d8751213307737285e7cdac4fbecsewardj         for (ri = 0; ri < 8; ri += 2) {
3579db268cc7344d8751213307737285e7cdac4fbecsewardj            if ((validR & (3 << ri)) != (3 << ri)) break;
3589db268cc7344d8751213307737285e7cdac4fbecsewardj            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
3599db268cc7344d8751213307737285e7cdac4fbecsewardj               m = 1; break;
3609db268cc7344d8751213307737285e7cdac4fbecsewardj            }
3619db268cc7344d8751213307737285e7cdac4fbecsewardj         }
3629db268cc7344d8751213307737285e7cdac4fbecsewardj         boolRes |= (m << si);
3639db268cc7344d8751213307737285e7cdac4fbecsewardj      }
3649db268cc7344d8751213307737285e7cdac4fbecsewardj
3659db268cc7344d8751213307737285e7cdac4fbecsewardj      // boolRes is "pre-invalidated"
3669db268cc7344d8751213307737285e7cdac4fbecsewardj      UInt intRes1 = boolRes & 0xFF;
3679db268cc7344d8751213307737285e7cdac4fbecsewardj
3689db268cc7344d8751213307737285e7cdac4fbecsewardj      // generate I-format output
3699db268cc7344d8751213307737285e7cdac4fbecsewardj      PCMPxSTRx_WRK_gen_output_fmt_I_wide(
3709db268cc7344d8751213307737285e7cdac4fbecsewardj         resV, resOSZACP,
3719db268cc7344d8751213307737285e7cdac4fbecsewardj         intRes1, zmaskL, zmaskR, validL, pol, idx
3729db268cc7344d8751213307737285e7cdac4fbecsewardj      );
3739db268cc7344d8751213307737285e7cdac4fbecsewardj
3749db268cc7344d8751213307737285e7cdac4fbecsewardj      return True;
3759db268cc7344d8751213307737285e7cdac4fbecsewardj   }
3769db268cc7344d8751213307737285e7cdac4fbecsewardj
3779db268cc7344d8751213307737285e7cdac4fbecsewardj   return False;
3789db268cc7344d8751213307737285e7cdac4fbecsewardj}
3799db268cc7344d8751213307737285e7cdac4fbecsewardj
3809db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
3819db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
3829db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_4B                       //
3839db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
3849db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
3859db268cc7344d8751213307737285e7cdac4fbecsewardj
3869db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_4B ( V128* argL, V128* argR )
3879db268cc7344d8751213307737285e7cdac4fbecsewardj{
3889db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
3899db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
3909db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
3919db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
3929db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
3939db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
3949db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
3959db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
3969db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x4B,  %%xmm2, %%xmm11"   "\n\t"
3979db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
3989db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
3999db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
4009db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
4019db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
4029db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
4039db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
4049db268cc7344d8751213307737285e7cdac4fbecsewardj   );
4059db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
4069db268cc7344d8751213307737285e7cdac4fbecsewardj}
4079db268cc7344d8751213307737285e7cdac4fbecsewardj
4089db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_4B ( V128* argLU, V128* argRU )
4099db268cc7344d8751213307737285e7cdac4fbecsewardj{
4109db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
4119db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
4129db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
4139db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
4149db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
4159db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
4169db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x4B, False/*!isSTRM*/
4179db268cc7344d8751213307737285e7cdac4fbecsewardj        );
4189db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
4199db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
4209db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
4219db268cc7344d8751213307737285e7cdac4fbecsewardj}
4229db268cc7344d8751213307737285e7cdac4fbecsewardj
4239db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_4B ( void )
4249db268cc7344d8751213307737285e7cdac4fbecsewardj{
4259db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "4B";
4269db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_4B;
4279db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_4B;
4289db268cc7344d8751213307737285e7cdac4fbecsewardj
4299db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
4309db268cc7344d8751213307737285e7cdac4fbecsewardj
4319db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4329db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4339db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
4349db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
4359db268cc7344d8751213307737285e7cdac4fbecsewardj
4369db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
4379db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
4389db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
4399db268cc7344d8751213307737285e7cdac4fbecsewardj
4409db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4419db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4429db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4439db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4449db268cc7344d8751213307737285e7cdac4fbecsewardj
4459db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4469db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
4479db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
4489db268cc7344d8751213307737285e7cdac4fbecsewardj
4499db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
4509db268cc7344d8751213307737285e7cdac4fbecsewardj
4519db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
4529db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
4539db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
4549db268cc7344d8751213307737285e7cdac4fbecsewardj
4559db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
4569db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
4579db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
4589db268cc7344d8751213307737285e7cdac4fbecsewardj
4599db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
4609db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
4619db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
4629db268cc7344d8751213307737285e7cdac4fbecsewardj
4639db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
4649db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
4659db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
4669db268cc7344d8751213307737285e7cdac4fbecsewardj
4679db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
4689db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
4699db268cc7344d8751213307737285e7cdac4fbecsewardj}
4709db268cc7344d8751213307737285e7cdac4fbecsewardj
4719db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
4729db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
4739db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_3B                       //
4749db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
4759db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
4769db268cc7344d8751213307737285e7cdac4fbecsewardj
4779db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_3B ( V128* argL, V128* argR )
4789db268cc7344d8751213307737285e7cdac4fbecsewardj{
4799db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
4809db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
4819db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
4829db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
4839db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
4849db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
4859db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
4869db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
4879db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x3B,  %%xmm2, %%xmm11"   "\n\t"
4889db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
4899db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
4909db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
4919db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
4929db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
4939db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
4949db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
4959db268cc7344d8751213307737285e7cdac4fbecsewardj   );
4969db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
4979db268cc7344d8751213307737285e7cdac4fbecsewardj}
4989db268cc7344d8751213307737285e7cdac4fbecsewardj
4999db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_3B ( V128* argLU, V128* argRU )
5009db268cc7344d8751213307737285e7cdac4fbecsewardj{
5019db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
5029db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
5039db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
5049db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
5059db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
5069db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
5079db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x3B, False/*!isSTRM*/
5089db268cc7344d8751213307737285e7cdac4fbecsewardj        );
5099db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
5109db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
5119db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
5129db268cc7344d8751213307737285e7cdac4fbecsewardj}
5139db268cc7344d8751213307737285e7cdac4fbecsewardj
5149db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_3B ( void )
5159db268cc7344d8751213307737285e7cdac4fbecsewardj{
5169db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "3B";
5179db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_3B;
5189db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_3B;
5199db268cc7344d8751213307737285e7cdac4fbecsewardj
5209db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
5219db268cc7344d8751213307737285e7cdac4fbecsewardj
5229db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5239db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5249db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
5259db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
5269db268cc7344d8751213307737285e7cdac4fbecsewardj
5279db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
5289db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
5299db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
5309db268cc7344d8751213307737285e7cdac4fbecsewardj
5319db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5329db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5339db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5349db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5359db268cc7344d8751213307737285e7cdac4fbecsewardj
5369db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5379db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
5389db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
5399db268cc7344d8751213307737285e7cdac4fbecsewardj
5409db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
5419db268cc7344d8751213307737285e7cdac4fbecsewardj
5429db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
5439db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
5449db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
5459db268cc7344d8751213307737285e7cdac4fbecsewardj
5469db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
5479db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
5489db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
5499db268cc7344d8751213307737285e7cdac4fbecsewardj
5509db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
5519db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
5529db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
5539db268cc7344d8751213307737285e7cdac4fbecsewardj
5549db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
5559db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
5569db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
5579db268cc7344d8751213307737285e7cdac4fbecsewardj
5589db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
5599db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
5609db268cc7344d8751213307737285e7cdac4fbecsewardj}
5619db268cc7344d8751213307737285e7cdac4fbecsewardj
5629db268cc7344d8751213307737285e7cdac4fbecsewardj
5639db268cc7344d8751213307737285e7cdac4fbecsewardj
5649db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
5659db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
5669db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_0D                       //
5679db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
5689db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
5699db268cc7344d8751213307737285e7cdac4fbecsewardj
5709db268cc7344d8751213307737285e7cdac4fbecsewardj__attribute__((noinline))
5719db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_0D ( V128* argL, V128* argR )
5729db268cc7344d8751213307737285e7cdac4fbecsewardj{
5739db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
5749db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
5759db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
5769db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res = 0, flags = 0;
5779db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
578c5274ae844ae01cde66e35f1873ed37726dccd45weidendo      "movdqu    0(%2),  %%xmm2"            "\n\t"
579c5274ae844ae01cde66e35f1873ed37726dccd45weidendo      "movdqu    16(%2), %%xmm11"           "\n\t"
5809db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x0D,  %%xmm2, %%xmm11"   "\n\t"
5819db268cc7344d8751213307737285e7cdac4fbecsewardj      //"pcmpistrm $0x0D,  %%xmm2, %%xmm11"   "\n\t"
5829db268cc7344d8751213307737285e7cdac4fbecsewardj      //"movd %%xmm0, %%ecx" "\n\t"
5839db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
5849db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
5859db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
5869db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
5879db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
5889db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
5899db268cc7344d8751213307737285e7cdac4fbecsewardj   );
5909db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
5919db268cc7344d8751213307737285e7cdac4fbecsewardj}
5929db268cc7344d8751213307737285e7cdac4fbecsewardj
5939db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_0D ( V128* argLU, V128* argRU )
5949db268cc7344d8751213307737285e7cdac4fbecsewardj{
5959db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
5969db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
5979db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
5989db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
5999db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
6009db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
6019db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x0D, False/*!isSTRM*/
6029db268cc7344d8751213307737285e7cdac4fbecsewardj        );
6039db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
6049db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
6059db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
6069db268cc7344d8751213307737285e7cdac4fbecsewardj}
6079db268cc7344d8751213307737285e7cdac4fbecsewardj
6089db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_0D ( void )
6099db268cc7344d8751213307737285e7cdac4fbecsewardj{
6109db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "0D";
6119db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_0D;
6129db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_0D;
6139db268cc7344d8751213307737285e7cdac4fbecsewardj
6149db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
6159db268cc7344d8751213307737285e7cdac4fbecsewardj
6169db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef");
6179db268cc7344d8751213307737285e7cdac4fbecsewardj
6189db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
6199db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef");
6209db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef");
6219db268cc7344d8751213307737285e7cdac4fbecsewardj
6229db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd");
6239db268cc7344d8751213307737285e7cdac4fbecsewardj
6249db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd");
6259db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd");
6269db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd");
6279db268cc7344d8751213307737285e7cdac4fbecsewardj
6289db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd");
6299db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd");
6309db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd");
6319db268cc7344d8751213307737285e7cdac4fbecsewardj
6329db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd");
6339db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd");
6349db268cc7344d8751213307737285e7cdac4fbecsewardj
6359db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
6369db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
6379db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111111111111234", "0000000000001111");
6389db268cc7344d8751213307737285e7cdac4fbecsewardj
6399db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
6409db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0a11111111111111", "000000000000000a");
6419db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0b11111111111111", "000000000000000a");
642c5274ae844ae01cde66e35f1873ed37726dccd45weidendo
643c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
644c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
645c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
646c5274ae844ae01cde66e35f1873ed37726dccd45weidendo   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
6479db268cc7344d8751213307737285e7cdac4fbecsewardj}
6489db268cc7344d8751213307737285e7cdac4fbecsewardj
6499db268cc7344d8751213307737285e7cdac4fbecsewardj
6509db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
6519db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
6529db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_09                       //
6539db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
6549db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
6559db268cc7344d8751213307737285e7cdac4fbecsewardj
6569db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_09 ( V128* argL, V128* argR )
6579db268cc7344d8751213307737285e7cdac4fbecsewardj{
6589db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
6599db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
6609db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
6619db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
6629db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
6639db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
6649db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
6659db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
6669db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x09,  %%xmm2, %%xmm11"   "\n\t"
6679db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
6689db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
6699db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
6709db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
6719db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
6729db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
6739db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
6749db268cc7344d8751213307737285e7cdac4fbecsewardj   );
6759db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
6769db268cc7344d8751213307737285e7cdac4fbecsewardj}
6779db268cc7344d8751213307737285e7cdac4fbecsewardj
6789db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_09 ( V128* argLU, V128* argRU )
6799db268cc7344d8751213307737285e7cdac4fbecsewardj{
6809db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
6819db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
6829db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
6839db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
6849db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
6859db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
6869db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x09, False/*!isSTRM*/
6879db268cc7344d8751213307737285e7cdac4fbecsewardj        );
6889db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
6899db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
6909db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
6919db268cc7344d8751213307737285e7cdac4fbecsewardj}
6929db268cc7344d8751213307737285e7cdac4fbecsewardj
6939db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_09 ( void )
6949db268cc7344d8751213307737285e7cdac4fbecsewardj{
6959db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "09";
6969db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_09;
6979db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_09;
6989db268cc7344d8751213307737285e7cdac4fbecsewardj
6999db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
7009db268cc7344d8751213307737285e7cdac4fbecsewardj
7019db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7029db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7039db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
7049db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
7059db268cc7344d8751213307737285e7cdac4fbecsewardj
7069db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
7079db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
7089db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
7099db268cc7344d8751213307737285e7cdac4fbecsewardj
7109db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7119db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7129db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7139db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7149db268cc7344d8751213307737285e7cdac4fbecsewardj
7159db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7169db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
7179db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
7189db268cc7344d8751213307737285e7cdac4fbecsewardj
7199db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7209db268cc7344d8751213307737285e7cdac4fbecsewardj
7219db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
7229db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
7239db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
7249db268cc7344d8751213307737285e7cdac4fbecsewardj
7259db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
7269db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
7279db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
7289db268cc7344d8751213307737285e7cdac4fbecsewardj
7299db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
7309db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
7319db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
7329db268cc7344d8751213307737285e7cdac4fbecsewardj
7339db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
7349db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
7359db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
7369db268cc7344d8751213307737285e7cdac4fbecsewardj
7379db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
7389db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
7399db268cc7344d8751213307737285e7cdac4fbecsewardj}
7409db268cc7344d8751213307737285e7cdac4fbecsewardj
7419db268cc7344d8751213307737285e7cdac4fbecsewardj
7429db268cc7344d8751213307737285e7cdac4fbecsewardj
7439db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
7449db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
7459db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_1B                       //
7469db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
7479db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
7489db268cc7344d8751213307737285e7cdac4fbecsewardj
7499db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_1B ( V128* argL, V128* argR )
7509db268cc7344d8751213307737285e7cdac4fbecsewardj{
7519db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
7529db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
7539db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
7549db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
7559db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
7569db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
7579db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
7589db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
7599db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x1B,  %%xmm2, %%xmm11"   "\n\t"
7609db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
7619db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
7629db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
7639db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
7649db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
7659db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
7669db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
7679db268cc7344d8751213307737285e7cdac4fbecsewardj   );
7689db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
7699db268cc7344d8751213307737285e7cdac4fbecsewardj}
7709db268cc7344d8751213307737285e7cdac4fbecsewardj
7719db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_1B ( V128* argLU, V128* argRU )
7729db268cc7344d8751213307737285e7cdac4fbecsewardj{
7739db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
7749db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
7759db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
7769db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
7779db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
7789db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
7799db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x1B, False/*!isSTRM*/
7809db268cc7344d8751213307737285e7cdac4fbecsewardj        );
7819db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
7829db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
7839db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
7849db268cc7344d8751213307737285e7cdac4fbecsewardj}
7859db268cc7344d8751213307737285e7cdac4fbecsewardj
7869db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_1B ( void )
7879db268cc7344d8751213307737285e7cdac4fbecsewardj{
7889db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "1B";
7899db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_1B;
7909db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_1B;
7919db268cc7344d8751213307737285e7cdac4fbecsewardj
7929db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
7939db268cc7344d8751213307737285e7cdac4fbecsewardj
7949db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7959db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
7969db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
7979db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
7989db268cc7344d8751213307737285e7cdac4fbecsewardj
7999db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
8009db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
8019db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
8029db268cc7344d8751213307737285e7cdac4fbecsewardj
8039db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8049db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8059db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8069db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8079db268cc7344d8751213307737285e7cdac4fbecsewardj
8089db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8099db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
8109db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
8119db268cc7344d8751213307737285e7cdac4fbecsewardj
8129db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
8139db268cc7344d8751213307737285e7cdac4fbecsewardj
8149db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
8159db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
8169db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
8179db268cc7344d8751213307737285e7cdac4fbecsewardj
8189db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
8199db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
8209db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
8219db268cc7344d8751213307737285e7cdac4fbecsewardj
8229db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
8239db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
8249db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
8259db268cc7344d8751213307737285e7cdac4fbecsewardj
8269db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
8279db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
8289db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
8299db268cc7344d8751213307737285e7cdac4fbecsewardj
8309db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
8319db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
8329db268cc7344d8751213307737285e7cdac4fbecsewardj}
8339db268cc7344d8751213307737285e7cdac4fbecsewardj
8349db268cc7344d8751213307737285e7cdac4fbecsewardj
8359db268cc7344d8751213307737285e7cdac4fbecsewardj
8369db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
8379db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
8389db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_03                       //
8399db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
8409db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
8419db268cc7344d8751213307737285e7cdac4fbecsewardj
8429db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_03 ( V128* argL, V128* argR )
8439db268cc7344d8751213307737285e7cdac4fbecsewardj{
8449db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
8459db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
8469db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
8479db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
8489db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
8499db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
8509db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
8519db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
8529db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x03,  %%xmm2, %%xmm11"   "\n\t"
8539db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x03, %%xmm2, %%xmm11"   "\n\t"
8549db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t"
8559db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
8569db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
8579db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
8589db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
8599db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
8609db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
8619db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
8629db268cc7344d8751213307737285e7cdac4fbecsewardj   );
8639db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
8649db268cc7344d8751213307737285e7cdac4fbecsewardj}
8659db268cc7344d8751213307737285e7cdac4fbecsewardj
8669db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_03 ( V128* argLU, V128* argRU )
8679db268cc7344d8751213307737285e7cdac4fbecsewardj{
8689db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
8699db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
8709db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
8719db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
8729db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
8739db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
8749db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x03, False/*!isSTRM*/
8759db268cc7344d8751213307737285e7cdac4fbecsewardj        );
8769db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
8779db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
8789db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
8799db268cc7344d8751213307737285e7cdac4fbecsewardj}
8809db268cc7344d8751213307737285e7cdac4fbecsewardj
8819db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_03 ( void )
8829db268cc7344d8751213307737285e7cdac4fbecsewardj{
8839db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "03";
8849db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_03;
8859db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_03;
8869db268cc7344d8751213307737285e7cdac4fbecsewardj
8879db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
8889db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
8899db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
8909db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
8919db268cc7344d8751213307737285e7cdac4fbecsewardj
8929db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
8939db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
8949db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
8959db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
8969db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
8979db268cc7344d8751213307737285e7cdac4fbecsewardj
8989db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
8999db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
9009db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
9019db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
9029db268cc7344d8751213307737285e7cdac4fbecsewardj
9039db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
9049db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9059db268cc7344d8751213307737285e7cdac4fbecsewardj
9069db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
9079db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
9089db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
9099db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
9109db268cc7344d8751213307737285e7cdac4fbecsewardj
9119db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
9129db268cc7344d8751213307737285e7cdac4fbecsewardj
9139db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
9149db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
9159db268cc7344d8751213307737285e7cdac4fbecsewardj}
9169db268cc7344d8751213307737285e7cdac4fbecsewardj
9179db268cc7344d8751213307737285e7cdac4fbecsewardj
9189db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
9199db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
9209db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_13                       //
9219db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
9229db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
9239db268cc7344d8751213307737285e7cdac4fbecsewardj
9249db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_13 ( V128* argL, V128* argR )
9259db268cc7344d8751213307737285e7cdac4fbecsewardj{
9269db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
9279db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
9289db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
9299db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
9309db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
9319db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
9329db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
9339db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
9349db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x13,  %%xmm2, %%xmm11"   "\n\t"
9359db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x13, %%xmm2, %%xmm11"   "\n\t"
9369db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t"
9379db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
9389db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
9399db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
9409db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
9419db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
9429db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
9439db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
9449db268cc7344d8751213307737285e7cdac4fbecsewardj   );
9459db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
9469db268cc7344d8751213307737285e7cdac4fbecsewardj}
9479db268cc7344d8751213307737285e7cdac4fbecsewardj
9489db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_13 ( V128* argLU, V128* argRU )
9499db268cc7344d8751213307737285e7cdac4fbecsewardj{
9509db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
9519db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
9529db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
9539db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
9549db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
9559db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
9569db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x13, False/*!isSTRM*/
9579db268cc7344d8751213307737285e7cdac4fbecsewardj        );
9589db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
9599db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
9609db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
9619db268cc7344d8751213307737285e7cdac4fbecsewardj}
9629db268cc7344d8751213307737285e7cdac4fbecsewardj
9639db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_13 ( void )
9649db268cc7344d8751213307737285e7cdac4fbecsewardj{
9659db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "13";
9669db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_13;
9679db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_13;
9689db268cc7344d8751213307737285e7cdac4fbecsewardj
9699db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
9709db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
9719db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
9729db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
9739db268cc7344d8751213307737285e7cdac4fbecsewardj
9749db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
9759db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
9769db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
9779db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
9789db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
9799db268cc7344d8751213307737285e7cdac4fbecsewardj
9809db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
9819db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
9829db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
9839db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
9849db268cc7344d8751213307737285e7cdac4fbecsewardj
9859db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
9869db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
9879db268cc7344d8751213307737285e7cdac4fbecsewardj
9889db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
9899db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
9909db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
9919db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
9929db268cc7344d8751213307737285e7cdac4fbecsewardj
9939db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
9949db268cc7344d8751213307737285e7cdac4fbecsewardj
9959db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
9969db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
9979db268cc7344d8751213307737285e7cdac4fbecsewardj}
9989db268cc7344d8751213307737285e7cdac4fbecsewardj
9999db268cc7344d8751213307737285e7cdac4fbecsewardj
10009db268cc7344d8751213307737285e7cdac4fbecsewardj
10019db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
10029db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
10039db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_45                       //
10049db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
10059db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
10069db268cc7344d8751213307737285e7cdac4fbecsewardj
10079db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_45 ( V128* argL, V128* argR )
10089db268cc7344d8751213307737285e7cdac4fbecsewardj{
10099db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
10109db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
10119db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
10129db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
10139db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
10149db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
10159db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
10169db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
10179db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x45,  %%xmm2, %%xmm11"   "\n\t"
10189db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
10199db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t"
10209db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
10219db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
10229db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
10239db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
10249db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
10259db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
10269db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
10279db268cc7344d8751213307737285e7cdac4fbecsewardj   );
10289db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
10299db268cc7344d8751213307737285e7cdac4fbecsewardj}
10309db268cc7344d8751213307737285e7cdac4fbecsewardj
10319db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_45 ( V128* argLU, V128* argRU )
10329db268cc7344d8751213307737285e7cdac4fbecsewardj{
10339db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
10349db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
10359db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
10369db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
10379db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
10389db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
10399db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x45, False/*!isSTRM*/
10409db268cc7344d8751213307737285e7cdac4fbecsewardj        );
10419db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
10429db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
10439db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
10449db268cc7344d8751213307737285e7cdac4fbecsewardj}
10459db268cc7344d8751213307737285e7cdac4fbecsewardj
10469db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_45 ( void )
10479db268cc7344d8751213307737285e7cdac4fbecsewardj{
10489db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "45";
10499db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_45;
10509db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_45;
10519db268cc7344d8751213307737285e7cdac4fbecsewardj
10529db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc");
10539db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb");
10549db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb");
10559db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb");
10569db268cc7344d8751213307737285e7cdac4fbecsewardj
10579db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
10589db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb");
10599db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb");
10609db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb");
10619db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "000000000000ccbb");
10629db268cc7344d8751213307737285e7cdac4fbecsewardj
10639db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
10649db268cc7344d8751213307737285e7cdac4fbecsewardj
10659db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
10669db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb");
10679db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb");
10689db268cc7344d8751213307737285e7cdac4fbecsewardj
10699db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb");
10709db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb");
10719db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb");
10729db268cc7344d8751213307737285e7cdac4fbecsewardj
10739db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0011223344556677", "0000997755442211");
10749db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1122334455667711", "0000997755442211");
10759db268cc7344d8751213307737285e7cdac4fbecsewardj
10769db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0011223344556677", "0000aa8866553322");
10779db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "1122334455667711", "0000aa8866553322");
10789db268cc7344d8751213307737285e7cdac4fbecsewardj}
10799db268cc7344d8751213307737285e7cdac4fbecsewardj
10809db268cc7344d8751213307737285e7cdac4fbecsewardj
10819db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
10829db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
10839db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_01                       //
10849db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
10859db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
10869db268cc7344d8751213307737285e7cdac4fbecsewardj
10879db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_01 ( V128* argL, V128* argR )
10889db268cc7344d8751213307737285e7cdac4fbecsewardj{
10899db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
10909db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
10919db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
10929db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
10939db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
10949db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
10959db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
10969db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
10979db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x01,  %%xmm2, %%xmm11"   "\n\t"
10989db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x01, %%xmm2, %%xmm11"   "\n\t"
10999db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t"
11009db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
11019db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
11029db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
11039db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
11049db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
11059db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
11069db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
11079db268cc7344d8751213307737285e7cdac4fbecsewardj   );
11089db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
11099db268cc7344d8751213307737285e7cdac4fbecsewardj}
11109db268cc7344d8751213307737285e7cdac4fbecsewardj
11119db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_01 ( V128* argLU, V128* argRU )
11129db268cc7344d8751213307737285e7cdac4fbecsewardj{
11139db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
11149db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
11159db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
11169db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
11179db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
11189db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
11199db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x01, False/*!isSTRM*/
11209db268cc7344d8751213307737285e7cdac4fbecsewardj        );
11219db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
11229db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
11239db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
11249db268cc7344d8751213307737285e7cdac4fbecsewardj}
11259db268cc7344d8751213307737285e7cdac4fbecsewardj
11269db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_01 ( void )
11279db268cc7344d8751213307737285e7cdac4fbecsewardj{
11289db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "01";
11299db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_01;
11309db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_01;
11319db268cc7344d8751213307737285e7cdac4fbecsewardj
11329db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
11339db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
11349db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
11359db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
11369db268cc7344d8751213307737285e7cdac4fbecsewardj
11379db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
11389db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
11399db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
11409db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
11419db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
11429db268cc7344d8751213307737285e7cdac4fbecsewardj
11439db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
11449db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
11459db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
11469db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
11479db268cc7344d8751213307737285e7cdac4fbecsewardj
11489db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
11499db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
11509db268cc7344d8751213307737285e7cdac4fbecsewardj
11519db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
11529db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
11539db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
11549db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
11559db268cc7344d8751213307737285e7cdac4fbecsewardj
11569db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
11579db268cc7344d8751213307737285e7cdac4fbecsewardj
11589db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
11599db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
11609db268cc7344d8751213307737285e7cdac4fbecsewardj}
11619db268cc7344d8751213307737285e7cdac4fbecsewardj
11629db268cc7344d8751213307737285e7cdac4fbecsewardj
11639db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
11649db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
11659db268cc7344d8751213307737285e7cdac4fbecsewardj//                       ISTRI_39                       //
11669db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
11679db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
11689db268cc7344d8751213307737285e7cdac4fbecsewardj
11699db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_39 ( V128* argL, V128* argR )
11709db268cc7344d8751213307737285e7cdac4fbecsewardj{
11719db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 block[2];
11729db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[0], argL, sizeof(V128));
11739db268cc7344d8751213307737285e7cdac4fbecsewardj   memcpy(&block[1], argR, sizeof(V128));
11749db268cc7344d8751213307737285e7cdac4fbecsewardj   ULong res, flags;
11759db268cc7344d8751213307737285e7cdac4fbecsewardj   __asm__ __volatile__(
11769db268cc7344d8751213307737285e7cdac4fbecsewardj      "subq      $1024,  %%rsp"             "\n\t"
11779db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    0(%2),  %%xmm2"            "\n\t"
11789db268cc7344d8751213307737285e7cdac4fbecsewardj      "movdqu    16(%2), %%xmm11"           "\n\t"
11799db268cc7344d8751213307737285e7cdac4fbecsewardj      "pcmpistri $0x39,  %%xmm2, %%xmm11"   "\n\t"
11809db268cc7344d8751213307737285e7cdac4fbecsewardj      "pushfq"                              "\n\t"
11819db268cc7344d8751213307737285e7cdac4fbecsewardj      "popq      %%rdx"                     "\n\t"
11829db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rcx,  %0"                "\n\t"
11839db268cc7344d8751213307737285e7cdac4fbecsewardj      "movq      %%rdx,  %1"                "\n\t"
11849db268cc7344d8751213307737285e7cdac4fbecsewardj      "addq      $1024,  %%rsp"             "\n\t"
11859db268cc7344d8751213307737285e7cdac4fbecsewardj      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
11869db268cc7344d8751213307737285e7cdac4fbecsewardj      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
11879db268cc7344d8751213307737285e7cdac4fbecsewardj   );
11889db268cc7344d8751213307737285e7cdac4fbecsewardj   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
11899db268cc7344d8751213307737285e7cdac4fbecsewardj}
11909db268cc7344d8751213307737285e7cdac4fbecsewardj
11919db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_39 ( V128* argLU, V128* argRU )
11929db268cc7344d8751213307737285e7cdac4fbecsewardj{
11939db268cc7344d8751213307737285e7cdac4fbecsewardj   V128 resV;
11949db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt resOSZACP, resECX;
11959db268cc7344d8751213307737285e7cdac4fbecsewardj   Bool ok
11969db268cc7344d8751213307737285e7cdac4fbecsewardj      = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
11979db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argLU),
11989db268cc7344d8751213307737285e7cdac4fbecsewardj			    zmask_from_V128(argRU),
11999db268cc7344d8751213307737285e7cdac4fbecsewardj			    0x39, False/*!isSTRM*/
12009db268cc7344d8751213307737285e7cdac4fbecsewardj        );
12019db268cc7344d8751213307737285e7cdac4fbecsewardj   assert(ok);
12029db268cc7344d8751213307737285e7cdac4fbecsewardj   resECX = resV.uInt[0];
12039db268cc7344d8751213307737285e7cdac4fbecsewardj   return (resOSZACP << 16) | resECX;
12049db268cc7344d8751213307737285e7cdac4fbecsewardj}
12059db268cc7344d8751213307737285e7cdac4fbecsewardj
12069db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_39 ( void )
12079db268cc7344d8751213307737285e7cdac4fbecsewardj{
12089db268cc7344d8751213307737285e7cdac4fbecsewardj   char* wot = "39";
12099db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*h)(V128*,V128*) = h_pcmpistri_39;
12109db268cc7344d8751213307737285e7cdac4fbecsewardj   UInt(*s)(V128*,V128*) = s_pcmpistri_39;
12119db268cc7344d8751213307737285e7cdac4fbecsewardj
12129db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
12139db268cc7344d8751213307737285e7cdac4fbecsewardj
12149db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12159db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12169db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
12179db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
12189db268cc7344d8751213307737285e7cdac4fbecsewardj
12199db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
12209db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
12219db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
12229db268cc7344d8751213307737285e7cdac4fbecsewardj
12239db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12249db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12259db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12269db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12279db268cc7344d8751213307737285e7cdac4fbecsewardj
12289db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12299db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
12309db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
12319db268cc7344d8751213307737285e7cdac4fbecsewardj
12329db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
12339db268cc7344d8751213307737285e7cdac4fbecsewardj
12349db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
12359db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
12369db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
12379db268cc7344d8751213307737285e7cdac4fbecsewardj
12389db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
12399db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
12409db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
12419db268cc7344d8751213307737285e7cdac4fbecsewardj
12429db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
12439db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
12449db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
12459db268cc7344d8751213307737285e7cdac4fbecsewardj
12469db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
12479db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
12489db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
12499db268cc7344d8751213307737285e7cdac4fbecsewardj
12509db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
12519db268cc7344d8751213307737285e7cdac4fbecsewardj   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
12529db268cc7344d8751213307737285e7cdac4fbecsewardj}
12539db268cc7344d8751213307737285e7cdac4fbecsewardj
12549db268cc7344d8751213307737285e7cdac4fbecsewardj
12559db268cc7344d8751213307737285e7cdac4fbecsewardj
12569db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
12579db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
12589db268cc7344d8751213307737285e7cdac4fbecsewardj//                         main                         //
12599db268cc7344d8751213307737285e7cdac4fbecsewardj//                                                      //
12609db268cc7344d8751213307737285e7cdac4fbecsewardj//////////////////////////////////////////////////////////
12619db268cc7344d8751213307737285e7cdac4fbecsewardj
12629db268cc7344d8751213307737285e7cdac4fbecsewardjint main ( void )
12639db268cc7344d8751213307737285e7cdac4fbecsewardj{
12649db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_4B();
12659db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_3B();
12669db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_09();
12679db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_1B();
12689db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_03();
12699db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_0D();
12709db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_13();
12719db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_45();
12729db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_01();
12739db268cc7344d8751213307737285e7cdac4fbecsewardj   istri_39();
12749db268cc7344d8751213307737285e7cdac4fbecsewardj   return 0;
12759db268cc7344d8751213307737285e7cdac4fbecsewardj}
1276