1
2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4   aspect. */
5
6#include <string.h>
7#include <stdio.h>
8#include <assert.h>
9
10typedef  unsigned int   UInt;
11typedef  signed int     Int;
12typedef  unsigned char  UChar;
13typedef  signed char    Char;
14typedef  unsigned long long int ULong;
15typedef  UChar          Bool;
16#define False ((Bool)0)
17#define True  ((Bool)1)
18
19//typedef  unsigned char  V128[16];
20typedef
21   union {
22      UChar uChar[16];
23      UInt  uInt[4];
24   }
25   V128;
26
27#define SHIFT_O   11
28#define SHIFT_S   7
29#define SHIFT_Z   6
30#define SHIFT_A   4
31#define SHIFT_C   0
32#define SHIFT_P   2
33
34#define MASK_O    (1ULL << SHIFT_O)
35#define MASK_S    (1ULL << SHIFT_S)
36#define MASK_Z    (1ULL << SHIFT_Z)
37#define MASK_A    (1ULL << SHIFT_A)
38#define MASK_C    (1ULL << SHIFT_C)
39#define MASK_P    (1ULL << SHIFT_P)
40
41
42UInt clz32 ( UInt x )
43{
44   Int y, m, n;
45   y = -(x >> 16);
46   m = (y >> 16) & 16;
47   n = 16 - m;
48   x = x >> m;
49   y = x - 0x100;
50   m = (y >> 16) & 8;
51   n = n + m;
52   x = x << m;
53   y = x - 0x1000;
54   m = (y >> 16) & 4;
55   n = n + m;
56   x = x << m;
57   y = x - 0x4000;
58   m = (y >> 16) & 2;
59   n = n + m;
60   x = x << m;
61   y = x >> 14;
62   m = y & ~(y >> 1);
63   return n + 2 - m;
64}
65
66UInt ctz32 ( UInt x )
67{
68   return 32 - clz32((~x) & (x-1));
69}
70
71void expand ( V128* dst, char* summary )
72{
73   Int i;
74   assert( strlen(summary) == 16 );
75   for (i = 0; i < 16; i++) {
76      UChar xx = 0;
77      UChar x = summary[15-i];
78      if      (x >= '0' && x <= '9') { xx = x - '0'; }
79      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81      else assert(0);
82
83      assert(xx < 16);
84      xx = (xx << 4) | xx;
85      assert(xx < 256);
86      dst->uChar[i] = xx;
87   }
88}
89
90void try_istri ( char* which,
91                 UInt(*h_fn)(V128*,V128*),
92                 UInt(*s_fn)(V128*,V128*),
93                 char* summL, char* summR )
94{
95   assert(strlen(which) == 2);
96   V128 argL, argR;
97   expand(&argL, summL);
98   expand(&argR, summR);
99   UInt h_res = h_fn(&argL, &argR);
100   UInt s_res = s_fn(&argL, &argR);
101   printf("istri %s  %s %s -> %08x %08x %s\n",
102          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103}
104
105UInt zmask_from_V128 ( V128* arg )
106{
107   UInt i, res = 0;
108   for (i = 0; i < 16; i++) {
109      res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
110   }
111   return res;
112}
113
114//////////////////////////////////////////////////////////
115//                                                      //
116//                       GENERAL                        //
117//                                                      //
118//////////////////////////////////////////////////////////
119
120
121/* Given partial results from a pcmpXstrX operation (intRes1,
122   basically), generate an I format (index value for ECX) output, and
123   also the new OSZACP flags.
124*/
125static
126void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127                                    /*OUT*/UInt* resOSZACP,
128                                    UInt intRes1,
129                                    UInt zmaskL, UInt zmaskR,
130                                    UInt validL,
131                                    UInt pol, UInt idx )
132{
133   assert((pol >> 2) == 0);
134   assert((idx >> 1) == 0);
135
136   UInt intRes2 = 0;
137   switch (pol) {
138      case 0: intRes2 = intRes1;          break; // pol +
139      case 1: intRes2 = ~intRes1;         break; // pol -
140      case 2: intRes2 = intRes1;          break; // pol m+
141      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142   }
143   intRes2 &= 0xFFFF;
144
145   // generate ecx value
146   UInt newECX = 0;
147   if (idx) {
148     // index of ms-1-bit
149     newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150   } else {
151     // index of ls-1-bit
152     newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153   }
154
155   *(UInt*)(&resV[0]) = newECX;
156
157   // generate new flags, common to all ISTRI and ISTRM cases
158   *resOSZACP    // A, P are zero
159     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
162     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
163}
164
165
166/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167   variants.
168
169   For xSTRI variants, the new ECX value is placed in the 32 bits
170   pointed to by *resV.  For xSTRM variants, the result is a 128 bit
171   value and is placed at *resV in the obvious way.
172
173   For all variants, the new OSZACP value is placed at *resOSZACP.
174
175   argLV and argRV are the vector args.  The caller must prepare a
176   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
177   must be 1 for each zero byte of of the respective arg.  For ESTRx
178   variants this is derived from the explicit length indication, and
179   must be 0 in all places except at the bit index corresponding to
180   the valid length (0 .. 16).  If the valid length is 16 then the
181   mask must be all zeroes.  In all cases, bits 31:16 must be zero.
182
183   imm8 is the original immediate from the instruction.  isSTRM
184   indicates whether this is a xSTRM or xSTRI variant, which controls
185   how much of *res is written.
186
187   If the given imm8 case can be handled, the return value is True.
188   If not, False is returned, and neither *res not *resOSZACP are
189   altered.
190*/
191
192Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193                     /*OUT*/UInt* resOSZACP,
194                     V128* argLV,  V128* argRV,
195                     UInt zmaskL, UInt zmaskR,
196                     UInt imm8,   Bool isSTRM )
197{
198   assert(imm8 < 0x80);
199   assert((zmaskL >> 16) == 0);
200   assert((zmaskR >> 16) == 0);
201
202   /* Explicitly reject any imm8 values that haven't been validated,
203      even if they would probably work.  Life is too short to have
204      unvalidated cases in the code base. */
205   switch (imm8) {
206      case 0x00: case 0x02:
207      case 0x08: case 0x0A: case 0x0C: case 0x0E:
208      case 0x10: case 0x12: case 0x14:
209      case 0x18: case 0x1A:
210      case 0x30:            case 0x34:
211      case 0x38: case 0x3A:
212      case 0x40: case 0x42: case 0x44: case 0x46:
213                 case 0x4A:
214                 case 0x62:
215      case 0x70: case 0x72:
216         break;
217      default:
218         return False;
219   }
220
221   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
222   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
223   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
224   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
225
226   /*----------------------------------------*/
227   /*-- strcmp on byte data                --*/
228   /*----------------------------------------*/
229
230   if (agg == 2/*equal each, aka strcmp*/
231       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
232       && !isSTRM) {
233      Int    i;
234      UChar* argL = (UChar*)argLV;
235      UChar* argR = (UChar*)argRV;
236      UInt boolResII = 0;
237      for (i = 15; i >= 0; i--) {
238         UChar cL  = argL[i];
239         UChar cR  = argR[i];
240         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
241      }
242      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
243      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
244
245      // do invalidation, common to all equal-each cases
246      UInt intRes1
247         = (boolResII & validL & validR)  // if both valid, use cmpres
248           | (~ (validL | validR));       // if both invalid, force 1
249                                          // else force 0
250      intRes1 &= 0xFFFF;
251
252      // generate I-format output
253      pcmpXstrX_WRK_gen_output_fmt_I(
254         resV, resOSZACP,
255         intRes1, zmaskL, zmaskR, validL, pol, idx
256      );
257
258      return True;
259   }
260
261   /*----------------------------------------*/
262   /*-- set membership on byte data        --*/
263   /*----------------------------------------*/
264
265   if (agg == 0/*equal any, aka find chars in a set*/
266       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
267       && !isSTRM) {
268      /* argL: the string,  argR: charset */
269      UInt   si, ci;
270      UChar* argL    = (UChar*)argLV;
271      UChar* argR    = (UChar*)argRV;
272      UInt   boolRes = 0;
273      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
274      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
275
276      for (si = 0; si < 16; si++) {
277         if ((validL & (1 << si)) == 0)
278            // run off the end of the string.
279            break;
280         UInt m = 0;
281         for (ci = 0; ci < 16; ci++) {
282            if ((validR & (1 << ci)) == 0) break;
283            if (argR[ci] == argL[si]) { m = 1; break; }
284         }
285         boolRes |= (m << si);
286      }
287
288      // boolRes is "pre-invalidated"
289      UInt intRes1 = boolRes & 0xFFFF;
290
291      // generate I-format output
292      pcmpXstrX_WRK_gen_output_fmt_I(
293         resV, resOSZACP,
294         intRes1, zmaskL, zmaskR, validL, pol, idx
295      );
296
297      return True;
298   }
299
300   /*----------------------------------------*/
301   /*-- substring search on byte data      --*/
302   /*----------------------------------------*/
303
304   if (agg == 3/*equal ordered, aka substring search*/
305       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
306       && !isSTRM) {
307
308      /* argL: haystack,  argR: needle */
309      UInt   ni, hi;
310      UChar* argL    = (UChar*)argLV;
311      UChar* argR    = (UChar*)argRV;
312      UInt   boolRes = 0;
313      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
314      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
315      for (hi = 0; hi < 16; hi++) {
316         UInt m = 1;
317         for (ni = 0; ni < 16; ni++) {
318            if ((validR & (1 << ni)) == 0) break;
319            UInt i = ni + hi;
320            if (i >= 16) break;
321            if (argL[i] != argR[ni]) { m = 0; break; }
322         }
323         boolRes |= (m << hi);
324         if ((validL & (1 << hi)) == 0)
325            // run off the end of the haystack
326            break;
327      }
328
329      // boolRes is "pre-invalidated"
330      UInt intRes1 = boolRes & 0xFFFF;
331
332      // generate I-format output
333      pcmpXstrX_WRK_gen_output_fmt_I(
334         resV, resOSZACP,
335         intRes1, zmaskL, zmaskR, validL, pol, idx
336      );
337
338      return True;
339   }
340
341   /*----------------------------------------*/
342   /*-- ranges, unsigned byte data         --*/
343   /*----------------------------------------*/
344
345   if (agg == 1/*ranges*/
346       && fmt == 0/*ub*/
347       && !isSTRM) {
348
349      /* argL: string,  argR: range-pairs */
350      UInt   ri, si;
351      UChar* argL    = (UChar*)argLV;
352      UChar* argR    = (UChar*)argRV;
353      UInt   boolRes = 0;
354      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
355      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
356      for (si = 0; si < 16; si++) {
357         if ((validL & (1 << si)) == 0)
358            // run off the end of the string
359            break;
360         UInt m = 0;
361         for (ri = 0; ri < 16; ri += 2) {
362            if ((validR & (3 << ri)) != (3 << ri)) break;
363            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
364               m = 1; break;
365            }
366         }
367         boolRes |= (m << si);
368      }
369
370      // boolRes is "pre-invalidated"
371      UInt intRes1 = boolRes & 0xFFFF;
372
373      // generate I-format output
374      pcmpXstrX_WRK_gen_output_fmt_I(
375         resV, resOSZACP,
376         intRes1, zmaskL, zmaskR, validL, pol, idx
377      );
378
379      return True;
380   }
381
382   /*----------------------------------------*/
383   /*-- ranges, signed byte data           --*/
384   /*----------------------------------------*/
385
386   if (agg == 1/*ranges*/
387       && fmt == 2/*sb*/
388       && !isSTRM) {
389
390      /* argL: string,  argR: range-pairs */
391      UInt   ri, si;
392      Char*  argL    = (Char*)argLV;
393      Char*  argR    = (Char*)argRV;
394      UInt   boolRes = 0;
395      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
396      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
397      for (si = 0; si < 16; si++) {
398         if ((validL & (1 << si)) == 0)
399            // run off the end of the string
400            break;
401         UInt m = 0;
402         for (ri = 0; ri < 16; ri += 2) {
403            if ((validR & (3 << ri)) != (3 << ri)) break;
404            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
405               m = 1; break;
406            }
407         }
408         boolRes |= (m << si);
409      }
410
411      // boolRes is "pre-invalidated"
412      UInt intRes1 = boolRes & 0xFFFF;
413
414      // generate I-format output
415      pcmpXstrX_WRK_gen_output_fmt_I(
416         resV, resOSZACP,
417         intRes1, zmaskL, zmaskR, validL, pol, idx
418      );
419
420      return True;
421   }
422
423   return False;
424}
425
426
427//////////////////////////////////////////////////////////
428//                                                      //
429//                       ISTRI_4A                       //
430//                                                      //
431//////////////////////////////////////////////////////////
432
433UInt h_pcmpistri_4A ( V128* argL, V128* argR )
434{
435   V128 block[2];
436   memcpy(&block[0], argL, sizeof(V128));
437   memcpy(&block[1], argR, sizeof(V128));
438   ULong res, flags;
439   __asm__ __volatile__(
440      "subq      $1024,  %%rsp"             "\n\t"
441      "movdqu    0(%2),  %%xmm2"            "\n\t"
442      "movdqu    16(%2), %%xmm11"           "\n\t"
443      "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
444      "pushfq"                              "\n\t"
445      "popq      %%rdx"                     "\n\t"
446      "movq      %%rcx,  %0"                "\n\t"
447      "movq      %%rdx,  %1"                "\n\t"
448      "addq      $1024,  %%rsp"             "\n\t"
449      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
450      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
451   );
452   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
453}
454
455UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
456{
457   V128 resV;
458   UInt resOSZACP, resECX;
459   Bool ok
460      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
461                       zmask_from_V128(argLU),
462                       zmask_from_V128(argRU),
463                       0x4A, False/*!isSTRM*/
464        );
465   assert(ok);
466   resECX = resV.uInt[0];
467   return (resOSZACP << 16) | resECX;
468}
469
470void istri_4A ( void )
471{
472   char* wot = "4A";
473   UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
474   UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
475
476   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
477
478   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
479   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
480   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
481   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
482
483   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
484   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
485   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
486
487   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
488   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
489   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
490   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491
492   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
493   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
494   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
495
496   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
497
498   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
499   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
500   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
501
502   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
503   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
504   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
505
506   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
507   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
508   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
509
510   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
511   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
512   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
513
514   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
515   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
516}
517
518//////////////////////////////////////////////////////////
519//                                                      //
520//                       ISTRI_3A                       //
521//                                                      //
522//////////////////////////////////////////////////////////
523
524UInt h_pcmpistri_3A ( V128* argL, V128* argR )
525{
526   V128 block[2];
527   memcpy(&block[0], argL, sizeof(V128));
528   memcpy(&block[1], argR, sizeof(V128));
529   ULong res, flags;
530   __asm__ __volatile__(
531      "subq      $1024,  %%rsp"             "\n\t"
532      "movdqu    0(%2),  %%xmm2"            "\n\t"
533      "movdqu    16(%2), %%xmm11"           "\n\t"
534      "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
535      "pushfq"                              "\n\t"
536      "popq      %%rdx"                     "\n\t"
537      "movq      %%rcx,  %0"                "\n\t"
538      "movq      %%rdx,  %1"                "\n\t"
539      "addq      $1024,  %%rsp"             "\n\t"
540      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
541      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
542   );
543   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
544}
545
546UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
547{
548   V128 resV;
549   UInt resOSZACP, resECX;
550   Bool ok
551      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
552                       zmask_from_V128(argLU),
553                       zmask_from_V128(argRU),
554                       0x3A, False/*!isSTRM*/
555        );
556   assert(ok);
557   resECX = resV.uInt[0];
558   return (resOSZACP << 16) | resECX;
559}
560
561void istri_3A ( void )
562{
563   char* wot = "3A";
564   UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
565   UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
566
567   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
568
569   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
570   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
571   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
572   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
573
574   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
575   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
576   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
577
578   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
579   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
580   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
581   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582
583   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
584   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
585   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
586
587   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
588
589   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
590   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
591   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
592
593   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
594   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
595   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
596
597   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
598   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
599   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
600
601   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
602   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
603   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
604
605   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
606   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
607}
608
609
610
611//////////////////////////////////////////////////////////
612//                                                      //
613//                       ISTRI_0C                       //
614//                                                      //
615//////////////////////////////////////////////////////////
616
617__attribute__((noinline))
618UInt h_pcmpistri_0C ( V128* argL, V128* argR )
619{
620   V128 block[2];
621   memcpy(&block[0], argL, sizeof(V128));
622   memcpy(&block[1], argR, sizeof(V128));
623   ULong res = 0, flags = 0;
624   __asm__ __volatile__(
625      "movdqu    0(%2),  %%xmm2"            "\n\t"
626      "movdqu    16(%2), %%xmm11"           "\n\t"
627      "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
628      //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
629      //"movd %%xmm0, %%ecx" "\n\t"
630      "pushfq"                              "\n\t"
631      "popq      %%rdx"                     "\n\t"
632      "movq      %%rcx,  %0"                "\n\t"
633      "movq      %%rdx,  %1"                "\n\t"
634      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
635      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
636   );
637   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
638}
639
640UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
641{
642   V128 resV;
643   UInt resOSZACP, resECX;
644   Bool ok
645      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
646                       zmask_from_V128(argLU),
647                       zmask_from_V128(argRU),
648                       0x0C, False/*!isSTRM*/
649        );
650   assert(ok);
651   resECX = resV.uInt[0];
652   return (resOSZACP << 16) | resECX;
653}
654
655void istri_0C ( void )
656{
657   char* wot = "0C";
658   UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
659   UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
660
661   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
662
663   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
664
665   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
666   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
667   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
668
669   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
670
671   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
672   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
673   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
674   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
675   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
676
677   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
678   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
679   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
680
681   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
682   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
683
684   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
685   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
686   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
687
688   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
689   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
690   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
691
692   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
693   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
694   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
695   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
696}
697
698
699//////////////////////////////////////////////////////////
700//                                                      //
701//                       ISTRI_08                       //
702//                                                      //
703//////////////////////////////////////////////////////////
704
705UInt h_pcmpistri_08 ( V128* argL, V128* argR )
706{
707   V128 block[2];
708   memcpy(&block[0], argL, sizeof(V128));
709   memcpy(&block[1], argR, sizeof(V128));
710   ULong res, flags;
711   __asm__ __volatile__(
712      "subq      $1024,  %%rsp"             "\n\t"
713      "movdqu    0(%2),  %%xmm2"            "\n\t"
714      "movdqu    16(%2), %%xmm11"           "\n\t"
715      "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
716      "pushfq"                              "\n\t"
717      "popq      %%rdx"                     "\n\t"
718      "movq      %%rcx,  %0"                "\n\t"
719      "movq      %%rdx,  %1"                "\n\t"
720      "addq      $1024,  %%rsp"             "\n\t"
721      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
722      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
723   );
724   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
725}
726
727UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
728{
729   V128 resV;
730   UInt resOSZACP, resECX;
731   Bool ok
732      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
733                       zmask_from_V128(argLU),
734                       zmask_from_V128(argRU),
735                       0x08, False/*!isSTRM*/
736        );
737   assert(ok);
738   resECX = resV.uInt[0];
739   return (resOSZACP << 16) | resECX;
740}
741
742void istri_08 ( void )
743{
744   char* wot = "08";
745   UInt(*h)(V128*,V128*) = h_pcmpistri_08;
746   UInt(*s)(V128*,V128*) = s_pcmpistri_08;
747
748   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
749
750   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
751   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
752   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
753   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
754
755   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
756   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
757   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
758
759   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
760   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
761   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
762   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763
764   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
765   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
766   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
767
768   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
769
770   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
771   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
772   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
773
774   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
775   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
776   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
777
778   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
779   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
780   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
781
782   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
783   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
784   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
785
786   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
787   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
788}
789
790
791
792//////////////////////////////////////////////////////////
793//                                                      //
794//                       ISTRI_18                       //
795//                                                      //
796//////////////////////////////////////////////////////////
797
798UInt h_pcmpistri_18 ( V128* argL, V128* argR )
799{
800   V128 block[2];
801   memcpy(&block[0], argL, sizeof(V128));
802   memcpy(&block[1], argR, sizeof(V128));
803   ULong res, flags;
804   __asm__ __volatile__(
805      "subq      $1024,  %%rsp"             "\n\t"
806      "movdqu    0(%2),  %%xmm2"            "\n\t"
807      "movdqu    16(%2), %%xmm11"           "\n\t"
808      "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
809      "pushfq"                              "\n\t"
810      "popq      %%rdx"                     "\n\t"
811      "movq      %%rcx,  %0"                "\n\t"
812      "movq      %%rdx,  %1"                "\n\t"
813      "addq      $1024,  %%rsp"             "\n\t"
814      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
815      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
816   );
817   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
818}
819
820UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
821{
822   V128 resV;
823   UInt resOSZACP, resECX;
824   Bool ok
825      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
826                       zmask_from_V128(argLU),
827                       zmask_from_V128(argRU),
828                       0x18, False/*!isSTRM*/
829        );
830   assert(ok);
831   resECX = resV.uInt[0];
832   return (resOSZACP << 16) | resECX;
833}
834
835void istri_18 ( void )
836{
837   char* wot = "18";
838   UInt(*h)(V128*,V128*) = h_pcmpistri_18;
839   UInt(*s)(V128*,V128*) = s_pcmpistri_18;
840
841   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
842
843   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
844   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
845   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
846   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
847
848   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
849   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
850   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
851
852   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
853   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
854   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
855   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856
857   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
858   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
859   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
860
861   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
862
863   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
864   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
865   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
866
867   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
868   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
869   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
870
871   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
872   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
873   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
874
875   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
876   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
877   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
878
879   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
880   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
881}
882
883
884
885//////////////////////////////////////////////////////////
886//                                                      //
887//                       ISTRI_1A                       //
888//                                                      //
889//////////////////////////////////////////////////////////
890
891UInt h_pcmpistri_1A ( V128* argL, V128* argR )
892{
893   V128 block[2];
894   memcpy(&block[0], argL, sizeof(V128));
895   memcpy(&block[1], argR, sizeof(V128));
896   ULong res, flags;
897   __asm__ __volatile__(
898      "subq      $1024,  %%rsp"             "\n\t"
899      "movdqu    0(%2),  %%xmm2"            "\n\t"
900      "movdqu    16(%2), %%xmm11"           "\n\t"
901      "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
902      "pushfq"                              "\n\t"
903      "popq      %%rdx"                     "\n\t"
904      "movq      %%rcx,  %0"                "\n\t"
905      "movq      %%rdx,  %1"                "\n\t"
906      "addq      $1024,  %%rsp"             "\n\t"
907      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
908      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
909   );
910   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
911}
912
913UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
914{
915   V128 resV;
916   UInt resOSZACP, resECX;
917   Bool ok
918      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
919                       zmask_from_V128(argLU),
920                       zmask_from_V128(argRU),
921                       0x1A, False/*!isSTRM*/
922        );
923   assert(ok);
924   resECX = resV.uInt[0];
925   return (resOSZACP << 16) | resECX;
926}
927
928void istri_1A ( void )
929{
930   char* wot = "1A";
931   UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
932   UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
933
934   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
935
936   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
937   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
938   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
939   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
940
941   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
942   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
943   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
944
945   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
946   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
947   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
949
950   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
951   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
952   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
953
954   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
955
956   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
957   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
958   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
959
960   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
961   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
962   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
963
964   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
965   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
966   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
967
968   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
969   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
970   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
971
972   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
973   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
974}
975
976
977
978//////////////////////////////////////////////////////////
979//                                                      //
980//                       ISTRI_02                       //
981//                                                      //
982//////////////////////////////////////////////////////////
983
984UInt h_pcmpistri_02 ( V128* argL, V128* argR )
985{
986   V128 block[2];
987   memcpy(&block[0], argL, sizeof(V128));
988   memcpy(&block[1], argR, sizeof(V128));
989   ULong res, flags;
990   __asm__ __volatile__(
991      "subq      $1024,  %%rsp"             "\n\t"
992      "movdqu    0(%2),  %%xmm2"            "\n\t"
993      "movdqu    16(%2), %%xmm11"           "\n\t"
994      "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
995//"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
996//"movd %%xmm0, %%ecx" "\n\t"
997      "pushfq"                              "\n\t"
998      "popq      %%rdx"                     "\n\t"
999      "movq      %%rcx,  %0"                "\n\t"
1000      "movq      %%rdx,  %1"                "\n\t"
1001      "addq      $1024,  %%rsp"             "\n\t"
1002      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1003      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1004   );
1005   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1006}
1007
1008UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1009{
1010   V128 resV;
1011   UInt resOSZACP, resECX;
1012   Bool ok
1013      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1014                       zmask_from_V128(argLU),
1015                       zmask_from_V128(argRU),
1016                       0x02, False/*!isSTRM*/
1017        );
1018   assert(ok);
1019   resECX = resV.uInt[0];
1020   return (resOSZACP << 16) | resECX;
1021}
1022
1023void istri_02 ( void )
1024{
1025   char* wot = "02";
1026   UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1027   UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1028
1029   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1030   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1031   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1032   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1033
1034   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1036   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1037   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1038   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1039
1040   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1041   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1042   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1043   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1044
1045   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1046   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1047
1048   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1049   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1050   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1051   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1052
1053   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1054
1055   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1056   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1057}
1058
1059
1060//////////////////////////////////////////////////////////
1061//                                                      //
1062//                       ISTRI_12                       //
1063//                                                      //
1064//////////////////////////////////////////////////////////
1065
1066UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1067{
1068   V128 block[2];
1069   memcpy(&block[0], argL, sizeof(V128));
1070   memcpy(&block[1], argR, sizeof(V128));
1071   ULong res, flags;
1072   __asm__ __volatile__(
1073      "subq      $1024,  %%rsp"             "\n\t"
1074      "movdqu    0(%2),  %%xmm2"            "\n\t"
1075      "movdqu    16(%2), %%xmm11"           "\n\t"
1076      "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
1077//"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
1078//"movd %%xmm0, %%ecx" "\n\t"
1079      "pushfq"                              "\n\t"
1080      "popq      %%rdx"                     "\n\t"
1081      "movq      %%rcx,  %0"                "\n\t"
1082      "movq      %%rdx,  %1"                "\n\t"
1083      "addq      $1024,  %%rsp"             "\n\t"
1084      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1085      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1086   );
1087   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1088}
1089
1090UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1091{
1092   V128 resV;
1093   UInt resOSZACP, resECX;
1094   Bool ok
1095      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1096                       zmask_from_V128(argLU),
1097                       zmask_from_V128(argRU),
1098                       0x12, False/*!isSTRM*/
1099        );
1100   assert(ok);
1101   resECX = resV.uInt[0];
1102   return (resOSZACP << 16) | resECX;
1103}
1104
1105void istri_12 ( void )
1106{
1107   char* wot = "12";
1108   UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1109   UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1110
1111   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1112   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1113   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1114   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1115
1116   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1118   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1119   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1120   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1121
1122   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1123   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1124   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1125   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1126
1127   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1128   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1129
1130   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1131   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1132   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1133   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1134
1135   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1136
1137   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1138   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1139}
1140
1141
1142
1143//////////////////////////////////////////////////////////
1144//                                                      //
1145//                       ISTRI_44                       //
1146//                                                      //
1147//////////////////////////////////////////////////////////
1148
1149UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1150{
1151   V128 block[2];
1152   memcpy(&block[0], argL, sizeof(V128));
1153   memcpy(&block[1], argR, sizeof(V128));
1154   ULong res, flags;
1155   __asm__ __volatile__(
1156      "subq      $1024,  %%rsp"             "\n\t"
1157      "movdqu    0(%2),  %%xmm2"            "\n\t"
1158      "movdqu    16(%2), %%xmm11"           "\n\t"
1159      "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1160//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1161//"movd %%xmm0, %%ecx" "\n\t"
1162      "pushfq"                              "\n\t"
1163      "popq      %%rdx"                     "\n\t"
1164      "movq      %%rcx,  %0"                "\n\t"
1165      "movq      %%rdx,  %1"                "\n\t"
1166      "addq      $1024,  %%rsp"             "\n\t"
1167      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1168      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1169   );
1170   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1171}
1172
1173UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1174{
1175   V128 resV;
1176   UInt resOSZACP, resECX;
1177   Bool ok
1178      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1179                       zmask_from_V128(argLU),
1180                       zmask_from_V128(argRU),
1181                       0x44, False/*!isSTRM*/
1182        );
1183   assert(ok);
1184   resECX = resV.uInt[0];
1185   return (resOSZACP << 16) | resECX;
1186}
1187
1188void istri_44 ( void )
1189{
1190   char* wot = "44";
1191   UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1192   UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1193
1194   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1195   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1196   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1197   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1198
1199   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1200   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1201   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1202   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1203   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1204
1205   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1206
1207   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1208   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1209   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1210
1211   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1212   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1213   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1214
1215   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1216   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1217
1218   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1219   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1220
1221   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1222   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1223   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1224   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1225}
1226
1227
1228//////////////////////////////////////////////////////////
1229//                                                      //
1230//                       ISTRI_00                       //
1231//                                                      //
1232//////////////////////////////////////////////////////////
1233
1234UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1235{
1236   V128 block[2];
1237   memcpy(&block[0], argL, sizeof(V128));
1238   memcpy(&block[1], argR, sizeof(V128));
1239   ULong res, flags;
1240   __asm__ __volatile__(
1241      "subq      $1024,  %%rsp"             "\n\t"
1242      "movdqu    0(%2),  %%xmm2"            "\n\t"
1243      "movdqu    16(%2), %%xmm11"           "\n\t"
1244      "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1245//"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1246//"movd %%xmm0, %%ecx" "\n\t"
1247      "pushfq"                              "\n\t"
1248      "popq      %%rdx"                     "\n\t"
1249      "movq      %%rcx,  %0"                "\n\t"
1250      "movq      %%rdx,  %1"                "\n\t"
1251      "addq      $1024,  %%rsp"             "\n\t"
1252      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1253      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1254   );
1255   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1256}
1257
1258UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1259{
1260   V128 resV;
1261   UInt resOSZACP, resECX;
1262   Bool ok
1263      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1264                       zmask_from_V128(argLU),
1265                       zmask_from_V128(argRU),
1266                       0x00, False/*!isSTRM*/
1267        );
1268   assert(ok);
1269   resECX = resV.uInt[0];
1270   return (resOSZACP << 16) | resECX;
1271}
1272
1273void istri_00 ( void )
1274{
1275   char* wot = "00";
1276   UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1277   UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1278
1279   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1280   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1281   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1282   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1283
1284   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1286   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1287   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1288   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1289
1290   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1291   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1292   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1293   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1294
1295   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1296   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1297
1298   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1299   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1300   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1301   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1302
1303   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1304
1305   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1306   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1307}
1308
1309
1310//////////////////////////////////////////////////////////
1311//                                                      //
1312//                       ISTRI_38                       //
1313//                                                      //
1314//////////////////////////////////////////////////////////
1315
1316UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1317{
1318   V128 block[2];
1319   memcpy(&block[0], argL, sizeof(V128));
1320   memcpy(&block[1], argR, sizeof(V128));
1321   ULong res, flags;
1322   __asm__ __volatile__(
1323      "subq      $1024,  %%rsp"             "\n\t"
1324      "movdqu    0(%2),  %%xmm2"            "\n\t"
1325      "movdqu    16(%2), %%xmm11"           "\n\t"
1326      "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1327      "pushfq"                              "\n\t"
1328      "popq      %%rdx"                     "\n\t"
1329      "movq      %%rcx,  %0"                "\n\t"
1330      "movq      %%rdx,  %1"                "\n\t"
1331      "addq      $1024,  %%rsp"             "\n\t"
1332      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1333      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1334   );
1335   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1336}
1337
1338UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1339{
1340   V128 resV;
1341   UInt resOSZACP, resECX;
1342   Bool ok
1343      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1344                       zmask_from_V128(argLU),
1345                       zmask_from_V128(argRU),
1346                       0x38, False/*!isSTRM*/
1347        );
1348   assert(ok);
1349   resECX = resV.uInt[0];
1350   return (resOSZACP << 16) | resECX;
1351}
1352
1353void istri_38 ( void )
1354{
1355   char* wot = "38";
1356   UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1357   UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1358
1359   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1360
1361   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1362   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1363   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1364   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1365
1366   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1367   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1368   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1369
1370   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1371   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1372   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1373   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374
1375   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1376   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1377   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1378
1379   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1380
1381   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1382   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1383   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1384
1385   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1386   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1387   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1388
1389   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1390   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1391   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1392
1393   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1394   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1395   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1396
1397   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1398   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1399}
1400
1401
1402
1403//////////////////////////////////////////////////////////
1404//                                                      //
1405//                       ISTRI_46                       //
1406//                                                      //
1407//////////////////////////////////////////////////////////
1408
1409UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1410{
1411   V128 block[2];
1412   memcpy(&block[0], argL, sizeof(V128));
1413   memcpy(&block[1], argR, sizeof(V128));
1414   ULong res, flags;
1415   __asm__ __volatile__(
1416      "subq      $1024,  %%rsp"             "\n\t"
1417      "movdqu    0(%2),  %%xmm2"            "\n\t"
1418      "movdqu    16(%2), %%xmm11"           "\n\t"
1419      "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
1420      "pushfq"                              "\n\t"
1421      "popq      %%rdx"                     "\n\t"
1422      "movq      %%rcx,  %0"                "\n\t"
1423      "movq      %%rdx,  %1"                "\n\t"
1424      "addq      $1024,  %%rsp"             "\n\t"
1425      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1426      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1427   );
1428   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1429}
1430
1431UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1432{
1433   V128 resV;
1434   UInt resOSZACP, resECX;
1435   Bool ok
1436      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1437                       zmask_from_V128(argLU),
1438                       zmask_from_V128(argRU),
1439                       0x46, False/*!isSTRM*/
1440        );
1441   assert(ok);
1442   resECX = resV.uInt[0];
1443   return (resOSZACP << 16) | resECX;
1444}
1445
1446void istri_46 ( void )
1447{
1448   char* wot = "46";
1449   UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1450   UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1451
1452   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1453   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1454   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1455   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1456
1457   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1458   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1459   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1460   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1461   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1462
1463   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1464
1465   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1466   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1467   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1468
1469   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1470   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1471   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1472
1473   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1474   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1475
1476   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1477   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1478
1479   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1480   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1481   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1482   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1483}
1484
1485
1486//////////////////////////////////////////////////////////
1487//                                                      //
1488//                       ISTRI_30                       //
1489//                                                      //
1490//////////////////////////////////////////////////////////
1491
1492UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1493{
1494   V128 block[2];
1495   memcpy(&block[0], argL, sizeof(V128));
1496   memcpy(&block[1], argR, sizeof(V128));
1497   ULong res, flags;
1498   __asm__ __volatile__(
1499      "subq      $1024,  %%rsp"             "\n\t"
1500      "movdqu    0(%2),  %%xmm2"            "\n\t"
1501      "movdqu    16(%2), %%xmm11"           "\n\t"
1502      "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1503      "pushfq"                              "\n\t"
1504      "popq      %%rdx"                     "\n\t"
1505      "movq      %%rcx,  %0"                "\n\t"
1506      "movq      %%rdx,  %1"                "\n\t"
1507      "addq      $1024,  %%rsp"             "\n\t"
1508      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1509      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1510   );
1511   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1512}
1513
1514UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1515{
1516   V128 resV;
1517   UInt resOSZACP, resECX;
1518   Bool ok
1519      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1520                       zmask_from_V128(argLU),
1521                       zmask_from_V128(argRU),
1522                       0x30, False/*!isSTRM*/
1523        );
1524   assert(ok);
1525   resECX = resV.uInt[0];
1526   return (resOSZACP << 16) | resECX;
1527}
1528
1529void istri_30 ( void )
1530{
1531   char* wot = "30";
1532   UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1533   UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1534
1535   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1536   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1537   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1538   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1539
1540   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1542   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1543   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1544   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1545
1546   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1547   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1548   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1549   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1550
1551   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1552   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1553
1554   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1555   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1556   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1557   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1558
1559   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1560
1561   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1562   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1563}
1564
1565
1566//////////////////////////////////////////////////////////
1567//                                                      //
1568//                       ISTRI_40                       //
1569//                                                      //
1570//////////////////////////////////////////////////////////
1571
1572UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1573{
1574   V128 block[2];
1575   memcpy(&block[0], argL, sizeof(V128));
1576   memcpy(&block[1], argR, sizeof(V128));
1577   ULong res, flags;
1578   __asm__ __volatile__(
1579      "subq      $1024,  %%rsp"             "\n\t"
1580      "movdqu    0(%2),  %%xmm2"            "\n\t"
1581      "movdqu    16(%2), %%xmm11"           "\n\t"
1582      "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1583      "pushfq"                              "\n\t"
1584      "popq      %%rdx"                     "\n\t"
1585      "movq      %%rcx,  %0"                "\n\t"
1586      "movq      %%rdx,  %1"                "\n\t"
1587      "addq      $1024,  %%rsp"             "\n\t"
1588      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1589      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1590   );
1591   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1592}
1593
1594UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1595{
1596   V128 resV;
1597   UInt resOSZACP, resECX;
1598   Bool ok
1599      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1600                       zmask_from_V128(argLU),
1601                       zmask_from_V128(argRU),
1602                       0x40, False/*!isSTRM*/
1603        );
1604   assert(ok);
1605   resECX = resV.uInt[0];
1606   return (resOSZACP << 16) | resECX;
1607}
1608
1609void istri_40 ( void )
1610{
1611   char* wot = "40";
1612   UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1613   UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1614
1615   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1616   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1617   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1618   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1619
1620   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1622   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1623   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1624   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1625
1626   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1627   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1628   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1629   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1630
1631   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1632   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1633
1634   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1635   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1636   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1637   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1638
1639   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1640
1641   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1642   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1643}
1644
1645
1646//////////////////////////////////////////////////////////
1647//                                                      //
1648//                       ISTRI_42                       //
1649//                                                      //
1650//////////////////////////////////////////////////////////
1651
1652UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1653{
1654   V128 block[2];
1655   memcpy(&block[0], argL, sizeof(V128));
1656   memcpy(&block[1], argR, sizeof(V128));
1657   ULong res, flags;
1658   __asm__ __volatile__(
1659      "subq      $1024,  %%rsp"             "\n\t"
1660      "movdqu    0(%2),  %%xmm2"            "\n\t"
1661      "movdqu    16(%2), %%xmm11"           "\n\t"
1662      "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
1663      "pushfq"                              "\n\t"
1664      "popq      %%rdx"                     "\n\t"
1665      "movq      %%rcx,  %0"                "\n\t"
1666      "movq      %%rdx,  %1"                "\n\t"
1667      "addq      $1024,  %%rsp"             "\n\t"
1668      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1669      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1670   );
1671   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1672}
1673
1674UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1675{
1676   V128 resV;
1677   UInt resOSZACP, resECX;
1678   Bool ok
1679      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1680                       zmask_from_V128(argLU),
1681                       zmask_from_V128(argRU),
1682                       0x42, False/*!isSTRM*/
1683        );
1684   assert(ok);
1685   resECX = resV.uInt[0];
1686   return (resOSZACP << 16) | resECX;
1687}
1688
1689void istri_42 ( void )
1690{
1691   char* wot = "42";
1692   UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1693   UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1694
1695   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1696   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1697   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1698   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1699
1700   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1702   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1703   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1704   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1705
1706   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1707   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1708   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1709   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1710
1711   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1712   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1713
1714   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1715   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1716   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1717   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1718
1719   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1720
1721   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1722   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1723}
1724
1725
1726//////////////////////////////////////////////////////////
1727//                                                      //
1728//                       ISTRI_0E                       //
1729//                                                      //
1730//////////////////////////////////////////////////////////
1731
1732__attribute__((noinline))
1733UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1734{
1735   V128 block[2];
1736   memcpy(&block[0], argL, sizeof(V128));
1737   memcpy(&block[1], argR, sizeof(V128));
1738   ULong res = 0, flags = 0;
1739   __asm__ __volatile__(
1740      "movdqu    0(%2),  %%xmm2"            "\n\t"
1741      "movdqu    16(%2), %%xmm11"           "\n\t"
1742      "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
1743      "pushfq"                              "\n\t"
1744      "popq      %%rdx"                     "\n\t"
1745      "movq      %%rcx,  %0"                "\n\t"
1746      "movq      %%rdx,  %1"                "\n\t"
1747      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1748      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1749   );
1750   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1751}
1752
1753UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1754{
1755   V128 resV;
1756   UInt resOSZACP, resECX;
1757   Bool ok
1758      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1759                       zmask_from_V128(argLU),
1760                       zmask_from_V128(argRU),
1761                       0x0E, False/*!isSTRM*/
1762        );
1763   assert(ok);
1764   resECX = resV.uInt[0];
1765   return (resOSZACP << 16) | resECX;
1766}
1767
1768void istri_0E ( void )
1769{
1770   char* wot = "0E";
1771   UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1772   UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1773
1774   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1775
1776   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1777
1778   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1779   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1780   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1781
1782   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1783
1784   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1785   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1786   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1787   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1788   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1789
1790   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1791   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1792   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1793
1794   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1795   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1796
1797   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1798   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1799   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1800
1801   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1802   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1803   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1804
1805   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1806   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1807   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1808   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1809}
1810
1811
1812//////////////////////////////////////////////////////////
1813//                                                      //
1814//                       ISTRI_34                       //
1815//                                                      //
1816//////////////////////////////////////////////////////////
1817
1818UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1819{
1820   V128 block[2];
1821   memcpy(&block[0], argL, sizeof(V128));
1822   memcpy(&block[1], argR, sizeof(V128));
1823   ULong res, flags;
1824   __asm__ __volatile__(
1825      "subq      $1024,  %%rsp"             "\n\t"
1826      "movdqu    0(%2),  %%xmm2"            "\n\t"
1827      "movdqu    16(%2), %%xmm11"           "\n\t"
1828      "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
1829      "pushfq"                              "\n\t"
1830      "popq      %%rdx"                     "\n\t"
1831      "movq      %%rcx,  %0"                "\n\t"
1832      "movq      %%rdx,  %1"                "\n\t"
1833      "addq      $1024,  %%rsp"             "\n\t"
1834      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1835      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1836   );
1837   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1838}
1839
1840UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1841{
1842   V128 resV;
1843   UInt resOSZACP, resECX;
1844   Bool ok
1845      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1846                       zmask_from_V128(argLU),
1847                       zmask_from_V128(argRU),
1848                       0x34, False/*!isSTRM*/
1849        );
1850   assert(ok);
1851   resECX = resV.uInt[0];
1852   return (resOSZACP << 16) | resECX;
1853}
1854
1855void istri_34 ( void )
1856{
1857   char* wot = "34";
1858   UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1859   UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1860
1861   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1862   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1863   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1864   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1865
1866   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1867   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1868   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1869   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1870   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1871
1872   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1873
1874   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1875   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1876   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1877
1878   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1879   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1880   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1881
1882   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1883   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1884
1885   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1886   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1887
1888   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1889   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1890   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1891   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1892}
1893
1894
1895//////////////////////////////////////////////////////////
1896//                                                      //
1897//                       ISTRI_14                       //
1898//                                                      //
1899//////////////////////////////////////////////////////////
1900
1901UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1902{
1903   V128 block[2];
1904   memcpy(&block[0], argL, sizeof(V128));
1905   memcpy(&block[1], argR, sizeof(V128));
1906   ULong res, flags;
1907   __asm__ __volatile__(
1908      "subq      $1024,  %%rsp"             "\n\t"
1909      "movdqu    0(%2),  %%xmm2"            "\n\t"
1910      "movdqu    16(%2), %%xmm11"           "\n\t"
1911      "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
1912      "pushfq"                              "\n\t"
1913      "popq      %%rdx"                     "\n\t"
1914      "movq      %%rcx,  %0"                "\n\t"
1915      "movq      %%rdx,  %1"                "\n\t"
1916      "addq      $1024,  %%rsp"             "\n\t"
1917      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1918      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1919   );
1920   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1921}
1922
1923UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1924{
1925   V128 resV;
1926   UInt resOSZACP, resECX;
1927   Bool ok
1928      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1929                       zmask_from_V128(argLU),
1930                       zmask_from_V128(argRU),
1931                       0x14, False/*!isSTRM*/
1932        );
1933   assert(ok);
1934   resECX = resV.uInt[0];
1935   return (resOSZACP << 16) | resECX;
1936}
1937
1938void istri_14 ( void )
1939{
1940   char* wot = "14";
1941   UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1942   UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1943
1944   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1945   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1946   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1947   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1948
1949   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1950   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1951   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1952   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1953   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1954
1955   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1956
1957   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1958   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1959   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1960
1961   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1962   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1963   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1964
1965   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1966   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1967
1968   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1969   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1970
1971   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1972   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1973   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1974   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1975}
1976
1977
1978//////////////////////////////////////////////////////////
1979//                                                      //
1980//                       ISTRI_70                       //
1981//                                                      //
1982//////////////////////////////////////////////////////////
1983
1984UInt h_pcmpistri_70 ( V128* argL, V128* argR )
1985{
1986   V128 block[2];
1987   memcpy(&block[0], argL, sizeof(V128));
1988   memcpy(&block[1], argR, sizeof(V128));
1989   ULong res, flags;
1990   __asm__ __volatile__(
1991      "subq      $1024,  %%rsp"             "\n\t"
1992      "movdqu    0(%2),  %%xmm2"            "\n\t"
1993      "movdqu    16(%2), %%xmm11"           "\n\t"
1994      "pcmpistri $0x70,  %%xmm2, %%xmm11"   "\n\t"
1995      "pushfq"                              "\n\t"
1996      "popq      %%rdx"                     "\n\t"
1997      "movq      %%rcx,  %0"                "\n\t"
1998      "movq      %%rdx,  %1"                "\n\t"
1999      "addq      $1024,  %%rsp"             "\n\t"
2000      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2001      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2002   );
2003   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2004}
2005
2006UInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
2007{
2008   V128 resV;
2009   UInt resOSZACP, resECX;
2010   Bool ok
2011      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2012                       zmask_from_V128(argLU),
2013                       zmask_from_V128(argRU),
2014                       0x70, False/*!isSTRM*/
2015        );
2016   assert(ok);
2017   resECX = resV.uInt[0];
2018   return (resOSZACP << 16) | resECX;
2019}
2020
2021void istri_70 ( void )
2022{
2023   char* wot = "70";
2024   UInt(*h)(V128*,V128*) = h_pcmpistri_70;
2025   UInt(*s)(V128*,V128*) = s_pcmpistri_70;
2026
2027   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2028   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2029   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2030   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2031
2032   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2033   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2034   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2035   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2036   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2037
2038   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2039   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2040   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2041   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2042
2043   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2044   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2045
2046   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2047   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2048   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2049   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2050
2051   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2052
2053   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2054   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2055}
2056
2057
2058//////////////////////////////////////////////////////////
2059//                                                      //
2060//                       ISTRI_62                       //
2061//                                                      //
2062//////////////////////////////////////////////////////////
2063
2064UInt h_pcmpistri_62 ( V128* argL, V128* argR )
2065{
2066   V128 block[2];
2067   memcpy(&block[0], argL, sizeof(V128));
2068   memcpy(&block[1], argR, sizeof(V128));
2069   ULong res, flags;
2070   __asm__ __volatile__(
2071      "subq      $1024,  %%rsp"             "\n\t"
2072      "movdqu    0(%2),  %%xmm2"            "\n\t"
2073      "movdqu    16(%2), %%xmm11"           "\n\t"
2074      "pcmpistri $0x62,  %%xmm2, %%xmm11"   "\n\t"
2075      "pushfq"                              "\n\t"
2076      "popq      %%rdx"                     "\n\t"
2077      "movq      %%rcx,  %0"                "\n\t"
2078      "movq      %%rdx,  %1"                "\n\t"
2079      "addq      $1024,  %%rsp"             "\n\t"
2080      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2081      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2082   );
2083   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2084}
2085
2086UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
2087{
2088   V128 resV;
2089   UInt resOSZACP, resECX;
2090   Bool ok
2091      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2092                       zmask_from_V128(argLU),
2093                       zmask_from_V128(argRU),
2094                       0x62, False/*!isSTRM*/
2095        );
2096   assert(ok);
2097   resECX = resV.uInt[0];
2098   return (resOSZACP << 16) | resECX;
2099}
2100
2101void istri_62 ( void )
2102{
2103   char* wot = "62";
2104   UInt(*h)(V128*,V128*) = h_pcmpistri_62;
2105   UInt(*s)(V128*,V128*) = s_pcmpistri_62;
2106
2107   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2108   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2109   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2110   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2111
2112   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2113   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2114   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2115   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2116   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2117
2118   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2119   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2120   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2121   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2122
2123   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2124   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2125
2126   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2127   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2128   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2129   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2130
2131   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2132
2133   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2134   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2135}
2136
2137
2138//////////////////////////////////////////////////////////
2139//                                                      //
2140//                       ISTRI_72                       //
2141//                                                      //
2142//////////////////////////////////////////////////////////
2143
2144UInt h_pcmpistri_72 ( V128* argL, V128* argR )
2145{
2146   V128 block[2];
2147   memcpy(&block[0], argL, sizeof(V128));
2148   memcpy(&block[1], argR, sizeof(V128));
2149   ULong res, flags;
2150   __asm__ __volatile__(
2151      "subq      $1024,  %%rsp"             "\n\t"
2152      "movdqu    0(%2),  %%xmm2"            "\n\t"
2153      "movdqu    16(%2), %%xmm11"           "\n\t"
2154      "pcmpistri $0x72,  %%xmm2, %%xmm11"   "\n\t"
2155      "pushfq"                              "\n\t"
2156      "popq      %%rdx"                     "\n\t"
2157      "movq      %%rcx,  %0"                "\n\t"
2158      "movq      %%rdx,  %1"                "\n\t"
2159      "addq      $1024,  %%rsp"             "\n\t"
2160      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2161      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2162   );
2163   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2164}
2165
2166UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
2167{
2168   V128 resV;
2169   UInt resOSZACP, resECX;
2170   Bool ok
2171      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2172                       zmask_from_V128(argLU),
2173                       zmask_from_V128(argRU),
2174                       0x72, False/*!isSTRM*/
2175        );
2176   assert(ok);
2177   resECX = resV.uInt[0];
2178   return (resOSZACP << 16) | resECX;
2179}
2180
2181void istri_72 ( void )
2182{
2183   char* wot = "72";
2184   UInt(*h)(V128*,V128*) = h_pcmpistri_72;
2185   UInt(*s)(V128*,V128*) = s_pcmpistri_72;
2186
2187   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2188   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2189   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2190   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2191
2192   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2193   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2194   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2195   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2196   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2197
2198   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2199   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2200   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2201   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2202
2203   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2204   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2205
2206   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2207   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2208   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2209   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2210
2211   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2212
2213   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2214   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2215}
2216
2217
2218//////////////////////////////////////////////////////////
2219//                                                      //
2220//                       ISTRI_10                       //
2221//                                                      //
2222//////////////////////////////////////////////////////////
2223
2224UInt h_pcmpistri_10 ( V128* argL, V128* argR )
2225{
2226   V128 block[2];
2227   memcpy(&block[0], argL, sizeof(V128));
2228   memcpy(&block[1], argR, sizeof(V128));
2229   ULong res, flags;
2230   __asm__ __volatile__(
2231      "subq      $1024,  %%rsp"             "\n\t"
2232      "movdqu    0(%2),  %%xmm2"            "\n\t"
2233      "movdqu    16(%2), %%xmm11"           "\n\t"
2234      "pcmpistri $0x10,  %%xmm2, %%xmm11"   "\n\t"
2235//"pcmpistrm $0x10, %%xmm2, %%xmm11"   "\n\t"
2236//"movd %%xmm0, %%ecx" "\n\t"
2237      "pushfq"                              "\n\t"
2238      "popq      %%rdx"                     "\n\t"
2239      "movq      %%rcx,  %0"                "\n\t"
2240      "movq      %%rdx,  %1"                "\n\t"
2241      "addq      $1024,  %%rsp"             "\n\t"
2242      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2243      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2244   );
2245   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2246}
2247
2248UInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
2249{
2250   V128 resV;
2251   UInt resOSZACP, resECX;
2252   Bool ok
2253      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2254                       zmask_from_V128(argLU),
2255                       zmask_from_V128(argRU),
2256                       0x10, False/*!isSTRM*/
2257        );
2258   assert(ok);
2259   resECX = resV.uInt[0];
2260   return (resOSZACP << 16) | resECX;
2261}
2262
2263void istri_10 ( void )
2264{
2265   char* wot = "10";
2266   UInt(*h)(V128*,V128*) = h_pcmpistri_10;
2267   UInt(*s)(V128*,V128*) = s_pcmpistri_10;
2268
2269   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2270   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2271   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2272   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2273
2274   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2275   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2276   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2277   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2278   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2279
2280   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2281   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2282   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2283   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2284
2285   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2286   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2287
2288   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2289   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2290   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2291   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2292
2293   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2294
2295   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2296   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2297}
2298
2299
2300//////////////////////////////////////////////////////////
2301//                                                      //
2302//                         main                         //
2303//                                                      //
2304//////////////////////////////////////////////////////////
2305
2306int main ( void )
2307{
2308   istri_4A();
2309   istri_3A();
2310   istri_08();
2311   istri_18();
2312   istri_1A();
2313   istri_02();
2314   istri_0C();
2315   istri_12();
2316   istri_44();
2317   istri_00();
2318   istri_38();
2319   istri_46();
2320   istri_30();
2321   istri_40();
2322   istri_42();
2323   istri_0E();
2324   istri_14();
2325   istri_34();
2326   istri_70();
2327   istri_62();
2328   istri_72();
2329   istri_10();
2330   return 0;
2331}
2332