1
2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4   aspect. */
5
6#include <string.h>
7#include <stdio.h>
8#include <assert.h>
9
10typedef  unsigned int   UInt;
11typedef  signed int     Int;
12typedef  unsigned char  UChar;
13typedef  signed char    Char;
14typedef  unsigned long long int ULong;
15typedef  UChar          Bool;
16#define False ((Bool)0)
17#define True  ((Bool)1)
18
19//typedef  unsigned char  V128[16];
20typedef
21   union {
22      UChar uChar[16];
23      UInt  uInt[4];
24   }
25   V128;
26
27#define SHIFT_O   11
28#define SHIFT_S   7
29#define SHIFT_Z   6
30#define SHIFT_A   4
31#define SHIFT_C   0
32#define SHIFT_P   2
33
34#define MASK_O    (1ULL << SHIFT_O)
35#define MASK_S    (1ULL << SHIFT_S)
36#define MASK_Z    (1ULL << SHIFT_Z)
37#define MASK_A    (1ULL << SHIFT_A)
38#define MASK_C    (1ULL << SHIFT_C)
39#define MASK_P    (1ULL << SHIFT_P)
40
41
42UInt clz32 ( UInt x )
43{
44   Int y, m, n;
45   y = -(x >> 16);
46   m = (y >> 16) & 16;
47   n = 16 - m;
48   x = x >> m;
49   y = x - 0x100;
50   m = (y >> 16) & 8;
51   n = n + m;
52   x = x << m;
53   y = x - 0x1000;
54   m = (y >> 16) & 4;
55   n = n + m;
56   x = x << m;
57   y = x - 0x4000;
58   m = (y >> 16) & 2;
59   n = n + m;
60   x = x << m;
61   y = x >> 14;
62   m = y & ~(y >> 1);
63   return n + 2 - m;
64}
65
66UInt ctz32 ( UInt x )
67{
68   return 32 - clz32((~x) & (x-1));
69}
70
71void expand ( V128* dst, char* summary )
72{
73   Int i;
74   assert( strlen(summary) == 16 );
75   for (i = 0; i < 16; i++) {
76      UChar xx = 0;
77      UChar x = summary[15-i];
78      if      (x >= '0' && x <= '9') { xx = x - '0'; }
79      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81      else assert(0);
82
83      assert(xx < 16);
84      xx = (xx << 4) | xx;
85      assert(xx < 256);
86      dst->uChar[i] = xx;
87   }
88}
89
90void try_istri ( char* which,
91                 UInt(*h_fn)(V128*,V128*),
92                 UInt(*s_fn)(V128*,V128*),
93                 char* summL, char* summR )
94{
95   assert(strlen(which) == 2);
96   V128 argL, argR;
97   expand(&argL, summL);
98   expand(&argR, summR);
99   UInt h_res = h_fn(&argL, &argR);
100   UInt s_res = s_fn(&argL, &argR);
101   printf("istri %s  %s %s -> %08x %08x %s\n",
102          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103}
104
105UInt zmask_from_V128 ( V128* arg )
106{
107   UInt i, res = 0;
108   for (i = 0; i < 16; i++) {
109      res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
110   }
111   return res;
112}
113
114//////////////////////////////////////////////////////////
115//                                                      //
116//                       GENERAL                        //
117//                                                      //
118//////////////////////////////////////////////////////////
119
120
121/* Given partial results from a pcmpXstrX operation (intRes1,
122   basically), generate an I format (index value for ECX) output, and
123   also the new OSZACP flags.
124*/
125static
126void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127                                    /*OUT*/UInt* resOSZACP,
128                                    UInt intRes1,
129                                    UInt zmaskL, UInt zmaskR,
130                                    UInt validL,
131                                    UInt pol, UInt idx )
132{
133   assert((pol >> 2) == 0);
134   assert((idx >> 1) == 0);
135
136   UInt intRes2 = 0;
137   switch (pol) {
138      case 0: intRes2 = intRes1;          break; // pol +
139      case 1: intRes2 = ~intRes1;         break; // pol -
140      case 2: intRes2 = intRes1;          break; // pol m+
141      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142   }
143   intRes2 &= 0xFFFF;
144
145   // generate ecx value
146   UInt newECX = 0;
147   if (idx) {
148     // index of ms-1-bit
149     newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150   } else {
151     // index of ls-1-bit
152     newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153   }
154
155   *(UInt*)(&resV[0]) = newECX;
156
157   // generate new flags, common to all ISTRI and ISTRM cases
158   *resOSZACP    // A, P are zero
159     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
162     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
163}
164
165
166/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167   variants.
168
169   For xSTRI variants, the new ECX value is placed in the 32 bits
170   pointed to by *resV.  For xSTRM variants, the result is a 128 bit
171   value and is placed at *resV in the obvious way.
172
173   For all variants, the new OSZACP value is placed at *resOSZACP.
174
175   argLV and argRV are the vector args.  The caller must prepare a
176   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
177   must be 1 for each zero byte of of the respective arg.  For ESTRx
178   variants this is derived from the explicit length indication, and
179   must be 0 in all places except at the bit index corresponding to
180   the valid length (0 .. 16).  If the valid length is 16 then the
181   mask must be all zeroes.  In all cases, bits 31:16 must be zero.
182
183   imm8 is the original immediate from the instruction.  isSTRM
184   indicates whether this is a xSTRM or xSTRI variant, which controls
185   how much of *res is written.
186
187   If the given imm8 case can be handled, the return value is True.
188   If not, False is returned, and neither *res not *resOSZACP are
189   altered.
190*/
191
192Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193                     /*OUT*/UInt* resOSZACP,
194                     V128* argLV,  V128* argRV,
195                     UInt zmaskL, UInt zmaskR,
196                     UInt imm8,   Bool isSTRM )
197{
198   assert(imm8 < 0x80);
199   assert((zmaskL >> 16) == 0);
200   assert((zmaskR >> 16) == 0);
201
202   /* Explicitly reject any imm8 values that haven't been validated,
203      even if they would probably work.  Life is too short to have
204      unvalidated cases in the code base. */
205   switch (imm8) {
206      case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
207      case 0x12: case 0x14: case 0x18: case 0x1A:
208      case 0x30: case 0x34: case 0x38: case 0x3A:
209      case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
210         break;
211      default:
212         return False;
213   }
214
215   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
216   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
217   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
218   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
219
220   /*----------------------------------------*/
221   /*-- strcmp on byte data                --*/
222   /*----------------------------------------*/
223
224   if (agg == 2/*equal each, aka strcmp*/
225       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226       && !isSTRM) {
227      Int    i;
228      UChar* argL = (UChar*)argLV;
229      UChar* argR = (UChar*)argRV;
230      UInt boolResII = 0;
231      for (i = 15; i >= 0; i--) {
232         UChar cL  = argL[i];
233         UChar cR  = argR[i];
234         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
235      }
236      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
237      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
238
239      // do invalidation, common to all equal-each cases
240      UInt intRes1
241         = (boolResII & validL & validR)  // if both valid, use cmpres
242           | (~ (validL | validR));       // if both invalid, force 1
243                                          // else force 0
244      intRes1 &= 0xFFFF;
245
246      // generate I-format output
247      pcmpXstrX_WRK_gen_output_fmt_I(
248         resV, resOSZACP,
249         intRes1, zmaskL, zmaskR, validL, pol, idx
250      );
251
252      return True;
253   }
254
255   /*----------------------------------------*/
256   /*-- set membership on byte data        --*/
257   /*----------------------------------------*/
258
259   if (agg == 0/*equal any, aka find chars in a set*/
260       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
261       && !isSTRM) {
262      /* argL: the string,  argR: charset */
263      UInt   si, ci;
264      UChar* argL    = (UChar*)argLV;
265      UChar* argR    = (UChar*)argRV;
266      UInt   boolRes = 0;
267      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
268      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
269
270      for (si = 0; si < 16; si++) {
271         if ((validL & (1 << si)) == 0)
272            // run off the end of the string.
273            break;
274         UInt m = 0;
275         for (ci = 0; ci < 16; ci++) {
276            if ((validR & (1 << ci)) == 0) break;
277            if (argR[ci] == argL[si]) { m = 1; break; }
278         }
279         boolRes |= (m << si);
280      }
281
282      // boolRes is "pre-invalidated"
283      UInt intRes1 = boolRes & 0xFFFF;
284
285      // generate I-format output
286      pcmpXstrX_WRK_gen_output_fmt_I(
287         resV, resOSZACP,
288         intRes1, zmaskL, zmaskR, validL, pol, idx
289      );
290
291      return True;
292   }
293
294   /*----------------------------------------*/
295   /*-- substring search on byte data      --*/
296   /*----------------------------------------*/
297
298   if (agg == 3/*equal ordered, aka substring search*/
299       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300       && !isSTRM) {
301
302      /* argL: haystack,  argR: needle */
303      UInt   ni, hi;
304      UChar* argL    = (UChar*)argLV;
305      UChar* argR    = (UChar*)argRV;
306      UInt   boolRes = 0;
307      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
308      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
309      for (hi = 0; hi < 16; hi++) {
310         UInt m = 1;
311         for (ni = 0; ni < 16; ni++) {
312            if ((validR & (1 << ni)) == 0) break;
313            UInt i = ni + hi;
314            if (i >= 16) break;
315            if (argL[i] != argR[ni]) { m = 0; break; }
316         }
317         boolRes |= (m << hi);
318         if ((validL & (1 << hi)) == 0)
319            // run off the end of the haystack
320            break;
321      }
322
323      // boolRes is "pre-invalidated"
324      UInt intRes1 = boolRes & 0xFFFF;
325
326      // generate I-format output
327      pcmpXstrX_WRK_gen_output_fmt_I(
328         resV, resOSZACP,
329         intRes1, zmaskL, zmaskR, validL, pol, idx
330      );
331
332      return True;
333   }
334
335   /*----------------------------------------*/
336   /*-- ranges, unsigned byte data         --*/
337   /*----------------------------------------*/
338
339   if (agg == 1/*ranges*/
340       && fmt == 0/*ub*/
341       && !isSTRM) {
342
343      /* argL: string,  argR: range-pairs */
344      UInt   ri, si;
345      UChar* argL    = (UChar*)argLV;
346      UChar* argR    = (UChar*)argRV;
347      UInt   boolRes = 0;
348      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
349      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
350      for (si = 0; si < 16; si++) {
351         if ((validL & (1 << si)) == 0)
352            // run off the end of the string
353            break;
354         UInt m = 0;
355         for (ri = 0; ri < 16; ri += 2) {
356            if ((validR & (3 << ri)) != (3 << ri)) break;
357            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
358               m = 1; break;
359            }
360         }
361         boolRes |= (m << si);
362      }
363
364      // boolRes is "pre-invalidated"
365      UInt intRes1 = boolRes & 0xFFFF;
366
367      // generate I-format output
368      pcmpXstrX_WRK_gen_output_fmt_I(
369         resV, resOSZACP,
370         intRes1, zmaskL, zmaskR, validL, pol, idx
371      );
372
373      return True;
374   }
375
376   /*----------------------------------------*/
377   /*-- ranges, signed byte data           --*/
378   /*----------------------------------------*/
379
380   if (agg == 1/*ranges*/
381       && fmt == 2/*sb*/
382       && !isSTRM) {
383
384      /* argL: string,  argR: range-pairs */
385      UInt   ri, si;
386      Char*  argL    = (Char*)argLV;
387      Char*  argR    = (Char*)argRV;
388      UInt   boolRes = 0;
389      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
390      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
391      for (si = 0; si < 16; si++) {
392         if ((validL & (1 << si)) == 0)
393            // run off the end of the string
394            break;
395         UInt m = 0;
396         for (ri = 0; ri < 16; ri += 2) {
397            if ((validR & (3 << ri)) != (3 << ri)) break;
398            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
399               m = 1; break;
400            }
401         }
402         boolRes |= (m << si);
403      }
404
405      // boolRes is "pre-invalidated"
406      UInt intRes1 = boolRes & 0xFFFF;
407
408      // generate I-format output
409      pcmpXstrX_WRK_gen_output_fmt_I(
410         resV, resOSZACP,
411         intRes1, zmaskL, zmaskR, validL, pol, idx
412      );
413
414      return True;
415   }
416
417   return False;
418}
419
420
421//////////////////////////////////////////////////////////
422//                                                      //
423//                       ISTRI_4A                       //
424//                                                      //
425//////////////////////////////////////////////////////////
426
427UInt h_pcmpistri_4A ( V128* argL, V128* argR )
428{
429   V128 block[2];
430   memcpy(&block[0], argL, sizeof(V128));
431   memcpy(&block[1], argR, sizeof(V128));
432   ULong res, flags;
433   __asm__ __volatile__(
434      "subq      $1024,  %%rsp"             "\n\t"
435      "movdqu    0(%2),  %%xmm2"            "\n\t"
436      "movdqu    16(%2), %%xmm11"           "\n\t"
437      "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
438      "pushfq"                              "\n\t"
439      "popq      %%rdx"                     "\n\t"
440      "movq      %%rcx,  %0"                "\n\t"
441      "movq      %%rdx,  %1"                "\n\t"
442      "addq      $1024,  %%rsp"             "\n\t"
443      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
444      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
445   );
446   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
447}
448
449UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
450{
451   V128 resV;
452   UInt resOSZACP, resECX;
453   Bool ok
454      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
455                       zmask_from_V128(argLU),
456                       zmask_from_V128(argRU),
457                       0x4A, False/*!isSTRM*/
458        );
459   assert(ok);
460   resECX = resV.uInt[0];
461   return (resOSZACP << 16) | resECX;
462}
463
464void istri_4A ( void )
465{
466   char* wot = "4A";
467   UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
468   UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
469
470   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
471
472   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
473   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
474   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
475   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
476
477   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
478   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
479   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
480
481   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
482   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
483   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
484   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
485
486   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
487   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
488   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
489
490   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491
492   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
493   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
494   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
495
496   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
497   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
498   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
499
500   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
501   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
502   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
503
504   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
505   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
506   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
507
508   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
509   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
510}
511
512//////////////////////////////////////////////////////////
513//                                                      //
514//                       ISTRI_3A                       //
515//                                                      //
516//////////////////////////////////////////////////////////
517
518UInt h_pcmpistri_3A ( V128* argL, V128* argR )
519{
520   V128 block[2];
521   memcpy(&block[0], argL, sizeof(V128));
522   memcpy(&block[1], argR, sizeof(V128));
523   ULong res, flags;
524   __asm__ __volatile__(
525      "subq      $1024,  %%rsp"             "\n\t"
526      "movdqu    0(%2),  %%xmm2"            "\n\t"
527      "movdqu    16(%2), %%xmm11"           "\n\t"
528      "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
529      "pushfq"                              "\n\t"
530      "popq      %%rdx"                     "\n\t"
531      "movq      %%rcx,  %0"                "\n\t"
532      "movq      %%rdx,  %1"                "\n\t"
533      "addq      $1024,  %%rsp"             "\n\t"
534      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
535      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
536   );
537   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
538}
539
540UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
541{
542   V128 resV;
543   UInt resOSZACP, resECX;
544   Bool ok
545      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
546                       zmask_from_V128(argLU),
547                       zmask_from_V128(argRU),
548                       0x3A, False/*!isSTRM*/
549        );
550   assert(ok);
551   resECX = resV.uInt[0];
552   return (resOSZACP << 16) | resECX;
553}
554
555void istri_3A ( void )
556{
557   char* wot = "3A";
558   UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
559   UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
560
561   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
562
563   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
564   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
565   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
566   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
567
568   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
569   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
570   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
571
572   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
573   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
574   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
575   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
576
577   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
578   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
579   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
580
581   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582
583   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
584   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
585   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
586
587   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
588   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
589   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
590
591   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
592   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
593   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
594
595   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
596   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
597   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
598
599   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
600   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
601}
602
603
604
605//////////////////////////////////////////////////////////
606//                                                      //
607//                       ISTRI_0C                       //
608//                                                      //
609//////////////////////////////////////////////////////////
610
611__attribute__((noinline))
612UInt h_pcmpistri_0C ( V128* argL, V128* argR )
613{
614   V128 block[2];
615   memcpy(&block[0], argL, sizeof(V128));
616   memcpy(&block[1], argR, sizeof(V128));
617   ULong res = 0, flags = 0;
618   __asm__ __volatile__(
619      "movdqu    0(%2),  %%xmm2"            "\n\t"
620      "movdqu    16(%2), %%xmm11"           "\n\t"
621      "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
622      //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
623      //"movd %%xmm0, %%ecx" "\n\t"
624      "pushfq"                              "\n\t"
625      "popq      %%rdx"                     "\n\t"
626      "movq      %%rcx,  %0"                "\n\t"
627      "movq      %%rdx,  %1"                "\n\t"
628      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
629      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
630   );
631   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
632}
633
634UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
635{
636   V128 resV;
637   UInt resOSZACP, resECX;
638   Bool ok
639      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
640                       zmask_from_V128(argLU),
641                       zmask_from_V128(argRU),
642                       0x0C, False/*!isSTRM*/
643        );
644   assert(ok);
645   resECX = resV.uInt[0];
646   return (resOSZACP << 16) | resECX;
647}
648
649void istri_0C ( void )
650{
651   char* wot = "0C";
652   UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
653   UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
654
655   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
656
657   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
658
659   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
660   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
661   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
662
663   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
664
665   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
666   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
667   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
668   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
669   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
670
671   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
672   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
673   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
674
675   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
676   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
677
678   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
679   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
680   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
681
682   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
683   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
684   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
685
686   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
687   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
688   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
689   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
690}
691
692
693//////////////////////////////////////////////////////////
694//                                                      //
695//                       ISTRI_08                       //
696//                                                      //
697//////////////////////////////////////////////////////////
698
699UInt h_pcmpistri_08 ( V128* argL, V128* argR )
700{
701   V128 block[2];
702   memcpy(&block[0], argL, sizeof(V128));
703   memcpy(&block[1], argR, sizeof(V128));
704   ULong res, flags;
705   __asm__ __volatile__(
706      "subq      $1024,  %%rsp"             "\n\t"
707      "movdqu    0(%2),  %%xmm2"            "\n\t"
708      "movdqu    16(%2), %%xmm11"           "\n\t"
709      "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
710      "pushfq"                              "\n\t"
711      "popq      %%rdx"                     "\n\t"
712      "movq      %%rcx,  %0"                "\n\t"
713      "movq      %%rdx,  %1"                "\n\t"
714      "addq      $1024,  %%rsp"             "\n\t"
715      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
716      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
717   );
718   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
719}
720
721UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
722{
723   V128 resV;
724   UInt resOSZACP, resECX;
725   Bool ok
726      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
727                       zmask_from_V128(argLU),
728                       zmask_from_V128(argRU),
729                       0x08, False/*!isSTRM*/
730        );
731   assert(ok);
732   resECX = resV.uInt[0];
733   return (resOSZACP << 16) | resECX;
734}
735
736void istri_08 ( void )
737{
738   char* wot = "08";
739   UInt(*h)(V128*,V128*) = h_pcmpistri_08;
740   UInt(*s)(V128*,V128*) = s_pcmpistri_08;
741
742   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
743
744   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
745   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
746   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
747   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
748
749   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
750   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
751   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
752
753   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
754   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
755   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
756   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
757
758   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
759   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
760   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
761
762   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763
764   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
765   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
766   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
767
768   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
769   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
770   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
771
772   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
773   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
774   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
775
776   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
777   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
778   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
779
780   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
781   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
782}
783
784
785
786//////////////////////////////////////////////////////////
787//                                                      //
788//                       ISTRI_18                       //
789//                                                      //
790//////////////////////////////////////////////////////////
791
792UInt h_pcmpistri_18 ( V128* argL, V128* argR )
793{
794   V128 block[2];
795   memcpy(&block[0], argL, sizeof(V128));
796   memcpy(&block[1], argR, sizeof(V128));
797   ULong res, flags;
798   __asm__ __volatile__(
799      "subq      $1024,  %%rsp"             "\n\t"
800      "movdqu    0(%2),  %%xmm2"            "\n\t"
801      "movdqu    16(%2), %%xmm11"           "\n\t"
802      "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
803      "pushfq"                              "\n\t"
804      "popq      %%rdx"                     "\n\t"
805      "movq      %%rcx,  %0"                "\n\t"
806      "movq      %%rdx,  %1"                "\n\t"
807      "addq      $1024,  %%rsp"             "\n\t"
808      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
809      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
810   );
811   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
812}
813
814UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
815{
816   V128 resV;
817   UInt resOSZACP, resECX;
818   Bool ok
819      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
820                       zmask_from_V128(argLU),
821                       zmask_from_V128(argRU),
822                       0x18, False/*!isSTRM*/
823        );
824   assert(ok);
825   resECX = resV.uInt[0];
826   return (resOSZACP << 16) | resECX;
827}
828
829void istri_18 ( void )
830{
831   char* wot = "18";
832   UInt(*h)(V128*,V128*) = h_pcmpistri_18;
833   UInt(*s)(V128*,V128*) = s_pcmpistri_18;
834
835   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
836
837   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
838   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
839   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
840   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
841
842   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
843   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
844   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
845
846   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
847   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
848   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
849   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
850
851   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
852   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
853   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
854
855   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856
857   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
858   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
859   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
860
861   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
862   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
863   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
864
865   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
866   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
867   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
868
869   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
870   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
871   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
872
873   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
874   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
875}
876
877
878
879//////////////////////////////////////////////////////////
880//                                                      //
881//                       ISTRI_1A                       //
882//                                                      //
883//////////////////////////////////////////////////////////
884
885UInt h_pcmpistri_1A ( V128* argL, V128* argR )
886{
887   V128 block[2];
888   memcpy(&block[0], argL, sizeof(V128));
889   memcpy(&block[1], argR, sizeof(V128));
890   ULong res, flags;
891   __asm__ __volatile__(
892      "subq      $1024,  %%rsp"             "\n\t"
893      "movdqu    0(%2),  %%xmm2"            "\n\t"
894      "movdqu    16(%2), %%xmm11"           "\n\t"
895      "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
896      "pushfq"                              "\n\t"
897      "popq      %%rdx"                     "\n\t"
898      "movq      %%rcx,  %0"                "\n\t"
899      "movq      %%rdx,  %1"                "\n\t"
900      "addq      $1024,  %%rsp"             "\n\t"
901      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
902      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
903   );
904   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
905}
906
907UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
908{
909   V128 resV;
910   UInt resOSZACP, resECX;
911   Bool ok
912      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
913                       zmask_from_V128(argLU),
914                       zmask_from_V128(argRU),
915                       0x1A, False/*!isSTRM*/
916        );
917   assert(ok);
918   resECX = resV.uInt[0];
919   return (resOSZACP << 16) | resECX;
920}
921
922void istri_1A ( void )
923{
924   char* wot = "1A";
925   UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
926   UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
927
928   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
929
930   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
931   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
932   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
933   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
934
935   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
936   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
937   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
938
939   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
940   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
941   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
942   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
943
944   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
945   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
946   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
947
948   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
949
950   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
951   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
952   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
953
954   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
955   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
956   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
957
958   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
959   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
960   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
961
962   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
963   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
964   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
965
966   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
967   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
968}
969
970
971
972//////////////////////////////////////////////////////////
973//                                                      //
974//                       ISTRI_02                       //
975//                                                      //
976//////////////////////////////////////////////////////////
977
978UInt h_pcmpistri_02 ( V128* argL, V128* argR )
979{
980   V128 block[2];
981   memcpy(&block[0], argL, sizeof(V128));
982   memcpy(&block[1], argR, sizeof(V128));
983   ULong res, flags;
984   __asm__ __volatile__(
985      "subq      $1024,  %%rsp"             "\n\t"
986      "movdqu    0(%2),  %%xmm2"            "\n\t"
987      "movdqu    16(%2), %%xmm11"           "\n\t"
988      "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
989//"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
990//"movd %%xmm0, %%ecx" "\n\t"
991      "pushfq"                              "\n\t"
992      "popq      %%rdx"                     "\n\t"
993      "movq      %%rcx,  %0"                "\n\t"
994      "movq      %%rdx,  %1"                "\n\t"
995      "addq      $1024,  %%rsp"             "\n\t"
996      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
997      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
998   );
999   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1000}
1001
1002UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1003{
1004   V128 resV;
1005   UInt resOSZACP, resECX;
1006   Bool ok
1007      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1008                       zmask_from_V128(argLU),
1009                       zmask_from_V128(argRU),
1010                       0x02, False/*!isSTRM*/
1011        );
1012   assert(ok);
1013   resECX = resV.uInt[0];
1014   return (resOSZACP << 16) | resECX;
1015}
1016
1017void istri_02 ( void )
1018{
1019   char* wot = "02";
1020   UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1021   UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1022
1023   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1024   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1025   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1026   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1027
1028   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1029   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1030   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1031   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1032   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1033
1034   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1036   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1037   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1038
1039   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1040   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1041
1042   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1043   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1044   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1045   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1046
1047   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1048
1049   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1050   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1051}
1052
1053
1054//////////////////////////////////////////////////////////
1055//                                                      //
1056//                       ISTRI_12                       //
1057//                                                      //
1058//////////////////////////////////////////////////////////
1059
1060UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1061{
1062   V128 block[2];
1063   memcpy(&block[0], argL, sizeof(V128));
1064   memcpy(&block[1], argR, sizeof(V128));
1065   ULong res, flags;
1066   __asm__ __volatile__(
1067      "subq      $1024,  %%rsp"             "\n\t"
1068      "movdqu    0(%2),  %%xmm2"            "\n\t"
1069      "movdqu    16(%2), %%xmm11"           "\n\t"
1070      "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
1071//"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
1072//"movd %%xmm0, %%ecx" "\n\t"
1073      "pushfq"                              "\n\t"
1074      "popq      %%rdx"                     "\n\t"
1075      "movq      %%rcx,  %0"                "\n\t"
1076      "movq      %%rdx,  %1"                "\n\t"
1077      "addq      $1024,  %%rsp"             "\n\t"
1078      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1079      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1080   );
1081   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1082}
1083
1084UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1085{
1086   V128 resV;
1087   UInt resOSZACP, resECX;
1088   Bool ok
1089      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1090                       zmask_from_V128(argLU),
1091                       zmask_from_V128(argRU),
1092                       0x12, False/*!isSTRM*/
1093        );
1094   assert(ok);
1095   resECX = resV.uInt[0];
1096   return (resOSZACP << 16) | resECX;
1097}
1098
1099void istri_12 ( void )
1100{
1101   char* wot = "12";
1102   UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1103   UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1104
1105   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1106   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1107   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1108   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1109
1110   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1111   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1112   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1113   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1114   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1115
1116   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1118   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1119   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1120
1121   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1122   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1123
1124   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1125   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1126   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1127   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1128
1129   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1130
1131   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1132   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1133}
1134
1135
1136
1137//////////////////////////////////////////////////////////
1138//                                                      //
1139//                       ISTRI_44                       //
1140//                                                      //
1141//////////////////////////////////////////////////////////
1142
1143UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1144{
1145   V128 block[2];
1146   memcpy(&block[0], argL, sizeof(V128));
1147   memcpy(&block[1], argR, sizeof(V128));
1148   ULong res, flags;
1149   __asm__ __volatile__(
1150      "subq      $1024,  %%rsp"             "\n\t"
1151      "movdqu    0(%2),  %%xmm2"            "\n\t"
1152      "movdqu    16(%2), %%xmm11"           "\n\t"
1153      "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1154//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1155//"movd %%xmm0, %%ecx" "\n\t"
1156      "pushfq"                              "\n\t"
1157      "popq      %%rdx"                     "\n\t"
1158      "movq      %%rcx,  %0"                "\n\t"
1159      "movq      %%rdx,  %1"                "\n\t"
1160      "addq      $1024,  %%rsp"             "\n\t"
1161      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1162      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1163   );
1164   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1165}
1166
1167UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1168{
1169   V128 resV;
1170   UInt resOSZACP, resECX;
1171   Bool ok
1172      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1173                       zmask_from_V128(argLU),
1174                       zmask_from_V128(argRU),
1175                       0x44, False/*!isSTRM*/
1176        );
1177   assert(ok);
1178   resECX = resV.uInt[0];
1179   return (resOSZACP << 16) | resECX;
1180}
1181
1182void istri_44 ( void )
1183{
1184   char* wot = "44";
1185   UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1186   UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1187
1188   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1189   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1190   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1191   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1192
1193   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1194   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1195   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1196   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1197   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1198
1199   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1200
1201   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1202   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1203   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1204
1205   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1206   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1207   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1208
1209   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1210   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1211
1212   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1213   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1214
1215   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1216   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1217   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1218   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1219}
1220
1221
1222//////////////////////////////////////////////////////////
1223//                                                      //
1224//                       ISTRI_00                       //
1225//                                                      //
1226//////////////////////////////////////////////////////////
1227
1228UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1229{
1230   V128 block[2];
1231   memcpy(&block[0], argL, sizeof(V128));
1232   memcpy(&block[1], argR, sizeof(V128));
1233   ULong res, flags;
1234   __asm__ __volatile__(
1235      "subq      $1024,  %%rsp"             "\n\t"
1236      "movdqu    0(%2),  %%xmm2"            "\n\t"
1237      "movdqu    16(%2), %%xmm11"           "\n\t"
1238      "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1239//"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1240//"movd %%xmm0, %%ecx" "\n\t"
1241      "pushfq"                              "\n\t"
1242      "popq      %%rdx"                     "\n\t"
1243      "movq      %%rcx,  %0"                "\n\t"
1244      "movq      %%rdx,  %1"                "\n\t"
1245      "addq      $1024,  %%rsp"             "\n\t"
1246      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1247      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1248   );
1249   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1250}
1251
1252UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1253{
1254   V128 resV;
1255   UInt resOSZACP, resECX;
1256   Bool ok
1257      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1258                       zmask_from_V128(argLU),
1259                       zmask_from_V128(argRU),
1260                       0x00, False/*!isSTRM*/
1261        );
1262   assert(ok);
1263   resECX = resV.uInt[0];
1264   return (resOSZACP << 16) | resECX;
1265}
1266
1267void istri_00 ( void )
1268{
1269   char* wot = "00";
1270   UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1271   UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1272
1273   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1274   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1275   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1276   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1277
1278   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1279   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1280   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1281   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1282   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1283
1284   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1286   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1287   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1288
1289   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1290   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1291
1292   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1293   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1294   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1295   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1296
1297   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1298
1299   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1300   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1301}
1302
1303
1304//////////////////////////////////////////////////////////
1305//                                                      //
1306//                       ISTRI_38                       //
1307//                                                      //
1308//////////////////////////////////////////////////////////
1309
1310UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1311{
1312   V128 block[2];
1313   memcpy(&block[0], argL, sizeof(V128));
1314   memcpy(&block[1], argR, sizeof(V128));
1315   ULong res, flags;
1316   __asm__ __volatile__(
1317      "subq      $1024,  %%rsp"             "\n\t"
1318      "movdqu    0(%2),  %%xmm2"            "\n\t"
1319      "movdqu    16(%2), %%xmm11"           "\n\t"
1320      "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1321      "pushfq"                              "\n\t"
1322      "popq      %%rdx"                     "\n\t"
1323      "movq      %%rcx,  %0"                "\n\t"
1324      "movq      %%rdx,  %1"                "\n\t"
1325      "addq      $1024,  %%rsp"             "\n\t"
1326      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1327      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1328   );
1329   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1330}
1331
1332UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1333{
1334   V128 resV;
1335   UInt resOSZACP, resECX;
1336   Bool ok
1337      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1338                       zmask_from_V128(argLU),
1339                       zmask_from_V128(argRU),
1340                       0x38, False/*!isSTRM*/
1341        );
1342   assert(ok);
1343   resECX = resV.uInt[0];
1344   return (resOSZACP << 16) | resECX;
1345}
1346
1347void istri_38 ( void )
1348{
1349   char* wot = "38";
1350   UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1351   UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1352
1353   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1354
1355   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1356   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1357   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1358   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1359
1360   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1361   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1362   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1363
1364   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1365   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1366   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1367   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1368
1369   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1370   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1371   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1372
1373   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374
1375   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1376   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1377   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1378
1379   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1380   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1381   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1382
1383   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1384   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1385   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1386
1387   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1388   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1389   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1390
1391   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1392   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1393}
1394
1395
1396
1397//////////////////////////////////////////////////////////
1398//                                                      //
1399//                       ISTRI_46                       //
1400//                                                      //
1401//////////////////////////////////////////////////////////
1402
1403UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1404{
1405   V128 block[2];
1406   memcpy(&block[0], argL, sizeof(V128));
1407   memcpy(&block[1], argR, sizeof(V128));
1408   ULong res, flags;
1409   __asm__ __volatile__(
1410      "subq      $1024,  %%rsp"             "\n\t"
1411      "movdqu    0(%2),  %%xmm2"            "\n\t"
1412      "movdqu    16(%2), %%xmm11"           "\n\t"
1413      "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
1414      "pushfq"                              "\n\t"
1415      "popq      %%rdx"                     "\n\t"
1416      "movq      %%rcx,  %0"                "\n\t"
1417      "movq      %%rdx,  %1"                "\n\t"
1418      "addq      $1024,  %%rsp"             "\n\t"
1419      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1420      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1421   );
1422   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1423}
1424
1425UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1426{
1427   V128 resV;
1428   UInt resOSZACP, resECX;
1429   Bool ok
1430      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1431                       zmask_from_V128(argLU),
1432                       zmask_from_V128(argRU),
1433                       0x46, False/*!isSTRM*/
1434        );
1435   assert(ok);
1436   resECX = resV.uInt[0];
1437   return (resOSZACP << 16) | resECX;
1438}
1439
1440void istri_46 ( void )
1441{
1442   char* wot = "46";
1443   UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1444   UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1445
1446   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1447   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1448   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1449   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1450
1451   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1452   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1453   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1454   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1455   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1456
1457   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1458
1459   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1460   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1461   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1462
1463   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1464   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1465   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1466
1467   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1468   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1469
1470   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1471   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1472
1473   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1474   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1475   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1476   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1477}
1478
1479
1480//////////////////////////////////////////////////////////
1481//                                                      //
1482//                       ISTRI_30                       //
1483//                                                      //
1484//////////////////////////////////////////////////////////
1485
1486UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1487{
1488   V128 block[2];
1489   memcpy(&block[0], argL, sizeof(V128));
1490   memcpy(&block[1], argR, sizeof(V128));
1491   ULong res, flags;
1492   __asm__ __volatile__(
1493      "subq      $1024,  %%rsp"             "\n\t"
1494      "movdqu    0(%2),  %%xmm2"            "\n\t"
1495      "movdqu    16(%2), %%xmm11"           "\n\t"
1496      "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1497      "pushfq"                              "\n\t"
1498      "popq      %%rdx"                     "\n\t"
1499      "movq      %%rcx,  %0"                "\n\t"
1500      "movq      %%rdx,  %1"                "\n\t"
1501      "addq      $1024,  %%rsp"             "\n\t"
1502      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1503      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1504   );
1505   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1506}
1507
1508UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1509{
1510   V128 resV;
1511   UInt resOSZACP, resECX;
1512   Bool ok
1513      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1514                       zmask_from_V128(argLU),
1515                       zmask_from_V128(argRU),
1516                       0x30, False/*!isSTRM*/
1517        );
1518   assert(ok);
1519   resECX = resV.uInt[0];
1520   return (resOSZACP << 16) | resECX;
1521}
1522
1523void istri_30 ( void )
1524{
1525   char* wot = "30";
1526   UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1527   UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1528
1529   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1530   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1531   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1532   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1533
1534   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1535   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1536   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1537   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1538   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1539
1540   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1542   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1543   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1544
1545   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1546   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1547
1548   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1549   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1550   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1551   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1552
1553   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1554
1555   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1556   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1557}
1558
1559
1560//////////////////////////////////////////////////////////
1561//                                                      //
1562//                       ISTRI_40                       //
1563//                                                      //
1564//////////////////////////////////////////////////////////
1565
1566UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1567{
1568   V128 block[2];
1569   memcpy(&block[0], argL, sizeof(V128));
1570   memcpy(&block[1], argR, sizeof(V128));
1571   ULong res, flags;
1572   __asm__ __volatile__(
1573      "subq      $1024,  %%rsp"             "\n\t"
1574      "movdqu    0(%2),  %%xmm2"            "\n\t"
1575      "movdqu    16(%2), %%xmm11"           "\n\t"
1576      "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1577      "pushfq"                              "\n\t"
1578      "popq      %%rdx"                     "\n\t"
1579      "movq      %%rcx,  %0"                "\n\t"
1580      "movq      %%rdx,  %1"                "\n\t"
1581      "addq      $1024,  %%rsp"             "\n\t"
1582      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1583      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1584   );
1585   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1586}
1587
1588UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1589{
1590   V128 resV;
1591   UInt resOSZACP, resECX;
1592   Bool ok
1593      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1594                       zmask_from_V128(argLU),
1595                       zmask_from_V128(argRU),
1596                       0x40, False/*!isSTRM*/
1597        );
1598   assert(ok);
1599   resECX = resV.uInt[0];
1600   return (resOSZACP << 16) | resECX;
1601}
1602
1603void istri_40 ( void )
1604{
1605   char* wot = "40";
1606   UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1607   UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1608
1609   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1610   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1611   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1612   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1613
1614   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1615   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1616   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1617   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1618   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1619
1620   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1622   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1623   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1624
1625   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1626   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1627
1628   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1629   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1630   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1631   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1632
1633   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1634
1635   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1636   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1637}
1638
1639
1640//////////////////////////////////////////////////////////
1641//                                                      //
1642//                       ISTRI_42                       //
1643//                                                      //
1644//////////////////////////////////////////////////////////
1645
1646UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1647{
1648   V128 block[2];
1649   memcpy(&block[0], argL, sizeof(V128));
1650   memcpy(&block[1], argR, sizeof(V128));
1651   ULong res, flags;
1652   __asm__ __volatile__(
1653      "subq      $1024,  %%rsp"             "\n\t"
1654      "movdqu    0(%2),  %%xmm2"            "\n\t"
1655      "movdqu    16(%2), %%xmm11"           "\n\t"
1656      "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
1657      "pushfq"                              "\n\t"
1658      "popq      %%rdx"                     "\n\t"
1659      "movq      %%rcx,  %0"                "\n\t"
1660      "movq      %%rdx,  %1"                "\n\t"
1661      "addq      $1024,  %%rsp"             "\n\t"
1662      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1663      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1664   );
1665   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1666}
1667
1668UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1669{
1670   V128 resV;
1671   UInt resOSZACP, resECX;
1672   Bool ok
1673      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1674                       zmask_from_V128(argLU),
1675                       zmask_from_V128(argRU),
1676                       0x42, False/*!isSTRM*/
1677        );
1678   assert(ok);
1679   resECX = resV.uInt[0];
1680   return (resOSZACP << 16) | resECX;
1681}
1682
1683void istri_42 ( void )
1684{
1685   char* wot = "42";
1686   UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1687   UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1688
1689   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1690   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1691   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1692   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1693
1694   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1695   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1696   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1697   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1698   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1699
1700   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1702   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1703   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1704
1705   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1706   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1707
1708   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1709   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1710   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1711   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1712
1713   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1714
1715   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1716   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1717}
1718
1719
1720//////////////////////////////////////////////////////////
1721//                                                      //
1722//                       ISTRI_0E                       //
1723//                                                      //
1724//////////////////////////////////////////////////////////
1725
1726__attribute__((noinline))
1727UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1728{
1729   V128 block[2];
1730   memcpy(&block[0], argL, sizeof(V128));
1731   memcpy(&block[1], argR, sizeof(V128));
1732   ULong res = 0, flags = 0;
1733   __asm__ __volatile__(
1734      "movdqu    0(%2),  %%xmm2"            "\n\t"
1735      "movdqu    16(%2), %%xmm11"           "\n\t"
1736      "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
1737      "pushfq"                              "\n\t"
1738      "popq      %%rdx"                     "\n\t"
1739      "movq      %%rcx,  %0"                "\n\t"
1740      "movq      %%rdx,  %1"                "\n\t"
1741      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1742      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1743   );
1744   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1745}
1746
1747UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1748{
1749   V128 resV;
1750   UInt resOSZACP, resECX;
1751   Bool ok
1752      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1753                       zmask_from_V128(argLU),
1754                       zmask_from_V128(argRU),
1755                       0x0E, False/*!isSTRM*/
1756        );
1757   assert(ok);
1758   resECX = resV.uInt[0];
1759   return (resOSZACP << 16) | resECX;
1760}
1761
1762void istri_0E ( void )
1763{
1764   char* wot = "0E";
1765   UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1766   UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1767
1768   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1769
1770   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1771
1772   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1773   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1774   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1775
1776   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1777
1778   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1779   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1780   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1781   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1782   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1783
1784   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1785   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1786   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1787
1788   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1789   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1790
1791   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1792   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1793   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1794
1795   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1796   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1797   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1798
1799   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1800   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1801   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1802   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1803}
1804
1805
1806//////////////////////////////////////////////////////////
1807//                                                      //
1808//                       ISTRI_34                       //
1809//                                                      //
1810//////////////////////////////////////////////////////////
1811
1812UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1813{
1814   V128 block[2];
1815   memcpy(&block[0], argL, sizeof(V128));
1816   memcpy(&block[1], argR, sizeof(V128));
1817   ULong res, flags;
1818   __asm__ __volatile__(
1819      "subq      $1024,  %%rsp"             "\n\t"
1820      "movdqu    0(%2),  %%xmm2"            "\n\t"
1821      "movdqu    16(%2), %%xmm11"           "\n\t"
1822      "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
1823      "pushfq"                              "\n\t"
1824      "popq      %%rdx"                     "\n\t"
1825      "movq      %%rcx,  %0"                "\n\t"
1826      "movq      %%rdx,  %1"                "\n\t"
1827      "addq      $1024,  %%rsp"             "\n\t"
1828      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1829      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1830   );
1831   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1832}
1833
1834UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1835{
1836   V128 resV;
1837   UInt resOSZACP, resECX;
1838   Bool ok
1839      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1840                       zmask_from_V128(argLU),
1841                       zmask_from_V128(argRU),
1842                       0x34, False/*!isSTRM*/
1843        );
1844   assert(ok);
1845   resECX = resV.uInt[0];
1846   return (resOSZACP << 16) | resECX;
1847}
1848
1849void istri_34 ( void )
1850{
1851   char* wot = "34";
1852   UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1853   UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1854
1855   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1856   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1857   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1858   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1859
1860   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1861   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1862   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1863   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1864   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1865
1866   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1867
1868   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1869   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1870   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1871
1872   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1873   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1874   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1875
1876   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1877   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1878
1879   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1880   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1881
1882   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1883   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1884   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1885   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1886}
1887
1888
1889//////////////////////////////////////////////////////////
1890//                                                      //
1891//                       ISTRI_14                       //
1892//                                                      //
1893//////////////////////////////////////////////////////////
1894
1895UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1896{
1897   V128 block[2];
1898   memcpy(&block[0], argL, sizeof(V128));
1899   memcpy(&block[1], argR, sizeof(V128));
1900   ULong res, flags;
1901   __asm__ __volatile__(
1902      "subq      $1024,  %%rsp"             "\n\t"
1903      "movdqu    0(%2),  %%xmm2"            "\n\t"
1904      "movdqu    16(%2), %%xmm11"           "\n\t"
1905      "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
1906      "pushfq"                              "\n\t"
1907      "popq      %%rdx"                     "\n\t"
1908      "movq      %%rcx,  %0"                "\n\t"
1909      "movq      %%rdx,  %1"                "\n\t"
1910      "addq      $1024,  %%rsp"             "\n\t"
1911      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1912      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1913   );
1914   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1915}
1916
1917UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1918{
1919   V128 resV;
1920   UInt resOSZACP, resECX;
1921   Bool ok
1922      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1923                       zmask_from_V128(argLU),
1924                       zmask_from_V128(argRU),
1925                       0x14, False/*!isSTRM*/
1926        );
1927   assert(ok);
1928   resECX = resV.uInt[0];
1929   return (resOSZACP << 16) | resECX;
1930}
1931
1932void istri_14 ( void )
1933{
1934   char* wot = "14";
1935   UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1936   UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1937
1938   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1939   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1940   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1941   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1942
1943   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1944   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1945   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1946   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1947   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1948
1949   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1950
1951   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1952   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1953   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1954
1955   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1956   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1957   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1958
1959   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1960   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1961
1962   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1963   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1964
1965   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1966   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1967   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1968   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1969}
1970
1971
1972//////////////////////////////////////////////////////////
1973//                                                      //
1974//                         main                         //
1975//                                                      //
1976//////////////////////////////////////////////////////////
1977
1978int main ( void )
1979{
1980   istri_4A();
1981   istri_3A();
1982   istri_08();
1983   istri_18();
1984   istri_1A();
1985   istri_02();
1986   istri_0C();
1987   istri_12();
1988   istri_44();
1989   istri_00();
1990   istri_38();
1991   istri_46();
1992   istri_30();
1993   istri_40();
1994   istri_42();
1995   istri_0E();
1996   istri_14();
1997   istri_34();
1998   return 0;
1999}
2000