pcmpstr64.c revision 436e89c602e787e7a27dd6624b09beed41a0da8a
1
2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3   pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4   aspect. */
5
6#include <string.h>
7#include <stdio.h>
8#include <assert.h>
9
10typedef  unsigned int   UInt;
11typedef  signed int     Int;
12typedef  unsigned char  UChar;
13typedef  signed char    Char;
14typedef  unsigned long long int ULong;
15typedef  UChar          Bool;
16#define False ((Bool)0)
17#define True  ((Bool)1)
18
19//typedef  unsigned char  V128[16];
20typedef
21   union {
22      UChar uChar[16];
23      UInt  uInt[4];
24   }
25   V128;
26
27#define SHIFT_O   11
28#define SHIFT_S   7
29#define SHIFT_Z   6
30#define SHIFT_A   4
31#define SHIFT_C   0
32#define SHIFT_P   2
33
34#define MASK_O    (1ULL << SHIFT_O)
35#define MASK_S    (1ULL << SHIFT_S)
36#define MASK_Z    (1ULL << SHIFT_Z)
37#define MASK_A    (1ULL << SHIFT_A)
38#define MASK_C    (1ULL << SHIFT_C)
39#define MASK_P    (1ULL << SHIFT_P)
40
41
42UInt clz32 ( UInt x )
43{
44   Int y, m, n;
45   y = -(x >> 16);
46   m = (y >> 16) & 16;
47   n = 16 - m;
48   x = x >> m;
49   y = x - 0x100;
50   m = (y >> 16) & 8;
51   n = n + m;
52   x = x << m;
53   y = x - 0x1000;
54   m = (y >> 16) & 4;
55   n = n + m;
56   x = x << m;
57   y = x - 0x4000;
58   m = (y >> 16) & 2;
59   n = n + m;
60   x = x << m;
61   y = x >> 14;
62   m = y & ~(y >> 1);
63   return n + 2 - m;
64}
65
66UInt ctz32 ( UInt x )
67{
68   return 32 - clz32((~x) & (x-1));
69}
70
71void expand ( V128* dst, char* summary )
72{
73   Int i;
74   assert( strlen(summary) == 16 );
75   for (i = 0; i < 16; i++) {
76      UChar xx = 0;
77      UChar x = summary[15-i];
78      if      (x >= '0' && x <= '9') { xx = x - '0'; }
79      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81      else assert(0);
82
83      assert(xx < 16);
84      xx = (xx << 4) | xx;
85      assert(xx < 256);
86      dst->uChar[i] = xx;
87   }
88}
89
90void try_istri ( char* which,
91                 UInt(*h_fn)(V128*,V128*),
92                 UInt(*s_fn)(V128*,V128*),
93                 char* summL, char* summR )
94{
95   assert(strlen(which) == 2);
96   V128 argL, argR;
97   expand(&argL, summL);
98   expand(&argR, summR);
99   UInt h_res = h_fn(&argL, &argR);
100   UInt s_res = s_fn(&argL, &argR);
101   printf("istri %s  %s %s -> %08x %08x %s\n",
102          which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103}
104
105UInt zmask_from_V128 ( V128* arg )
106{
107   UInt i, res = 0;
108   for (i = 0; i < 16; i++) {
109      res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
110   }
111   return res;
112}
113
114//////////////////////////////////////////////////////////
115//                                                      //
116//                       GENERAL                        //
117//                                                      //
118//////////////////////////////////////////////////////////
119
120
121/* Given partial results from a pcmpXstrX operation (intRes1,
122   basically), generate an I format (index value for ECX) output, and
123   also the new OSZACP flags.
124*/
125static
126void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127                                    /*OUT*/UInt* resOSZACP,
128                                    UInt intRes1,
129                                    UInt zmaskL, UInt zmaskR,
130                                    UInt validL,
131                                    UInt pol, UInt idx )
132{
133   assert((pol >> 2) == 0);
134   assert((idx >> 1) == 0);
135
136   UInt intRes2 = 0;
137   switch (pol) {
138      case 0: intRes2 = intRes1;          break; // pol +
139      case 1: intRes2 = ~intRes1;         break; // pol -
140      case 2: intRes2 = intRes1;          break; // pol m+
141      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142   }
143   intRes2 &= 0xFFFF;
144
145   // generate ecx value
146   UInt newECX = 0;
147   if (idx) {
148     // index of ms-1-bit
149     newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150   } else {
151     // index of ls-1-bit
152     newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153   }
154
155   *(UInt*)(&resV[0]) = newECX;
156
157   // generate new flags, common to all ISTRI and ISTRM cases
158   *resOSZACP    // A, P are zero
159     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
162     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
163}
164
165
166/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167   variants.
168
169   For xSTRI variants, the new ECX value is placed in the 32 bits
170   pointed to by *resV.  For xSTRM variants, the result is a 128 bit
171   value and is placed at *resV in the obvious way.
172
173   For all variants, the new OSZACP value is placed at *resOSZACP.
174
175   argLV and argRV are the vector args.  The caller must prepare a
176   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
177   must be 1 for each zero byte of of the respective arg.  For ESTRx
178   variants this is derived from the explicit length indication, and
179   must be 0 in all places except at the bit index corresponding to
180   the valid length (0 .. 16).  If the valid length is 16 then the
181   mask must be all zeroes.  In all cases, bits 31:16 must be zero.
182
183   imm8 is the original immediate from the instruction.  isSTRM
184   indicates whether this is a xSTRM or xSTRI variant, which controls
185   how much of *res is written.
186
187   If the given imm8 case can be handled, the return value is True.
188   If not, False is returned, and neither *res not *resOSZACP are
189   altered.
190*/
191
192Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193                     /*OUT*/UInt* resOSZACP,
194                     V128* argLV,  V128* argRV,
195                     UInt zmaskL, UInt zmaskR,
196                     UInt imm8,   Bool isSTRM )
197{
198   assert(imm8 < 0x80);
199   assert((zmaskL >> 16) == 0);
200   assert((zmaskR >> 16) == 0);
201
202   /* Explicitly reject any imm8 values that haven't been validated,
203      even if they would probably work.  Life is too short to have
204      unvalidated cases in the code base. */
205   switch (imm8) {
206      case 0x00:
207      case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A:
208      case 0x38: case 0x3A: case 0x44: case 0x46: case 0x4A:
209      case 0x30: case 0x40:
210         break;
211      default:
212         return False;
213   }
214
215   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
216   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
217   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
218   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
219
220   /*----------------------------------------*/
221   /*-- strcmp on byte data                --*/
222   /*----------------------------------------*/
223
224   if (agg == 2/*equal each, aka strcmp*/
225       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226       && !isSTRM) {
227      Int    i;
228      UChar* argL = (UChar*)argLV;
229      UChar* argR = (UChar*)argRV;
230      UInt boolResII = 0;
231      for (i = 15; i >= 0; i--) {
232         UChar cL  = argL[i];
233         UChar cR  = argR[i];
234         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
235      }
236      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
237      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
238
239      // do invalidation, common to all equal-each cases
240      UInt intRes1
241         = (boolResII & validL & validR)  // if both valid, use cmpres
242           | (~ (validL | validR));       // if both invalid, force 1
243                                          // else force 0
244      intRes1 &= 0xFFFF;
245
246      // generate I-format output
247      pcmpXstrX_WRK_gen_output_fmt_I(
248         resV, resOSZACP,
249         intRes1, zmaskL, zmaskR, validL, pol, idx
250      );
251
252      return True;
253   }
254
255   /*----------------------------------------*/
256   /*-- set membership on byte data        --*/
257   /*----------------------------------------*/
258
259   if (agg == 0/*equal any, aka find chars in a set*/
260       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
261       && !isSTRM) {
262      /* argL: the string,  argR: charset */
263      UInt   si, ci;
264      UChar* argL    = (UChar*)argLV;
265      UChar* argR    = (UChar*)argRV;
266      UInt   boolRes = 0;
267      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
268      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
269
270      for (si = 0; si < 16; si++) {
271         if ((validL & (1 << si)) == 0)
272            // run off the end of the string.
273            break;
274         UInt m = 0;
275         for (ci = 0; ci < 16; ci++) {
276            if ((validR & (1 << ci)) == 0) break;
277            if (argR[ci] == argL[si]) { m = 1; break; }
278         }
279         boolRes |= (m << si);
280      }
281
282      // boolRes is "pre-invalidated"
283      UInt intRes1 = boolRes & 0xFFFF;
284
285      // generate I-format output
286      pcmpXstrX_WRK_gen_output_fmt_I(
287         resV, resOSZACP,
288         intRes1, zmaskL, zmaskR, validL, pol, idx
289      );
290
291      return True;
292   }
293
294   /*----------------------------------------*/
295   /*-- substring search on byte data      --*/
296   /*----------------------------------------*/
297
298   if (agg == 3/*equal ordered, aka substring search*/
299       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300       && !isSTRM) {
301
302      /* argL: haystack,  argR: needle */
303      UInt   ni, hi;
304      UChar* argL    = (UChar*)argLV;
305      UChar* argR    = (UChar*)argRV;
306      UInt   boolRes = 0;
307      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
308      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
309      for (hi = 0; hi < 16; hi++) {
310         UInt m = 1;
311         for (ni = 0; ni < 16; ni++) {
312            if ((validR & (1 << ni)) == 0) break;
313            UInt i = ni + hi;
314            if (i >= 16) break;
315            if (argL[i] != argR[ni]) { m = 0; break; }
316         }
317         boolRes |= (m << hi);
318         if ((validL & (1 << hi)) == 0)
319            // run off the end of the haystack
320            break;
321      }
322
323      // boolRes is "pre-invalidated"
324      UInt intRes1 = boolRes & 0xFFFF;
325
326      // generate I-format output
327      pcmpXstrX_WRK_gen_output_fmt_I(
328         resV, resOSZACP,
329         intRes1, zmaskL, zmaskR, validL, pol, idx
330      );
331
332      return True;
333   }
334
335   /*----------------------------------------*/
336   /*-- ranges, unsigned byte data         --*/
337   /*----------------------------------------*/
338
339   if (agg == 1/*ranges*/
340       && fmt == 0/*ub*/
341       && !isSTRM) {
342
343      /* argL: string,  argR: range-pairs */
344      UInt   ri, si;
345      UChar* argL    = (UChar*)argLV;
346      UChar* argR    = (UChar*)argRV;
347      UInt   boolRes = 0;
348      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
349      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
350      for (si = 0; si < 16; si++) {
351         if ((validL & (1 << si)) == 0)
352            // run off the end of the string
353            break;
354         UInt m = 0;
355         for (ri = 0; ri < 16; ri += 2) {
356            if ((validR & (3 << ri)) != (3 << ri)) break;
357            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
358               m = 1; break;
359            }
360         }
361         boolRes |= (m << si);
362      }
363
364      // boolRes is "pre-invalidated"
365      UInt intRes1 = boolRes & 0xFFFF;
366
367      // generate I-format output
368      pcmpXstrX_WRK_gen_output_fmt_I(
369         resV, resOSZACP,
370         intRes1, zmaskL, zmaskR, validL, pol, idx
371      );
372
373      return True;
374   }
375
376   /*----------------------------------------*/
377   /*-- ranges, signed byte data           --*/
378   /*----------------------------------------*/
379
380   if (agg == 1/*ranges*/
381       && fmt == 2/*sb*/
382       && !isSTRM) {
383
384      /* argL: string,  argR: range-pairs */
385      UInt   ri, si;
386      Char*  argL    = (Char*)argLV;
387      Char*  argR    = (Char*)argRV;
388      UInt   boolRes = 0;
389      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
390      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
391      for (si = 0; si < 16; si++) {
392         if ((validL & (1 << si)) == 0)
393            // run off the end of the string
394            break;
395         UInt m = 0;
396         for (ri = 0; ri < 16; ri += 2) {
397            if ((validR & (3 << ri)) != (3 << ri)) break;
398            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
399               m = 1; break;
400            }
401         }
402         boolRes |= (m << si);
403      }
404
405      // boolRes is "pre-invalidated"
406      UInt intRes1 = boolRes & 0xFFFF;
407
408      // generate I-format output
409      pcmpXstrX_WRK_gen_output_fmt_I(
410         resV, resOSZACP,
411         intRes1, zmaskL, zmaskR, validL, pol, idx
412      );
413
414      return True;
415   }
416
417   return False;
418}
419
420
421//////////////////////////////////////////////////////////
422//                                                      //
423//                       ISTRI_4A                       //
424//                                                      //
425//////////////////////////////////////////////////////////
426
427UInt h_pcmpistri_4A ( V128* argL, V128* argR )
428{
429   V128 block[2];
430   memcpy(&block[0], argL, sizeof(V128));
431   memcpy(&block[1], argR, sizeof(V128));
432   ULong res, flags;
433   __asm__ __volatile__(
434      "subq      $1024,  %%rsp"             "\n\t"
435      "movdqu    0(%2),  %%xmm2"            "\n\t"
436      "movdqu    16(%2), %%xmm11"           "\n\t"
437      "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
438      "pushfq"                              "\n\t"
439      "popq      %%rdx"                     "\n\t"
440      "movq      %%rcx,  %0"                "\n\t"
441      "movq      %%rdx,  %1"                "\n\t"
442      "addq      $1024,  %%rsp"             "\n\t"
443      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
444      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
445   );
446   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
447}
448
449UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
450{
451   V128 resV;
452   UInt resOSZACP, resECX;
453   Bool ok
454      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
455                       zmask_from_V128(argLU),
456                       zmask_from_V128(argRU),
457                       0x4A, False/*!isSTRM*/
458        );
459   assert(ok);
460   resECX = resV.uInt[0];
461   return (resOSZACP << 16) | resECX;
462}
463
464void istri_4A ( void )
465{
466   char* wot = "4A";
467   UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
468   UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
469
470   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
471
472   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
473   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
474   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
475   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
476
477   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
478   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
479   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
480
481   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
482   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
483   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
484   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
485
486   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
487   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
488   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
489
490   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491
492   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
493   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
494   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
495
496   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
497   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
498   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
499
500   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
501   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
502   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
503
504   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
505   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
506   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
507
508   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
509   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
510}
511
512//////////////////////////////////////////////////////////
513//                                                      //
514//                       ISTRI_3A                       //
515//                                                      //
516//////////////////////////////////////////////////////////
517
518UInt h_pcmpistri_3A ( V128* argL, V128* argR )
519{
520   V128 block[2];
521   memcpy(&block[0], argL, sizeof(V128));
522   memcpy(&block[1], argR, sizeof(V128));
523   ULong res, flags;
524   __asm__ __volatile__(
525      "subq      $1024,  %%rsp"             "\n\t"
526      "movdqu    0(%2),  %%xmm2"            "\n\t"
527      "movdqu    16(%2), %%xmm11"           "\n\t"
528      "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
529      "pushfq"                              "\n\t"
530      "popq      %%rdx"                     "\n\t"
531      "movq      %%rcx,  %0"                "\n\t"
532      "movq      %%rdx,  %1"                "\n\t"
533      "addq      $1024,  %%rsp"             "\n\t"
534      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
535      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
536   );
537   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
538}
539
540UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
541{
542   V128 resV;
543   UInt resOSZACP, resECX;
544   Bool ok
545      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
546                       zmask_from_V128(argLU),
547                       zmask_from_V128(argRU),
548                       0x3A, False/*!isSTRM*/
549        );
550   assert(ok);
551   resECX = resV.uInt[0];
552   return (resOSZACP << 16) | resECX;
553}
554
555void istri_3A ( void )
556{
557   char* wot = "3A";
558   UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
559   UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
560
561   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
562
563   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
564   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
565   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
566   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
567
568   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
569   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
570   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
571
572   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
573   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
574   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
575   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
576
577   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
578   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
579   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
580
581   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582
583   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
584   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
585   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
586
587   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
588   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
589   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
590
591   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
592   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
593   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
594
595   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
596   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
597   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
598
599   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
600   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
601}
602
603
604
605//////////////////////////////////////////////////////////
606//                                                      //
607//                       ISTRI_0C                       //
608//                                                      //
609//////////////////////////////////////////////////////////
610
611__attribute__((noinline))
612UInt h_pcmpistri_0C ( V128* argL, V128* argR )
613{
614   V128 block[2];
615   memcpy(&block[0], argL, sizeof(V128));
616   memcpy(&block[1], argR, sizeof(V128));
617   ULong res = 0, flags = 0;
618   __asm__ __volatile__(
619      "movdqu    0(%2),  %%xmm2"            "\n\t"
620      "movdqu    16(%2), %%xmm11"           "\n\t"
621      "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
622      //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
623      //"movd %%xmm0, %%ecx" "\n\t"
624      "pushfq"                              "\n\t"
625      "popq      %%rdx"                     "\n\t"
626      "movq      %%rcx,  %0"                "\n\t"
627      "movq      %%rdx,  %1"                "\n\t"
628      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
629      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
630   );
631   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
632}
633
634UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
635{
636   V128 resV;
637   UInt resOSZACP, resECX;
638   Bool ok
639      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
640                       zmask_from_V128(argLU),
641                       zmask_from_V128(argRU),
642                       0x0C, False/*!isSTRM*/
643        );
644   assert(ok);
645   resECX = resV.uInt[0];
646   return (resOSZACP << 16) | resECX;
647}
648
649void istri_0C ( void )
650{
651   char* wot = "0C";
652   UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
653   UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
654
655   try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
656
657   try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
658
659   try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
660   try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
661   try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
662
663   try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
664
665   try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
666   try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
667   try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
668   try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
669   try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
670
671   try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
672   try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
673   try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
674
675   try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
676   try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
677
678   try_istri(wot,h,s, "1111111111111234", "0000000000000000");
679   try_istri(wot,h,s, "1111111111111234", "0000000000000001");
680   try_istri(wot,h,s, "1111111111111234", "0000000000000011");
681
682   try_istri(wot,h,s, "1111111111111234", "1111111111111234");
683   try_istri(wot,h,s, "a111111111111111", "000000000000000a");
684   try_istri(wot,h,s, "b111111111111111", "000000000000000a");
685
686   try_istri(wot,h,s, "b111111111111111", "0000000000000000");
687   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
688   try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
689   try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
690}
691
692
693//////////////////////////////////////////////////////////
694//                                                      //
695//                       ISTRI_08                       //
696//                                                      //
697//////////////////////////////////////////////////////////
698
699UInt h_pcmpistri_08 ( V128* argL, V128* argR )
700{
701   V128 block[2];
702   memcpy(&block[0], argL, sizeof(V128));
703   memcpy(&block[1], argR, sizeof(V128));
704   ULong res, flags;
705   __asm__ __volatile__(
706      "subq      $1024,  %%rsp"             "\n\t"
707      "movdqu    0(%2),  %%xmm2"            "\n\t"
708      "movdqu    16(%2), %%xmm11"           "\n\t"
709      "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
710      "pushfq"                              "\n\t"
711      "popq      %%rdx"                     "\n\t"
712      "movq      %%rcx,  %0"                "\n\t"
713      "movq      %%rdx,  %1"                "\n\t"
714      "addq      $1024,  %%rsp"             "\n\t"
715      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
716      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
717   );
718   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
719}
720
721UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
722{
723   V128 resV;
724   UInt resOSZACP, resECX;
725   Bool ok
726      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
727                       zmask_from_V128(argLU),
728                       zmask_from_V128(argRU),
729                       0x08, False/*!isSTRM*/
730        );
731   assert(ok);
732   resECX = resV.uInt[0];
733   return (resOSZACP << 16) | resECX;
734}
735
736void istri_08 ( void )
737{
738   char* wot = "08";
739   UInt(*h)(V128*,V128*) = h_pcmpistri_08;
740   UInt(*s)(V128*,V128*) = s_pcmpistri_08;
741
742   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
743
744   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
745   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
746   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
747   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
748
749   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
750   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
751   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
752
753   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
754   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
755   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
756   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
757
758   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
759   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
760   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
761
762   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763
764   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
765   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
766   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
767
768   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
769   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
770   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
771
772   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
773   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
774   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
775
776   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
777   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
778   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
779
780   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
781   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
782}
783
784
785
786//////////////////////////////////////////////////////////
787//                                                      //
788//                       ISTRI_1A                       //
789//                                                      //
790//////////////////////////////////////////////////////////
791
792UInt h_pcmpistri_1A ( V128* argL, V128* argR )
793{
794   V128 block[2];
795   memcpy(&block[0], argL, sizeof(V128));
796   memcpy(&block[1], argR, sizeof(V128));
797   ULong res, flags;
798   __asm__ __volatile__(
799      "subq      $1024,  %%rsp"             "\n\t"
800      "movdqu    0(%2),  %%xmm2"            "\n\t"
801      "movdqu    16(%2), %%xmm11"           "\n\t"
802      "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
803      "pushfq"                              "\n\t"
804      "popq      %%rdx"                     "\n\t"
805      "movq      %%rcx,  %0"                "\n\t"
806      "movq      %%rdx,  %1"                "\n\t"
807      "addq      $1024,  %%rsp"             "\n\t"
808      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
809      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
810   );
811   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
812}
813
814UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
815{
816   V128 resV;
817   UInt resOSZACP, resECX;
818   Bool ok
819      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
820                       zmask_from_V128(argLU),
821                       zmask_from_V128(argRU),
822                       0x1A, False/*!isSTRM*/
823        );
824   assert(ok);
825   resECX = resV.uInt[0];
826   return (resOSZACP << 16) | resECX;
827}
828
829void istri_1A ( void )
830{
831   char* wot = "1A";
832   UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
833   UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
834
835   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
836
837   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
838   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
839   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
840   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
841
842   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
843   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
844   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
845
846   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
847   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
848   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
849   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
850
851   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
852   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
853   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
854
855   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856
857   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
858   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
859   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
860
861   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
862   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
863   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
864
865   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
866   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
867   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
868
869   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
870   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
871   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
872
873   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
874   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
875}
876
877
878
879//////////////////////////////////////////////////////////
880//                                                      //
881//                       ISTRI_02                       //
882//                                                      //
883//////////////////////////////////////////////////////////
884
885UInt h_pcmpistri_02 ( V128* argL, V128* argR )
886{
887   V128 block[2];
888   memcpy(&block[0], argL, sizeof(V128));
889   memcpy(&block[1], argR, sizeof(V128));
890   ULong res, flags;
891   __asm__ __volatile__(
892      "subq      $1024,  %%rsp"             "\n\t"
893      "movdqu    0(%2),  %%xmm2"            "\n\t"
894      "movdqu    16(%2), %%xmm11"           "\n\t"
895      "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
896//"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
897//"movd %%xmm0, %%ecx" "\n\t"
898      "pushfq"                              "\n\t"
899      "popq      %%rdx"                     "\n\t"
900      "movq      %%rcx,  %0"                "\n\t"
901      "movq      %%rdx,  %1"                "\n\t"
902      "addq      $1024,  %%rsp"             "\n\t"
903      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
904      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
905   );
906   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
907}
908
909UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
910{
911   V128 resV;
912   UInt resOSZACP, resECX;
913   Bool ok
914      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
915                       zmask_from_V128(argLU),
916                       zmask_from_V128(argRU),
917                       0x02, False/*!isSTRM*/
918        );
919   assert(ok);
920   resECX = resV.uInt[0];
921   return (resOSZACP << 16) | resECX;
922}
923
924void istri_02 ( void )
925{
926   char* wot = "02";
927   UInt(*h)(V128*,V128*) = h_pcmpistri_02;
928   UInt(*s)(V128*,V128*) = s_pcmpistri_02;
929
930   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
931   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
932   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
933   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
934
935   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
936   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
937   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
938   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
939   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
940
941   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
942   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
943   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
944   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
945
946   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
947   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948
949   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
950   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
951   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
952   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
953
954   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
955
956   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
957   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
958}
959
960
961//////////////////////////////////////////////////////////
962//                                                      //
963//                       ISTRI_12                       //
964//                                                      //
965//////////////////////////////////////////////////////////
966
967UInt h_pcmpistri_12 ( V128* argL, V128* argR )
968{
969   V128 block[2];
970   memcpy(&block[0], argL, sizeof(V128));
971   memcpy(&block[1], argR, sizeof(V128));
972   ULong res, flags;
973   __asm__ __volatile__(
974      "subq      $1024,  %%rsp"             "\n\t"
975      "movdqu    0(%2),  %%xmm2"            "\n\t"
976      "movdqu    16(%2), %%xmm11"           "\n\t"
977      "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
978//"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
979//"movd %%xmm0, %%ecx" "\n\t"
980      "pushfq"                              "\n\t"
981      "popq      %%rdx"                     "\n\t"
982      "movq      %%rcx,  %0"                "\n\t"
983      "movq      %%rdx,  %1"                "\n\t"
984      "addq      $1024,  %%rsp"             "\n\t"
985      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
986      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
987   );
988   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
989}
990
991UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
992{
993   V128 resV;
994   UInt resOSZACP, resECX;
995   Bool ok
996      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
997                       zmask_from_V128(argLU),
998                       zmask_from_V128(argRU),
999                       0x12, False/*!isSTRM*/
1000        );
1001   assert(ok);
1002   resECX = resV.uInt[0];
1003   return (resOSZACP << 16) | resECX;
1004}
1005
1006void istri_12 ( void )
1007{
1008   char* wot = "12";
1009   UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1010   UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1011
1012   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1013   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1014   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1015   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1016
1017   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1018   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1019   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1020   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1021   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1022
1023   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1024   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1025   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1026   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1027
1028   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1029   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1030
1031   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1032   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1033   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1034   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1035
1036   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1037
1038   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1039   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1040}
1041
1042
1043
1044//////////////////////////////////////////////////////////
1045//                                                      //
1046//                       ISTRI_44                       //
1047//                                                      //
1048//////////////////////////////////////////////////////////
1049
1050UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1051{
1052   V128 block[2];
1053   memcpy(&block[0], argL, sizeof(V128));
1054   memcpy(&block[1], argR, sizeof(V128));
1055   ULong res, flags;
1056   __asm__ __volatile__(
1057      "subq      $1024,  %%rsp"             "\n\t"
1058      "movdqu    0(%2),  %%xmm2"            "\n\t"
1059      "movdqu    16(%2), %%xmm11"           "\n\t"
1060      "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1061//"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1062//"movd %%xmm0, %%ecx" "\n\t"
1063      "pushfq"                              "\n\t"
1064      "popq      %%rdx"                     "\n\t"
1065      "movq      %%rcx,  %0"                "\n\t"
1066      "movq      %%rdx,  %1"                "\n\t"
1067      "addq      $1024,  %%rsp"             "\n\t"
1068      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1069      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1070   );
1071   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1072}
1073
1074UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1075{
1076   V128 resV;
1077   UInt resOSZACP, resECX;
1078   Bool ok
1079      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1080                       zmask_from_V128(argLU),
1081                       zmask_from_V128(argRU),
1082                       0x44, False/*!isSTRM*/
1083        );
1084   assert(ok);
1085   resECX = resV.uInt[0];
1086   return (resOSZACP << 16) | resECX;
1087}
1088
1089void istri_44 ( void )
1090{
1091   char* wot = "44";
1092   UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1093   UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1094
1095   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1096   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1097   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1098   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1099
1100   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1101   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1102   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1103   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1104   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1105
1106   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1107
1108   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1109   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1110   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1111
1112   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1113   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1114   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1115
1116   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1117   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1118
1119   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1120   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1121
1122   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1123   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1124   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1125   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1126}
1127
1128
1129//////////////////////////////////////////////////////////
1130//                                                      //
1131//                       ISTRI_00                       //
1132//                                                      //
1133//////////////////////////////////////////////////////////
1134
1135UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1136{
1137   V128 block[2];
1138   memcpy(&block[0], argL, sizeof(V128));
1139   memcpy(&block[1], argR, sizeof(V128));
1140   ULong res, flags;
1141   __asm__ __volatile__(
1142      "subq      $1024,  %%rsp"             "\n\t"
1143      "movdqu    0(%2),  %%xmm2"            "\n\t"
1144      "movdqu    16(%2), %%xmm11"           "\n\t"
1145      "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1146//"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1147//"movd %%xmm0, %%ecx" "\n\t"
1148      "pushfq"                              "\n\t"
1149      "popq      %%rdx"                     "\n\t"
1150      "movq      %%rcx,  %0"                "\n\t"
1151      "movq      %%rdx,  %1"                "\n\t"
1152      "addq      $1024,  %%rsp"             "\n\t"
1153      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1154      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1155   );
1156   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1157}
1158
1159UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1160{
1161   V128 resV;
1162   UInt resOSZACP, resECX;
1163   Bool ok
1164      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1165                       zmask_from_V128(argLU),
1166                       zmask_from_V128(argRU),
1167                       0x00, False/*!isSTRM*/
1168        );
1169   assert(ok);
1170   resECX = resV.uInt[0];
1171   return (resOSZACP << 16) | resECX;
1172}
1173
1174void istri_00 ( void )
1175{
1176   char* wot = "00";
1177   UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1178   UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1179
1180   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1181   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1182   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1183   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1184
1185   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1186   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1187   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1188   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1189   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1190
1191   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1192   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1193   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1194   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1195
1196   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1197   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1198
1199   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1200   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1201   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1202   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1203
1204   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1205
1206   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1207   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1208}
1209
1210
1211//////////////////////////////////////////////////////////
1212//                                                      //
1213//                       ISTRI_38                       //
1214//                                                      //
1215//////////////////////////////////////////////////////////
1216
1217UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1218{
1219   V128 block[2];
1220   memcpy(&block[0], argL, sizeof(V128));
1221   memcpy(&block[1], argR, sizeof(V128));
1222   ULong res, flags;
1223   __asm__ __volatile__(
1224      "subq      $1024,  %%rsp"             "\n\t"
1225      "movdqu    0(%2),  %%xmm2"            "\n\t"
1226      "movdqu    16(%2), %%xmm11"           "\n\t"
1227      "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1228      "pushfq"                              "\n\t"
1229      "popq      %%rdx"                     "\n\t"
1230      "movq      %%rcx,  %0"                "\n\t"
1231      "movq      %%rdx,  %1"                "\n\t"
1232      "addq      $1024,  %%rsp"             "\n\t"
1233      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1234      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1235   );
1236   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1237}
1238
1239UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1240{
1241   V128 resV;
1242   UInt resOSZACP, resECX;
1243   Bool ok
1244      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1245                       zmask_from_V128(argLU),
1246                       zmask_from_V128(argRU),
1247                       0x38, False/*!isSTRM*/
1248        );
1249   assert(ok);
1250   resECX = resV.uInt[0];
1251   return (resOSZACP << 16) | resECX;
1252}
1253
1254void istri_38 ( void )
1255{
1256   char* wot = "38";
1257   UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1258   UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1259
1260   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1261
1262   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1263   try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1264   try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1265   try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1266
1267   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1268   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1269   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1270
1271   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1272   try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1273   try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1274   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1275
1276   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1277   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1278   try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1279
1280   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1281
1282   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1283   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1284   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1285
1286   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1287   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1288   try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1289
1290   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1291   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1292   try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1293
1294   try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1295   try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1296   try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1297
1298   try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1299   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1300}
1301
1302
1303
1304//////////////////////////////////////////////////////////
1305//                                                      //
1306//                       ISTRI_46                       //
1307//                                                      //
1308//////////////////////////////////////////////////////////
1309
1310UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1311{
1312   V128 block[2];
1313   memcpy(&block[0], argL, sizeof(V128));
1314   memcpy(&block[1], argR, sizeof(V128));
1315   ULong res, flags;
1316   __asm__ __volatile__(
1317      "subq      $1024,  %%rsp"             "\n\t"
1318      "movdqu    0(%2),  %%xmm2"            "\n\t"
1319      "movdqu    16(%2), %%xmm11"           "\n\t"
1320      "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
1321      "pushfq"                              "\n\t"
1322      "popq      %%rdx"                     "\n\t"
1323      "movq      %%rcx,  %0"                "\n\t"
1324      "movq      %%rdx,  %1"                "\n\t"
1325      "addq      $1024,  %%rsp"             "\n\t"
1326      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1327      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1328   );
1329   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1330}
1331
1332UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1333{
1334   V128 resV;
1335   UInt resOSZACP, resECX;
1336   Bool ok
1337      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1338                       zmask_from_V128(argLU),
1339                       zmask_from_V128(argRU),
1340                       0x46, False/*!isSTRM*/
1341        );
1342   assert(ok);
1343   resECX = resV.uInt[0];
1344   return (resOSZACP << 16) | resECX;
1345}
1346
1347void istri_46 ( void )
1348{
1349   char* wot = "46";
1350   UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1351   UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1352
1353   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1354   try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1355   try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1356   try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1357
1358   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1359   try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1360   try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1361   try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1362   try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1363
1364   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1365
1366   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1367   try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1368   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1369
1370   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1371   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1372   try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1373
1374   try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1375   try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1376
1377   try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1378   try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1379
1380   try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1381   try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1382   try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1383   try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1384}
1385
1386
1387//////////////////////////////////////////////////////////
1388//                                                      //
1389//                       ISTRI_30                       //
1390//                                                      //
1391//////////////////////////////////////////////////////////
1392
1393UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1394{
1395   V128 block[2];
1396   memcpy(&block[0], argL, sizeof(V128));
1397   memcpy(&block[1], argR, sizeof(V128));
1398   ULong res, flags;
1399   __asm__ __volatile__(
1400      "subq      $1024,  %%rsp"             "\n\t"
1401      "movdqu    0(%2),  %%xmm2"            "\n\t"
1402      "movdqu    16(%2), %%xmm11"           "\n\t"
1403      "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1404      "pushfq"                              "\n\t"
1405      "popq      %%rdx"                     "\n\t"
1406      "movq      %%rcx,  %0"                "\n\t"
1407      "movq      %%rdx,  %1"                "\n\t"
1408      "addq      $1024,  %%rsp"             "\n\t"
1409      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1410      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1411   );
1412   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1413}
1414
1415UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1416{
1417   V128 resV;
1418   UInt resOSZACP, resECX;
1419   Bool ok
1420      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1421                       zmask_from_V128(argLU),
1422                       zmask_from_V128(argRU),
1423                       0x30, False/*!isSTRM*/
1424        );
1425   assert(ok);
1426   resECX = resV.uInt[0];
1427   return (resOSZACP << 16) | resECX;
1428}
1429
1430void istri_30 ( void )
1431{
1432   char* wot = "30";
1433   UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1434   UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1435
1436   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1437   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1438   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1439   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1440
1441   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1442   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1443   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1444   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1445   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1446
1447   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1448   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1449   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1450   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1451
1452   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1453   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1454
1455   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1456   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1457   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1458   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1459
1460   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1461
1462   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1463   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1464}
1465
1466
1467//////////////////////////////////////////////////////////
1468//                                                      //
1469//                       ISTRI_40                       //
1470//                                                      //
1471//////////////////////////////////////////////////////////
1472
1473UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1474{
1475   V128 block[2];
1476   memcpy(&block[0], argL, sizeof(V128));
1477   memcpy(&block[1], argR, sizeof(V128));
1478   ULong res, flags;
1479   __asm__ __volatile__(
1480      "subq      $1024,  %%rsp"             "\n\t"
1481      "movdqu    0(%2),  %%xmm2"            "\n\t"
1482      "movdqu    16(%2), %%xmm11"           "\n\t"
1483      "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1484      "pushfq"                              "\n\t"
1485      "popq      %%rdx"                     "\n\t"
1486      "movq      %%rcx,  %0"                "\n\t"
1487      "movq      %%rdx,  %1"                "\n\t"
1488      "addq      $1024,  %%rsp"             "\n\t"
1489      : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1490      : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1491   );
1492   return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1493}
1494
1495UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1496{
1497   V128 resV;
1498   UInt resOSZACP, resECX;
1499   Bool ok
1500      = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1501                       zmask_from_V128(argLU),
1502                       zmask_from_V128(argRU),
1503                       0x40, False/*!isSTRM*/
1504        );
1505   assert(ok);
1506   resECX = resV.uInt[0];
1507   return (resOSZACP << 16) | resECX;
1508}
1509
1510void istri_40 ( void )
1511{
1512   char* wot = "40";
1513   UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1514   UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1515
1516   try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1517   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1518   try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1519   try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1520
1521   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1522   try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1523   try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1524   try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1525   try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1526
1527   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1528   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1529   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1530   try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1531
1532   try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1533   try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1534
1535   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1536   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1537   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1538   try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1539
1540   try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1541
1542   try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1543   try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1544}
1545
1546
1547//////////////////////////////////////////////////////////
1548//                                                      //
1549//                         main                         //
1550//                                                      //
1551//////////////////////////////////////////////////////////
1552
1553int main ( void )
1554{
1555   istri_4A();
1556   istri_3A();
1557   istri_08();
1558   istri_1A();
1559   istri_02();
1560   istri_0C();
1561   istri_12();
1562   istri_44();
1563   istri_00();
1564   istri_38();
1565   istri_46();
1566   istri_30();
1567   istri_40();
1568   return 0;
1569}
1570