19db268cc7344d8751213307737285e7cdac4fbecsewardj 29db268cc7344d8751213307737285e7cdac4fbecsewardj/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 39db268cc7344d8751213307737285e7cdac4fbecsewardj pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 49db268cc7344d8751213307737285e7cdac4fbecsewardj aspect. */ 59db268cc7344d8751213307737285e7cdac4fbecsewardj 69db268cc7344d8751213307737285e7cdac4fbecsewardj#include <string.h> 79db268cc7344d8751213307737285e7cdac4fbecsewardj#include <stdio.h> 89db268cc7344d8751213307737285e7cdac4fbecsewardj#include <assert.h> 99db268cc7344d8751213307737285e7cdac4fbecsewardj 109db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned int UInt; 119db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef signed int Int; 129db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned char UChar; 139db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned short UShort; 149db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned long long int ULong; 159db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef UChar Bool; 169db268cc7344d8751213307737285e7cdac4fbecsewardj#define False ((Bool)0) 179db268cc7344d8751213307737285e7cdac4fbecsewardj#define True ((Bool)1) 189db268cc7344d8751213307737285e7cdac4fbecsewardj 199db268cc7344d8751213307737285e7cdac4fbecsewardj//typedef unsigned char V128[16]; 209db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef 219db268cc7344d8751213307737285e7cdac4fbecsewardj union { 229db268cc7344d8751213307737285e7cdac4fbecsewardj UChar uChar[16]; 239db268cc7344d8751213307737285e7cdac4fbecsewardj UShort uShort[8]; 249db268cc7344d8751213307737285e7cdac4fbecsewardj UInt uInt[4]; 259db268cc7344d8751213307737285e7cdac4fbecsewardj UInt w32[4]; 269db268cc7344d8751213307737285e7cdac4fbecsewardj } 279db268cc7344d8751213307737285e7cdac4fbecsewardj V128; 289db268cc7344d8751213307737285e7cdac4fbecsewardj 299db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_O 11 309db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_S 7 319db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_Z 6 329db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_A 4 339db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_C 0 349db268cc7344d8751213307737285e7cdac4fbecsewardj#define SHIFT_P 2 359db268cc7344d8751213307737285e7cdac4fbecsewardj 369db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_O (1ULL << SHIFT_O) 379db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_S (1ULL << SHIFT_S) 389db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_Z (1ULL << SHIFT_Z) 399db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_A (1ULL << SHIFT_A) 409db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_C (1ULL << SHIFT_C) 419db268cc7344d8751213307737285e7cdac4fbecsewardj#define MASK_P (1ULL << SHIFT_P) 429db268cc7344d8751213307737285e7cdac4fbecsewardj 439db268cc7344d8751213307737285e7cdac4fbecsewardj 449db268cc7344d8751213307737285e7cdac4fbecsewardjUInt clz32 ( UInt x ) 459db268cc7344d8751213307737285e7cdac4fbecsewardj{ 469db268cc7344d8751213307737285e7cdac4fbecsewardj Int y, m, n; 479db268cc7344d8751213307737285e7cdac4fbecsewardj y = -(x >> 16); 489db268cc7344d8751213307737285e7cdac4fbecsewardj m = (y >> 16) & 16; 499db268cc7344d8751213307737285e7cdac4fbecsewardj n = 16 - m; 509db268cc7344d8751213307737285e7cdac4fbecsewardj x = x >> m; 519db268cc7344d8751213307737285e7cdac4fbecsewardj y = x - 0x100; 529db268cc7344d8751213307737285e7cdac4fbecsewardj m = (y >> 16) & 8; 539db268cc7344d8751213307737285e7cdac4fbecsewardj n = n + m; 549db268cc7344d8751213307737285e7cdac4fbecsewardj x = x << m; 559db268cc7344d8751213307737285e7cdac4fbecsewardj y = x - 0x1000; 569db268cc7344d8751213307737285e7cdac4fbecsewardj m = (y >> 16) & 4; 579db268cc7344d8751213307737285e7cdac4fbecsewardj n = n + m; 589db268cc7344d8751213307737285e7cdac4fbecsewardj x = x << m; 599db268cc7344d8751213307737285e7cdac4fbecsewardj y = x - 0x4000; 609db268cc7344d8751213307737285e7cdac4fbecsewardj m = (y >> 16) & 2; 619db268cc7344d8751213307737285e7cdac4fbecsewardj n = n + m; 629db268cc7344d8751213307737285e7cdac4fbecsewardj x = x << m; 639db268cc7344d8751213307737285e7cdac4fbecsewardj y = x >> 14; 649db268cc7344d8751213307737285e7cdac4fbecsewardj m = y & ~(y >> 1); 659db268cc7344d8751213307737285e7cdac4fbecsewardj return n + 2 - m; 669db268cc7344d8751213307737285e7cdac4fbecsewardj} 679db268cc7344d8751213307737285e7cdac4fbecsewardj 689db268cc7344d8751213307737285e7cdac4fbecsewardjUInt ctz32 ( UInt x ) 699db268cc7344d8751213307737285e7cdac4fbecsewardj{ 709db268cc7344d8751213307737285e7cdac4fbecsewardj return 32 - clz32((~x) & (x-1)); 719db268cc7344d8751213307737285e7cdac4fbecsewardj} 729db268cc7344d8751213307737285e7cdac4fbecsewardj 739db268cc7344d8751213307737285e7cdac4fbecsewardjvoid expand ( V128* dst, char* summary ) 749db268cc7344d8751213307737285e7cdac4fbecsewardj{ 759db268cc7344d8751213307737285e7cdac4fbecsewardj Int i; 769db268cc7344d8751213307737285e7cdac4fbecsewardj assert( strlen(summary) == 16 ); 779db268cc7344d8751213307737285e7cdac4fbecsewardj for (i = 0; i < 16; i++) { 789db268cc7344d8751213307737285e7cdac4fbecsewardj UChar xx = 0; 799db268cc7344d8751213307737285e7cdac4fbecsewardj UChar x = summary[15-i]; 809db268cc7344d8751213307737285e7cdac4fbecsewardj if (x >= '0' && x <= '9') { xx = x - '0'; } 819db268cc7344d8751213307737285e7cdac4fbecsewardj else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 829db268cc7344d8751213307737285e7cdac4fbecsewardj else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 839db268cc7344d8751213307737285e7cdac4fbecsewardj else assert(0); 849db268cc7344d8751213307737285e7cdac4fbecsewardj 859db268cc7344d8751213307737285e7cdac4fbecsewardj assert(xx < 16); 869db268cc7344d8751213307737285e7cdac4fbecsewardj xx = (xx << 4) | xx; 879db268cc7344d8751213307737285e7cdac4fbecsewardj assert(xx < 256); 889db268cc7344d8751213307737285e7cdac4fbecsewardj dst->uChar[i] = xx; 899db268cc7344d8751213307737285e7cdac4fbecsewardj } 909db268cc7344d8751213307737285e7cdac4fbecsewardj} 919db268cc7344d8751213307737285e7cdac4fbecsewardj 929db268cc7344d8751213307737285e7cdac4fbecsewardjvoid try_istri ( char* which, 939db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h_fn)(V128*,V128*), 949db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s_fn)(V128*,V128*), 959db268cc7344d8751213307737285e7cdac4fbecsewardj char* summL, char* summR ) 969db268cc7344d8751213307737285e7cdac4fbecsewardj{ 979db268cc7344d8751213307737285e7cdac4fbecsewardj assert(strlen(which) == 2); 989db268cc7344d8751213307737285e7cdac4fbecsewardj V128 argL, argR; 999db268cc7344d8751213307737285e7cdac4fbecsewardj expand(&argL, summL); 1009db268cc7344d8751213307737285e7cdac4fbecsewardj expand(&argR, summR); 1019db268cc7344d8751213307737285e7cdac4fbecsewardj UInt h_res = h_fn(&argL, &argR); 1029db268cc7344d8751213307737285e7cdac4fbecsewardj UInt s_res = s_fn(&argL, &argR); 1039db268cc7344d8751213307737285e7cdac4fbecsewardj printf("istri %s %s %s -> %08x %08x %s\n", 1049db268cc7344d8751213307737285e7cdac4fbecsewardj which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 1059db268cc7344d8751213307737285e7cdac4fbecsewardj} 1069db268cc7344d8751213307737285e7cdac4fbecsewardj 1079db268cc7344d8751213307737285e7cdac4fbecsewardjUInt zmask_from_V128 ( V128* arg ) 1089db268cc7344d8751213307737285e7cdac4fbecsewardj{ 1099db268cc7344d8751213307737285e7cdac4fbecsewardj UInt i, res = 0; 1109db268cc7344d8751213307737285e7cdac4fbecsewardj for (i = 0; i < 8; i++) { 1119db268cc7344d8751213307737285e7cdac4fbecsewardj res |= ((arg->uShort[i] == 0) ? 1 : 0) << i; 1129db268cc7344d8751213307737285e7cdac4fbecsewardj } 1139db268cc7344d8751213307737285e7cdac4fbecsewardj return res; 1149db268cc7344d8751213307737285e7cdac4fbecsewardj} 1159db268cc7344d8751213307737285e7cdac4fbecsewardj 1169db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 1179db268cc7344d8751213307737285e7cdac4fbecsewardj// // 1189db268cc7344d8751213307737285e7cdac4fbecsewardj// GENERAL // 1199db268cc7344d8751213307737285e7cdac4fbecsewardj// // 1209db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 1219db268cc7344d8751213307737285e7cdac4fbecsewardj 1229db268cc7344d8751213307737285e7cdac4fbecsewardj 1239db268cc7344d8751213307737285e7cdac4fbecsewardj/* Given partial results from a 16-bit pcmpXstrX operation (intRes1, 1249db268cc7344d8751213307737285e7cdac4fbecsewardj basically), generate an I- or M-format output value, also the new 1259db268cc7344d8751213307737285e7cdac4fbecsewardj OSZACP flags. */ 1269db268cc7344d8751213307737285e7cdac4fbecsewardjstatic 1279db268cc7344d8751213307737285e7cdac4fbecsewardjvoid PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV, 1289db268cc7344d8751213307737285e7cdac4fbecsewardj /*OUT*/UInt* resOSZACP, 1299db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes1, 1309db268cc7344d8751213307737285e7cdac4fbecsewardj UInt zmaskL, UInt zmaskR, 1319db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validL, 1329db268cc7344d8751213307737285e7cdac4fbecsewardj UInt pol, UInt idx ) 1339db268cc7344d8751213307737285e7cdac4fbecsewardj{ 1349db268cc7344d8751213307737285e7cdac4fbecsewardj assert((pol >> 2) == 0); 1359db268cc7344d8751213307737285e7cdac4fbecsewardj assert((idx >> 1) == 0); 1369db268cc7344d8751213307737285e7cdac4fbecsewardj 1379db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes2 = 0; 1389db268cc7344d8751213307737285e7cdac4fbecsewardj switch (pol) { 1399db268cc7344d8751213307737285e7cdac4fbecsewardj case 0: intRes2 = intRes1; break; // pol + 1409db268cc7344d8751213307737285e7cdac4fbecsewardj case 1: intRes2 = ~intRes1; break; // pol - 1419db268cc7344d8751213307737285e7cdac4fbecsewardj case 2: intRes2 = intRes1; break; // pol m+ 1429db268cc7344d8751213307737285e7cdac4fbecsewardj case 3: intRes2 = intRes1 ^ validL; break; // pol m- 1439db268cc7344d8751213307737285e7cdac4fbecsewardj } 1449db268cc7344d8751213307737285e7cdac4fbecsewardj intRes2 &= 0xFF; 1459db268cc7344d8751213307737285e7cdac4fbecsewardj 1469db268cc7344d8751213307737285e7cdac4fbecsewardj // generate I-format output (an index in ECX) 1479db268cc7344d8751213307737285e7cdac4fbecsewardj // generate ecx value 1489db268cc7344d8751213307737285e7cdac4fbecsewardj UInt newECX = 0; 1499db268cc7344d8751213307737285e7cdac4fbecsewardj if (idx) { 1509db268cc7344d8751213307737285e7cdac4fbecsewardj // index of ms-1-bit 1519db268cc7344d8751213307737285e7cdac4fbecsewardj newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2)); 1529db268cc7344d8751213307737285e7cdac4fbecsewardj } else { 1539db268cc7344d8751213307737285e7cdac4fbecsewardj // index of ls-1-bit 1549db268cc7344d8751213307737285e7cdac4fbecsewardj newECX = intRes2 == 0 ? 8 : ctz32(intRes2); 1559db268cc7344d8751213307737285e7cdac4fbecsewardj } 1569db268cc7344d8751213307737285e7cdac4fbecsewardj 1579db268cc7344d8751213307737285e7cdac4fbecsewardj resV->w32[0] = newECX; 1589db268cc7344d8751213307737285e7cdac4fbecsewardj resV->w32[1] = 0; 1599db268cc7344d8751213307737285e7cdac4fbecsewardj resV->w32[2] = 0; 1609db268cc7344d8751213307737285e7cdac4fbecsewardj resV->w32[3] = 0; 1619db268cc7344d8751213307737285e7cdac4fbecsewardj 1629db268cc7344d8751213307737285e7cdac4fbecsewardj // generate new flags, common to all ISTRI and ISTRM cases 1639db268cc7344d8751213307737285e7cdac4fbecsewardj *resOSZACP // A, P are zero 1649db268cc7344d8751213307737285e7cdac4fbecsewardj = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 1659db268cc7344d8751213307737285e7cdac4fbecsewardj | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 1669db268cc7344d8751213307737285e7cdac4fbecsewardj | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 1679db268cc7344d8751213307737285e7cdac4fbecsewardj | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 1689db268cc7344d8751213307737285e7cdac4fbecsewardj} 1699db268cc7344d8751213307737285e7cdac4fbecsewardj 1709db268cc7344d8751213307737285e7cdac4fbecsewardj/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 1719db268cc7344d8751213307737285e7cdac4fbecsewardj variants on 16-bit characters. 1729db268cc7344d8751213307737285e7cdac4fbecsewardj 1739db268cc7344d8751213307737285e7cdac4fbecsewardj For xSTRI variants, the new ECX value is placed in the 32 bits 1749db268cc7344d8751213307737285e7cdac4fbecsewardj pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 1759db268cc7344d8751213307737285e7cdac4fbecsewardj variants, the result is a 128 bit value and is placed at *resV in 1769db268cc7344d8751213307737285e7cdac4fbecsewardj the obvious way. 1779db268cc7344d8751213307737285e7cdac4fbecsewardj 1789db268cc7344d8751213307737285e7cdac4fbecsewardj For all variants, the new OSZACP value is placed at *resOSZACP. 1799db268cc7344d8751213307737285e7cdac4fbecsewardj 1809db268cc7344d8751213307737285e7cdac4fbecsewardj argLV and argRV are the vector args. The caller must prepare a 1819db268cc7344d8751213307737285e7cdac4fbecsewardj 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 1829db268cc7344d8751213307737285e7cdac4fbecsewardj must be 1 for each zero byte of of the respective arg. For ESTRx 1839db268cc7344d8751213307737285e7cdac4fbecsewardj variants this is derived from the explicit length indication, and 1849db268cc7344d8751213307737285e7cdac4fbecsewardj must be 0 in all places except at the bit index corresponding to 1859db268cc7344d8751213307737285e7cdac4fbecsewardj the valid length (0 .. 8). If the valid length is 8 then the 1869db268cc7344d8751213307737285e7cdac4fbecsewardj mask must be all zeroes. In all cases, bits 31:8 must be zero. 1879db268cc7344d8751213307737285e7cdac4fbecsewardj 1889db268cc7344d8751213307737285e7cdac4fbecsewardj imm8 is the original immediate from the instruction. isSTRM 1899db268cc7344d8751213307737285e7cdac4fbecsewardj indicates whether this is a xSTRM or xSTRI variant, which controls 1909db268cc7344d8751213307737285e7cdac4fbecsewardj how much of *res is written. 1919db268cc7344d8751213307737285e7cdac4fbecsewardj 1929db268cc7344d8751213307737285e7cdac4fbecsewardj If the given imm8 case can be handled, the return value is True. 1939db268cc7344d8751213307737285e7cdac4fbecsewardj If not, False is returned, and neither *res not *resOSZACP are 1949db268cc7344d8751213307737285e7cdac4fbecsewardj altered. 1959db268cc7344d8751213307737285e7cdac4fbecsewardj*/ 1969db268cc7344d8751213307737285e7cdac4fbecsewardj 1979db268cc7344d8751213307737285e7cdac4fbecsewardjBool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV, 1989db268cc7344d8751213307737285e7cdac4fbecsewardj /*OUT*/UInt* resOSZACP, 1999db268cc7344d8751213307737285e7cdac4fbecsewardj V128* argLV, V128* argRV, 2009db268cc7344d8751213307737285e7cdac4fbecsewardj UInt zmaskL, UInt zmaskR, 2019db268cc7344d8751213307737285e7cdac4fbecsewardj UInt imm8, Bool isxSTRM ) 2029db268cc7344d8751213307737285e7cdac4fbecsewardj{ 2039db268cc7344d8751213307737285e7cdac4fbecsewardj assert(imm8 < 0x80); 2049db268cc7344d8751213307737285e7cdac4fbecsewardj assert((zmaskL >> 8) == 0); 2059db268cc7344d8751213307737285e7cdac4fbecsewardj assert((zmaskR >> 8) == 0); 2069db268cc7344d8751213307737285e7cdac4fbecsewardj 2079db268cc7344d8751213307737285e7cdac4fbecsewardj /* Explicitly reject any imm8 values that haven't been validated, 2089db268cc7344d8751213307737285e7cdac4fbecsewardj even if they would probably work. Life is too short to have 2099db268cc7344d8751213307737285e7cdac4fbecsewardj unvalidated cases in the code base. */ 2109db268cc7344d8751213307737285e7cdac4fbecsewardj switch (imm8) { 211a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 212a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj case 0x13: case 0x1B: 213a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj case 0x39: case 0x3B: 214a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj case 0x45: case 0x4B: 2159db268cc7344d8751213307737285e7cdac4fbecsewardj break; 2169db268cc7344d8751213307737285e7cdac4fbecsewardj default: 2179db268cc7344d8751213307737285e7cdac4fbecsewardj return False; 2189db268cc7344d8751213307737285e7cdac4fbecsewardj } 2199db268cc7344d8751213307737285e7cdac4fbecsewardj 2209db268cc7344d8751213307737285e7cdac4fbecsewardj UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 2219db268cc7344d8751213307737285e7cdac4fbecsewardj UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 2229db268cc7344d8751213307737285e7cdac4fbecsewardj UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 2239db268cc7344d8751213307737285e7cdac4fbecsewardj UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 2249db268cc7344d8751213307737285e7cdac4fbecsewardj 2259db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 2269db268cc7344d8751213307737285e7cdac4fbecsewardj /*-- strcmp on wide data --*/ 2279db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 2289db268cc7344d8751213307737285e7cdac4fbecsewardj 2299db268cc7344d8751213307737285e7cdac4fbecsewardj if (agg == 2/*equal each, aka strcmp*/ 2309db268cc7344d8751213307737285e7cdac4fbecsewardj && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 2319db268cc7344d8751213307737285e7cdac4fbecsewardj Int i; 2329db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argL = (UShort*)argLV; 2339db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argR = (UShort*)argRV; 2349db268cc7344d8751213307737285e7cdac4fbecsewardj UInt boolResII = 0; 2359db268cc7344d8751213307737285e7cdac4fbecsewardj for (i = 7; i >= 0; i--) { 2369db268cc7344d8751213307737285e7cdac4fbecsewardj UShort cL = argL[i]; 2379db268cc7344d8751213307737285e7cdac4fbecsewardj UShort cR = argR[i]; 2389db268cc7344d8751213307737285e7cdac4fbecsewardj boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 2399db268cc7344d8751213307737285e7cdac4fbecsewardj } 2409db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 2419db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 2429db268cc7344d8751213307737285e7cdac4fbecsewardj 2439db268cc7344d8751213307737285e7cdac4fbecsewardj // do invalidation, common to all equal-each cases 2449db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes1 2459db268cc7344d8751213307737285e7cdac4fbecsewardj = (boolResII & validL & validR) // if both valid, use cmpres 2469db268cc7344d8751213307737285e7cdac4fbecsewardj | (~ (validL | validR)); // if both invalid, force 1 2479db268cc7344d8751213307737285e7cdac4fbecsewardj // else force 0 2489db268cc7344d8751213307737285e7cdac4fbecsewardj intRes1 &= 0xFF; 2499db268cc7344d8751213307737285e7cdac4fbecsewardj 2509db268cc7344d8751213307737285e7cdac4fbecsewardj // generate I-format output 2519db268cc7344d8751213307737285e7cdac4fbecsewardj PCMPxSTRx_WRK_gen_output_fmt_I_wide( 2529db268cc7344d8751213307737285e7cdac4fbecsewardj resV, resOSZACP, 2539db268cc7344d8751213307737285e7cdac4fbecsewardj intRes1, zmaskL, zmaskR, validL, pol, idx 2549db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2559db268cc7344d8751213307737285e7cdac4fbecsewardj 2569db268cc7344d8751213307737285e7cdac4fbecsewardj return True; 2579db268cc7344d8751213307737285e7cdac4fbecsewardj } 2589db268cc7344d8751213307737285e7cdac4fbecsewardj 2599db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 2609db268cc7344d8751213307737285e7cdac4fbecsewardj /*-- set membership on wide data --*/ 2619db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 2629db268cc7344d8751213307737285e7cdac4fbecsewardj 2639db268cc7344d8751213307737285e7cdac4fbecsewardj if (agg == 0/*equal any, aka find chars in a set*/ 2649db268cc7344d8751213307737285e7cdac4fbecsewardj && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 2659db268cc7344d8751213307737285e7cdac4fbecsewardj /* argL: the string, argR: charset */ 2669db268cc7344d8751213307737285e7cdac4fbecsewardj UInt si, ci; 2679db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argL = (UShort*)argLV; 2689db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argR = (UShort*)argRV; 2699db268cc7344d8751213307737285e7cdac4fbecsewardj UInt boolRes = 0; 2709db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 2719db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 2729db268cc7344d8751213307737285e7cdac4fbecsewardj 2739db268cc7344d8751213307737285e7cdac4fbecsewardj for (si = 0; si < 8; si++) { 2749db268cc7344d8751213307737285e7cdac4fbecsewardj if ((validL & (1 << si)) == 0) 2759db268cc7344d8751213307737285e7cdac4fbecsewardj // run off the end of the string. 2769db268cc7344d8751213307737285e7cdac4fbecsewardj break; 2779db268cc7344d8751213307737285e7cdac4fbecsewardj UInt m = 0; 2789db268cc7344d8751213307737285e7cdac4fbecsewardj for (ci = 0; ci < 8; ci++) { 2799db268cc7344d8751213307737285e7cdac4fbecsewardj if ((validR & (1 << ci)) == 0) break; 2809db268cc7344d8751213307737285e7cdac4fbecsewardj if (argR[ci] == argL[si]) { m = 1; break; } 2819db268cc7344d8751213307737285e7cdac4fbecsewardj } 2829db268cc7344d8751213307737285e7cdac4fbecsewardj boolRes |= (m << si); 2839db268cc7344d8751213307737285e7cdac4fbecsewardj } 2849db268cc7344d8751213307737285e7cdac4fbecsewardj 2859db268cc7344d8751213307737285e7cdac4fbecsewardj // boolRes is "pre-invalidated" 2869db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes1 = boolRes & 0xFF; 2879db268cc7344d8751213307737285e7cdac4fbecsewardj 2889db268cc7344d8751213307737285e7cdac4fbecsewardj // generate I-format output 2899db268cc7344d8751213307737285e7cdac4fbecsewardj PCMPxSTRx_WRK_gen_output_fmt_I_wide( 2909db268cc7344d8751213307737285e7cdac4fbecsewardj resV, resOSZACP, 2919db268cc7344d8751213307737285e7cdac4fbecsewardj intRes1, zmaskL, zmaskR, validL, pol, idx 2929db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2939db268cc7344d8751213307737285e7cdac4fbecsewardj 2949db268cc7344d8751213307737285e7cdac4fbecsewardj return True; 2959db268cc7344d8751213307737285e7cdac4fbecsewardj } 2969db268cc7344d8751213307737285e7cdac4fbecsewardj 2979db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 2989db268cc7344d8751213307737285e7cdac4fbecsewardj /*-- substring search on wide data --*/ 2999db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 3009db268cc7344d8751213307737285e7cdac4fbecsewardj 3019db268cc7344d8751213307737285e7cdac4fbecsewardj if (agg == 3/*equal ordered, aka substring search*/ 3029db268cc7344d8751213307737285e7cdac4fbecsewardj && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 3039db268cc7344d8751213307737285e7cdac4fbecsewardj 3049db268cc7344d8751213307737285e7cdac4fbecsewardj /* argL: haystack, argR: needle */ 3059db268cc7344d8751213307737285e7cdac4fbecsewardj UInt ni, hi; 3069db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argL = (UShort*)argLV; 3079db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argR = (UShort*)argRV; 3089db268cc7344d8751213307737285e7cdac4fbecsewardj UInt boolRes = 0; 3099db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 3109db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 3119db268cc7344d8751213307737285e7cdac4fbecsewardj for (hi = 0; hi < 8; hi++) { 3129db268cc7344d8751213307737285e7cdac4fbecsewardj UInt m = 1; 3139db268cc7344d8751213307737285e7cdac4fbecsewardj for (ni = 0; ni < 8; ni++) { 3149db268cc7344d8751213307737285e7cdac4fbecsewardj if ((validR & (1 << ni)) == 0) break; 3159db268cc7344d8751213307737285e7cdac4fbecsewardj UInt i = ni + hi; 3169db268cc7344d8751213307737285e7cdac4fbecsewardj if (i >= 8) break; 3179db268cc7344d8751213307737285e7cdac4fbecsewardj if (argL[i] != argR[ni]) { m = 0; break; } 3189db268cc7344d8751213307737285e7cdac4fbecsewardj } 3199db268cc7344d8751213307737285e7cdac4fbecsewardj boolRes |= (m << hi); 320c5274ae844ae01cde66e35f1873ed37726dccd45weidendo if ((validL & (1 << hi)) == 0) 321c5274ae844ae01cde66e35f1873ed37726dccd45weidendo // run off the end of the haystack 322c5274ae844ae01cde66e35f1873ed37726dccd45weidendo break; 3239db268cc7344d8751213307737285e7cdac4fbecsewardj } 3249db268cc7344d8751213307737285e7cdac4fbecsewardj 3259db268cc7344d8751213307737285e7cdac4fbecsewardj // boolRes is "pre-invalidated" 3269db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes1 = boolRes & 0xFF; 3279db268cc7344d8751213307737285e7cdac4fbecsewardj 3289db268cc7344d8751213307737285e7cdac4fbecsewardj // generate I-format output 3299db268cc7344d8751213307737285e7cdac4fbecsewardj PCMPxSTRx_WRK_gen_output_fmt_I_wide( 3309db268cc7344d8751213307737285e7cdac4fbecsewardj resV, resOSZACP, 3319db268cc7344d8751213307737285e7cdac4fbecsewardj intRes1, zmaskL, zmaskR, validL, pol, idx 3329db268cc7344d8751213307737285e7cdac4fbecsewardj ); 3339db268cc7344d8751213307737285e7cdac4fbecsewardj 3349db268cc7344d8751213307737285e7cdac4fbecsewardj return True; 3359db268cc7344d8751213307737285e7cdac4fbecsewardj } 3369db268cc7344d8751213307737285e7cdac4fbecsewardj 3379db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 3389db268cc7344d8751213307737285e7cdac4fbecsewardj /*-- ranges, unsigned wide data --*/ 3399db268cc7344d8751213307737285e7cdac4fbecsewardj /*----------------------------------------*/ 3409db268cc7344d8751213307737285e7cdac4fbecsewardj 3419db268cc7344d8751213307737285e7cdac4fbecsewardj if (agg == 1/*ranges*/ 3429db268cc7344d8751213307737285e7cdac4fbecsewardj && fmt == 1/*uw*/) { 3439db268cc7344d8751213307737285e7cdac4fbecsewardj 3449db268cc7344d8751213307737285e7cdac4fbecsewardj /* argL: string, argR: range-pairs */ 3459db268cc7344d8751213307737285e7cdac4fbecsewardj UInt ri, si; 3469db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argL = (UShort*)argLV; 3479db268cc7344d8751213307737285e7cdac4fbecsewardj UShort* argR = (UShort*)argRV; 3489db268cc7344d8751213307737285e7cdac4fbecsewardj UInt boolRes = 0; 3499db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 3509db268cc7344d8751213307737285e7cdac4fbecsewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 3519db268cc7344d8751213307737285e7cdac4fbecsewardj for (si = 0; si < 8; si++) { 3529db268cc7344d8751213307737285e7cdac4fbecsewardj if ((validL & (1 << si)) == 0) 3539db268cc7344d8751213307737285e7cdac4fbecsewardj // run off the end of the string 3549db268cc7344d8751213307737285e7cdac4fbecsewardj break; 3559db268cc7344d8751213307737285e7cdac4fbecsewardj UInt m = 0; 3569db268cc7344d8751213307737285e7cdac4fbecsewardj for (ri = 0; ri < 8; ri += 2) { 3579db268cc7344d8751213307737285e7cdac4fbecsewardj if ((validR & (3 << ri)) != (3 << ri)) break; 3589db268cc7344d8751213307737285e7cdac4fbecsewardj if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 3599db268cc7344d8751213307737285e7cdac4fbecsewardj m = 1; break; 3609db268cc7344d8751213307737285e7cdac4fbecsewardj } 3619db268cc7344d8751213307737285e7cdac4fbecsewardj } 3629db268cc7344d8751213307737285e7cdac4fbecsewardj boolRes |= (m << si); 3639db268cc7344d8751213307737285e7cdac4fbecsewardj } 3649db268cc7344d8751213307737285e7cdac4fbecsewardj 3659db268cc7344d8751213307737285e7cdac4fbecsewardj // boolRes is "pre-invalidated" 3669db268cc7344d8751213307737285e7cdac4fbecsewardj UInt intRes1 = boolRes & 0xFF; 3679db268cc7344d8751213307737285e7cdac4fbecsewardj 3689db268cc7344d8751213307737285e7cdac4fbecsewardj // generate I-format output 3699db268cc7344d8751213307737285e7cdac4fbecsewardj PCMPxSTRx_WRK_gen_output_fmt_I_wide( 3709db268cc7344d8751213307737285e7cdac4fbecsewardj resV, resOSZACP, 3719db268cc7344d8751213307737285e7cdac4fbecsewardj intRes1, zmaskL, zmaskR, validL, pol, idx 3729db268cc7344d8751213307737285e7cdac4fbecsewardj ); 3739db268cc7344d8751213307737285e7cdac4fbecsewardj 3749db268cc7344d8751213307737285e7cdac4fbecsewardj return True; 3759db268cc7344d8751213307737285e7cdac4fbecsewardj } 3769db268cc7344d8751213307737285e7cdac4fbecsewardj 3779db268cc7344d8751213307737285e7cdac4fbecsewardj return False; 3789db268cc7344d8751213307737285e7cdac4fbecsewardj} 3799db268cc7344d8751213307737285e7cdac4fbecsewardj 3809db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 3819db268cc7344d8751213307737285e7cdac4fbecsewardj// // 3829db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_4B // 3839db268cc7344d8751213307737285e7cdac4fbecsewardj// // 3849db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 3859db268cc7344d8751213307737285e7cdac4fbecsewardj 3869db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_4B ( V128* argL, V128* argR ) 3879db268cc7344d8751213307737285e7cdac4fbecsewardj{ 3889db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 3899db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 3909db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 3919db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 3929db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 3939db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 3949db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 3959db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 3969db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t" 3979db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 3989db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 3999db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 4009db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 4019db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 4029db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 4039db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 4049db268cc7344d8751213307737285e7cdac4fbecsewardj ); 4059db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 4069db268cc7344d8751213307737285e7cdac4fbecsewardj} 4079db268cc7344d8751213307737285e7cdac4fbecsewardj 4089db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_4B ( V128* argLU, V128* argRU ) 4099db268cc7344d8751213307737285e7cdac4fbecsewardj{ 4109db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 4119db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 4129db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 4139db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 4149db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 4159db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 4169db268cc7344d8751213307737285e7cdac4fbecsewardj 0x4B, False/*!isSTRM*/ 4179db268cc7344d8751213307737285e7cdac4fbecsewardj ); 4189db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 4199db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 4209db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 4219db268cc7344d8751213307737285e7cdac4fbecsewardj} 4229db268cc7344d8751213307737285e7cdac4fbecsewardj 4239db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_4B ( void ) 4249db268cc7344d8751213307737285e7cdac4fbecsewardj{ 4259db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "4B"; 4269db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_4B; 4279db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_4B; 4289db268cc7344d8751213307737285e7cdac4fbecsewardj 4299db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 4309db268cc7344d8751213307737285e7cdac4fbecsewardj 4319db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4329db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4339db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 4349db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 4359db268cc7344d8751213307737285e7cdac4fbecsewardj 4369db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 4379db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 4389db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 4399db268cc7344d8751213307737285e7cdac4fbecsewardj 4409db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4419db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4429db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4439db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4449db268cc7344d8751213307737285e7cdac4fbecsewardj 4459db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4469db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 4479db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 4489db268cc7344d8751213307737285e7cdac4fbecsewardj 4499db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4509db268cc7344d8751213307737285e7cdac4fbecsewardj 4519db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 4529db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 4539db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 4549db268cc7344d8751213307737285e7cdac4fbecsewardj 4559db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 4569db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 4579db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 4589db268cc7344d8751213307737285e7cdac4fbecsewardj 4599db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 4609db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 4619db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 4629db268cc7344d8751213307737285e7cdac4fbecsewardj 4639db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 4649db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 4659db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 4669db268cc7344d8751213307737285e7cdac4fbecsewardj 4679db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 4689db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 4699db268cc7344d8751213307737285e7cdac4fbecsewardj} 4709db268cc7344d8751213307737285e7cdac4fbecsewardj 4719db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 4729db268cc7344d8751213307737285e7cdac4fbecsewardj// // 4739db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_3B // 4749db268cc7344d8751213307737285e7cdac4fbecsewardj// // 4759db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 4769db268cc7344d8751213307737285e7cdac4fbecsewardj 4779db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_3B ( V128* argL, V128* argR ) 4789db268cc7344d8751213307737285e7cdac4fbecsewardj{ 4799db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 4809db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 4819db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 4829db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 4839db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 4849db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 4859db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 4869db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 4879db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t" 4889db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 4899db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 4909db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 4919db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 4929db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 4939db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 4949db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 4959db268cc7344d8751213307737285e7cdac4fbecsewardj ); 4969db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 4979db268cc7344d8751213307737285e7cdac4fbecsewardj} 4989db268cc7344d8751213307737285e7cdac4fbecsewardj 4999db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_3B ( V128* argLU, V128* argRU ) 5009db268cc7344d8751213307737285e7cdac4fbecsewardj{ 5019db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 5029db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 5039db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 5049db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 5059db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 5069db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 5079db268cc7344d8751213307737285e7cdac4fbecsewardj 0x3B, False/*!isSTRM*/ 5089db268cc7344d8751213307737285e7cdac4fbecsewardj ); 5099db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 5109db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 5119db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 5129db268cc7344d8751213307737285e7cdac4fbecsewardj} 5139db268cc7344d8751213307737285e7cdac4fbecsewardj 5149db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_3B ( void ) 5159db268cc7344d8751213307737285e7cdac4fbecsewardj{ 5169db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "3B"; 5179db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_3B; 5189db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_3B; 5199db268cc7344d8751213307737285e7cdac4fbecsewardj 5209db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 5219db268cc7344d8751213307737285e7cdac4fbecsewardj 5229db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5239db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5249db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 5259db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 5269db268cc7344d8751213307737285e7cdac4fbecsewardj 5279db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 5289db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 5299db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 5309db268cc7344d8751213307737285e7cdac4fbecsewardj 5319db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5329db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5339db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5349db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5359db268cc7344d8751213307737285e7cdac4fbecsewardj 5369db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5379db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 5389db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 5399db268cc7344d8751213307737285e7cdac4fbecsewardj 5409db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5419db268cc7344d8751213307737285e7cdac4fbecsewardj 5429db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 5439db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 5449db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 5459db268cc7344d8751213307737285e7cdac4fbecsewardj 5469db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 5479db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 5489db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 5499db268cc7344d8751213307737285e7cdac4fbecsewardj 5509db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 5519db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 5529db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 5539db268cc7344d8751213307737285e7cdac4fbecsewardj 5549db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 5559db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 5569db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 5579db268cc7344d8751213307737285e7cdac4fbecsewardj 5589db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 5599db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 5609db268cc7344d8751213307737285e7cdac4fbecsewardj} 5619db268cc7344d8751213307737285e7cdac4fbecsewardj 5629db268cc7344d8751213307737285e7cdac4fbecsewardj 5639db268cc7344d8751213307737285e7cdac4fbecsewardj 5649db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 5659db268cc7344d8751213307737285e7cdac4fbecsewardj// // 5669db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_0D // 5679db268cc7344d8751213307737285e7cdac4fbecsewardj// // 5689db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 5699db268cc7344d8751213307737285e7cdac4fbecsewardj 5709db268cc7344d8751213307737285e7cdac4fbecsewardj__attribute__((noinline)) 5719db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_0D ( V128* argL, V128* argR ) 5729db268cc7344d8751213307737285e7cdac4fbecsewardj{ 5739db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 5749db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 5759db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 5769db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res = 0, flags = 0; 5779db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 578c5274ae844ae01cde66e35f1873ed37726dccd45weidendo "movdqu 0(%2), %%xmm2" "\n\t" 579c5274ae844ae01cde66e35f1873ed37726dccd45weidendo "movdqu 16(%2), %%xmm11" "\n\t" 5809db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t" 5819db268cc7344d8751213307737285e7cdac4fbecsewardj //"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t" 5829db268cc7344d8751213307737285e7cdac4fbecsewardj //"movd %%xmm0, %%ecx" "\n\t" 5839db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 5849db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 5859db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 5869db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 5879db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 5889db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 5899db268cc7344d8751213307737285e7cdac4fbecsewardj ); 5909db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 5919db268cc7344d8751213307737285e7cdac4fbecsewardj} 5929db268cc7344d8751213307737285e7cdac4fbecsewardj 5939db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_0D ( V128* argLU, V128* argRU ) 5949db268cc7344d8751213307737285e7cdac4fbecsewardj{ 5959db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 5969db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 5979db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 5989db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 5999db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 6009db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 6019db268cc7344d8751213307737285e7cdac4fbecsewardj 0x0D, False/*!isSTRM*/ 6029db268cc7344d8751213307737285e7cdac4fbecsewardj ); 6039db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 6049db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 6059db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 6069db268cc7344d8751213307737285e7cdac4fbecsewardj} 6079db268cc7344d8751213307737285e7cdac4fbecsewardj 6089db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_0D ( void ) 6099db268cc7344d8751213307737285e7cdac4fbecsewardj{ 6109db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "0D"; 6119db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_0D; 6129db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_0D; 6139db268cc7344d8751213307737285e7cdac4fbecsewardj 6149db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 6159db268cc7344d8751213307737285e7cdac4fbecsewardj 6169db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef"); 6179db268cc7344d8751213307737285e7cdac4fbecsewardj 6189db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 6199db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef"); 6209db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef"); 6219db268cc7344d8751213307737285e7cdac4fbecsewardj 6229db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd"); 6239db268cc7344d8751213307737285e7cdac4fbecsewardj 6249db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd"); 6259db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd"); 6269db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd"); 6279db268cc7344d8751213307737285e7cdac4fbecsewardj 6289db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd"); 6299db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd"); 6309db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd"); 6319db268cc7344d8751213307737285e7cdac4fbecsewardj 6329db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd"); 6339db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd"); 6349db268cc7344d8751213307737285e7cdac4fbecsewardj 6359db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 6369db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 6379db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111111111111234", "0000000000001111"); 6389db268cc7344d8751213307737285e7cdac4fbecsewardj 6399db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 6409db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0a11111111111111", "000000000000000a"); 6419db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0b11111111111111", "000000000000000a"); 642c5274ae844ae01cde66e35f1873ed37726dccd45weidendo 643c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 644c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 645c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 646c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 6479db268cc7344d8751213307737285e7cdac4fbecsewardj} 6489db268cc7344d8751213307737285e7cdac4fbecsewardj 6499db268cc7344d8751213307737285e7cdac4fbecsewardj 6509db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 6519db268cc7344d8751213307737285e7cdac4fbecsewardj// // 6529db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_09 // 6539db268cc7344d8751213307737285e7cdac4fbecsewardj// // 6549db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 6559db268cc7344d8751213307737285e7cdac4fbecsewardj 6569db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_09 ( V128* argL, V128* argR ) 6579db268cc7344d8751213307737285e7cdac4fbecsewardj{ 6589db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 6599db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 6609db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 6619db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 6629db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 6639db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 6649db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 6659db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 6669db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t" 6679db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 6689db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 6699db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 6709db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 6719db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 6729db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 6739db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 6749db268cc7344d8751213307737285e7cdac4fbecsewardj ); 6759db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 6769db268cc7344d8751213307737285e7cdac4fbecsewardj} 6779db268cc7344d8751213307737285e7cdac4fbecsewardj 6789db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_09 ( V128* argLU, V128* argRU ) 6799db268cc7344d8751213307737285e7cdac4fbecsewardj{ 6809db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 6819db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 6829db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 6839db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 6849db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 6859db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 6869db268cc7344d8751213307737285e7cdac4fbecsewardj 0x09, False/*!isSTRM*/ 6879db268cc7344d8751213307737285e7cdac4fbecsewardj ); 6889db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 6899db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 6909db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 6919db268cc7344d8751213307737285e7cdac4fbecsewardj} 6929db268cc7344d8751213307737285e7cdac4fbecsewardj 6939db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_09 ( void ) 6949db268cc7344d8751213307737285e7cdac4fbecsewardj{ 6959db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "09"; 6969db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_09; 6979db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_09; 6989db268cc7344d8751213307737285e7cdac4fbecsewardj 6999db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 7009db268cc7344d8751213307737285e7cdac4fbecsewardj 7019db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7029db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7039db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 7049db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 7059db268cc7344d8751213307737285e7cdac4fbecsewardj 7069db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 7079db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 7089db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 7099db268cc7344d8751213307737285e7cdac4fbecsewardj 7109db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7119db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7129db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7139db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7149db268cc7344d8751213307737285e7cdac4fbecsewardj 7159db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7169db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 7179db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 7189db268cc7344d8751213307737285e7cdac4fbecsewardj 7199db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7209db268cc7344d8751213307737285e7cdac4fbecsewardj 7219db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 7229db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 7239db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 7249db268cc7344d8751213307737285e7cdac4fbecsewardj 7259db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 7269db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 7279db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 7289db268cc7344d8751213307737285e7cdac4fbecsewardj 7299db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 7309db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 7319db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 7329db268cc7344d8751213307737285e7cdac4fbecsewardj 7339db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 7349db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 7359db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 7369db268cc7344d8751213307737285e7cdac4fbecsewardj 7379db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 7389db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 7399db268cc7344d8751213307737285e7cdac4fbecsewardj} 7409db268cc7344d8751213307737285e7cdac4fbecsewardj 7419db268cc7344d8751213307737285e7cdac4fbecsewardj 7429db268cc7344d8751213307737285e7cdac4fbecsewardj 7439db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 7449db268cc7344d8751213307737285e7cdac4fbecsewardj// // 7459db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_1B // 7469db268cc7344d8751213307737285e7cdac4fbecsewardj// // 7479db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 7489db268cc7344d8751213307737285e7cdac4fbecsewardj 7499db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_1B ( V128* argL, V128* argR ) 7509db268cc7344d8751213307737285e7cdac4fbecsewardj{ 7519db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 7529db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 7539db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 7549db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 7559db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 7569db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 7579db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 7589db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 7599db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t" 7609db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 7619db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 7629db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 7639db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 7649db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 7659db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 7669db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 7679db268cc7344d8751213307737285e7cdac4fbecsewardj ); 7689db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 7699db268cc7344d8751213307737285e7cdac4fbecsewardj} 7709db268cc7344d8751213307737285e7cdac4fbecsewardj 7719db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_1B ( V128* argLU, V128* argRU ) 7729db268cc7344d8751213307737285e7cdac4fbecsewardj{ 7739db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 7749db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 7759db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 7769db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 7779db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 7789db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 7799db268cc7344d8751213307737285e7cdac4fbecsewardj 0x1B, False/*!isSTRM*/ 7809db268cc7344d8751213307737285e7cdac4fbecsewardj ); 7819db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 7829db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 7839db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 7849db268cc7344d8751213307737285e7cdac4fbecsewardj} 7859db268cc7344d8751213307737285e7cdac4fbecsewardj 7869db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_1B ( void ) 7879db268cc7344d8751213307737285e7cdac4fbecsewardj{ 7889db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "1B"; 7899db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_1B; 7909db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_1B; 7919db268cc7344d8751213307737285e7cdac4fbecsewardj 7929db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 7939db268cc7344d8751213307737285e7cdac4fbecsewardj 7949db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7959db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7969db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 7979db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 7989db268cc7344d8751213307737285e7cdac4fbecsewardj 7999db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 8009db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 8019db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 8029db268cc7344d8751213307737285e7cdac4fbecsewardj 8039db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8049db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8059db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8069db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8079db268cc7344d8751213307737285e7cdac4fbecsewardj 8089db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8099db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 8109db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 8119db268cc7344d8751213307737285e7cdac4fbecsewardj 8129db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 8139db268cc7344d8751213307737285e7cdac4fbecsewardj 8149db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 8159db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 8169db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 8179db268cc7344d8751213307737285e7cdac4fbecsewardj 8189db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 8199db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 8209db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 8219db268cc7344d8751213307737285e7cdac4fbecsewardj 8229db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 8239db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 8249db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 8259db268cc7344d8751213307737285e7cdac4fbecsewardj 8269db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 8279db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 8289db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 8299db268cc7344d8751213307737285e7cdac4fbecsewardj 8309db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 8319db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 8329db268cc7344d8751213307737285e7cdac4fbecsewardj} 8339db268cc7344d8751213307737285e7cdac4fbecsewardj 8349db268cc7344d8751213307737285e7cdac4fbecsewardj 8359db268cc7344d8751213307737285e7cdac4fbecsewardj 8369db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 8379db268cc7344d8751213307737285e7cdac4fbecsewardj// // 8389db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_03 // 8399db268cc7344d8751213307737285e7cdac4fbecsewardj// // 8409db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 8419db268cc7344d8751213307737285e7cdac4fbecsewardj 8429db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_03 ( V128* argL, V128* argR ) 8439db268cc7344d8751213307737285e7cdac4fbecsewardj{ 8449db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 8459db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 8469db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 8479db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 8489db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 8499db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 8509db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 8519db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 8529db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t" 8539db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t" 8549db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t" 8559db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 8569db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 8579db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 8589db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 8599db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 8609db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 8619db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 8629db268cc7344d8751213307737285e7cdac4fbecsewardj ); 8639db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 8649db268cc7344d8751213307737285e7cdac4fbecsewardj} 8659db268cc7344d8751213307737285e7cdac4fbecsewardj 8669db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_03 ( V128* argLU, V128* argRU ) 8679db268cc7344d8751213307737285e7cdac4fbecsewardj{ 8689db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 8699db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 8709db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 8719db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 8729db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 8739db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 8749db268cc7344d8751213307737285e7cdac4fbecsewardj 0x03, False/*!isSTRM*/ 8759db268cc7344d8751213307737285e7cdac4fbecsewardj ); 8769db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 8779db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 8789db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 8799db268cc7344d8751213307737285e7cdac4fbecsewardj} 8809db268cc7344d8751213307737285e7cdac4fbecsewardj 8819db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_03 ( void ) 8829db268cc7344d8751213307737285e7cdac4fbecsewardj{ 8839db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "03"; 8849db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_03; 8859db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_03; 8869db268cc7344d8751213307737285e7cdac4fbecsewardj 8879db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 8889db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 8899db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 8909db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 8919db268cc7344d8751213307737285e7cdac4fbecsewardj 8929db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 8939db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 8949db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 8959db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 8969db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 8979db268cc7344d8751213307737285e7cdac4fbecsewardj 8989db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 8999db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 9009db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 9019db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 9029db268cc7344d8751213307737285e7cdac4fbecsewardj 9039db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 9049db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9059db268cc7344d8751213307737285e7cdac4fbecsewardj 9069db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 9079db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 9089db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 9099db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 9109db268cc7344d8751213307737285e7cdac4fbecsewardj 9119db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 9129db268cc7344d8751213307737285e7cdac4fbecsewardj 9139db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 9149db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 9159db268cc7344d8751213307737285e7cdac4fbecsewardj} 9169db268cc7344d8751213307737285e7cdac4fbecsewardj 9179db268cc7344d8751213307737285e7cdac4fbecsewardj 9189db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 9199db268cc7344d8751213307737285e7cdac4fbecsewardj// // 9209db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_13 // 9219db268cc7344d8751213307737285e7cdac4fbecsewardj// // 9229db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 9239db268cc7344d8751213307737285e7cdac4fbecsewardj 9249db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_13 ( V128* argL, V128* argR ) 9259db268cc7344d8751213307737285e7cdac4fbecsewardj{ 9269db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 9279db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 9289db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 9299db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 9309db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 9319db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 9329db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 9339db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 9349db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t" 9359db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t" 9369db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t" 9379db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 9389db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 9399db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 9409db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 9419db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 9429db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 9439db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 9449db268cc7344d8751213307737285e7cdac4fbecsewardj ); 9459db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 9469db268cc7344d8751213307737285e7cdac4fbecsewardj} 9479db268cc7344d8751213307737285e7cdac4fbecsewardj 9489db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_13 ( V128* argLU, V128* argRU ) 9499db268cc7344d8751213307737285e7cdac4fbecsewardj{ 9509db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 9519db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 9529db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 9539db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 9549db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 9559db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 9569db268cc7344d8751213307737285e7cdac4fbecsewardj 0x13, False/*!isSTRM*/ 9579db268cc7344d8751213307737285e7cdac4fbecsewardj ); 9589db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 9599db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 9609db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 9619db268cc7344d8751213307737285e7cdac4fbecsewardj} 9629db268cc7344d8751213307737285e7cdac4fbecsewardj 9639db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_13 ( void ) 9649db268cc7344d8751213307737285e7cdac4fbecsewardj{ 9659db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "13"; 9669db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_13; 9679db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_13; 9689db268cc7344d8751213307737285e7cdac4fbecsewardj 9699db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 9709db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 9719db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 9729db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 9739db268cc7344d8751213307737285e7cdac4fbecsewardj 9749db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 9759db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 9769db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 9779db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 9789db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 9799db268cc7344d8751213307737285e7cdac4fbecsewardj 9809db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 9819db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 9829db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 9839db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 9849db268cc7344d8751213307737285e7cdac4fbecsewardj 9859db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 9869db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9879db268cc7344d8751213307737285e7cdac4fbecsewardj 9889db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 9899db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 9909db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 9919db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 9929db268cc7344d8751213307737285e7cdac4fbecsewardj 9939db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 9949db268cc7344d8751213307737285e7cdac4fbecsewardj 9959db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 9969db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 9979db268cc7344d8751213307737285e7cdac4fbecsewardj} 9989db268cc7344d8751213307737285e7cdac4fbecsewardj 9999db268cc7344d8751213307737285e7cdac4fbecsewardj 10009db268cc7344d8751213307737285e7cdac4fbecsewardj 10019db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 10029db268cc7344d8751213307737285e7cdac4fbecsewardj// // 10039db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_45 // 10049db268cc7344d8751213307737285e7cdac4fbecsewardj// // 10059db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 10069db268cc7344d8751213307737285e7cdac4fbecsewardj 10079db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_45 ( V128* argL, V128* argR ) 10089db268cc7344d8751213307737285e7cdac4fbecsewardj{ 10099db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 10109db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 10119db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 10129db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 10139db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 10149db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 10159db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 10169db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 10179db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t" 10189db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 10199db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t" 10209db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 10219db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 10229db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 10239db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 10249db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 10259db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 10269db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 10279db268cc7344d8751213307737285e7cdac4fbecsewardj ); 10289db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 10299db268cc7344d8751213307737285e7cdac4fbecsewardj} 10309db268cc7344d8751213307737285e7cdac4fbecsewardj 10319db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_45 ( V128* argLU, V128* argRU ) 10329db268cc7344d8751213307737285e7cdac4fbecsewardj{ 10339db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 10349db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 10359db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 10369db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 10379db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 10389db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 10399db268cc7344d8751213307737285e7cdac4fbecsewardj 0x45, False/*!isSTRM*/ 10409db268cc7344d8751213307737285e7cdac4fbecsewardj ); 10419db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 10429db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 10439db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 10449db268cc7344d8751213307737285e7cdac4fbecsewardj} 10459db268cc7344d8751213307737285e7cdac4fbecsewardj 10469db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_45 ( void ) 10479db268cc7344d8751213307737285e7cdac4fbecsewardj{ 10489db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "45"; 10499db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_45; 10509db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_45; 10519db268cc7344d8751213307737285e7cdac4fbecsewardj 10529db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc"); 10539db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb"); 10549db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb"); 10559db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb"); 10569db268cc7344d8751213307737285e7cdac4fbecsewardj 10579db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 10589db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb"); 10599db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb"); 10609db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb"); 10619db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "000000000000ccbb"); 10629db268cc7344d8751213307737285e7cdac4fbecsewardj 10639db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 10649db268cc7344d8751213307737285e7cdac4fbecsewardj 10659db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 10669db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb"); 10679db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb"); 10689db268cc7344d8751213307737285e7cdac4fbecsewardj 10699db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb"); 10709db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb"); 10719db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb"); 10729db268cc7344d8751213307737285e7cdac4fbecsewardj 10739db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0011223344556677", "0000997755442211"); 10749db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1122334455667711", "0000997755442211"); 10759db268cc7344d8751213307737285e7cdac4fbecsewardj 10769db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0011223344556677", "0000aa8866553322"); 10779db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "1122334455667711", "0000aa8866553322"); 10789db268cc7344d8751213307737285e7cdac4fbecsewardj} 10799db268cc7344d8751213307737285e7cdac4fbecsewardj 10809db268cc7344d8751213307737285e7cdac4fbecsewardj 10819db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 10829db268cc7344d8751213307737285e7cdac4fbecsewardj// // 10839db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_01 // 10849db268cc7344d8751213307737285e7cdac4fbecsewardj// // 10859db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 10869db268cc7344d8751213307737285e7cdac4fbecsewardj 10879db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_01 ( V128* argL, V128* argR ) 10889db268cc7344d8751213307737285e7cdac4fbecsewardj{ 10899db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 10909db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 10919db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 10929db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 10939db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 10949db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 10959db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 10969db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 10979db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t" 10989db268cc7344d8751213307737285e7cdac4fbecsewardj//"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t" 10999db268cc7344d8751213307737285e7cdac4fbecsewardj//"movd %%xmm0, %%ecx" "\n\t" 11009db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 11019db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 11029db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 11039db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 11049db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 11059db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 11069db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 11079db268cc7344d8751213307737285e7cdac4fbecsewardj ); 11089db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 11099db268cc7344d8751213307737285e7cdac4fbecsewardj} 11109db268cc7344d8751213307737285e7cdac4fbecsewardj 11119db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_01 ( V128* argLU, V128* argRU ) 11129db268cc7344d8751213307737285e7cdac4fbecsewardj{ 11139db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 11149db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 11159db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 11169db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 11179db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 11189db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 11199db268cc7344d8751213307737285e7cdac4fbecsewardj 0x01, False/*!isSTRM*/ 11209db268cc7344d8751213307737285e7cdac4fbecsewardj ); 11219db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 11229db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 11239db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 11249db268cc7344d8751213307737285e7cdac4fbecsewardj} 11259db268cc7344d8751213307737285e7cdac4fbecsewardj 11269db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_01 ( void ) 11279db268cc7344d8751213307737285e7cdac4fbecsewardj{ 11289db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "01"; 11299db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_01; 11309db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_01; 11319db268cc7344d8751213307737285e7cdac4fbecsewardj 11329db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 11339db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 11349db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 11359db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 11369db268cc7344d8751213307737285e7cdac4fbecsewardj 11379db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 11389db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 11399db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 11409db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 11419db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 11429db268cc7344d8751213307737285e7cdac4fbecsewardj 11439db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 11449db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 11459db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 11469db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 11479db268cc7344d8751213307737285e7cdac4fbecsewardj 11489db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 11499db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 11509db268cc7344d8751213307737285e7cdac4fbecsewardj 11519db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 11529db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 11539db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 11549db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 11559db268cc7344d8751213307737285e7cdac4fbecsewardj 11569db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 11579db268cc7344d8751213307737285e7cdac4fbecsewardj 11589db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 11599db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 11609db268cc7344d8751213307737285e7cdac4fbecsewardj} 11619db268cc7344d8751213307737285e7cdac4fbecsewardj 11629db268cc7344d8751213307737285e7cdac4fbecsewardj 11639db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 11649db268cc7344d8751213307737285e7cdac4fbecsewardj// // 11659db268cc7344d8751213307737285e7cdac4fbecsewardj// ISTRI_39 // 11669db268cc7344d8751213307737285e7cdac4fbecsewardj// // 11679db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 11689db268cc7344d8751213307737285e7cdac4fbecsewardj 11699db268cc7344d8751213307737285e7cdac4fbecsewardjUInt h_pcmpistri_39 ( V128* argL, V128* argR ) 11709db268cc7344d8751213307737285e7cdac4fbecsewardj{ 11719db268cc7344d8751213307737285e7cdac4fbecsewardj V128 block[2]; 11729db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[0], argL, sizeof(V128)); 11739db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(&block[1], argR, sizeof(V128)); 11749db268cc7344d8751213307737285e7cdac4fbecsewardj ULong res, flags; 11759db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 11769db268cc7344d8751213307737285e7cdac4fbecsewardj "subq $1024, %%rsp" "\n\t" 11779db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 0(%2), %%xmm2" "\n\t" 11789db268cc7344d8751213307737285e7cdac4fbecsewardj "movdqu 16(%2), %%xmm11" "\n\t" 11799db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t" 11809db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 11819db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%rdx" "\n\t" 11829db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rcx, %0" "\n\t" 11839db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%rdx, %1" "\n\t" 11849db268cc7344d8751213307737285e7cdac4fbecsewardj "addq $1024, %%rsp" "\n\t" 11859db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 11869db268cc7344d8751213307737285e7cdac4fbecsewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 11879db268cc7344d8751213307737285e7cdac4fbecsewardj ); 11889db268cc7344d8751213307737285e7cdac4fbecsewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 11899db268cc7344d8751213307737285e7cdac4fbecsewardj} 11909db268cc7344d8751213307737285e7cdac4fbecsewardj 11919db268cc7344d8751213307737285e7cdac4fbecsewardjUInt s_pcmpistri_39 ( V128* argLU, V128* argRU ) 11929db268cc7344d8751213307737285e7cdac4fbecsewardj{ 11939db268cc7344d8751213307737285e7cdac4fbecsewardj V128 resV; 11949db268cc7344d8751213307737285e7cdac4fbecsewardj UInt resOSZACP, resECX; 11959db268cc7344d8751213307737285e7cdac4fbecsewardj Bool ok 11969db268cc7344d8751213307737285e7cdac4fbecsewardj = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 11979db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argLU), 11989db268cc7344d8751213307737285e7cdac4fbecsewardj zmask_from_V128(argRU), 11999db268cc7344d8751213307737285e7cdac4fbecsewardj 0x39, False/*!isSTRM*/ 12009db268cc7344d8751213307737285e7cdac4fbecsewardj ); 12019db268cc7344d8751213307737285e7cdac4fbecsewardj assert(ok); 12029db268cc7344d8751213307737285e7cdac4fbecsewardj resECX = resV.uInt[0]; 12039db268cc7344d8751213307737285e7cdac4fbecsewardj return (resOSZACP << 16) | resECX; 12049db268cc7344d8751213307737285e7cdac4fbecsewardj} 12059db268cc7344d8751213307737285e7cdac4fbecsewardj 12069db268cc7344d8751213307737285e7cdac4fbecsewardjvoid istri_39 ( void ) 12079db268cc7344d8751213307737285e7cdac4fbecsewardj{ 12089db268cc7344d8751213307737285e7cdac4fbecsewardj char* wot = "39"; 12099db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*h)(V128*,V128*) = h_pcmpistri_39; 12109db268cc7344d8751213307737285e7cdac4fbecsewardj UInt(*s)(V128*,V128*) = s_pcmpistri_39; 12119db268cc7344d8751213307737285e7cdac4fbecsewardj 12129db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 12139db268cc7344d8751213307737285e7cdac4fbecsewardj 12149db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12159db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12169db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 12179db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 12189db268cc7344d8751213307737285e7cdac4fbecsewardj 12199db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 12209db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 12219db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 12229db268cc7344d8751213307737285e7cdac4fbecsewardj 12239db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12249db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12259db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12269db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12279db268cc7344d8751213307737285e7cdac4fbecsewardj 12289db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12299db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 12309db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 12319db268cc7344d8751213307737285e7cdac4fbecsewardj 12329db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12339db268cc7344d8751213307737285e7cdac4fbecsewardj 12349db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 12359db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 12369db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 12379db268cc7344d8751213307737285e7cdac4fbecsewardj 12389db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 12399db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 12409db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 12419db268cc7344d8751213307737285e7cdac4fbecsewardj 12429db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 12439db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 12449db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 12459db268cc7344d8751213307737285e7cdac4fbecsewardj 12469db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 12479db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 12489db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 12499db268cc7344d8751213307737285e7cdac4fbecsewardj 12509db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 12519db268cc7344d8751213307737285e7cdac4fbecsewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 12529db268cc7344d8751213307737285e7cdac4fbecsewardj} 12539db268cc7344d8751213307737285e7cdac4fbecsewardj 12549db268cc7344d8751213307737285e7cdac4fbecsewardj 12559db268cc7344d8751213307737285e7cdac4fbecsewardj 12569db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 12579db268cc7344d8751213307737285e7cdac4fbecsewardj// // 12589db268cc7344d8751213307737285e7cdac4fbecsewardj// main // 12599db268cc7344d8751213307737285e7cdac4fbecsewardj// // 12609db268cc7344d8751213307737285e7cdac4fbecsewardj////////////////////////////////////////////////////////// 12619db268cc7344d8751213307737285e7cdac4fbecsewardj 12629db268cc7344d8751213307737285e7cdac4fbecsewardjint main ( void ) 12639db268cc7344d8751213307737285e7cdac4fbecsewardj{ 12649db268cc7344d8751213307737285e7cdac4fbecsewardj istri_4B(); 12659db268cc7344d8751213307737285e7cdac4fbecsewardj istri_3B(); 12669db268cc7344d8751213307737285e7cdac4fbecsewardj istri_09(); 12679db268cc7344d8751213307737285e7cdac4fbecsewardj istri_1B(); 12689db268cc7344d8751213307737285e7cdac4fbecsewardj istri_03(); 12699db268cc7344d8751213307737285e7cdac4fbecsewardj istri_0D(); 12709db268cc7344d8751213307737285e7cdac4fbecsewardj istri_13(); 12719db268cc7344d8751213307737285e7cdac4fbecsewardj istri_45(); 12729db268cc7344d8751213307737285e7cdac4fbecsewardj istri_01(); 12739db268cc7344d8751213307737285e7cdac4fbecsewardj istri_39(); 12749db268cc7344d8751213307737285e7cdac4fbecsewardj return 0; 12759db268cc7344d8751213307737285e7cdac4fbecsewardj} 1276