10a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 27f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 37f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 47f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj aspect. */ 57f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj 60a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <string.h> 70a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <stdio.h> 80a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#include <assert.h> 90a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef unsigned int UInt; 110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef signed int Int; 120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef unsigned char UChar; 1315df336557eb012a5f3b2f1482a0411857039496sewardjtypedef signed char Char; 140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjtypedef unsigned long long int ULong; 155ac99069b0538adcb2f18b04b078ea27b00b4185sewardjtypedef UChar Bool; 165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj#define False ((Bool)0) 175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj#define True ((Bool)1) 180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj//typedef unsigned char V128[16]; 207f3019bfbbbbc5356c351c5cd319c36fe731b806sewardjtypedef 217f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj union { 227f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj UChar uChar[16]; 237f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj UInt uInt[4]; 247f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj } 257f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj V128; 267f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj 270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_O 11 280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_S 7 290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_Z 6 300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_A 4 310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_C 0 320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define SHIFT_P 2 330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_O (1ULL << SHIFT_O) 350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_S (1ULL << SHIFT_S) 360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_Z (1ULL << SHIFT_Z) 370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_A (1ULL << SHIFT_A) 380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_C (1ULL << SHIFT_C) 390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj#define MASK_P (1ULL << SHIFT_P) 400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt clz32 ( UInt x ) 430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj Int y, m, n; 450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj y = -(x >> 16); 460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj m = (y >> 16) & 16; 470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj n = 16 - m; 480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj x = x >> m; 490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj y = x - 0x100; 500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj m = (y >> 16) & 8; 510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj n = n + m; 520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj x = x << m; 530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj y = x - 0x1000; 540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj m = (y >> 16) & 4; 550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj n = n + m; 560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj x = x << m; 570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj y = x - 0x4000; 580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj m = (y >> 16) & 2; 590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj n = n + m; 600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj x = x << m; 610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj y = x >> 14; 620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj m = y & ~(y >> 1); 630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return n + 2 - m; 640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt ctz32 ( UInt x ) 670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return 32 - clz32((~x) & (x-1)); 690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid expand ( V128* dst, char* summary ) 720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj Int i; 740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj assert( strlen(summary) == 16 ); 750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj for (i = 0; i < 16; i++) { 760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UChar xx = 0; 770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UChar x = summary[15-i]; 780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj if (x >= '0' && x <= '9') { xx = x - '0'; } 790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj else assert(0); 820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj assert(xx < 16); 840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj xx = (xx << 4) | xx; 850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj assert(xx < 256); 867f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj dst->uChar[i] = xx; 870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj } 880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid try_istri ( char* which, 910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*h_fn)(V128*,V128*), 920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*s_fn)(V128*,V128*), 930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj char* summL, char* summR ) 940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj assert(strlen(which) == 2); 960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj V128 argL, argR; 970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj expand(&argL, summL); 980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj expand(&argR, summR); 990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt h_res = h_fn(&argL, &argR); 1000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt s_res = s_fn(&argL, &argR); 1010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj printf("istri %s %s %s -> %08x %08x %s\n", 1020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 1030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 1040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 1055ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt zmask_from_V128 ( V128* arg ) 1065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 1075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt i, res = 0; 1085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (i = 0; i < 16; i++) { 1097f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 1105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 1115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return res; 1125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 1135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 1155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 1165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// GENERAL // 1175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 1185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 1195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj/* Given partial results from a pcmpXstrX operation (intRes1, 1225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj basically), generate an I format (index value for ECX) output, and 1235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj also the new OSZACP flags. 1245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj*/ 1255ac99069b0538adcb2f18b04b078ea27b00b4185sewardjstatic 1265ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 1275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*OUT*/UInt* resOSZACP, 1285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes1, 1295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt zmaskL, UInt zmaskR, 1305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validL, 1315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt pol, UInt idx ) 1325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 1335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert((pol >> 2) == 0); 1345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert((idx >> 1) == 0); 1355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes2 = 0; 1375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj switch (pol) { 1385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj case 0: intRes2 = intRes1; break; // pol + 1395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj case 1: intRes2 = ~intRes1; break; // pol - 1405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj case 2: intRes2 = intRes1; break; // pol m+ 1415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj case 3: intRes2 = intRes1 ^ validL; break; // pol m- 1425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 1435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes2 &= 0xFFFF; 1445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate ecx value 1465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt newECX = 0; 1475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (idx) { 1485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // index of ms-1-bit 1495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 1505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } else { 1515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // index of ls-1-bit 1525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 1535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 1545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj *(UInt*)(&resV[0]) = newECX; 1565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate new flags, common to all ISTRI and ISTRM cases 1585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj *resOSZACP // A, P are zero 1595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 1605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 1615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 1625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 1635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 1645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 1675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj variants. 1685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj For xSTRI variants, the new ECX value is placed in the 32 bits 1705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj pointed to by *resV. For xSTRM variants, the result is a 128 bit 1715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj value and is placed at *resV in the obvious way. 1725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj For all variants, the new OSZACP value is placed at *resOSZACP. 1745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj argLV and argRV are the vector args. The caller must prepare a 1765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 1775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj must be 1 for each zero byte of of the respective arg. For ESTRx 1785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj variants this is derived from the explicit length indication, and 1795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj must be 0 in all places except at the bit index corresponding to 1805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj the valid length (0 .. 16). If the valid length is 16 then the 1815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj mask must be all zeroes. In all cases, bits 31:16 must be zero. 1825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj imm8 is the original immediate from the instruction. isSTRM 1845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj indicates whether this is a xSTRM or xSTRI variant, which controls 1855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj how much of *res is written. 1865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj If the given imm8 case can be handled, the return value is True. 1885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj If not, False is returned, and neither *res not *resOSZACP are 1895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj altered. 1905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj*/ 1915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 1925ac99069b0538adcb2f18b04b078ea27b00b4185sewardjBool pcmpXstrX_WRK ( /*OUT*/V128* resV, 1935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*OUT*/UInt* resOSZACP, 1945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128* argLV, V128* argRV, 1955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt zmaskL, UInt zmaskR, 1965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt imm8, Bool isSTRM ) 1975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 1985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(imm8 < 0x80); 1995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert((zmaskL >> 16) == 0); 2005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert((zmaskR >> 16) == 0); 2015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2027f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj /* Explicitly reject any imm8 values that haven't been validated, 2037f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj even if they would probably work. Life is too short to have 2047f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj unvalidated cases in the code base. */ 2057f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj switch (imm8) { 206a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x00: case 0x02: 207a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x08: case 0x0A: case 0x0C: case 0x0E: 208ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes case 0x10: case 0x12: case 0x14: 209a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x18: case 0x1A: 210a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x30: case 0x34: 211a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x38: case 0x3A: 212a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x40: case 0x42: case 0x44: case 0x46: 213a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x4A: 214a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x62: 215a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes case 0x70: case 0x72: 2167f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj break; 2177f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj default: 2187f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj return False; 2197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj } 2207f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj 2215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 2225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 2235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 2245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 2255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 2275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*-- strcmp on byte data --*/ 2285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 2295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (agg == 2/*equal each, aka strcmp*/ 2315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 2325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && !isSTRM) { 2335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Int i; 2345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argL = (UChar*)argLV; 2355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argR = (UChar*)argRV; 2365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt boolResII = 0; 2375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (i = 15; i >= 0; i--) { 2385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar cL = argL[i]; 2395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar cR = argR[i]; 2405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 2415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 2425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 2435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 2445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // do invalidation, common to all equal-each cases 2465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes1 2475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = (boolResII & validL & validR) // if both valid, use cmpres 2485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj | (~ (validL | validR)); // if both invalid, force 1 2495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // else force 0 2505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes1 &= 0xFFFF; 2515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate I-format output 2535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj pcmpXstrX_WRK_gen_output_fmt_I( 2545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj resV, resOSZACP, 2555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes1, zmaskL, zmaskR, validL, pol, idx 2565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 2575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return True; 2595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 2605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 2625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*-- set membership on byte data --*/ 2635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 2645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (agg == 0/*equal any, aka find chars in a set*/ 2665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 2675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && !isSTRM) { 2685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /* argL: the string, argR: charset */ 2695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt si, ci; 2705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argL = (UChar*)argLV; 2715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argR = (UChar*)argRV; 2725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt boolRes = 0; 2735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 2745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 2755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (si = 0; si < 16; si++) { 2775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if ((validL & (1 << si)) == 0) 2785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // run off the end of the string. 2795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj break; 2805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt m = 0; 2815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (ci = 0; ci < 16; ci++) { 2825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if ((validR & (1 << ci)) == 0) break; 2835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (argR[ci] == argL[si]) { m = 1; break; } 2845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 2855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj boolRes |= (m << si); 2865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 2875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // boolRes is "pre-invalidated" 2895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes1 = boolRes & 0xFFFF; 2905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate I-format output 2925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj pcmpXstrX_WRK_gen_output_fmt_I( 2935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj resV, resOSZACP, 2945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes1, zmaskL, zmaskR, validL, pol, idx 2955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 2965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 2975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return True; 2985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 2995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 3015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*-- substring search on byte data --*/ 3025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 3035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (agg == 3/*equal ordered, aka substring search*/ 3055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 3065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && !isSTRM) { 3075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /* argL: haystack, argR: needle */ 3095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt ni, hi; 3105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argL = (UChar*)argLV; 3115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argR = (UChar*)argRV; 3125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt boolRes = 0; 3135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 3145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 3155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (hi = 0; hi < 16; hi++) { 3165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt m = 1; 3175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (ni = 0; ni < 16; ni++) { 3185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if ((validR & (1 << ni)) == 0) break; 3195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt i = ni + hi; 3205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (i >= 16) break; 3215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (argL[i] != argR[ni]) { m = 0; break; } 3225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj boolRes |= (m << hi); 324c5274ae844ae01cde66e35f1873ed37726dccd45weidendo if ((validL & (1 << hi)) == 0) 325c5274ae844ae01cde66e35f1873ed37726dccd45weidendo // run off the end of the haystack 326c5274ae844ae01cde66e35f1873ed37726dccd45weidendo break; 3275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // boolRes is "pre-invalidated" 3305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes1 = boolRes & 0xFFFF; 3315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate I-format output 3335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj pcmpXstrX_WRK_gen_output_fmt_I( 3345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj resV, resOSZACP, 3355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes1, zmaskL, zmaskR, validL, pol, idx 3365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 3375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return True; 3395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 3425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*-- ranges, unsigned byte data --*/ 3435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /*----------------------------------------*/ 3445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (agg == 1/*ranges*/ 3465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && fmt == 0/*ub*/ 3475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj && !isSTRM) { 3485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj /* argL: string, argR: range-pairs */ 3505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt ri, si; 3515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argL = (UChar*)argLV; 3525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UChar* argR = (UChar*)argRV; 3535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt boolRes = 0; 3545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 3555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 3565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (si = 0; si < 16; si++) { 3575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if ((validL & (1 << si)) == 0) 3585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // run off the end of the string 3595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj break; 3605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt m = 0; 3615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj for (ri = 0; ri < 16; ri += 2) { 3625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if ((validR & (3 << ri)) != (3 << ri)) break; 3635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 3645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj m = 1; break; 3655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj boolRes |= (m << si); 3685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // boolRes is "pre-invalidated" 3715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt intRes1 = boolRes & 0xFFFF; 3725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj // generate I-format output 3745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj pcmpXstrX_WRK_gen_output_fmt_I( 3755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj resV, resOSZACP, 3765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj intRes1, zmaskL, zmaskR, validL, pol, idx 3775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 3785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 3795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return True; 3805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj } 3815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 38215df336557eb012a5f3b2f1482a0411857039496sewardj /*----------------------------------------*/ 38315df336557eb012a5f3b2f1482a0411857039496sewardj /*-- ranges, signed byte data --*/ 38415df336557eb012a5f3b2f1482a0411857039496sewardj /*----------------------------------------*/ 38515df336557eb012a5f3b2f1482a0411857039496sewardj 38615df336557eb012a5f3b2f1482a0411857039496sewardj if (agg == 1/*ranges*/ 38715df336557eb012a5f3b2f1482a0411857039496sewardj && fmt == 2/*sb*/ 38815df336557eb012a5f3b2f1482a0411857039496sewardj && !isSTRM) { 38915df336557eb012a5f3b2f1482a0411857039496sewardj 39015df336557eb012a5f3b2f1482a0411857039496sewardj /* argL: string, argR: range-pairs */ 39115df336557eb012a5f3b2f1482a0411857039496sewardj UInt ri, si; 39215df336557eb012a5f3b2f1482a0411857039496sewardj Char* argL = (Char*)argLV; 39315df336557eb012a5f3b2f1482a0411857039496sewardj Char* argR = (Char*)argRV; 39415df336557eb012a5f3b2f1482a0411857039496sewardj UInt boolRes = 0; 39515df336557eb012a5f3b2f1482a0411857039496sewardj UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 39615df336557eb012a5f3b2f1482a0411857039496sewardj UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 39715df336557eb012a5f3b2f1482a0411857039496sewardj for (si = 0; si < 16; si++) { 39815df336557eb012a5f3b2f1482a0411857039496sewardj if ((validL & (1 << si)) == 0) 39915df336557eb012a5f3b2f1482a0411857039496sewardj // run off the end of the string 40015df336557eb012a5f3b2f1482a0411857039496sewardj break; 40115df336557eb012a5f3b2f1482a0411857039496sewardj UInt m = 0; 40215df336557eb012a5f3b2f1482a0411857039496sewardj for (ri = 0; ri < 16; ri += 2) { 40315df336557eb012a5f3b2f1482a0411857039496sewardj if ((validR & (3 << ri)) != (3 << ri)) break; 40415df336557eb012a5f3b2f1482a0411857039496sewardj if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 40515df336557eb012a5f3b2f1482a0411857039496sewardj m = 1; break; 40615df336557eb012a5f3b2f1482a0411857039496sewardj } 40715df336557eb012a5f3b2f1482a0411857039496sewardj } 40815df336557eb012a5f3b2f1482a0411857039496sewardj boolRes |= (m << si); 40915df336557eb012a5f3b2f1482a0411857039496sewardj } 41015df336557eb012a5f3b2f1482a0411857039496sewardj 41115df336557eb012a5f3b2f1482a0411857039496sewardj // boolRes is "pre-invalidated" 41215df336557eb012a5f3b2f1482a0411857039496sewardj UInt intRes1 = boolRes & 0xFFFF; 41315df336557eb012a5f3b2f1482a0411857039496sewardj 41415df336557eb012a5f3b2f1482a0411857039496sewardj // generate I-format output 41515df336557eb012a5f3b2f1482a0411857039496sewardj pcmpXstrX_WRK_gen_output_fmt_I( 41615df336557eb012a5f3b2f1482a0411857039496sewardj resV, resOSZACP, 41715df336557eb012a5f3b2f1482a0411857039496sewardj intRes1, zmaskL, zmaskR, validL, pol, idx 41815df336557eb012a5f3b2f1482a0411857039496sewardj ); 41915df336557eb012a5f3b2f1482a0411857039496sewardj 42015df336557eb012a5f3b2f1482a0411857039496sewardj return True; 42115df336557eb012a5f3b2f1482a0411857039496sewardj } 42215df336557eb012a5f3b2f1482a0411857039496sewardj 4235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return False; 4245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 4255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 4265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 4270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 4280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 4290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// ISTRI_4A // 4300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 4310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 4320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_4A ( V128* argL, V128* argR ) 4340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 4350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj V128 block[2]; 4360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[0], argL, sizeof(V128)); 4370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[1], argR, sizeof(V128)); 4380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ULong res, flags; 4390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj __asm__ __volatile__( 4400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "subq $1024, %%rsp" "\n\t" 4410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 0(%2), %%xmm2" "\n\t" 4420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 16(%2), %%xmm11" "\n\t" 4430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 4440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pushfq" "\n\t" 4450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "popq %%rdx" "\n\t" 4460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rcx, %0" "\n\t" 4470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rdx, %1" "\n\t" 4480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "addq $1024, %%rsp" "\n\t" 4490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 4500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 4510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ); 4520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 4530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 4540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 4560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 4575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 4585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 4595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 4605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 4615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 4625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 4635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x4A, False/*!isSTRM*/ 4645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 4655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 4667f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 4675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 4680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 4690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_4A ( void ) 4710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 4720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj char* wot = "4A"; 4730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 4740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 4750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 4770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 4810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 4820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 4840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 4850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 4860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 4940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 4950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 4970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 4980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 4990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 5000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 5010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 5030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 5040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 5050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 5070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 5080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 5090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 5110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 5120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 5130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 5150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 5160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 5170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 5190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 5200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// ISTRI_3A // 5210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 5220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 5230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_3A ( V128* argL, V128* argR ) 5250a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 5260a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj V128 block[2]; 5270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[0], argL, sizeof(V128)); 5280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[1], argR, sizeof(V128)); 5290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ULong res, flags; 5300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj __asm__ __volatile__( 5310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "subq $1024, %%rsp" "\n\t" 5320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 0(%2), %%xmm2" "\n\t" 5330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 16(%2), %%xmm11" "\n\t" 5340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 5350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pushfq" "\n\t" 5360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "popq %%rdx" "\n\t" 5370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rcx, %0" "\n\t" 5380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rdx, %1" "\n\t" 5390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "addq $1024, %%rsp" "\n\t" 5400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 5410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 5420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ); 5430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 5440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 5450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 5470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 5485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 5495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 5505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 5515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 5525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 5535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 5545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x3A, False/*!isSTRM*/ 5555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 5565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 5577f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 5585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 5590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 5600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_3A ( void ) 5620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 5630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj char* wot = "3A"; 5640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 5650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 5660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 5680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 5720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 5730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 5750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 5760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 5770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 5850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 5860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 5880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 5900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 5910a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 5920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5930a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 5940a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 5950a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 5960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 5970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 5980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 5990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 6000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 6020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 6030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 6040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 6060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 6070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 6080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 6120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 6130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// ISTRI_0C // 6140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 6150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 6160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj__attribute__((noinline)) 6180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_0C ( V128* argL, V128* argR ) 6190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 6200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj V128 block[2]; 6210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[0], argL, sizeof(V128)); 6220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[1], argR, sizeof(V128)); 6230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ULong res = 0, flags = 0; 6240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj __asm__ __volatile__( 625c5274ae844ae01cde66e35f1873ed37726dccd45weidendo "movdqu 0(%2), %%xmm2" "\n\t" 626c5274ae844ae01cde66e35f1873ed37726dccd45weidendo "movdqu 16(%2), %%xmm11" "\n\t" 6270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 6280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 6290a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj //"movd %%xmm0, %%ecx" "\n\t" 6300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pushfq" "\n\t" 6310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "popq %%rdx" "\n\t" 6320a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rcx, %0" "\n\t" 6330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rdx, %1" "\n\t" 6347f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 6350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 6360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ); 6370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 6380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 6390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 6410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 6425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 6435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 6445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 6455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 6465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 6475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 6485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x0C, False/*!isSTRM*/ 6495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 6505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 6517f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 6525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 6530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 6540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjvoid istri_0C ( void ) 6560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 6570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj char* wot = "0C"; 6580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 6590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 6600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 6620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 6640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 6660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 6670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 6680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 6700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 6720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 6730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 6740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 6750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 6760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 6780a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 6790a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 6800a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6810a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 6820a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 6830a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6840a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 6850a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 6860a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 6870a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 6890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 6900a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 691c5274ae844ae01cde66e35f1873ed37726dccd45weidendo 692c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 693c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 694c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 695c5274ae844ae01cde66e35f1873ed37726dccd45weidendo try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 6960a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 6970a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6980a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 6990a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 7000a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 7010a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// ISTRI_08 // 7020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 7030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 7040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt h_pcmpistri_08 ( V128* argL, V128* argR ) 7060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 7070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj V128 block[2]; 7080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[0], argL, sizeof(V128)); 7090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj memcpy(&block[1], argR, sizeof(V128)); 7100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ULong res, flags; 7110a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj __asm__ __volatile__( 7120a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "subq $1024, %%rsp" "\n\t" 7130a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 0(%2), %%xmm2" "\n\t" 7140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movdqu 16(%2), %%xmm11" "\n\t" 7150a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 7160a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "pushfq" "\n\t" 7170a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "popq %%rdx" "\n\t" 7180a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rcx, %0" "\n\t" 7190a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "movq %%rdx, %1" "\n\t" 7200a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj "addq $1024, %%rsp" "\n\t" 7210a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 7220a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 7230a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj ); 7240a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 7250a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 7260a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7270a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjUInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 7280a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 7295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 7305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 7315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 7325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 7335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 7345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 7355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x08, False/*!isSTRM*/ 7365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 7375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 7387f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 7395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 7405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 7410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7425ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_08 ( void ) 7435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 7445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj char* wot = "08"; 7455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_08; 7465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_08; 7470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 7490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 7535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 7540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 7565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 7575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 7580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 7665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 7675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 7695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 7715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 7725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 7735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 7755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 7765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 7775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 7795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 7805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 7815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 7835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 7845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 7855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 7875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 7880a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 7890a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 7905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 7925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 7935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 794053f436448ea3f8733f5205226d2989d4de31b66sewardj// ISTRI_18 // 795053f436448ea3f8733f5205226d2989d4de31b66sewardj// // 796053f436448ea3f8733f5205226d2989d4de31b66sewardj////////////////////////////////////////////////////////// 797053f436448ea3f8733f5205226d2989d4de31b66sewardj 798053f436448ea3f8733f5205226d2989d4de31b66sewardjUInt h_pcmpistri_18 ( V128* argL, V128* argR ) 799053f436448ea3f8733f5205226d2989d4de31b66sewardj{ 800053f436448ea3f8733f5205226d2989d4de31b66sewardj V128 block[2]; 801053f436448ea3f8733f5205226d2989d4de31b66sewardj memcpy(&block[0], argL, sizeof(V128)); 802053f436448ea3f8733f5205226d2989d4de31b66sewardj memcpy(&block[1], argR, sizeof(V128)); 803053f436448ea3f8733f5205226d2989d4de31b66sewardj ULong res, flags; 804053f436448ea3f8733f5205226d2989d4de31b66sewardj __asm__ __volatile__( 805053f436448ea3f8733f5205226d2989d4de31b66sewardj "subq $1024, %%rsp" "\n\t" 806053f436448ea3f8733f5205226d2989d4de31b66sewardj "movdqu 0(%2), %%xmm2" "\n\t" 807053f436448ea3f8733f5205226d2989d4de31b66sewardj "movdqu 16(%2), %%xmm11" "\n\t" 808053f436448ea3f8733f5205226d2989d4de31b66sewardj "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t" 809053f436448ea3f8733f5205226d2989d4de31b66sewardj "pushfq" "\n\t" 810053f436448ea3f8733f5205226d2989d4de31b66sewardj "popq %%rdx" "\n\t" 811053f436448ea3f8733f5205226d2989d4de31b66sewardj "movq %%rcx, %0" "\n\t" 812053f436448ea3f8733f5205226d2989d4de31b66sewardj "movq %%rdx, %1" "\n\t" 813053f436448ea3f8733f5205226d2989d4de31b66sewardj "addq $1024, %%rsp" "\n\t" 814053f436448ea3f8733f5205226d2989d4de31b66sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 815053f436448ea3f8733f5205226d2989d4de31b66sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 816053f436448ea3f8733f5205226d2989d4de31b66sewardj ); 817053f436448ea3f8733f5205226d2989d4de31b66sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 818053f436448ea3f8733f5205226d2989d4de31b66sewardj} 819053f436448ea3f8733f5205226d2989d4de31b66sewardj 820053f436448ea3f8733f5205226d2989d4de31b66sewardjUInt s_pcmpistri_18 ( V128* argLU, V128* argRU ) 821053f436448ea3f8733f5205226d2989d4de31b66sewardj{ 822053f436448ea3f8733f5205226d2989d4de31b66sewardj V128 resV; 823053f436448ea3f8733f5205226d2989d4de31b66sewardj UInt resOSZACP, resECX; 824053f436448ea3f8733f5205226d2989d4de31b66sewardj Bool ok 825053f436448ea3f8733f5205226d2989d4de31b66sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 826053f436448ea3f8733f5205226d2989d4de31b66sewardj zmask_from_V128(argLU), 827053f436448ea3f8733f5205226d2989d4de31b66sewardj zmask_from_V128(argRU), 828053f436448ea3f8733f5205226d2989d4de31b66sewardj 0x18, False/*!isSTRM*/ 829053f436448ea3f8733f5205226d2989d4de31b66sewardj ); 830053f436448ea3f8733f5205226d2989d4de31b66sewardj assert(ok); 831053f436448ea3f8733f5205226d2989d4de31b66sewardj resECX = resV.uInt[0]; 832053f436448ea3f8733f5205226d2989d4de31b66sewardj return (resOSZACP << 16) | resECX; 833053f436448ea3f8733f5205226d2989d4de31b66sewardj} 834053f436448ea3f8733f5205226d2989d4de31b66sewardj 835053f436448ea3f8733f5205226d2989d4de31b66sewardjvoid istri_18 ( void ) 836053f436448ea3f8733f5205226d2989d4de31b66sewardj{ 837053f436448ea3f8733f5205226d2989d4de31b66sewardj char* wot = "18"; 838053f436448ea3f8733f5205226d2989d4de31b66sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_18; 839053f436448ea3f8733f5205226d2989d4de31b66sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_18; 840053f436448ea3f8733f5205226d2989d4de31b66sewardj 841053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 842053f436448ea3f8733f5205226d2989d4de31b66sewardj 843053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 844053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 845053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 846053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 847053f436448ea3f8733f5205226d2989d4de31b66sewardj 848053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 849053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 850053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 851053f436448ea3f8733f5205226d2989d4de31b66sewardj 852053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 853053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 854053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 855053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 856053f436448ea3f8733f5205226d2989d4de31b66sewardj 857053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 858053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 859053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 860053f436448ea3f8733f5205226d2989d4de31b66sewardj 861053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 862053f436448ea3f8733f5205226d2989d4de31b66sewardj 863053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 864053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 865053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 866053f436448ea3f8733f5205226d2989d4de31b66sewardj 867053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 868053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 869053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 870053f436448ea3f8733f5205226d2989d4de31b66sewardj 871053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 872053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 873053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 874053f436448ea3f8733f5205226d2989d4de31b66sewardj 875053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 876053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 877053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 878053f436448ea3f8733f5205226d2989d4de31b66sewardj 879053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 880053f436448ea3f8733f5205226d2989d4de31b66sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 881053f436448ea3f8733f5205226d2989d4de31b66sewardj} 882053f436448ea3f8733f5205226d2989d4de31b66sewardj 883053f436448ea3f8733f5205226d2989d4de31b66sewardj 884053f436448ea3f8733f5205226d2989d4de31b66sewardj 885053f436448ea3f8733f5205226d2989d4de31b66sewardj////////////////////////////////////////////////////////// 886053f436448ea3f8733f5205226d2989d4de31b66sewardj// // 8875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// ISTRI_1A // 8885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 8895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 8905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 8915ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_1A ( V128* argL, V128* argR ) 8920a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 8935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 block[2]; 8945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[0], argL, sizeof(V128)); 8955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[1], argR, sizeof(V128)); 8965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ULong res, flags; 8975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj __asm__ __volatile__( 8985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "subq $1024, %%rsp" "\n\t" 8995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 0(%2), %%xmm2" "\n\t" 9005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 16(%2), %%xmm11" "\n\t" 9015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 9025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pushfq" "\n\t" 9035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "popq %%rdx" "\n\t" 9045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rcx, %0" "\n\t" 9055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rdx, %1" "\n\t" 9065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "addq $1024, %%rsp" "\n\t" 9075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 9085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 9095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 9105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 9115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 9125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 9135ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 9145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 9155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 9165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 9175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 9185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 9195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 9205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 9215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x1A, False/*!isSTRM*/ 9225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 9235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 9247f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 9255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 9265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 9275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 9285ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_1A ( void ) 9295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 9305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj char* wot = "1A"; 9315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 9325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 9330a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9340a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 9350a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9360a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9370a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9380a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 9390a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 9400a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9410a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 9420a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 9430a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 9440a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9450a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9460a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9470a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9480a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9490a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9500a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9510a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 9520a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 9530a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9540a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 9550a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9560a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 9570a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 9580a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 9590a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9600a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 9610a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 9620a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 9630a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9640a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 9650a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 9660a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 9670a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9680a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 9690a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 9700a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 9710a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9720a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 9730a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 9740a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 9750a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9760a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9770a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 9785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 9795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 9805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// ISTRI_02 // 9815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 9825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 9835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 9845ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_02 ( V128* argL, V128* argR ) 9855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 9865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 block[2]; 9875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[0], argL, sizeof(V128)); 9885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[1], argR, sizeof(V128)); 9895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ULong res, flags; 9905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj __asm__ __volatile__( 9915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "subq $1024, %%rsp" "\n\t" 9925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 0(%2), %%xmm2" "\n\t" 9935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 16(%2), %%xmm11" "\n\t" 9945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 9955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 9965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t" 9975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pushfq" "\n\t" 9985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "popq %%rdx" "\n\t" 9995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rcx, %0" "\n\t" 10005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rdx, %1" "\n\t" 10015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "addq $1024, %%rsp" "\n\t" 10025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 10035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 10045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 10055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 10065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 10075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10085ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 10095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 10105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 10115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 10125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 10135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 10145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 10155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 10165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x02, False/*!isSTRM*/ 10175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 10185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 10197f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 10205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 10215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 10225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10235ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_02 ( void ) 10245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 10255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj char* wot = "02"; 10265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_02; 10275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_02; 10285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 10305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 10315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 10325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 10335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 10355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 10365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 10375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 10385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 10395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 10415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 10425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 10435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 10445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 10465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 10475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 10495ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 10505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 10515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 10525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 10545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 10565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 10575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 10585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 10615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 10625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// ISTRI_12 // 10635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 10645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 10655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10665ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_12 ( V128* argL, V128* argR ) 10675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 10685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 block[2]; 10695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[0], argL, sizeof(V128)); 10705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[1], argR, sizeof(V128)); 10715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ULong res, flags; 10725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj __asm__ __volatile__( 10735ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "subq $1024, %%rsp" "\n\t" 10745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 0(%2), %%xmm2" "\n\t" 10755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 16(%2), %%xmm11" "\n\t" 10765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 10775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 10785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t" 10795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pushfq" "\n\t" 10805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "popq %%rdx" "\n\t" 10815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rcx, %0" "\n\t" 10825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rdx, %1" "\n\t" 10835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "addq $1024, %%rsp" "\n\t" 10845ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 10855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 10865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 10875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 10885ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 10895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 10905ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 10915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 10925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 10935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 10945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 10955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 10965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 10975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 10985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x12, False/*!isSTRM*/ 10995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 11005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 11017f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 11025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 11035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 11045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11055ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_12 ( void ) 11065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 11075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj char* wot = "12"; 11085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_12; 11095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_12; 11105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 11125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 11135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 11145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 11155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 11175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 11185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 11195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 11205ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 11215ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11225ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 11235ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 11245ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 11255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 11265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 11285ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 11295ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11305ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 11315ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 11325ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 11335ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 11345ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11355ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 11365ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11375ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 11385ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 11395ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 11405ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11415ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11425ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11435ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 11445ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 11455ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// ISTRI_44 // 11465ac99069b0538adcb2f18b04b078ea27b00b4185sewardj// // 11475ac99069b0538adcb2f18b04b078ea27b00b4185sewardj////////////////////////////////////////////////////////// 11485ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11495ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt h_pcmpistri_44 ( V128* argL, V128* argR ) 11505ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 11515ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 block[2]; 11525ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[0], argL, sizeof(V128)); 11535ac99069b0538adcb2f18b04b078ea27b00b4185sewardj memcpy(&block[1], argR, sizeof(V128)); 11545ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ULong res, flags; 11555ac99069b0538adcb2f18b04b078ea27b00b4185sewardj __asm__ __volatile__( 11565ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "subq $1024, %%rsp" "\n\t" 11575ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 0(%2), %%xmm2" "\n\t" 11585ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movdqu 16(%2), %%xmm11" "\n\t" 11595ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 11605ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 11615ac99069b0538adcb2f18b04b078ea27b00b4185sewardj//"movd %%xmm0, %%ecx" "\n\t" 11625ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "pushfq" "\n\t" 11635ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "popq %%rdx" "\n\t" 11645ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rcx, %0" "\n\t" 11655ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "movq %%rdx, %1" "\n\t" 11665ac99069b0538adcb2f18b04b078ea27b00b4185sewardj "addq $1024, %%rsp" "\n\t" 11675ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 11685ac99069b0538adcb2f18b04b078ea27b00b4185sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 11695ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 11705ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 11715ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 11725ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11735ac99069b0538adcb2f18b04b078ea27b00b4185sewardjUInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 11745ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 11755ac99069b0538adcb2f18b04b078ea27b00b4185sewardj V128 resV; 11765ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt resOSZACP, resECX; 11775ac99069b0538adcb2f18b04b078ea27b00b4185sewardj Bool ok 11785ac99069b0538adcb2f18b04b078ea27b00b4185sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 11795ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argLU), 11805ac99069b0538adcb2f18b04b078ea27b00b4185sewardj zmask_from_V128(argRU), 11815ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 0x44, False/*!isSTRM*/ 11825ac99069b0538adcb2f18b04b078ea27b00b4185sewardj ); 11835ac99069b0538adcb2f18b04b078ea27b00b4185sewardj assert(ok); 11847f3019bfbbbbc5356c351c5cd319c36fe731b806sewardj resECX = resV.uInt[0]; 11855ac99069b0538adcb2f18b04b078ea27b00b4185sewardj return (resOSZACP << 16) | resECX; 11865ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 11875ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11885ac99069b0538adcb2f18b04b078ea27b00b4185sewardjvoid istri_44 ( void ) 11895ac99069b0538adcb2f18b04b078ea27b00b4185sewardj{ 11905ac99069b0538adcb2f18b04b078ea27b00b4185sewardj char* wot = "44"; 11915ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_44; 11925ac99069b0538adcb2f18b04b078ea27b00b4185sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_44; 11935ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11945ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 11955ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 11965ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 11975ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 11985ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 11995ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 12005ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 12015ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 12025ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 12035ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 12045ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12055ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 12065ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12075ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 12085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 12095ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 12105ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12115ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 12125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 12135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 12145ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 12165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 12175ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12185ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 12195ac99069b0538adcb2f18b04b078ea27b00b4185sewardj try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 122015df336557eb012a5f3b2f1482a0411857039496sewardj 122115df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 122215df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 122315df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 122415df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 12255ac99069b0538adcb2f18b04b078ea27b00b4185sewardj} 12265ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12275ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 12283b20b17c01834d95e1ce9785a0a366057320fe5csewardj////////////////////////////////////////////////////////// 12293b20b17c01834d95e1ce9785a0a366057320fe5csewardj// // 12303b20b17c01834d95e1ce9785a0a366057320fe5csewardj// ISTRI_00 // 12313b20b17c01834d95e1ce9785a0a366057320fe5csewardj// // 12323b20b17c01834d95e1ce9785a0a366057320fe5csewardj////////////////////////////////////////////////////////// 12333b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12343b20b17c01834d95e1ce9785a0a366057320fe5csewardjUInt h_pcmpistri_00 ( V128* argL, V128* argR ) 12353b20b17c01834d95e1ce9785a0a366057320fe5csewardj{ 12363b20b17c01834d95e1ce9785a0a366057320fe5csewardj V128 block[2]; 12373b20b17c01834d95e1ce9785a0a366057320fe5csewardj memcpy(&block[0], argL, sizeof(V128)); 12383b20b17c01834d95e1ce9785a0a366057320fe5csewardj memcpy(&block[1], argR, sizeof(V128)); 12393b20b17c01834d95e1ce9785a0a366057320fe5csewardj ULong res, flags; 12403b20b17c01834d95e1ce9785a0a366057320fe5csewardj __asm__ __volatile__( 12413b20b17c01834d95e1ce9785a0a366057320fe5csewardj "subq $1024, %%rsp" "\n\t" 12423b20b17c01834d95e1ce9785a0a366057320fe5csewardj "movdqu 0(%2), %%xmm2" "\n\t" 12433b20b17c01834d95e1ce9785a0a366057320fe5csewardj "movdqu 16(%2), %%xmm11" "\n\t" 12443b20b17c01834d95e1ce9785a0a366057320fe5csewardj "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" 12453b20b17c01834d95e1ce9785a0a366057320fe5csewardj//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" 12463b20b17c01834d95e1ce9785a0a366057320fe5csewardj//"movd %%xmm0, %%ecx" "\n\t" 12473b20b17c01834d95e1ce9785a0a366057320fe5csewardj "pushfq" "\n\t" 12483b20b17c01834d95e1ce9785a0a366057320fe5csewardj "popq %%rdx" "\n\t" 12493b20b17c01834d95e1ce9785a0a366057320fe5csewardj "movq %%rcx, %0" "\n\t" 12503b20b17c01834d95e1ce9785a0a366057320fe5csewardj "movq %%rdx, %1" "\n\t" 12513b20b17c01834d95e1ce9785a0a366057320fe5csewardj "addq $1024, %%rsp" "\n\t" 12523b20b17c01834d95e1ce9785a0a366057320fe5csewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 12533b20b17c01834d95e1ce9785a0a366057320fe5csewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 12543b20b17c01834d95e1ce9785a0a366057320fe5csewardj ); 12553b20b17c01834d95e1ce9785a0a366057320fe5csewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 12563b20b17c01834d95e1ce9785a0a366057320fe5csewardj} 12573b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12583b20b17c01834d95e1ce9785a0a366057320fe5csewardjUInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) 12593b20b17c01834d95e1ce9785a0a366057320fe5csewardj{ 12603b20b17c01834d95e1ce9785a0a366057320fe5csewardj V128 resV; 12613b20b17c01834d95e1ce9785a0a366057320fe5csewardj UInt resOSZACP, resECX; 12623b20b17c01834d95e1ce9785a0a366057320fe5csewardj Bool ok 12633b20b17c01834d95e1ce9785a0a366057320fe5csewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 12643b20b17c01834d95e1ce9785a0a366057320fe5csewardj zmask_from_V128(argLU), 12653b20b17c01834d95e1ce9785a0a366057320fe5csewardj zmask_from_V128(argRU), 12663b20b17c01834d95e1ce9785a0a366057320fe5csewardj 0x00, False/*!isSTRM*/ 12673b20b17c01834d95e1ce9785a0a366057320fe5csewardj ); 12683b20b17c01834d95e1ce9785a0a366057320fe5csewardj assert(ok); 12693b20b17c01834d95e1ce9785a0a366057320fe5csewardj resECX = resV.uInt[0]; 12703b20b17c01834d95e1ce9785a0a366057320fe5csewardj return (resOSZACP << 16) | resECX; 12713b20b17c01834d95e1ce9785a0a366057320fe5csewardj} 12723b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12733b20b17c01834d95e1ce9785a0a366057320fe5csewardjvoid istri_00 ( void ) 12743b20b17c01834d95e1ce9785a0a366057320fe5csewardj{ 12753b20b17c01834d95e1ce9785a0a366057320fe5csewardj char* wot = "00"; 12763b20b17c01834d95e1ce9785a0a366057320fe5csewardj UInt(*h)(V128*,V128*) = h_pcmpistri_00; 12773b20b17c01834d95e1ce9785a0a366057320fe5csewardj UInt(*s)(V128*,V128*) = s_pcmpistri_00; 12783b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12793b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 12803b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 12813b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 12823b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 12833b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12843b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 12853b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 12863b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 12873b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 12883b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 12893b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12903b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 12913b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 12923b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 12933b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 12943b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12953b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 12963b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 12973b20b17c01834d95e1ce9785a0a366057320fe5csewardj 12983b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 12993b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 13003b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 13013b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 13023b20b17c01834d95e1ce9785a0a366057320fe5csewardj 13033b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 13043b20b17c01834d95e1ce9785a0a366057320fe5csewardj 13053b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 13063b20b17c01834d95e1ce9785a0a366057320fe5csewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 13073b20b17c01834d95e1ce9785a0a366057320fe5csewardj} 13085ac99069b0538adcb2f18b04b078ea27b00b4185sewardj 13090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 1310e801ed2c0d58802634b06de65a730364df4c08b2sewardj////////////////////////////////////////////////////////// 1311e801ed2c0d58802634b06de65a730364df4c08b2sewardj// // 1312e801ed2c0d58802634b06de65a730364df4c08b2sewardj// ISTRI_38 // 1313e801ed2c0d58802634b06de65a730364df4c08b2sewardj// // 1314e801ed2c0d58802634b06de65a730364df4c08b2sewardj////////////////////////////////////////////////////////// 1315e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1316e801ed2c0d58802634b06de65a730364df4c08b2sewardjUInt h_pcmpistri_38 ( V128* argL, V128* argR ) 1317e801ed2c0d58802634b06de65a730364df4c08b2sewardj{ 1318e801ed2c0d58802634b06de65a730364df4c08b2sewardj V128 block[2]; 1319e801ed2c0d58802634b06de65a730364df4c08b2sewardj memcpy(&block[0], argL, sizeof(V128)); 1320e801ed2c0d58802634b06de65a730364df4c08b2sewardj memcpy(&block[1], argR, sizeof(V128)); 1321e801ed2c0d58802634b06de65a730364df4c08b2sewardj ULong res, flags; 1322e801ed2c0d58802634b06de65a730364df4c08b2sewardj __asm__ __volatile__( 1323e801ed2c0d58802634b06de65a730364df4c08b2sewardj "subq $1024, %%rsp" "\n\t" 1324e801ed2c0d58802634b06de65a730364df4c08b2sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1325e801ed2c0d58802634b06de65a730364df4c08b2sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1326e801ed2c0d58802634b06de65a730364df4c08b2sewardj "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" 1327e801ed2c0d58802634b06de65a730364df4c08b2sewardj "pushfq" "\n\t" 1328e801ed2c0d58802634b06de65a730364df4c08b2sewardj "popq %%rdx" "\n\t" 1329e801ed2c0d58802634b06de65a730364df4c08b2sewardj "movq %%rcx, %0" "\n\t" 1330e801ed2c0d58802634b06de65a730364df4c08b2sewardj "movq %%rdx, %1" "\n\t" 1331e801ed2c0d58802634b06de65a730364df4c08b2sewardj "addq $1024, %%rsp" "\n\t" 1332e801ed2c0d58802634b06de65a730364df4c08b2sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1333e801ed2c0d58802634b06de65a730364df4c08b2sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1334e801ed2c0d58802634b06de65a730364df4c08b2sewardj ); 1335e801ed2c0d58802634b06de65a730364df4c08b2sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1336e801ed2c0d58802634b06de65a730364df4c08b2sewardj} 1337e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1338e801ed2c0d58802634b06de65a730364df4c08b2sewardjUInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) 1339e801ed2c0d58802634b06de65a730364df4c08b2sewardj{ 1340e801ed2c0d58802634b06de65a730364df4c08b2sewardj V128 resV; 1341e801ed2c0d58802634b06de65a730364df4c08b2sewardj UInt resOSZACP, resECX; 1342e801ed2c0d58802634b06de65a730364df4c08b2sewardj Bool ok 1343e801ed2c0d58802634b06de65a730364df4c08b2sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1344e801ed2c0d58802634b06de65a730364df4c08b2sewardj zmask_from_V128(argLU), 1345e801ed2c0d58802634b06de65a730364df4c08b2sewardj zmask_from_V128(argRU), 1346e801ed2c0d58802634b06de65a730364df4c08b2sewardj 0x38, False/*!isSTRM*/ 1347e801ed2c0d58802634b06de65a730364df4c08b2sewardj ); 1348e801ed2c0d58802634b06de65a730364df4c08b2sewardj assert(ok); 1349e801ed2c0d58802634b06de65a730364df4c08b2sewardj resECX = resV.uInt[0]; 1350e801ed2c0d58802634b06de65a730364df4c08b2sewardj return (resOSZACP << 16) | resECX; 1351e801ed2c0d58802634b06de65a730364df4c08b2sewardj} 1352e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1353e801ed2c0d58802634b06de65a730364df4c08b2sewardjvoid istri_38 ( void ) 1354e801ed2c0d58802634b06de65a730364df4c08b2sewardj{ 1355e801ed2c0d58802634b06de65a730364df4c08b2sewardj char* wot = "38"; 1356e801ed2c0d58802634b06de65a730364df4c08b2sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_38; 1357e801ed2c0d58802634b06de65a730364df4c08b2sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_38; 1358e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1359e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1360e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1361e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1362e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1363e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1364e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1365e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1366e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1367e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1368e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1369e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1370e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1371e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1372e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1373e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1374e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1375e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1376e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1377e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1378e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1379e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1380e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1381e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1382e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1383e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1384e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1385e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1386e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1387e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1388e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1389e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1390e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1391e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1392e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1393e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1394e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1395e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1396e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1397e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1398e801ed2c0d58802634b06de65a730364df4c08b2sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1399e801ed2c0d58802634b06de65a730364df4c08b2sewardj} 1400e801ed2c0d58802634b06de65a730364df4c08b2sewardj 1401e801ed2c0d58802634b06de65a730364df4c08b2sewardj 14020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 14030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 14040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 140515df336557eb012a5f3b2f1482a0411857039496sewardj// ISTRI_46 // 140615df336557eb012a5f3b2f1482a0411857039496sewardj// // 140715df336557eb012a5f3b2f1482a0411857039496sewardj////////////////////////////////////////////////////////// 140815df336557eb012a5f3b2f1482a0411857039496sewardj 140915df336557eb012a5f3b2f1482a0411857039496sewardjUInt h_pcmpistri_46 ( V128* argL, V128* argR ) 141015df336557eb012a5f3b2f1482a0411857039496sewardj{ 141115df336557eb012a5f3b2f1482a0411857039496sewardj V128 block[2]; 141215df336557eb012a5f3b2f1482a0411857039496sewardj memcpy(&block[0], argL, sizeof(V128)); 141315df336557eb012a5f3b2f1482a0411857039496sewardj memcpy(&block[1], argR, sizeof(V128)); 141415df336557eb012a5f3b2f1482a0411857039496sewardj ULong res, flags; 141515df336557eb012a5f3b2f1482a0411857039496sewardj __asm__ __volatile__( 141615df336557eb012a5f3b2f1482a0411857039496sewardj "subq $1024, %%rsp" "\n\t" 141715df336557eb012a5f3b2f1482a0411857039496sewardj "movdqu 0(%2), %%xmm2" "\n\t" 141815df336557eb012a5f3b2f1482a0411857039496sewardj "movdqu 16(%2), %%xmm11" "\n\t" 141915df336557eb012a5f3b2f1482a0411857039496sewardj "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t" 142015df336557eb012a5f3b2f1482a0411857039496sewardj "pushfq" "\n\t" 142115df336557eb012a5f3b2f1482a0411857039496sewardj "popq %%rdx" "\n\t" 142215df336557eb012a5f3b2f1482a0411857039496sewardj "movq %%rcx, %0" "\n\t" 142315df336557eb012a5f3b2f1482a0411857039496sewardj "movq %%rdx, %1" "\n\t" 142415df336557eb012a5f3b2f1482a0411857039496sewardj "addq $1024, %%rsp" "\n\t" 142515df336557eb012a5f3b2f1482a0411857039496sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 142615df336557eb012a5f3b2f1482a0411857039496sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 142715df336557eb012a5f3b2f1482a0411857039496sewardj ); 142815df336557eb012a5f3b2f1482a0411857039496sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 142915df336557eb012a5f3b2f1482a0411857039496sewardj} 143015df336557eb012a5f3b2f1482a0411857039496sewardj 143115df336557eb012a5f3b2f1482a0411857039496sewardjUInt s_pcmpistri_46 ( V128* argLU, V128* argRU ) 143215df336557eb012a5f3b2f1482a0411857039496sewardj{ 143315df336557eb012a5f3b2f1482a0411857039496sewardj V128 resV; 143415df336557eb012a5f3b2f1482a0411857039496sewardj UInt resOSZACP, resECX; 143515df336557eb012a5f3b2f1482a0411857039496sewardj Bool ok 143615df336557eb012a5f3b2f1482a0411857039496sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 143715df336557eb012a5f3b2f1482a0411857039496sewardj zmask_from_V128(argLU), 143815df336557eb012a5f3b2f1482a0411857039496sewardj zmask_from_V128(argRU), 143915df336557eb012a5f3b2f1482a0411857039496sewardj 0x46, False/*!isSTRM*/ 144015df336557eb012a5f3b2f1482a0411857039496sewardj ); 144115df336557eb012a5f3b2f1482a0411857039496sewardj assert(ok); 144215df336557eb012a5f3b2f1482a0411857039496sewardj resECX = resV.uInt[0]; 144315df336557eb012a5f3b2f1482a0411857039496sewardj return (resOSZACP << 16) | resECX; 144415df336557eb012a5f3b2f1482a0411857039496sewardj} 144515df336557eb012a5f3b2f1482a0411857039496sewardj 144615df336557eb012a5f3b2f1482a0411857039496sewardjvoid istri_46 ( void ) 144715df336557eb012a5f3b2f1482a0411857039496sewardj{ 144815df336557eb012a5f3b2f1482a0411857039496sewardj char* wot = "46"; 144915df336557eb012a5f3b2f1482a0411857039496sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_46; 145015df336557eb012a5f3b2f1482a0411857039496sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_46; 145115df336557eb012a5f3b2f1482a0411857039496sewardj 145215df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 145315df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 145415df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 145515df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 145615df336557eb012a5f3b2f1482a0411857039496sewardj 145715df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 145815df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 145915df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 146015df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 146115df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 146215df336557eb012a5f3b2f1482a0411857039496sewardj 146315df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 146415df336557eb012a5f3b2f1482a0411857039496sewardj 146515df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 146615df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 146715df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 146815df336557eb012a5f3b2f1482a0411857039496sewardj 146915df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 147015df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 147115df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 147215df336557eb012a5f3b2f1482a0411857039496sewardj 147315df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 147415df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 147515df336557eb012a5f3b2f1482a0411857039496sewardj 147615df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 147715df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 147815df336557eb012a5f3b2f1482a0411857039496sewardj 147915df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 148015df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 148115df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 148215df336557eb012a5f3b2f1482a0411857039496sewardj try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 148315df336557eb012a5f3b2f1482a0411857039496sewardj} 148415df336557eb012a5f3b2f1482a0411857039496sewardj 148515df336557eb012a5f3b2f1482a0411857039496sewardj 148615df336557eb012a5f3b2f1482a0411857039496sewardj////////////////////////////////////////////////////////// 148715df336557eb012a5f3b2f1482a0411857039496sewardj// // 1488473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// ISTRI_30 // 1489473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// // 1490473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj////////////////////////////////////////////////////////// 1491473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1492473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt h_pcmpistri_30 ( V128* argL, V128* argR ) 1493473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1494473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj V128 block[2]; 1495473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj memcpy(&block[0], argL, sizeof(V128)); 1496473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj memcpy(&block[1], argR, sizeof(V128)); 1497473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ULong res, flags; 1498473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj __asm__ __volatile__( 1499473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "subq $1024, %%rsp" "\n\t" 1500473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1501473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1502473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t" 1503473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "pushfq" "\n\t" 1504473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "popq %%rdx" "\n\t" 1505473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movq %%rcx, %0" "\n\t" 1506473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movq %%rdx, %1" "\n\t" 1507473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "addq $1024, %%rsp" "\n\t" 1508473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1509473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1510473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ); 1511473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1512473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1513473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1514473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt s_pcmpistri_30 ( V128* argLU, V128* argRU ) 1515473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1516473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj V128 resV; 1517473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt resOSZACP, resECX; 1518473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj Bool ok 1519473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1520473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj zmask_from_V128(argLU), 1521473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj zmask_from_V128(argRU), 1522473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 0x30, False/*!isSTRM*/ 1523473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ); 1524473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj assert(ok); 1525473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj resECX = resV.uInt[0]; 1526473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj return (resOSZACP << 16) | resECX; 1527473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1528473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1529473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjvoid istri_30 ( void ) 1530473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1531473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj char* wot = "30"; 1532473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_30; 1533473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_30; 1534473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1535473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1536473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1537473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1538473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1539473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1540473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1541473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1542473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1543473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1544473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1545473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1546473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1547473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1548473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1549473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1550473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1551473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1552473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1553473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1554473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1555473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1556473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1557473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1558473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1559473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1560473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1561473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1562473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1563473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1564473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1565473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1566473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj////////////////////////////////////////////////////////// 1567473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// // 1568473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// ISTRI_40 // 1569473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// // 1570473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj////////////////////////////////////////////////////////// 1571473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1572473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt h_pcmpistri_40 ( V128* argL, V128* argR ) 1573473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1574473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj V128 block[2]; 1575473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj memcpy(&block[0], argL, sizeof(V128)); 1576473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj memcpy(&block[1], argR, sizeof(V128)); 1577473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ULong res, flags; 1578473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj __asm__ __volatile__( 1579473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "subq $1024, %%rsp" "\n\t" 1580473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1581473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1582473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t" 1583473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "pushfq" "\n\t" 1584473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "popq %%rdx" "\n\t" 1585473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movq %%rcx, %0" "\n\t" 1586473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "movq %%rdx, %1" "\n\t" 1587473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj "addq $1024, %%rsp" "\n\t" 1588473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1589473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1590473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ); 1591473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1592473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1593473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1594473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjUInt s_pcmpistri_40 ( V128* argLU, V128* argRU ) 1595473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1596473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj V128 resV; 1597473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt resOSZACP, resECX; 1598473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj Bool ok 1599473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1600473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj zmask_from_V128(argLU), 1601473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj zmask_from_V128(argRU), 1602473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 0x40, False/*!isSTRM*/ 1603473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj ); 1604473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj assert(ok); 1605473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj resECX = resV.uInt[0]; 1606473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj return (resOSZACP << 16) | resECX; 1607473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1608473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1609473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardjvoid istri_40 ( void ) 1610473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj{ 1611473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj char* wot = "40"; 1612473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_40; 1613473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_40; 1614473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1615473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1616473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1617473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1618473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1619473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1620473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1621473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1622473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1623473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1624473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1625473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1626473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1627473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1628473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1629473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1630473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1631473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1632473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1633473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1634473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1635473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1636473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1637473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1638473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1639473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1640473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1641473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1642473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1643473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj} 1644473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1645473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj 1646473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj////////////////////////////////////////////////////////// 1647473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj// // 16487f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj// ISTRI_42 // 16497f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj// // 16507f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj////////////////////////////////////////////////////////// 16517f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 16527f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjUInt h_pcmpistri_42 ( V128* argL, V128* argR ) 16537f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{ 16547f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj V128 block[2]; 16557f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj memcpy(&block[0], argL, sizeof(V128)); 16567f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj memcpy(&block[1], argR, sizeof(V128)); 16577f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj ULong res, flags; 16587f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj __asm__ __volatile__( 16597f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "subq $1024, %%rsp" "\n\t" 16607f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "movdqu 0(%2), %%xmm2" "\n\t" 16617f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "movdqu 16(%2), %%xmm11" "\n\t" 16627f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t" 16637f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "pushfq" "\n\t" 16647f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "popq %%rdx" "\n\t" 16657f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "movq %%rcx, %0" "\n\t" 16667f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "movq %%rdx, %1" "\n\t" 16677f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj "addq $1024, %%rsp" "\n\t" 16687f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 16697f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 16707f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj ); 16717f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 16727f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj} 16737f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 16747f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjUInt s_pcmpistri_42 ( V128* argLU, V128* argRU ) 16757f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{ 16767f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj V128 resV; 16777f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj UInt resOSZACP, resECX; 16787f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj Bool ok 16797f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 16807f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj zmask_from_V128(argLU), 16817f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj zmask_from_V128(argRU), 16827f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 0x42, False/*!isSTRM*/ 16837f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj ); 16847f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj assert(ok); 16857f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj resECX = resV.uInt[0]; 16867f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj return (resOSZACP << 16) | resECX; 16877f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj} 16887f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 16897f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardjvoid istri_42 ( void ) 16907f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj{ 16917f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj char* wot = "42"; 16927f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_42; 16937f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_42; 16947f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 16957f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 16967f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 16977f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 16987f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 16997f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17007f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 17017f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 17027f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 17037f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 17047f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 17057f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17067f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 17077f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 17087f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 17097f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 17107f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17117f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 17127f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 17137f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17147f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 17157f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 17167f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 17177f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 17187f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17197f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 17207f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17217f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 17227f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 17237f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj} 17247f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17257f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj 17267f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj////////////////////////////////////////////////////////// 17277f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj// // 1728a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// ISTRI_0E // 1729a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1730a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1731a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1732a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj__attribute__((noinline)) 1733a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_0E ( V128* argL, V128* argR ) 1734a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1735a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 block[2]; 1736a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[0], argL, sizeof(V128)); 1737a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[1], argR, sizeof(V128)); 1738a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ULong res = 0, flags = 0; 1739a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj __asm__ __volatile__( 1740a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1741a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1742a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t" 1743a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pushfq" "\n\t" 1744a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "popq %%rdx" "\n\t" 1745a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rcx, %0" "\n\t" 1746a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rdx, %1" "\n\t" 1747a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1748a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1749a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1750a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1751a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1752a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1753a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_0E ( V128* argLU, V128* argRU ) 1754a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1755a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 resV; 1756a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt resOSZACP, resECX; 1757a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj Bool ok 1758a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1759a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argLU), 1760a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argRU), 1761a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 0x0E, False/*!isSTRM*/ 1762a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1763a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj assert(ok); 1764a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj resECX = resV.uInt[0]; 1765a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return (resOSZACP << 16) | resECX; 1766a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1767a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1768a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_0E ( void ) 1769a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1770a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj char* wot = "0E"; 1771a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_0E; 1772a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_0E; 1773a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1774a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 1775a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1776a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 1777a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1778a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 1779a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 1780a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 1781a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1782a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 1783a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1784a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 1785a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 1786a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 1787a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 1788a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 1789a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1790a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 1791a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 1792a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 1793a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1794a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 1795a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 1796a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1797a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 1798a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 1799a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 1800a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1801a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 1802a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 1803a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 1804a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1805a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 1806a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1807a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 1808a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 1809a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1810a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1811a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1812a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1813a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1814a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// ISTRI_34 // 1815a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1816a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1817a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1818a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_34 ( V128* argL, V128* argR ) 1819a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1820a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 block[2]; 1821a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[0], argL, sizeof(V128)); 1822a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[1], argR, sizeof(V128)); 1823a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ULong res, flags; 1824a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj __asm__ __volatile__( 1825a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "subq $1024, %%rsp" "\n\t" 1826a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1827a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1828a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t" 1829a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pushfq" "\n\t" 1830a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "popq %%rdx" "\n\t" 1831a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rcx, %0" "\n\t" 1832a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rdx, %1" "\n\t" 1833a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "addq $1024, %%rsp" "\n\t" 1834a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1835a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1836a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1837a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1838a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1839a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1840a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_34 ( V128* argLU, V128* argRU ) 1841a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1842a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 resV; 1843a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt resOSZACP, resECX; 1844a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj Bool ok 1845a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1846a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argLU), 1847a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argRU), 1848a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 0x34, False/*!isSTRM*/ 1849a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1850a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj assert(ok); 1851a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj resECX = resV.uInt[0]; 1852a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return (resOSZACP << 16) | resECX; 1853a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1854a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1855a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_34 ( void ) 1856a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1857a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj char* wot = "34"; 1858a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_34; 1859a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_34; 1860a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1861a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1862a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1863a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1864a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1865a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1866a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1867a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1868a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1869a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1870a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1871a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1872a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1873a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1874a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1875a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1876a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1877a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1878a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1879a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1880a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1881a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1882a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1883a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1884a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1885a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1886a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1887a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1888a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1889a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1890a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1891a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1892a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1893a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1894a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1895a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1896a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1897a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// ISTRI_14 // 1898a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1899a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1900a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1901a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt h_pcmpistri_14 ( V128* argL, V128* argR ) 1902a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1903a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 block[2]; 1904a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[0], argL, sizeof(V128)); 1905a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj memcpy(&block[1], argR, sizeof(V128)); 1906a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ULong res, flags; 1907a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj __asm__ __volatile__( 1908a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "subq $1024, %%rsp" "\n\t" 1909a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 0(%2), %%xmm2" "\n\t" 1910a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movdqu 16(%2), %%xmm11" "\n\t" 1911a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t" 1912a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "pushfq" "\n\t" 1913a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "popq %%rdx" "\n\t" 1914a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rcx, %0" "\n\t" 1915a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "movq %%rdx, %1" "\n\t" 1916a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj "addq $1024, %%rsp" "\n\t" 1917a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1918a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1919a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1920a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1921a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1922a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1923a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjUInt s_pcmpistri_14 ( V128* argLU, V128* argRU ) 1924a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1925a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj V128 resV; 1926a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt resOSZACP, resECX; 1927a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj Bool ok 1928a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1929a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argLU), 1930a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj zmask_from_V128(argRU), 1931a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 0x14, False/*!isSTRM*/ 1932a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj ); 1933a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj assert(ok); 1934a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj resECX = resV.uInt[0]; 1935a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj return (resOSZACP << 16) | resECX; 1936a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1937a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1938a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardjvoid istri_14 ( void ) 1939a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj{ 1940a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj char* wot = "14"; 1941a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*h)(V128*,V128*) = h_pcmpistri_14; 1942a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj UInt(*s)(V128*,V128*) = s_pcmpistri_14; 1943a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1944a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1945a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1946a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1947a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1948a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1949a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1950a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1951a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1952a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1953a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1954a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1955a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1956a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1957a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1958a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1959a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1960a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1961a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1962a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1963a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1964a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1965a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1966a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1967a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1968a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1969a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1970a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1971a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1972a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1973a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1974a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1975a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj} 1976a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1977a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj 1978a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj////////////////////////////////////////////////////////// 1979a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj// // 1980a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// ISTRI_70 // 1981a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 1982a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 1983a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 1984a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_70 ( V128* argL, V128* argR ) 1985a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 1986a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 block[2]; 1987a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[0], argL, sizeof(V128)); 1988a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[1], argR, sizeof(V128)); 1989a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ULong res, flags; 1990a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes __asm__ __volatile__( 1991a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "subq $1024, %%rsp" "\n\t" 1992a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 0(%2), %%xmm2" "\n\t" 1993a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 16(%2), %%xmm11" "\n\t" 1994a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pcmpistri $0x70, %%xmm2, %%xmm11" "\n\t" 1995a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pushfq" "\n\t" 1996a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "popq %%rdx" "\n\t" 1997a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rcx, %0" "\n\t" 1998a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rdx, %1" "\n\t" 1999a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "addq $1024, %%rsp" "\n\t" 2000a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2001a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2002a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2003a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2004a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2005a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2006a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_70 ( V128* argLU, V128* argRU ) 2007a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2008a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 resV; 2009a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt resOSZACP, resECX; 2010a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes Bool ok 2011a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2012a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argLU), 2013a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argRU), 2014a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 0x70, False/*!isSTRM*/ 2015a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2016a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes assert(ok); 2017a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes resECX = resV.uInt[0]; 2018a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return (resOSZACP << 16) | resECX; 2019a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2020a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2021a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_70 ( void ) 2022a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2023a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes char* wot = "70"; 2024a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*h)(V128*,V128*) = h_pcmpistri_70; 2025a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*s)(V128*,V128*) = s_pcmpistri_70; 2026a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2027a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2028a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2029a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2030a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2031a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2032a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2033a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2034a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2035a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2036a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2037a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2038a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2039a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2040a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2041a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2042a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2043a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2044a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2045a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2046a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2047a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2048a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2049a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2050a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2051a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2052a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2053a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2054a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2055a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2056a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2057a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2058a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 2059a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 2060a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// ISTRI_62 // 2061a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 2062a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 2063a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2064a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_62 ( V128* argL, V128* argR ) 2065a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2066a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 block[2]; 2067a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[0], argL, sizeof(V128)); 2068a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[1], argR, sizeof(V128)); 2069a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ULong res, flags; 2070a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes __asm__ __volatile__( 2071a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "subq $1024, %%rsp" "\n\t" 2072a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 0(%2), %%xmm2" "\n\t" 2073a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 16(%2), %%xmm11" "\n\t" 2074a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t" 2075a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pushfq" "\n\t" 2076a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "popq %%rdx" "\n\t" 2077a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rcx, %0" "\n\t" 2078a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rdx, %1" "\n\t" 2079a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "addq $1024, %%rsp" "\n\t" 2080a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2081a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2082a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2083a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2084a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2085a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2086a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_62 ( V128* argLU, V128* argRU ) 2087a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2088a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 resV; 2089a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt resOSZACP, resECX; 2090a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes Bool ok 2091a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2092a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argLU), 2093a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argRU), 2094a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 0x62, False/*!isSTRM*/ 2095a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2096a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes assert(ok); 2097a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes resECX = resV.uInt[0]; 2098a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return (resOSZACP << 16) | resECX; 2099a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2100a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2101a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_62 ( void ) 2102a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2103a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes char* wot = "62"; 2104a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*h)(V128*,V128*) = h_pcmpistri_62; 2105a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*s)(V128*,V128*) = s_pcmpistri_62; 2106a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2107a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2108a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2109a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2110a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2111a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2112a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2113a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2114a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2115a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2116a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2117a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2118a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2119a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2120a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2121a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2122a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2123a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2124a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2125a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2126a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2127a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2128a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2129a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2130a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2131a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2132a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2133a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2134a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2135a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2136a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2137a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2138a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 2139a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 2140a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// ISTRI_72 // 2141a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 2142a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 2143a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2144a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt h_pcmpistri_72 ( V128* argL, V128* argR ) 2145a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2146a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 block[2]; 2147a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[0], argL, sizeof(V128)); 2148a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes memcpy(&block[1], argR, sizeof(V128)); 2149a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ULong res, flags; 2150a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes __asm__ __volatile__( 2151a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "subq $1024, %%rsp" "\n\t" 2152a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 0(%2), %%xmm2" "\n\t" 2153a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movdqu 16(%2), %%xmm11" "\n\t" 2154a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t" 2155a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "pushfq" "\n\t" 2156a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "popq %%rdx" "\n\t" 2157a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rcx, %0" "\n\t" 2158a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "movq %%rdx, %1" "\n\t" 2159a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes "addq $1024, %%rsp" "\n\t" 2160a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2161a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2162a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2163a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2164a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2165a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2166a0664b9ca67b594bd6f570a61d3301167a24750cElliott HughesUInt s_pcmpistri_72 ( V128* argLU, V128* argRU ) 2167a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2168a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes V128 resV; 2169a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt resOSZACP, resECX; 2170a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes Bool ok 2171a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2172a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argLU), 2173a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes zmask_from_V128(argRU), 2174a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 0x72, False/*!isSTRM*/ 2175a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes ); 2176a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes assert(ok); 2177a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes resECX = resV.uInt[0]; 2178a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes return (resOSZACP << 16) | resECX; 2179a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2180a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2181a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughesvoid istri_72 ( void ) 2182a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes{ 2183a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes char* wot = "72"; 2184a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*h)(V128*,V128*) = h_pcmpistri_72; 2185a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes UInt(*s)(V128*,V128*) = s_pcmpistri_72; 2186a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2187a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2188a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2189a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2190a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2191a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2192a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2193a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2194a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2195a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2196a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2197a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2198a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2199a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2200a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2201a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2202a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2203a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2204a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2205a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2206a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2207a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2208a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2209a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2210a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2211a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2212a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2213a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2214a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2215a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes} 2216a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2217a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes 2218a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes////////////////////////////////////////////////////////// 2219a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes// // 2220ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes// ISTRI_10 // 2221ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes// // 2222ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes////////////////////////////////////////////////////////// 2223ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2224ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott HughesUInt h_pcmpistri_10 ( V128* argL, V128* argR ) 2225ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{ 2226ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes V128 block[2]; 2227ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes memcpy(&block[0], argL, sizeof(V128)); 2228ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes memcpy(&block[1], argR, sizeof(V128)); 2229ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes ULong res, flags; 2230ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes __asm__ __volatile__( 2231ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "subq $1024, %%rsp" "\n\t" 2232ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "movdqu 0(%2), %%xmm2" "\n\t" 2233ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "movdqu 16(%2), %%xmm11" "\n\t" 2234ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "pcmpistri $0x10, %%xmm2, %%xmm11" "\n\t" 2235ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//"pcmpistrm $0x10, %%xmm2, %%xmm11" "\n\t" 2236ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes//"movd %%xmm0, %%ecx" "\n\t" 2237ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "pushfq" "\n\t" 2238ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "popq %%rdx" "\n\t" 2239ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "movq %%rcx, %0" "\n\t" 2240ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "movq %%rdx, %1" "\n\t" 2241ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes "addq $1024, %%rsp" "\n\t" 2242ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2243ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2244ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes ); 2245ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2246ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes} 2247ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2248ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott HughesUInt s_pcmpistri_10 ( V128* argLU, V128* argRU ) 2249ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{ 2250ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes V128 resV; 2251ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes UInt resOSZACP, resECX; 2252ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes Bool ok 2253ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2254ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes zmask_from_V128(argLU), 2255ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes zmask_from_V128(argRU), 2256ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 0x10, False/*!isSTRM*/ 2257ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes ); 2258ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes assert(ok); 2259ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes resECX = resV.uInt[0]; 2260ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes return (resOSZACP << 16) | resECX; 2261ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes} 2262ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2263ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughesvoid istri_10 ( void ) 2264ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes{ 2265ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes char* wot = "10"; 2266ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes UInt(*h)(V128*,V128*) = h_pcmpistri_10; 2267ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes UInt(*s)(V128*,V128*) = s_pcmpistri_10; 2268ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2269ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2270ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2271ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2272ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2273ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2274ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2275ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2276ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2277ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2278ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2279ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2280ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2281ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2282ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2283ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2284ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2285ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2286ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2287ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2288ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2289ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2290ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2291ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2292ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2293ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2294ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2295ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2296ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2297ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes} 2298ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2299ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes 2300ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes////////////////////////////////////////////////////////// 2301ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes// // 23020a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// main // 23030a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj// // 23040a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj////////////////////////////////////////////////////////// 23050a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj 23060a87e8d2d445c36fbd78787c3a553ea46bfe50desewardjint main ( void ) 23070a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj{ 23080a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj istri_4A(); 23090a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj istri_3A(); 23100a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj istri_08(); 2311053f436448ea3f8733f5205226d2989d4de31b66sewardj istri_18(); 23125ac99069b0538adcb2f18b04b078ea27b00b4185sewardj istri_1A(); 23135ac99069b0538adcb2f18b04b078ea27b00b4185sewardj istri_02(); 23140a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj istri_0C(); 23155ac99069b0538adcb2f18b04b078ea27b00b4185sewardj istri_12(); 23165ac99069b0538adcb2f18b04b078ea27b00b4185sewardj istri_44(); 23173b20b17c01834d95e1ce9785a0a366057320fe5csewardj istri_00(); 2318e801ed2c0d58802634b06de65a730364df4c08b2sewardj istri_38(); 231915df336557eb012a5f3b2f1482a0411857039496sewardj istri_46(); 2320473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj istri_30(); 2321473d0ba7d2a41cc0776b7d2f6a23b9cef55919e4sewardj istri_40(); 23227f0dfddf4ef62358f2f9ce8648146f2dd6863830sewardj istri_42(); 2323a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj istri_0E(); 2324a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj istri_14(); 2325a2bf3898843dc00329b7e8fe93c201c1fbceb225sewardj istri_34(); 2326a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes istri_70(); 2327a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes istri_62(); 2328a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes istri_72(); 2329ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes istri_10(); 23300a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj return 0; 23310a87e8d2d445c36fbd78787c3a553ea46bfe50desewardj} 2332