1 2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6#include <string.h> 7#include <stdio.h> 8#include <assert.h> 9 10typedef unsigned int UInt; 11typedef signed int Int; 12typedef unsigned char UChar; 13typedef signed char Char; 14typedef unsigned long long int ULong; 15typedef UChar Bool; 16#define False ((Bool)0) 17#define True ((Bool)1) 18 19//typedef unsigned char V128[16]; 20typedef 21 union { 22 UChar uChar[16]; 23 UInt uInt[4]; 24 } 25 V128; 26 27#define SHIFT_O 11 28#define SHIFT_S 7 29#define SHIFT_Z 6 30#define SHIFT_A 4 31#define SHIFT_C 0 32#define SHIFT_P 2 33 34#define MASK_O (1ULL << SHIFT_O) 35#define MASK_S (1ULL << SHIFT_S) 36#define MASK_Z (1ULL << SHIFT_Z) 37#define MASK_A (1ULL << SHIFT_A) 38#define MASK_C (1ULL << SHIFT_C) 39#define MASK_P (1ULL << SHIFT_P) 40 41 42UInt clz32 ( UInt x ) 43{ 44 Int y, m, n; 45 y = -(x >> 16); 46 m = (y >> 16) & 16; 47 n = 16 - m; 48 x = x >> m; 49 y = x - 0x100; 50 m = (y >> 16) & 8; 51 n = n + m; 52 x = x << m; 53 y = x - 0x1000; 54 m = (y >> 16) & 4; 55 n = n + m; 56 x = x << m; 57 y = x - 0x4000; 58 m = (y >> 16) & 2; 59 n = n + m; 60 x = x << m; 61 y = x >> 14; 62 m = y & ~(y >> 1); 63 return n + 2 - m; 64} 65 66UInt ctz32 ( UInt x ) 67{ 68 return 32 - clz32((~x) & (x-1)); 69} 70 71void expand ( V128* dst, char* summary ) 72{ 73 Int i; 74 assert( strlen(summary) == 16 ); 75 for (i = 0; i < 16; i++) { 76 UChar xx = 0; 77 UChar x = summary[15-i]; 78 if (x >= '0' && x <= '9') { xx = x - '0'; } 79 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 80 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 81 else assert(0); 82 83 assert(xx < 16); 84 xx = (xx << 4) | xx; 85 assert(xx < 256); 86 dst->uChar[i] = xx; 87 } 88} 89 90void try_istri ( char* which, 91 UInt(*h_fn)(V128*,V128*), 92 UInt(*s_fn)(V128*,V128*), 93 char* summL, char* summR ) 94{ 95 assert(strlen(which) == 2); 96 V128 argL, argR; 97 expand(&argL, summL); 98 expand(&argR, summR); 99 UInt h_res = h_fn(&argL, &argR); 100 UInt s_res = s_fn(&argL, &argR); 101 printf("istri %s %s %s -> %08x %08x %s\n", 102 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 103} 104 105UInt zmask_from_V128 ( V128* arg ) 106{ 107 UInt i, res = 0; 108 for (i = 0; i < 16; i++) { 109 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 110 } 111 return res; 112} 113 114////////////////////////////////////////////////////////// 115// // 116// GENERAL // 117// // 118////////////////////////////////////////////////////////// 119 120 121/* Given partial results from a pcmpXstrX operation (intRes1, 122 basically), generate an I format (index value for ECX) output, and 123 also the new OSZACP flags. 124*/ 125static 126void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 127 /*OUT*/UInt* resOSZACP, 128 UInt intRes1, 129 UInt zmaskL, UInt zmaskR, 130 UInt validL, 131 UInt pol, UInt idx ) 132{ 133 assert((pol >> 2) == 0); 134 assert((idx >> 1) == 0); 135 136 UInt intRes2 = 0; 137 switch (pol) { 138 case 0: intRes2 = intRes1; break; // pol + 139 case 1: intRes2 = ~intRes1; break; // pol - 140 case 2: intRes2 = intRes1; break; // pol m+ 141 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 142 } 143 intRes2 &= 0xFFFF; 144 145 // generate ecx value 146 UInt newECX = 0; 147 if (idx) { 148 // index of ms-1-bit 149 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 150 } else { 151 // index of ls-1-bit 152 newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 153 } 154 155 *(UInt*)(&resV[0]) = newECX; 156 157 // generate new flags, common to all ISTRI and ISTRM cases 158 *resOSZACP // A, P are zero 159 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 160 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 161 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 162 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 163} 164 165 166/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 167 variants. 168 169 For xSTRI variants, the new ECX value is placed in the 32 bits 170 pointed to by *resV. For xSTRM variants, the result is a 128 bit 171 value and is placed at *resV in the obvious way. 172 173 For all variants, the new OSZACP value is placed at *resOSZACP. 174 175 argLV and argRV are the vector args. The caller must prepare a 176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 177 must be 1 for each zero byte of of the respective arg. For ESTRx 178 variants this is derived from the explicit length indication, and 179 must be 0 in all places except at the bit index corresponding to 180 the valid length (0 .. 16). If the valid length is 16 then the 181 mask must be all zeroes. In all cases, bits 31:16 must be zero. 182 183 imm8 is the original immediate from the instruction. isSTRM 184 indicates whether this is a xSTRM or xSTRI variant, which controls 185 how much of *res is written. 186 187 If the given imm8 case can be handled, the return value is True. 188 If not, False is returned, and neither *res not *resOSZACP are 189 altered. 190*/ 191 192Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, 193 /*OUT*/UInt* resOSZACP, 194 V128* argLV, V128* argRV, 195 UInt zmaskL, UInt zmaskR, 196 UInt imm8, Bool isSTRM ) 197{ 198 assert(imm8 < 0x80); 199 assert((zmaskL >> 16) == 0); 200 assert((zmaskR >> 16) == 0); 201 202 /* Explicitly reject any imm8 values that haven't been validated, 203 even if they would probably work. Life is too short to have 204 unvalidated cases in the code base. */ 205 switch (imm8) { 206 case 0x00: case 0x02: 207 case 0x08: case 0x0A: case 0x0C: case 0x0E: 208 case 0x10: case 0x12: case 0x14: 209 case 0x18: case 0x1A: 210 case 0x30: case 0x34: 211 case 0x38: case 0x3A: 212 case 0x40: case 0x42: case 0x44: case 0x46: 213 case 0x4A: 214 case 0x62: 215 case 0x70: case 0x72: 216 break; 217 default: 218 return False; 219 } 220 221 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 222 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 223 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 224 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 225 226 /*----------------------------------------*/ 227 /*-- strcmp on byte data --*/ 228 /*----------------------------------------*/ 229 230 if (agg == 2/*equal each, aka strcmp*/ 231 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 232 && !isSTRM) { 233 Int i; 234 UChar* argL = (UChar*)argLV; 235 UChar* argR = (UChar*)argRV; 236 UInt boolResII = 0; 237 for (i = 15; i >= 0; i--) { 238 UChar cL = argL[i]; 239 UChar cR = argR[i]; 240 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 241 } 242 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 243 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 244 245 // do invalidation, common to all equal-each cases 246 UInt intRes1 247 = (boolResII & validL & validR) // if both valid, use cmpres 248 | (~ (validL | validR)); // if both invalid, force 1 249 // else force 0 250 intRes1 &= 0xFFFF; 251 252 // generate I-format output 253 pcmpXstrX_WRK_gen_output_fmt_I( 254 resV, resOSZACP, 255 intRes1, zmaskL, zmaskR, validL, pol, idx 256 ); 257 258 return True; 259 } 260 261 /*----------------------------------------*/ 262 /*-- set membership on byte data --*/ 263 /*----------------------------------------*/ 264 265 if (agg == 0/*equal any, aka find chars in a set*/ 266 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 267 && !isSTRM) { 268 /* argL: the string, argR: charset */ 269 UInt si, ci; 270 UChar* argL = (UChar*)argLV; 271 UChar* argR = (UChar*)argRV; 272 UInt boolRes = 0; 273 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 274 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 275 276 for (si = 0; si < 16; si++) { 277 if ((validL & (1 << si)) == 0) 278 // run off the end of the string. 279 break; 280 UInt m = 0; 281 for (ci = 0; ci < 16; ci++) { 282 if ((validR & (1 << ci)) == 0) break; 283 if (argR[ci] == argL[si]) { m = 1; break; } 284 } 285 boolRes |= (m << si); 286 } 287 288 // boolRes is "pre-invalidated" 289 UInt intRes1 = boolRes & 0xFFFF; 290 291 // generate I-format output 292 pcmpXstrX_WRK_gen_output_fmt_I( 293 resV, resOSZACP, 294 intRes1, zmaskL, zmaskR, validL, pol, idx 295 ); 296 297 return True; 298 } 299 300 /*----------------------------------------*/ 301 /*-- substring search on byte data --*/ 302 /*----------------------------------------*/ 303 304 if (agg == 3/*equal ordered, aka substring search*/ 305 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 306 && !isSTRM) { 307 308 /* argL: haystack, argR: needle */ 309 UInt ni, hi; 310 UChar* argL = (UChar*)argLV; 311 UChar* argR = (UChar*)argRV; 312 UInt boolRes = 0; 313 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 314 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 315 for (hi = 0; hi < 16; hi++) { 316 UInt m = 1; 317 for (ni = 0; ni < 16; ni++) { 318 if ((validR & (1 << ni)) == 0) break; 319 UInt i = ni + hi; 320 if (i >= 16) break; 321 if (argL[i] != argR[ni]) { m = 0; break; } 322 } 323 boolRes |= (m << hi); 324 if ((validL & (1 << hi)) == 0) 325 // run off the end of the haystack 326 break; 327 } 328 329 // boolRes is "pre-invalidated" 330 UInt intRes1 = boolRes & 0xFFFF; 331 332 // generate I-format output 333 pcmpXstrX_WRK_gen_output_fmt_I( 334 resV, resOSZACP, 335 intRes1, zmaskL, zmaskR, validL, pol, idx 336 ); 337 338 return True; 339 } 340 341 /*----------------------------------------*/ 342 /*-- ranges, unsigned byte data --*/ 343 /*----------------------------------------*/ 344 345 if (agg == 1/*ranges*/ 346 && fmt == 0/*ub*/ 347 && !isSTRM) { 348 349 /* argL: string, argR: range-pairs */ 350 UInt ri, si; 351 UChar* argL = (UChar*)argLV; 352 UChar* argR = (UChar*)argRV; 353 UInt boolRes = 0; 354 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 355 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 356 for (si = 0; si < 16; si++) { 357 if ((validL & (1 << si)) == 0) 358 // run off the end of the string 359 break; 360 UInt m = 0; 361 for (ri = 0; ri < 16; ri += 2) { 362 if ((validR & (3 << ri)) != (3 << ri)) break; 363 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 364 m = 1; break; 365 } 366 } 367 boolRes |= (m << si); 368 } 369 370 // boolRes is "pre-invalidated" 371 UInt intRes1 = boolRes & 0xFFFF; 372 373 // generate I-format output 374 pcmpXstrX_WRK_gen_output_fmt_I( 375 resV, resOSZACP, 376 intRes1, zmaskL, zmaskR, validL, pol, idx 377 ); 378 379 return True; 380 } 381 382 /*----------------------------------------*/ 383 /*-- ranges, signed byte data --*/ 384 /*----------------------------------------*/ 385 386 if (agg == 1/*ranges*/ 387 && fmt == 2/*sb*/ 388 && !isSTRM) { 389 390 /* argL: string, argR: range-pairs */ 391 UInt ri, si; 392 Char* argL = (Char*)argLV; 393 Char* argR = (Char*)argRV; 394 UInt boolRes = 0; 395 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 396 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 397 for (si = 0; si < 16; si++) { 398 if ((validL & (1 << si)) == 0) 399 // run off the end of the string 400 break; 401 UInt m = 0; 402 for (ri = 0; ri < 16; ri += 2) { 403 if ((validR & (3 << ri)) != (3 << ri)) break; 404 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 405 m = 1; break; 406 } 407 } 408 boolRes |= (m << si); 409 } 410 411 // boolRes is "pre-invalidated" 412 UInt intRes1 = boolRes & 0xFFFF; 413 414 // generate I-format output 415 pcmpXstrX_WRK_gen_output_fmt_I( 416 resV, resOSZACP, 417 intRes1, zmaskL, zmaskR, validL, pol, idx 418 ); 419 420 return True; 421 } 422 423 return False; 424} 425 426 427////////////////////////////////////////////////////////// 428// // 429// ISTRI_4A // 430// // 431////////////////////////////////////////////////////////// 432 433UInt h_pcmpistri_4A ( V128* argL, V128* argR ) 434{ 435 V128 block[2]; 436 memcpy(&block[0], argL, sizeof(V128)); 437 memcpy(&block[1], argR, sizeof(V128)); 438 ULong res, flags; 439 __asm__ __volatile__( 440 "subq $1024, %%rsp" "\n\t" 441 "movdqu 0(%2), %%xmm2" "\n\t" 442 "movdqu 16(%2), %%xmm11" "\n\t" 443 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 444 "pushfq" "\n\t" 445 "popq %%rdx" "\n\t" 446 "movq %%rcx, %0" "\n\t" 447 "movq %%rdx, %1" "\n\t" 448 "addq $1024, %%rsp" "\n\t" 449 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 450 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 451 ); 452 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 453} 454 455UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 456{ 457 V128 resV; 458 UInt resOSZACP, resECX; 459 Bool ok 460 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 461 zmask_from_V128(argLU), 462 zmask_from_V128(argRU), 463 0x4A, False/*!isSTRM*/ 464 ); 465 assert(ok); 466 resECX = resV.uInt[0]; 467 return (resOSZACP << 16) | resECX; 468} 469 470void istri_4A ( void ) 471{ 472 char* wot = "4A"; 473 UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 474 UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 475 476 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 477 478 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 479 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 480 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 481 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 482 483 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 484 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 485 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 486 487 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 488 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 489 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 490 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 491 492 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 493 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 494 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 495 496 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 497 498 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 499 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 500 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 501 502 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 503 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 504 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 505 506 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 507 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 508 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 509 510 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 511 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 512 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 513 514 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 515 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 516} 517 518////////////////////////////////////////////////////////// 519// // 520// ISTRI_3A // 521// // 522////////////////////////////////////////////////////////// 523 524UInt h_pcmpistri_3A ( V128* argL, V128* argR ) 525{ 526 V128 block[2]; 527 memcpy(&block[0], argL, sizeof(V128)); 528 memcpy(&block[1], argR, sizeof(V128)); 529 ULong res, flags; 530 __asm__ __volatile__( 531 "subq $1024, %%rsp" "\n\t" 532 "movdqu 0(%2), %%xmm2" "\n\t" 533 "movdqu 16(%2), %%xmm11" "\n\t" 534 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 535 "pushfq" "\n\t" 536 "popq %%rdx" "\n\t" 537 "movq %%rcx, %0" "\n\t" 538 "movq %%rdx, %1" "\n\t" 539 "addq $1024, %%rsp" "\n\t" 540 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 541 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 542 ); 543 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 544} 545 546UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 547{ 548 V128 resV; 549 UInt resOSZACP, resECX; 550 Bool ok 551 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 552 zmask_from_V128(argLU), 553 zmask_from_V128(argRU), 554 0x3A, False/*!isSTRM*/ 555 ); 556 assert(ok); 557 resECX = resV.uInt[0]; 558 return (resOSZACP << 16) | resECX; 559} 560 561void istri_3A ( void ) 562{ 563 char* wot = "3A"; 564 UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 565 UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 566 567 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 568 569 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 570 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 571 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 572 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 573 574 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 575 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 576 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 577 578 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 579 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 580 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 581 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 582 583 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 584 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 585 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 586 587 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 588 589 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 590 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 591 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 592 593 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 594 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 595 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 596 597 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 598 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 599 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 600 601 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 602 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 603 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 604 605 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 606 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 607} 608 609 610 611////////////////////////////////////////////////////////// 612// // 613// ISTRI_0C // 614// // 615////////////////////////////////////////////////////////// 616 617__attribute__((noinline)) 618UInt h_pcmpistri_0C ( V128* argL, V128* argR ) 619{ 620 V128 block[2]; 621 memcpy(&block[0], argL, sizeof(V128)); 622 memcpy(&block[1], argR, sizeof(V128)); 623 ULong res = 0, flags = 0; 624 __asm__ __volatile__( 625 "movdqu 0(%2), %%xmm2" "\n\t" 626 "movdqu 16(%2), %%xmm11" "\n\t" 627 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 628 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 629 //"movd %%xmm0, %%ecx" "\n\t" 630 "pushfq" "\n\t" 631 "popq %%rdx" "\n\t" 632 "movq %%rcx, %0" "\n\t" 633 "movq %%rdx, %1" "\n\t" 634 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 635 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 636 ); 637 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 638} 639 640UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 641{ 642 V128 resV; 643 UInt resOSZACP, resECX; 644 Bool ok 645 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 646 zmask_from_V128(argLU), 647 zmask_from_V128(argRU), 648 0x0C, False/*!isSTRM*/ 649 ); 650 assert(ok); 651 resECX = resV.uInt[0]; 652 return (resOSZACP << 16) | resECX; 653} 654 655void istri_0C ( void ) 656{ 657 char* wot = "0C"; 658 UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 659 UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 660 661 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 662 663 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 664 665 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 666 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 667 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 668 669 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 670 671 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 672 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 673 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 674 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 675 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 676 677 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 678 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 679 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 680 681 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 682 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 683 684 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 685 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 686 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 687 688 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 689 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 690 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 691 692 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 693 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 694 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 695 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 696} 697 698 699////////////////////////////////////////////////////////// 700// // 701// ISTRI_08 // 702// // 703////////////////////////////////////////////////////////// 704 705UInt h_pcmpistri_08 ( V128* argL, V128* argR ) 706{ 707 V128 block[2]; 708 memcpy(&block[0], argL, sizeof(V128)); 709 memcpy(&block[1], argR, sizeof(V128)); 710 ULong res, flags; 711 __asm__ __volatile__( 712 "subq $1024, %%rsp" "\n\t" 713 "movdqu 0(%2), %%xmm2" "\n\t" 714 "movdqu 16(%2), %%xmm11" "\n\t" 715 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 716 "pushfq" "\n\t" 717 "popq %%rdx" "\n\t" 718 "movq %%rcx, %0" "\n\t" 719 "movq %%rdx, %1" "\n\t" 720 "addq $1024, %%rsp" "\n\t" 721 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 722 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 723 ); 724 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 725} 726 727UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 728{ 729 V128 resV; 730 UInt resOSZACP, resECX; 731 Bool ok 732 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 733 zmask_from_V128(argLU), 734 zmask_from_V128(argRU), 735 0x08, False/*!isSTRM*/ 736 ); 737 assert(ok); 738 resECX = resV.uInt[0]; 739 return (resOSZACP << 16) | resECX; 740} 741 742void istri_08 ( void ) 743{ 744 char* wot = "08"; 745 UInt(*h)(V128*,V128*) = h_pcmpistri_08; 746 UInt(*s)(V128*,V128*) = s_pcmpistri_08; 747 748 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 749 750 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 751 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 752 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 753 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 754 755 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 756 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 757 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 758 759 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 760 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 761 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 762 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 763 764 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 765 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 766 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 767 768 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 769 770 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 771 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 772 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 773 774 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 775 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 776 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 777 778 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 779 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 780 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 781 782 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 783 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 784 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 785 786 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 787 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 788} 789 790 791 792////////////////////////////////////////////////////////// 793// // 794// ISTRI_18 // 795// // 796////////////////////////////////////////////////////////// 797 798UInt h_pcmpistri_18 ( V128* argL, V128* argR ) 799{ 800 V128 block[2]; 801 memcpy(&block[0], argL, sizeof(V128)); 802 memcpy(&block[1], argR, sizeof(V128)); 803 ULong res, flags; 804 __asm__ __volatile__( 805 "subq $1024, %%rsp" "\n\t" 806 "movdqu 0(%2), %%xmm2" "\n\t" 807 "movdqu 16(%2), %%xmm11" "\n\t" 808 "pcmpistri $0x18, %%xmm2, %%xmm11" "\n\t" 809 "pushfq" "\n\t" 810 "popq %%rdx" "\n\t" 811 "movq %%rcx, %0" "\n\t" 812 "movq %%rdx, %1" "\n\t" 813 "addq $1024, %%rsp" "\n\t" 814 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 815 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 816 ); 817 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 818} 819 820UInt s_pcmpistri_18 ( V128* argLU, V128* argRU ) 821{ 822 V128 resV; 823 UInt resOSZACP, resECX; 824 Bool ok 825 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 826 zmask_from_V128(argLU), 827 zmask_from_V128(argRU), 828 0x18, False/*!isSTRM*/ 829 ); 830 assert(ok); 831 resECX = resV.uInt[0]; 832 return (resOSZACP << 16) | resECX; 833} 834 835void istri_18 ( void ) 836{ 837 char* wot = "18"; 838 UInt(*h)(V128*,V128*) = h_pcmpistri_18; 839 UInt(*s)(V128*,V128*) = s_pcmpistri_18; 840 841 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 842 843 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 844 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 845 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 846 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 847 848 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 849 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 850 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 851 852 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 853 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 854 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 855 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 856 857 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 858 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 859 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 860 861 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 862 863 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 864 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 865 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 866 867 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 868 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 869 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 870 871 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 872 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 873 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 874 875 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 876 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 877 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 878 879 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 880 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 881} 882 883 884 885////////////////////////////////////////////////////////// 886// // 887// ISTRI_1A // 888// // 889////////////////////////////////////////////////////////// 890 891UInt h_pcmpistri_1A ( V128* argL, V128* argR ) 892{ 893 V128 block[2]; 894 memcpy(&block[0], argL, sizeof(V128)); 895 memcpy(&block[1], argR, sizeof(V128)); 896 ULong res, flags; 897 __asm__ __volatile__( 898 "subq $1024, %%rsp" "\n\t" 899 "movdqu 0(%2), %%xmm2" "\n\t" 900 "movdqu 16(%2), %%xmm11" "\n\t" 901 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 902 "pushfq" "\n\t" 903 "popq %%rdx" "\n\t" 904 "movq %%rcx, %0" "\n\t" 905 "movq %%rdx, %1" "\n\t" 906 "addq $1024, %%rsp" "\n\t" 907 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 908 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 909 ); 910 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 911} 912 913UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 914{ 915 V128 resV; 916 UInt resOSZACP, resECX; 917 Bool ok 918 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 919 zmask_from_V128(argLU), 920 zmask_from_V128(argRU), 921 0x1A, False/*!isSTRM*/ 922 ); 923 assert(ok); 924 resECX = resV.uInt[0]; 925 return (resOSZACP << 16) | resECX; 926} 927 928void istri_1A ( void ) 929{ 930 char* wot = "1A"; 931 UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 932 UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 933 934 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 935 936 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 937 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 938 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 939 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 940 941 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 942 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 943 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 944 945 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 946 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 947 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 948 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 949 950 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 951 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 952 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 953 954 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 955 956 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 957 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 958 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 959 960 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 961 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 962 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 963 964 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 965 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 966 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 967 968 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 969 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 970 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 971 972 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 973 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 974} 975 976 977 978////////////////////////////////////////////////////////// 979// // 980// ISTRI_02 // 981// // 982////////////////////////////////////////////////////////// 983 984UInt h_pcmpistri_02 ( V128* argL, V128* argR ) 985{ 986 V128 block[2]; 987 memcpy(&block[0], argL, sizeof(V128)); 988 memcpy(&block[1], argR, sizeof(V128)); 989 ULong res, flags; 990 __asm__ __volatile__( 991 "subq $1024, %%rsp" "\n\t" 992 "movdqu 0(%2), %%xmm2" "\n\t" 993 "movdqu 16(%2), %%xmm11" "\n\t" 994 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 995//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 996//"movd %%xmm0, %%ecx" "\n\t" 997 "pushfq" "\n\t" 998 "popq %%rdx" "\n\t" 999 "movq %%rcx, %0" "\n\t" 1000 "movq %%rdx, %1" "\n\t" 1001 "addq $1024, %%rsp" "\n\t" 1002 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1003 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1004 ); 1005 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1006} 1007 1008UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 1009{ 1010 V128 resV; 1011 UInt resOSZACP, resECX; 1012 Bool ok 1013 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1014 zmask_from_V128(argLU), 1015 zmask_from_V128(argRU), 1016 0x02, False/*!isSTRM*/ 1017 ); 1018 assert(ok); 1019 resECX = resV.uInt[0]; 1020 return (resOSZACP << 16) | resECX; 1021} 1022 1023void istri_02 ( void ) 1024{ 1025 char* wot = "02"; 1026 UInt(*h)(V128*,V128*) = h_pcmpistri_02; 1027 UInt(*s)(V128*,V128*) = s_pcmpistri_02; 1028 1029 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1030 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1031 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1032 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1033 1034 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1035 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1036 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1037 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1038 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1039 1040 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1041 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1042 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1043 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1044 1045 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1046 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1047 1048 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1049 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1050 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1051 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1052 1053 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1054 1055 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1056 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1057} 1058 1059 1060////////////////////////////////////////////////////////// 1061// // 1062// ISTRI_12 // 1063// // 1064////////////////////////////////////////////////////////// 1065 1066UInt h_pcmpistri_12 ( V128* argL, V128* argR ) 1067{ 1068 V128 block[2]; 1069 memcpy(&block[0], argL, sizeof(V128)); 1070 memcpy(&block[1], argR, sizeof(V128)); 1071 ULong res, flags; 1072 __asm__ __volatile__( 1073 "subq $1024, %%rsp" "\n\t" 1074 "movdqu 0(%2), %%xmm2" "\n\t" 1075 "movdqu 16(%2), %%xmm11" "\n\t" 1076 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 1077//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 1078//"movd %%xmm0, %%ecx" "\n\t" 1079 "pushfq" "\n\t" 1080 "popq %%rdx" "\n\t" 1081 "movq %%rcx, %0" "\n\t" 1082 "movq %%rdx, %1" "\n\t" 1083 "addq $1024, %%rsp" "\n\t" 1084 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1085 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1086 ); 1087 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1088} 1089 1090UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 1091{ 1092 V128 resV; 1093 UInt resOSZACP, resECX; 1094 Bool ok 1095 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1096 zmask_from_V128(argLU), 1097 zmask_from_V128(argRU), 1098 0x12, False/*!isSTRM*/ 1099 ); 1100 assert(ok); 1101 resECX = resV.uInt[0]; 1102 return (resOSZACP << 16) | resECX; 1103} 1104 1105void istri_12 ( void ) 1106{ 1107 char* wot = "12"; 1108 UInt(*h)(V128*,V128*) = h_pcmpistri_12; 1109 UInt(*s)(V128*,V128*) = s_pcmpistri_12; 1110 1111 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1112 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1113 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1114 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1115 1116 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1117 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1118 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1119 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1120 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1121 1122 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1123 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1124 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1125 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1126 1127 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1128 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1129 1130 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1131 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1132 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1133 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1134 1135 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1136 1137 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1138 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1139} 1140 1141 1142 1143////////////////////////////////////////////////////////// 1144// // 1145// ISTRI_44 // 1146// // 1147////////////////////////////////////////////////////////// 1148 1149UInt h_pcmpistri_44 ( V128* argL, V128* argR ) 1150{ 1151 V128 block[2]; 1152 memcpy(&block[0], argL, sizeof(V128)); 1153 memcpy(&block[1], argR, sizeof(V128)); 1154 ULong res, flags; 1155 __asm__ __volatile__( 1156 "subq $1024, %%rsp" "\n\t" 1157 "movdqu 0(%2), %%xmm2" "\n\t" 1158 "movdqu 16(%2), %%xmm11" "\n\t" 1159 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 1160//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1161//"movd %%xmm0, %%ecx" "\n\t" 1162 "pushfq" "\n\t" 1163 "popq %%rdx" "\n\t" 1164 "movq %%rcx, %0" "\n\t" 1165 "movq %%rdx, %1" "\n\t" 1166 "addq $1024, %%rsp" "\n\t" 1167 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1168 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1169 ); 1170 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1171} 1172 1173UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 1174{ 1175 V128 resV; 1176 UInt resOSZACP, resECX; 1177 Bool ok 1178 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1179 zmask_from_V128(argLU), 1180 zmask_from_V128(argRU), 1181 0x44, False/*!isSTRM*/ 1182 ); 1183 assert(ok); 1184 resECX = resV.uInt[0]; 1185 return (resOSZACP << 16) | resECX; 1186} 1187 1188void istri_44 ( void ) 1189{ 1190 char* wot = "44"; 1191 UInt(*h)(V128*,V128*) = h_pcmpistri_44; 1192 UInt(*s)(V128*,V128*) = s_pcmpistri_44; 1193 1194 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1195 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1196 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1197 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1198 1199 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1200 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1201 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1202 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1203 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1204 1205 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1206 1207 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1208 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1209 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1210 1211 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1212 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1213 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1214 1215 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1216 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1217 1218 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1219 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1220 1221 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1222 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1223 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1224 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1225} 1226 1227 1228////////////////////////////////////////////////////////// 1229// // 1230// ISTRI_00 // 1231// // 1232////////////////////////////////////////////////////////// 1233 1234UInt h_pcmpistri_00 ( V128* argL, V128* argR ) 1235{ 1236 V128 block[2]; 1237 memcpy(&block[0], argL, sizeof(V128)); 1238 memcpy(&block[1], argR, sizeof(V128)); 1239 ULong res, flags; 1240 __asm__ __volatile__( 1241 "subq $1024, %%rsp" "\n\t" 1242 "movdqu 0(%2), %%xmm2" "\n\t" 1243 "movdqu 16(%2), %%xmm11" "\n\t" 1244 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" 1245//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" 1246//"movd %%xmm0, %%ecx" "\n\t" 1247 "pushfq" "\n\t" 1248 "popq %%rdx" "\n\t" 1249 "movq %%rcx, %0" "\n\t" 1250 "movq %%rdx, %1" "\n\t" 1251 "addq $1024, %%rsp" "\n\t" 1252 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1253 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1254 ); 1255 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1256} 1257 1258UInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) 1259{ 1260 V128 resV; 1261 UInt resOSZACP, resECX; 1262 Bool ok 1263 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1264 zmask_from_V128(argLU), 1265 zmask_from_V128(argRU), 1266 0x00, False/*!isSTRM*/ 1267 ); 1268 assert(ok); 1269 resECX = resV.uInt[0]; 1270 return (resOSZACP << 16) | resECX; 1271} 1272 1273void istri_00 ( void ) 1274{ 1275 char* wot = "00"; 1276 UInt(*h)(V128*,V128*) = h_pcmpistri_00; 1277 UInt(*s)(V128*,V128*) = s_pcmpistri_00; 1278 1279 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1280 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1281 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1282 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1283 1284 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1285 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1286 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1287 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1288 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1289 1290 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1291 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1292 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1293 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1294 1295 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1296 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1297 1298 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1299 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1300 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1301 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1302 1303 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1304 1305 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1306 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1307} 1308 1309 1310////////////////////////////////////////////////////////// 1311// // 1312// ISTRI_38 // 1313// // 1314////////////////////////////////////////////////////////// 1315 1316UInt h_pcmpistri_38 ( V128* argL, V128* argR ) 1317{ 1318 V128 block[2]; 1319 memcpy(&block[0], argL, sizeof(V128)); 1320 memcpy(&block[1], argR, sizeof(V128)); 1321 ULong res, flags; 1322 __asm__ __volatile__( 1323 "subq $1024, %%rsp" "\n\t" 1324 "movdqu 0(%2), %%xmm2" "\n\t" 1325 "movdqu 16(%2), %%xmm11" "\n\t" 1326 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" 1327 "pushfq" "\n\t" 1328 "popq %%rdx" "\n\t" 1329 "movq %%rcx, %0" "\n\t" 1330 "movq %%rdx, %1" "\n\t" 1331 "addq $1024, %%rsp" "\n\t" 1332 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1333 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1334 ); 1335 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1336} 1337 1338UInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) 1339{ 1340 V128 resV; 1341 UInt resOSZACP, resECX; 1342 Bool ok 1343 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1344 zmask_from_V128(argLU), 1345 zmask_from_V128(argRU), 1346 0x38, False/*!isSTRM*/ 1347 ); 1348 assert(ok); 1349 resECX = resV.uInt[0]; 1350 return (resOSZACP << 16) | resECX; 1351} 1352 1353void istri_38 ( void ) 1354{ 1355 char* wot = "38"; 1356 UInt(*h)(V128*,V128*) = h_pcmpistri_38; 1357 UInt(*s)(V128*,V128*) = s_pcmpistri_38; 1358 1359 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1360 1361 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1362 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1363 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1364 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1365 1366 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1367 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1368 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1369 1370 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1371 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1372 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1373 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1374 1375 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1376 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1377 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1378 1379 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1380 1381 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1382 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1383 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1384 1385 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1386 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1387 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1388 1389 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1390 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1391 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1392 1393 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1394 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1395 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1396 1397 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1398 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1399} 1400 1401 1402 1403////////////////////////////////////////////////////////// 1404// // 1405// ISTRI_46 // 1406// // 1407////////////////////////////////////////////////////////// 1408 1409UInt h_pcmpistri_46 ( V128* argL, V128* argR ) 1410{ 1411 V128 block[2]; 1412 memcpy(&block[0], argL, sizeof(V128)); 1413 memcpy(&block[1], argR, sizeof(V128)); 1414 ULong res, flags; 1415 __asm__ __volatile__( 1416 "subq $1024, %%rsp" "\n\t" 1417 "movdqu 0(%2), %%xmm2" "\n\t" 1418 "movdqu 16(%2), %%xmm11" "\n\t" 1419 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t" 1420 "pushfq" "\n\t" 1421 "popq %%rdx" "\n\t" 1422 "movq %%rcx, %0" "\n\t" 1423 "movq %%rdx, %1" "\n\t" 1424 "addq $1024, %%rsp" "\n\t" 1425 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1426 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1427 ); 1428 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1429} 1430 1431UInt s_pcmpistri_46 ( V128* argLU, V128* argRU ) 1432{ 1433 V128 resV; 1434 UInt resOSZACP, resECX; 1435 Bool ok 1436 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1437 zmask_from_V128(argLU), 1438 zmask_from_V128(argRU), 1439 0x46, False/*!isSTRM*/ 1440 ); 1441 assert(ok); 1442 resECX = resV.uInt[0]; 1443 return (resOSZACP << 16) | resECX; 1444} 1445 1446void istri_46 ( void ) 1447{ 1448 char* wot = "46"; 1449 UInt(*h)(V128*,V128*) = h_pcmpistri_46; 1450 UInt(*s)(V128*,V128*) = s_pcmpistri_46; 1451 1452 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1453 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1454 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1455 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1456 1457 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1458 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1459 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1460 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1461 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1462 1463 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1464 1465 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1466 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1467 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1468 1469 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1470 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1471 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1472 1473 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1474 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1475 1476 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1477 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1478 1479 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1480 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1481 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1482 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1483} 1484 1485 1486////////////////////////////////////////////////////////// 1487// // 1488// ISTRI_30 // 1489// // 1490////////////////////////////////////////////////////////// 1491 1492UInt h_pcmpistri_30 ( V128* argL, V128* argR ) 1493{ 1494 V128 block[2]; 1495 memcpy(&block[0], argL, sizeof(V128)); 1496 memcpy(&block[1], argR, sizeof(V128)); 1497 ULong res, flags; 1498 __asm__ __volatile__( 1499 "subq $1024, %%rsp" "\n\t" 1500 "movdqu 0(%2), %%xmm2" "\n\t" 1501 "movdqu 16(%2), %%xmm11" "\n\t" 1502 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t" 1503 "pushfq" "\n\t" 1504 "popq %%rdx" "\n\t" 1505 "movq %%rcx, %0" "\n\t" 1506 "movq %%rdx, %1" "\n\t" 1507 "addq $1024, %%rsp" "\n\t" 1508 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1509 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1510 ); 1511 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1512} 1513 1514UInt s_pcmpistri_30 ( V128* argLU, V128* argRU ) 1515{ 1516 V128 resV; 1517 UInt resOSZACP, resECX; 1518 Bool ok 1519 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1520 zmask_from_V128(argLU), 1521 zmask_from_V128(argRU), 1522 0x30, False/*!isSTRM*/ 1523 ); 1524 assert(ok); 1525 resECX = resV.uInt[0]; 1526 return (resOSZACP << 16) | resECX; 1527} 1528 1529void istri_30 ( void ) 1530{ 1531 char* wot = "30"; 1532 UInt(*h)(V128*,V128*) = h_pcmpistri_30; 1533 UInt(*s)(V128*,V128*) = s_pcmpistri_30; 1534 1535 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1536 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1537 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1538 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1539 1540 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1541 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1542 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1543 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1544 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1545 1546 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1547 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1548 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1549 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1550 1551 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1552 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1553 1554 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1555 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1556 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1557 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1558 1559 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1560 1561 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1562 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1563} 1564 1565 1566////////////////////////////////////////////////////////// 1567// // 1568// ISTRI_40 // 1569// // 1570////////////////////////////////////////////////////////// 1571 1572UInt h_pcmpistri_40 ( V128* argL, V128* argR ) 1573{ 1574 V128 block[2]; 1575 memcpy(&block[0], argL, sizeof(V128)); 1576 memcpy(&block[1], argR, sizeof(V128)); 1577 ULong res, flags; 1578 __asm__ __volatile__( 1579 "subq $1024, %%rsp" "\n\t" 1580 "movdqu 0(%2), %%xmm2" "\n\t" 1581 "movdqu 16(%2), %%xmm11" "\n\t" 1582 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t" 1583 "pushfq" "\n\t" 1584 "popq %%rdx" "\n\t" 1585 "movq %%rcx, %0" "\n\t" 1586 "movq %%rdx, %1" "\n\t" 1587 "addq $1024, %%rsp" "\n\t" 1588 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1589 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1590 ); 1591 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1592} 1593 1594UInt s_pcmpistri_40 ( V128* argLU, V128* argRU ) 1595{ 1596 V128 resV; 1597 UInt resOSZACP, resECX; 1598 Bool ok 1599 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1600 zmask_from_V128(argLU), 1601 zmask_from_V128(argRU), 1602 0x40, False/*!isSTRM*/ 1603 ); 1604 assert(ok); 1605 resECX = resV.uInt[0]; 1606 return (resOSZACP << 16) | resECX; 1607} 1608 1609void istri_40 ( void ) 1610{ 1611 char* wot = "40"; 1612 UInt(*h)(V128*,V128*) = h_pcmpistri_40; 1613 UInt(*s)(V128*,V128*) = s_pcmpistri_40; 1614 1615 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1616 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1617 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1618 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1619 1620 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1621 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1622 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1623 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1624 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1625 1626 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1627 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1628 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1629 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1630 1631 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1632 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1633 1634 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1635 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1636 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1637 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1638 1639 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1640 1641 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1642 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1643} 1644 1645 1646////////////////////////////////////////////////////////// 1647// // 1648// ISTRI_42 // 1649// // 1650////////////////////////////////////////////////////////// 1651 1652UInt h_pcmpistri_42 ( V128* argL, V128* argR ) 1653{ 1654 V128 block[2]; 1655 memcpy(&block[0], argL, sizeof(V128)); 1656 memcpy(&block[1], argR, sizeof(V128)); 1657 ULong res, flags; 1658 __asm__ __volatile__( 1659 "subq $1024, %%rsp" "\n\t" 1660 "movdqu 0(%2), %%xmm2" "\n\t" 1661 "movdqu 16(%2), %%xmm11" "\n\t" 1662 "pcmpistri $0x42, %%xmm2, %%xmm11" "\n\t" 1663 "pushfq" "\n\t" 1664 "popq %%rdx" "\n\t" 1665 "movq %%rcx, %0" "\n\t" 1666 "movq %%rdx, %1" "\n\t" 1667 "addq $1024, %%rsp" "\n\t" 1668 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1669 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1670 ); 1671 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1672} 1673 1674UInt s_pcmpistri_42 ( V128* argLU, V128* argRU ) 1675{ 1676 V128 resV; 1677 UInt resOSZACP, resECX; 1678 Bool ok 1679 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1680 zmask_from_V128(argLU), 1681 zmask_from_V128(argRU), 1682 0x42, False/*!isSTRM*/ 1683 ); 1684 assert(ok); 1685 resECX = resV.uInt[0]; 1686 return (resOSZACP << 16) | resECX; 1687} 1688 1689void istri_42 ( void ) 1690{ 1691 char* wot = "42"; 1692 UInt(*h)(V128*,V128*) = h_pcmpistri_42; 1693 UInt(*s)(V128*,V128*) = s_pcmpistri_42; 1694 1695 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1696 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1697 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1698 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1699 1700 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1701 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1702 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1703 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1704 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1705 1706 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1707 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1708 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1709 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1710 1711 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1712 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1713 1714 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1715 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1716 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1717 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1718 1719 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1720 1721 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1722 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1723} 1724 1725 1726////////////////////////////////////////////////////////// 1727// // 1728// ISTRI_0E // 1729// // 1730////////////////////////////////////////////////////////// 1731 1732__attribute__((noinline)) 1733UInt h_pcmpistri_0E ( V128* argL, V128* argR ) 1734{ 1735 V128 block[2]; 1736 memcpy(&block[0], argL, sizeof(V128)); 1737 memcpy(&block[1], argR, sizeof(V128)); 1738 ULong res = 0, flags = 0; 1739 __asm__ __volatile__( 1740 "movdqu 0(%2), %%xmm2" "\n\t" 1741 "movdqu 16(%2), %%xmm11" "\n\t" 1742 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t" 1743 "pushfq" "\n\t" 1744 "popq %%rdx" "\n\t" 1745 "movq %%rcx, %0" "\n\t" 1746 "movq %%rdx, %1" "\n\t" 1747 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1748 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1749 ); 1750 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1751} 1752 1753UInt s_pcmpistri_0E ( V128* argLU, V128* argRU ) 1754{ 1755 V128 resV; 1756 UInt resOSZACP, resECX; 1757 Bool ok 1758 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1759 zmask_from_V128(argLU), 1760 zmask_from_V128(argRU), 1761 0x0E, False/*!isSTRM*/ 1762 ); 1763 assert(ok); 1764 resECX = resV.uInt[0]; 1765 return (resOSZACP << 16) | resECX; 1766} 1767 1768void istri_0E ( void ) 1769{ 1770 char* wot = "0E"; 1771 UInt(*h)(V128*,V128*) = h_pcmpistri_0E; 1772 UInt(*s)(V128*,V128*) = s_pcmpistri_0E; 1773 1774 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 1775 1776 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 1777 1778 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 1779 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 1780 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 1781 1782 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 1783 1784 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 1785 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 1786 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 1787 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 1788 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 1789 1790 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 1791 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 1792 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 1793 1794 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 1795 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 1796 1797 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 1798 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 1799 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 1800 1801 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 1802 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 1803 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 1804 1805 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 1806 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1807 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 1808 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 1809} 1810 1811 1812////////////////////////////////////////////////////////// 1813// // 1814// ISTRI_34 // 1815// // 1816////////////////////////////////////////////////////////// 1817 1818UInt h_pcmpistri_34 ( V128* argL, V128* argR ) 1819{ 1820 V128 block[2]; 1821 memcpy(&block[0], argL, sizeof(V128)); 1822 memcpy(&block[1], argR, sizeof(V128)); 1823 ULong res, flags; 1824 __asm__ __volatile__( 1825 "subq $1024, %%rsp" "\n\t" 1826 "movdqu 0(%2), %%xmm2" "\n\t" 1827 "movdqu 16(%2), %%xmm11" "\n\t" 1828 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t" 1829 "pushfq" "\n\t" 1830 "popq %%rdx" "\n\t" 1831 "movq %%rcx, %0" "\n\t" 1832 "movq %%rdx, %1" "\n\t" 1833 "addq $1024, %%rsp" "\n\t" 1834 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1835 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1836 ); 1837 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1838} 1839 1840UInt s_pcmpistri_34 ( V128* argLU, V128* argRU ) 1841{ 1842 V128 resV; 1843 UInt resOSZACP, resECX; 1844 Bool ok 1845 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1846 zmask_from_V128(argLU), 1847 zmask_from_V128(argRU), 1848 0x34, False/*!isSTRM*/ 1849 ); 1850 assert(ok); 1851 resECX = resV.uInt[0]; 1852 return (resOSZACP << 16) | resECX; 1853} 1854 1855void istri_34 ( void ) 1856{ 1857 char* wot = "34"; 1858 UInt(*h)(V128*,V128*) = h_pcmpistri_34; 1859 UInt(*s)(V128*,V128*) = s_pcmpistri_34; 1860 1861 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1862 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1863 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1864 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1865 1866 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1867 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1868 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1869 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1870 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1871 1872 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1873 1874 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1875 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1876 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1877 1878 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1879 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1880 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1881 1882 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1883 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1884 1885 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1886 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1887 1888 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1889 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1890 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1891 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1892} 1893 1894 1895////////////////////////////////////////////////////////// 1896// // 1897// ISTRI_14 // 1898// // 1899////////////////////////////////////////////////////////// 1900 1901UInt h_pcmpistri_14 ( V128* argL, V128* argR ) 1902{ 1903 V128 block[2]; 1904 memcpy(&block[0], argL, sizeof(V128)); 1905 memcpy(&block[1], argR, sizeof(V128)); 1906 ULong res, flags; 1907 __asm__ __volatile__( 1908 "subq $1024, %%rsp" "\n\t" 1909 "movdqu 0(%2), %%xmm2" "\n\t" 1910 "movdqu 16(%2), %%xmm11" "\n\t" 1911 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t" 1912 "pushfq" "\n\t" 1913 "popq %%rdx" "\n\t" 1914 "movq %%rcx, %0" "\n\t" 1915 "movq %%rdx, %1" "\n\t" 1916 "addq $1024, %%rsp" "\n\t" 1917 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1918 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1919 ); 1920 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1921} 1922 1923UInt s_pcmpistri_14 ( V128* argLU, V128* argRU ) 1924{ 1925 V128 resV; 1926 UInt resOSZACP, resECX; 1927 Bool ok 1928 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1929 zmask_from_V128(argLU), 1930 zmask_from_V128(argRU), 1931 0x14, False/*!isSTRM*/ 1932 ); 1933 assert(ok); 1934 resECX = resV.uInt[0]; 1935 return (resOSZACP << 16) | resECX; 1936} 1937 1938void istri_14 ( void ) 1939{ 1940 char* wot = "14"; 1941 UInt(*h)(V128*,V128*) = h_pcmpistri_14; 1942 UInt(*s)(V128*,V128*) = s_pcmpistri_14; 1943 1944 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1945 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1946 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1947 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1948 1949 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1950 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1951 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1952 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1953 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1954 1955 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1956 1957 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1958 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1959 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1960 1961 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1962 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1963 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1964 1965 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1966 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1967 1968 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1969 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1970 1971 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1972 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1973 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1974 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1975} 1976 1977 1978////////////////////////////////////////////////////////// 1979// // 1980// ISTRI_70 // 1981// // 1982////////////////////////////////////////////////////////// 1983 1984UInt h_pcmpistri_70 ( V128* argL, V128* argR ) 1985{ 1986 V128 block[2]; 1987 memcpy(&block[0], argL, sizeof(V128)); 1988 memcpy(&block[1], argR, sizeof(V128)); 1989 ULong res, flags; 1990 __asm__ __volatile__( 1991 "subq $1024, %%rsp" "\n\t" 1992 "movdqu 0(%2), %%xmm2" "\n\t" 1993 "movdqu 16(%2), %%xmm11" "\n\t" 1994 "pcmpistri $0x70, %%xmm2, %%xmm11" "\n\t" 1995 "pushfq" "\n\t" 1996 "popq %%rdx" "\n\t" 1997 "movq %%rcx, %0" "\n\t" 1998 "movq %%rdx, %1" "\n\t" 1999 "addq $1024, %%rsp" "\n\t" 2000 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2001 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2002 ); 2003 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2004} 2005 2006UInt s_pcmpistri_70 ( V128* argLU, V128* argRU ) 2007{ 2008 V128 resV; 2009 UInt resOSZACP, resECX; 2010 Bool ok 2011 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2012 zmask_from_V128(argLU), 2013 zmask_from_V128(argRU), 2014 0x70, False/*!isSTRM*/ 2015 ); 2016 assert(ok); 2017 resECX = resV.uInt[0]; 2018 return (resOSZACP << 16) | resECX; 2019} 2020 2021void istri_70 ( void ) 2022{ 2023 char* wot = "70"; 2024 UInt(*h)(V128*,V128*) = h_pcmpistri_70; 2025 UInt(*s)(V128*,V128*) = s_pcmpistri_70; 2026 2027 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2028 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2029 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2030 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2031 2032 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2033 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2034 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2035 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2036 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2037 2038 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2039 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2040 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2041 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2042 2043 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2044 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2045 2046 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2047 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2048 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2049 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2050 2051 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2052 2053 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2054 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2055} 2056 2057 2058////////////////////////////////////////////////////////// 2059// // 2060// ISTRI_62 // 2061// // 2062////////////////////////////////////////////////////////// 2063 2064UInt h_pcmpistri_62 ( V128* argL, V128* argR ) 2065{ 2066 V128 block[2]; 2067 memcpy(&block[0], argL, sizeof(V128)); 2068 memcpy(&block[1], argR, sizeof(V128)); 2069 ULong res, flags; 2070 __asm__ __volatile__( 2071 "subq $1024, %%rsp" "\n\t" 2072 "movdqu 0(%2), %%xmm2" "\n\t" 2073 "movdqu 16(%2), %%xmm11" "\n\t" 2074 "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t" 2075 "pushfq" "\n\t" 2076 "popq %%rdx" "\n\t" 2077 "movq %%rcx, %0" "\n\t" 2078 "movq %%rdx, %1" "\n\t" 2079 "addq $1024, %%rsp" "\n\t" 2080 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2081 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2082 ); 2083 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2084} 2085 2086UInt s_pcmpistri_62 ( V128* argLU, V128* argRU ) 2087{ 2088 V128 resV; 2089 UInt resOSZACP, resECX; 2090 Bool ok 2091 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2092 zmask_from_V128(argLU), 2093 zmask_from_V128(argRU), 2094 0x62, False/*!isSTRM*/ 2095 ); 2096 assert(ok); 2097 resECX = resV.uInt[0]; 2098 return (resOSZACP << 16) | resECX; 2099} 2100 2101void istri_62 ( void ) 2102{ 2103 char* wot = "62"; 2104 UInt(*h)(V128*,V128*) = h_pcmpistri_62; 2105 UInt(*s)(V128*,V128*) = s_pcmpistri_62; 2106 2107 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2108 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2109 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2110 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2111 2112 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2113 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2114 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2115 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2116 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2117 2118 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2119 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2120 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2121 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2122 2123 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2124 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2125 2126 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2127 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2128 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2129 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2130 2131 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2132 2133 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2134 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2135} 2136 2137 2138////////////////////////////////////////////////////////// 2139// // 2140// ISTRI_72 // 2141// // 2142////////////////////////////////////////////////////////// 2143 2144UInt h_pcmpistri_72 ( V128* argL, V128* argR ) 2145{ 2146 V128 block[2]; 2147 memcpy(&block[0], argL, sizeof(V128)); 2148 memcpy(&block[1], argR, sizeof(V128)); 2149 ULong res, flags; 2150 __asm__ __volatile__( 2151 "subq $1024, %%rsp" "\n\t" 2152 "movdqu 0(%2), %%xmm2" "\n\t" 2153 "movdqu 16(%2), %%xmm11" "\n\t" 2154 "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t" 2155 "pushfq" "\n\t" 2156 "popq %%rdx" "\n\t" 2157 "movq %%rcx, %0" "\n\t" 2158 "movq %%rdx, %1" "\n\t" 2159 "addq $1024, %%rsp" "\n\t" 2160 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2161 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2162 ); 2163 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2164} 2165 2166UInt s_pcmpistri_72 ( V128* argLU, V128* argRU ) 2167{ 2168 V128 resV; 2169 UInt resOSZACP, resECX; 2170 Bool ok 2171 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2172 zmask_from_V128(argLU), 2173 zmask_from_V128(argRU), 2174 0x72, False/*!isSTRM*/ 2175 ); 2176 assert(ok); 2177 resECX = resV.uInt[0]; 2178 return (resOSZACP << 16) | resECX; 2179} 2180 2181void istri_72 ( void ) 2182{ 2183 char* wot = "72"; 2184 UInt(*h)(V128*,V128*) = h_pcmpistri_72; 2185 UInt(*s)(V128*,V128*) = s_pcmpistri_72; 2186 2187 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2188 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2189 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2190 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2191 2192 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2193 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2194 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2195 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2196 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2197 2198 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2199 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2200 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2201 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2202 2203 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2204 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2205 2206 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2207 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2208 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2209 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2210 2211 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2212 2213 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2214 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2215} 2216 2217 2218////////////////////////////////////////////////////////// 2219// // 2220// ISTRI_10 // 2221// // 2222////////////////////////////////////////////////////////// 2223 2224UInt h_pcmpistri_10 ( V128* argL, V128* argR ) 2225{ 2226 V128 block[2]; 2227 memcpy(&block[0], argL, sizeof(V128)); 2228 memcpy(&block[1], argR, sizeof(V128)); 2229 ULong res, flags; 2230 __asm__ __volatile__( 2231 "subq $1024, %%rsp" "\n\t" 2232 "movdqu 0(%2), %%xmm2" "\n\t" 2233 "movdqu 16(%2), %%xmm11" "\n\t" 2234 "pcmpistri $0x10, %%xmm2, %%xmm11" "\n\t" 2235//"pcmpistrm $0x10, %%xmm2, %%xmm11" "\n\t" 2236//"movd %%xmm0, %%ecx" "\n\t" 2237 "pushfq" "\n\t" 2238 "popq %%rdx" "\n\t" 2239 "movq %%rcx, %0" "\n\t" 2240 "movq %%rdx, %1" "\n\t" 2241 "addq $1024, %%rsp" "\n\t" 2242 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2243 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2244 ); 2245 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2246} 2247 2248UInt s_pcmpistri_10 ( V128* argLU, V128* argRU ) 2249{ 2250 V128 resV; 2251 UInt resOSZACP, resECX; 2252 Bool ok 2253 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2254 zmask_from_V128(argLU), 2255 zmask_from_V128(argRU), 2256 0x10, False/*!isSTRM*/ 2257 ); 2258 assert(ok); 2259 resECX = resV.uInt[0]; 2260 return (resOSZACP << 16) | resECX; 2261} 2262 2263void istri_10 ( void ) 2264{ 2265 char* wot = "10"; 2266 UInt(*h)(V128*,V128*) = h_pcmpistri_10; 2267 UInt(*s)(V128*,V128*) = s_pcmpistri_10; 2268 2269 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2270 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2271 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2272 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2273 2274 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2275 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2276 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2277 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2278 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2279 2280 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2281 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2282 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2283 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2284 2285 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2286 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2287 2288 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2289 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2290 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2291 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2292 2293 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2294 2295 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2296 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2297} 2298 2299 2300////////////////////////////////////////////////////////// 2301// // 2302// main // 2303// // 2304////////////////////////////////////////////////////////// 2305 2306int main ( void ) 2307{ 2308 istri_4A(); 2309 istri_3A(); 2310 istri_08(); 2311 istri_18(); 2312 istri_1A(); 2313 istri_02(); 2314 istri_0C(); 2315 istri_12(); 2316 istri_44(); 2317 istri_00(); 2318 istri_38(); 2319 istri_46(); 2320 istri_30(); 2321 istri_40(); 2322 istri_42(); 2323 istri_0E(); 2324 istri_14(); 2325 istri_34(); 2326 istri_70(); 2327 istri_62(); 2328 istri_72(); 2329 istri_10(); 2330 return 0; 2331} 2332