1 2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6#include <string.h> 7#include <stdio.h> 8#include <assert.h> 9 10typedef unsigned int UInt; 11typedef signed int Int; 12typedef unsigned char UChar; 13typedef signed char Char; 14typedef unsigned long long int ULong; 15typedef UChar Bool; 16#define False ((Bool)0) 17#define True ((Bool)1) 18 19//typedef unsigned char V128[16]; 20typedef 21 union { 22 UChar uChar[16]; 23 UInt uInt[4]; 24 } 25 V128; 26 27#define SHIFT_O 11 28#define SHIFT_S 7 29#define SHIFT_Z 6 30#define SHIFT_A 4 31#define SHIFT_C 0 32#define SHIFT_P 2 33 34#define MASK_O (1ULL << SHIFT_O) 35#define MASK_S (1ULL << SHIFT_S) 36#define MASK_Z (1ULL << SHIFT_Z) 37#define MASK_A (1ULL << SHIFT_A) 38#define MASK_C (1ULL << SHIFT_C) 39#define MASK_P (1ULL << SHIFT_P) 40 41 42UInt clz32 ( UInt x ) 43{ 44 Int y, m, n; 45 y = -(x >> 16); 46 m = (y >> 16) & 16; 47 n = 16 - m; 48 x = x >> m; 49 y = x - 0x100; 50 m = (y >> 16) & 8; 51 n = n + m; 52 x = x << m; 53 y = x - 0x1000; 54 m = (y >> 16) & 4; 55 n = n + m; 56 x = x << m; 57 y = x - 0x4000; 58 m = (y >> 16) & 2; 59 n = n + m; 60 x = x << m; 61 y = x >> 14; 62 m = y & ~(y >> 1); 63 return n + 2 - m; 64} 65 66UInt ctz32 ( UInt x ) 67{ 68 return 32 - clz32((~x) & (x-1)); 69} 70 71void expand ( V128* dst, char* summary ) 72{ 73 Int i; 74 assert( strlen(summary) == 16 ); 75 for (i = 0; i < 16; i++) { 76 UChar xx = 0; 77 UChar x = summary[15-i]; 78 if (x >= '0' && x <= '9') { xx = x - '0'; } 79 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 80 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 81 else assert(0); 82 83 assert(xx < 16); 84 xx = (xx << 4) | xx; 85 assert(xx < 256); 86 dst->uChar[i] = xx; 87 } 88} 89 90void try_istri ( char* which, 91 UInt(*h_fn)(V128*,V128*), 92 UInt(*s_fn)(V128*,V128*), 93 char* summL, char* summR ) 94{ 95 assert(strlen(which) == 2); 96 V128 argL, argR; 97 expand(&argL, summL); 98 expand(&argR, summR); 99 UInt h_res = h_fn(&argL, &argR); 100 UInt s_res = s_fn(&argL, &argR); 101 printf("istri %s %s %s -> %08x %08x %s\n", 102 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 103} 104 105UInt zmask_from_V128 ( V128* arg ) 106{ 107 UInt i, res = 0; 108 for (i = 0; i < 16; i++) { 109 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 110 } 111 return res; 112} 113 114////////////////////////////////////////////////////////// 115// // 116// GENERAL // 117// // 118////////////////////////////////////////////////////////// 119 120 121/* Given partial results from a pcmpXstrX operation (intRes1, 122 basically), generate an I format (index value for ECX) output, and 123 also the new OSZACP flags. 124*/ 125static 126void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 127 /*OUT*/UInt* resOSZACP, 128 UInt intRes1, 129 UInt zmaskL, UInt zmaskR, 130 UInt validL, 131 UInt pol, UInt idx ) 132{ 133 assert((pol >> 2) == 0); 134 assert((idx >> 1) == 0); 135 136 UInt intRes2 = 0; 137 switch (pol) { 138 case 0: intRes2 = intRes1; break; // pol + 139 case 1: intRes2 = ~intRes1; break; // pol - 140 case 2: intRes2 = intRes1; break; // pol m+ 141 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 142 } 143 intRes2 &= 0xFFFF; 144 145 // generate ecx value 146 UInt newECX = 0; 147 if (idx) { 148 // index of ms-1-bit 149 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 150 } else { 151 // index of ls-1-bit 152 newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 153 } 154 155 *(UInt*)(&resV[0]) = newECX; 156 157 // generate new flags, common to all ISTRI and ISTRM cases 158 *resOSZACP // A, P are zero 159 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 160 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 161 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 162 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 163} 164 165 166/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 167 variants. 168 169 For xSTRI variants, the new ECX value is placed in the 32 bits 170 pointed to by *resV. For xSTRM variants, the result is a 128 bit 171 value and is placed at *resV in the obvious way. 172 173 For all variants, the new OSZACP value is placed at *resOSZACP. 174 175 argLV and argRV are the vector args. The caller must prepare a 176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 177 must be 1 for each zero byte of of the respective arg. For ESTRx 178 variants this is derived from the explicit length indication, and 179 must be 0 in all places except at the bit index corresponding to 180 the valid length (0 .. 16). If the valid length is 16 then the 181 mask must be all zeroes. In all cases, bits 31:16 must be zero. 182 183 imm8 is the original immediate from the instruction. isSTRM 184 indicates whether this is a xSTRM or xSTRI variant, which controls 185 how much of *res is written. 186 187 If the given imm8 case can be handled, the return value is True. 188 If not, False is returned, and neither *res not *resOSZACP are 189 altered. 190*/ 191 192Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, 193 /*OUT*/UInt* resOSZACP, 194 V128* argLV, V128* argRV, 195 UInt zmaskL, UInt zmaskR, 196 UInt imm8, Bool isSTRM ) 197{ 198 assert(imm8 < 0x80); 199 assert((zmaskL >> 16) == 0); 200 assert((zmaskR >> 16) == 0); 201 202 /* Explicitly reject any imm8 values that haven't been validated, 203 even if they would probably work. Life is too short to have 204 unvalidated cases in the code base. */ 205 switch (imm8) { 206 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: 207 case 0x12: case 0x14: case 0x1A: 208 case 0x30: case 0x34: case 0x38: case 0x3A: 209 case 0x40: case 0x44: case 0x46: case 0x4A: 210 break; 211 default: 212 return False; 213 } 214 215 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 216 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 217 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 218 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 219 220 /*----------------------------------------*/ 221 /*-- strcmp on byte data --*/ 222 /*----------------------------------------*/ 223 224 if (agg == 2/*equal each, aka strcmp*/ 225 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 226 && !isSTRM) { 227 Int i; 228 UChar* argL = (UChar*)argLV; 229 UChar* argR = (UChar*)argRV; 230 UInt boolResII = 0; 231 for (i = 15; i >= 0; i--) { 232 UChar cL = argL[i]; 233 UChar cR = argR[i]; 234 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 235 } 236 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 237 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 238 239 // do invalidation, common to all equal-each cases 240 UInt intRes1 241 = (boolResII & validL & validR) // if both valid, use cmpres 242 | (~ (validL | validR)); // if both invalid, force 1 243 // else force 0 244 intRes1 &= 0xFFFF; 245 246 // generate I-format output 247 pcmpXstrX_WRK_gen_output_fmt_I( 248 resV, resOSZACP, 249 intRes1, zmaskL, zmaskR, validL, pol, idx 250 ); 251 252 return True; 253 } 254 255 /*----------------------------------------*/ 256 /*-- set membership on byte data --*/ 257 /*----------------------------------------*/ 258 259 if (agg == 0/*equal any, aka find chars in a set*/ 260 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 261 && !isSTRM) { 262 /* argL: the string, argR: charset */ 263 UInt si, ci; 264 UChar* argL = (UChar*)argLV; 265 UChar* argR = (UChar*)argRV; 266 UInt boolRes = 0; 267 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 268 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 269 270 for (si = 0; si < 16; si++) { 271 if ((validL & (1 << si)) == 0) 272 // run off the end of the string. 273 break; 274 UInt m = 0; 275 for (ci = 0; ci < 16; ci++) { 276 if ((validR & (1 << ci)) == 0) break; 277 if (argR[ci] == argL[si]) { m = 1; break; } 278 } 279 boolRes |= (m << si); 280 } 281 282 // boolRes is "pre-invalidated" 283 UInt intRes1 = boolRes & 0xFFFF; 284 285 // generate I-format output 286 pcmpXstrX_WRK_gen_output_fmt_I( 287 resV, resOSZACP, 288 intRes1, zmaskL, zmaskR, validL, pol, idx 289 ); 290 291 return True; 292 } 293 294 /*----------------------------------------*/ 295 /*-- substring search on byte data --*/ 296 /*----------------------------------------*/ 297 298 if (agg == 3/*equal ordered, aka substring search*/ 299 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 300 && !isSTRM) { 301 302 /* argL: haystack, argR: needle */ 303 UInt ni, hi; 304 UChar* argL = (UChar*)argLV; 305 UChar* argR = (UChar*)argRV; 306 UInt boolRes = 0; 307 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 308 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 309 for (hi = 0; hi < 16; hi++) { 310 UInt m = 1; 311 for (ni = 0; ni < 16; ni++) { 312 if ((validR & (1 << ni)) == 0) break; 313 UInt i = ni + hi; 314 if (i >= 16) break; 315 if (argL[i] != argR[ni]) { m = 0; break; } 316 } 317 boolRes |= (m << hi); 318 if ((validL & (1 << hi)) == 0) 319 // run off the end of the haystack 320 break; 321 } 322 323 // boolRes is "pre-invalidated" 324 UInt intRes1 = boolRes & 0xFFFF; 325 326 // generate I-format output 327 pcmpXstrX_WRK_gen_output_fmt_I( 328 resV, resOSZACP, 329 intRes1, zmaskL, zmaskR, validL, pol, idx 330 ); 331 332 return True; 333 } 334 335 /*----------------------------------------*/ 336 /*-- ranges, unsigned byte data --*/ 337 /*----------------------------------------*/ 338 339 if (agg == 1/*ranges*/ 340 && fmt == 0/*ub*/ 341 && !isSTRM) { 342 343 /* argL: string, argR: range-pairs */ 344 UInt ri, si; 345 UChar* argL = (UChar*)argLV; 346 UChar* argR = (UChar*)argRV; 347 UInt boolRes = 0; 348 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 349 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 350 for (si = 0; si < 16; si++) { 351 if ((validL & (1 << si)) == 0) 352 // run off the end of the string 353 break; 354 UInt m = 0; 355 for (ri = 0; ri < 16; ri += 2) { 356 if ((validR & (3 << ri)) != (3 << ri)) break; 357 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 358 m = 1; break; 359 } 360 } 361 boolRes |= (m << si); 362 } 363 364 // boolRes is "pre-invalidated" 365 UInt intRes1 = boolRes & 0xFFFF; 366 367 // generate I-format output 368 pcmpXstrX_WRK_gen_output_fmt_I( 369 resV, resOSZACP, 370 intRes1, zmaskL, zmaskR, validL, pol, idx 371 ); 372 373 return True; 374 } 375 376 /*----------------------------------------*/ 377 /*-- ranges, signed byte data --*/ 378 /*----------------------------------------*/ 379 380 if (agg == 1/*ranges*/ 381 && fmt == 2/*sb*/ 382 && !isSTRM) { 383 384 /* argL: string, argR: range-pairs */ 385 UInt ri, si; 386 Char* argL = (Char*)argLV; 387 Char* argR = (Char*)argRV; 388 UInt boolRes = 0; 389 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 390 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 391 for (si = 0; si < 16; si++) { 392 if ((validL & (1 << si)) == 0) 393 // run off the end of the string 394 break; 395 UInt m = 0; 396 for (ri = 0; ri < 16; ri += 2) { 397 if ((validR & (3 << ri)) != (3 << ri)) break; 398 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 399 m = 1; break; 400 } 401 } 402 boolRes |= (m << si); 403 } 404 405 // boolRes is "pre-invalidated" 406 UInt intRes1 = boolRes & 0xFFFF; 407 408 // generate I-format output 409 pcmpXstrX_WRK_gen_output_fmt_I( 410 resV, resOSZACP, 411 intRes1, zmaskL, zmaskR, validL, pol, idx 412 ); 413 414 return True; 415 } 416 417 return False; 418} 419 420 421////////////////////////////////////////////////////////// 422// // 423// ISTRI_4A // 424// // 425////////////////////////////////////////////////////////// 426 427UInt h_pcmpistri_4A ( V128* argL, V128* argR ) 428{ 429 V128 block[2]; 430 memcpy(&block[0], argL, sizeof(V128)); 431 memcpy(&block[1], argR, sizeof(V128)); 432 ULong res, flags; 433 __asm__ __volatile__( 434 "subq $1024, %%rsp" "\n\t" 435 "movdqu 0(%2), %%xmm2" "\n\t" 436 "movdqu 16(%2), %%xmm11" "\n\t" 437 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 438 "pushfq" "\n\t" 439 "popq %%rdx" "\n\t" 440 "movq %%rcx, %0" "\n\t" 441 "movq %%rdx, %1" "\n\t" 442 "addq $1024, %%rsp" "\n\t" 443 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 444 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 445 ); 446 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 447} 448 449UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 450{ 451 V128 resV; 452 UInt resOSZACP, resECX; 453 Bool ok 454 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 455 zmask_from_V128(argLU), 456 zmask_from_V128(argRU), 457 0x4A, False/*!isSTRM*/ 458 ); 459 assert(ok); 460 resECX = resV.uInt[0]; 461 return (resOSZACP << 16) | resECX; 462} 463 464void istri_4A ( void ) 465{ 466 char* wot = "4A"; 467 UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 468 UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 469 470 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 471 472 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 473 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 474 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 475 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 476 477 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 478 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 479 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 480 481 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 482 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 483 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 484 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 485 486 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 487 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 488 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 489 490 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 491 492 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 493 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 494 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 495 496 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 497 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 498 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 499 500 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 501 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 502 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 503 504 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 505 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 506 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 507 508 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 509 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 510} 511 512////////////////////////////////////////////////////////// 513// // 514// ISTRI_3A // 515// // 516////////////////////////////////////////////////////////// 517 518UInt h_pcmpistri_3A ( V128* argL, V128* argR ) 519{ 520 V128 block[2]; 521 memcpy(&block[0], argL, sizeof(V128)); 522 memcpy(&block[1], argR, sizeof(V128)); 523 ULong res, flags; 524 __asm__ __volatile__( 525 "subq $1024, %%rsp" "\n\t" 526 "movdqu 0(%2), %%xmm2" "\n\t" 527 "movdqu 16(%2), %%xmm11" "\n\t" 528 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 529 "pushfq" "\n\t" 530 "popq %%rdx" "\n\t" 531 "movq %%rcx, %0" "\n\t" 532 "movq %%rdx, %1" "\n\t" 533 "addq $1024, %%rsp" "\n\t" 534 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 535 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 536 ); 537 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 538} 539 540UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 541{ 542 V128 resV; 543 UInt resOSZACP, resECX; 544 Bool ok 545 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 546 zmask_from_V128(argLU), 547 zmask_from_V128(argRU), 548 0x3A, False/*!isSTRM*/ 549 ); 550 assert(ok); 551 resECX = resV.uInt[0]; 552 return (resOSZACP << 16) | resECX; 553} 554 555void istri_3A ( void ) 556{ 557 char* wot = "3A"; 558 UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 559 UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 560 561 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 562 563 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 564 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 565 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 566 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 567 568 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 569 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 570 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 571 572 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 573 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 574 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 575 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 576 577 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 578 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 579 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 580 581 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 582 583 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 584 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 585 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 586 587 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 588 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 589 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 590 591 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 592 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 593 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 594 595 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 596 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 597 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 598 599 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 600 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 601} 602 603 604 605////////////////////////////////////////////////////////// 606// // 607// ISTRI_0C // 608// // 609////////////////////////////////////////////////////////// 610 611__attribute__((noinline)) 612UInt h_pcmpistri_0C ( V128* argL, V128* argR ) 613{ 614 V128 block[2]; 615 memcpy(&block[0], argL, sizeof(V128)); 616 memcpy(&block[1], argR, sizeof(V128)); 617 ULong res = 0, flags = 0; 618 __asm__ __volatile__( 619 "movdqu 0(%2), %%xmm2" "\n\t" 620 "movdqu 16(%2), %%xmm11" "\n\t" 621 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 622 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 623 //"movd %%xmm0, %%ecx" "\n\t" 624 "pushfq" "\n\t" 625 "popq %%rdx" "\n\t" 626 "movq %%rcx, %0" "\n\t" 627 "movq %%rdx, %1" "\n\t" 628 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 629 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 630 ); 631 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 632} 633 634UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 635{ 636 V128 resV; 637 UInt resOSZACP, resECX; 638 Bool ok 639 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 640 zmask_from_V128(argLU), 641 zmask_from_V128(argRU), 642 0x0C, False/*!isSTRM*/ 643 ); 644 assert(ok); 645 resECX = resV.uInt[0]; 646 return (resOSZACP << 16) | resECX; 647} 648 649void istri_0C ( void ) 650{ 651 char* wot = "0C"; 652 UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 653 UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 654 655 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 656 657 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 658 659 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 660 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 661 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 662 663 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 664 665 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 666 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 667 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 668 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 669 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 670 671 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 672 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 673 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 674 675 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 676 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 677 678 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 679 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 680 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 681 682 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 683 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 684 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 685 686 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 687 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 688 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 689 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 690} 691 692 693////////////////////////////////////////////////////////// 694// // 695// ISTRI_08 // 696// // 697////////////////////////////////////////////////////////// 698 699UInt h_pcmpistri_08 ( V128* argL, V128* argR ) 700{ 701 V128 block[2]; 702 memcpy(&block[0], argL, sizeof(V128)); 703 memcpy(&block[1], argR, sizeof(V128)); 704 ULong res, flags; 705 __asm__ __volatile__( 706 "subq $1024, %%rsp" "\n\t" 707 "movdqu 0(%2), %%xmm2" "\n\t" 708 "movdqu 16(%2), %%xmm11" "\n\t" 709 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 710 "pushfq" "\n\t" 711 "popq %%rdx" "\n\t" 712 "movq %%rcx, %0" "\n\t" 713 "movq %%rdx, %1" "\n\t" 714 "addq $1024, %%rsp" "\n\t" 715 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 716 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 717 ); 718 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 719} 720 721UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 722{ 723 V128 resV; 724 UInt resOSZACP, resECX; 725 Bool ok 726 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 727 zmask_from_V128(argLU), 728 zmask_from_V128(argRU), 729 0x08, False/*!isSTRM*/ 730 ); 731 assert(ok); 732 resECX = resV.uInt[0]; 733 return (resOSZACP << 16) | resECX; 734} 735 736void istri_08 ( void ) 737{ 738 char* wot = "08"; 739 UInt(*h)(V128*,V128*) = h_pcmpistri_08; 740 UInt(*s)(V128*,V128*) = s_pcmpistri_08; 741 742 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 743 744 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 745 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 746 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 747 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 748 749 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 750 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 751 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 752 753 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 754 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 755 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 756 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 757 758 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 759 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 760 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 761 762 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 763 764 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 765 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 766 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 767 768 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 769 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 770 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 771 772 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 773 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 774 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 775 776 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 777 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 778 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 779 780 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 781 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 782} 783 784 785 786////////////////////////////////////////////////////////// 787// // 788// ISTRI_1A // 789// // 790////////////////////////////////////////////////////////// 791 792UInt h_pcmpistri_1A ( V128* argL, V128* argR ) 793{ 794 V128 block[2]; 795 memcpy(&block[0], argL, sizeof(V128)); 796 memcpy(&block[1], argR, sizeof(V128)); 797 ULong res, flags; 798 __asm__ __volatile__( 799 "subq $1024, %%rsp" "\n\t" 800 "movdqu 0(%2), %%xmm2" "\n\t" 801 "movdqu 16(%2), %%xmm11" "\n\t" 802 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 803 "pushfq" "\n\t" 804 "popq %%rdx" "\n\t" 805 "movq %%rcx, %0" "\n\t" 806 "movq %%rdx, %1" "\n\t" 807 "addq $1024, %%rsp" "\n\t" 808 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 809 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 810 ); 811 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 812} 813 814UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 815{ 816 V128 resV; 817 UInt resOSZACP, resECX; 818 Bool ok 819 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 820 zmask_from_V128(argLU), 821 zmask_from_V128(argRU), 822 0x1A, False/*!isSTRM*/ 823 ); 824 assert(ok); 825 resECX = resV.uInt[0]; 826 return (resOSZACP << 16) | resECX; 827} 828 829void istri_1A ( void ) 830{ 831 char* wot = "1A"; 832 UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 833 UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 834 835 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 836 837 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 838 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 839 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 840 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 841 842 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 843 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 844 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 845 846 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 847 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 848 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 849 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 850 851 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 852 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 853 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 854 855 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 856 857 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 858 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 859 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 860 861 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 862 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 863 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 864 865 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 866 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 867 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 868 869 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 870 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 871 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 872 873 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 874 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 875} 876 877 878 879////////////////////////////////////////////////////////// 880// // 881// ISTRI_02 // 882// // 883////////////////////////////////////////////////////////// 884 885UInt h_pcmpistri_02 ( V128* argL, V128* argR ) 886{ 887 V128 block[2]; 888 memcpy(&block[0], argL, sizeof(V128)); 889 memcpy(&block[1], argR, sizeof(V128)); 890 ULong res, flags; 891 __asm__ __volatile__( 892 "subq $1024, %%rsp" "\n\t" 893 "movdqu 0(%2), %%xmm2" "\n\t" 894 "movdqu 16(%2), %%xmm11" "\n\t" 895 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 896//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 897//"movd %%xmm0, %%ecx" "\n\t" 898 "pushfq" "\n\t" 899 "popq %%rdx" "\n\t" 900 "movq %%rcx, %0" "\n\t" 901 "movq %%rdx, %1" "\n\t" 902 "addq $1024, %%rsp" "\n\t" 903 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 904 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 905 ); 906 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 907} 908 909UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 910{ 911 V128 resV; 912 UInt resOSZACP, resECX; 913 Bool ok 914 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 915 zmask_from_V128(argLU), 916 zmask_from_V128(argRU), 917 0x02, False/*!isSTRM*/ 918 ); 919 assert(ok); 920 resECX = resV.uInt[0]; 921 return (resOSZACP << 16) | resECX; 922} 923 924void istri_02 ( void ) 925{ 926 char* wot = "02"; 927 UInt(*h)(V128*,V128*) = h_pcmpistri_02; 928 UInt(*s)(V128*,V128*) = s_pcmpistri_02; 929 930 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 931 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 932 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 933 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 934 935 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 936 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 937 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 938 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 939 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 940 941 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 942 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 943 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 944 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 945 946 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 947 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 948 949 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 950 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 951 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 952 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 953 954 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 955 956 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 957 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 958} 959 960 961////////////////////////////////////////////////////////// 962// // 963// ISTRI_12 // 964// // 965////////////////////////////////////////////////////////// 966 967UInt h_pcmpistri_12 ( V128* argL, V128* argR ) 968{ 969 V128 block[2]; 970 memcpy(&block[0], argL, sizeof(V128)); 971 memcpy(&block[1], argR, sizeof(V128)); 972 ULong res, flags; 973 __asm__ __volatile__( 974 "subq $1024, %%rsp" "\n\t" 975 "movdqu 0(%2), %%xmm2" "\n\t" 976 "movdqu 16(%2), %%xmm11" "\n\t" 977 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 978//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 979//"movd %%xmm0, %%ecx" "\n\t" 980 "pushfq" "\n\t" 981 "popq %%rdx" "\n\t" 982 "movq %%rcx, %0" "\n\t" 983 "movq %%rdx, %1" "\n\t" 984 "addq $1024, %%rsp" "\n\t" 985 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 986 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 987 ); 988 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 989} 990 991UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 992{ 993 V128 resV; 994 UInt resOSZACP, resECX; 995 Bool ok 996 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 997 zmask_from_V128(argLU), 998 zmask_from_V128(argRU), 999 0x12, False/*!isSTRM*/ 1000 ); 1001 assert(ok); 1002 resECX = resV.uInt[0]; 1003 return (resOSZACP << 16) | resECX; 1004} 1005 1006void istri_12 ( void ) 1007{ 1008 char* wot = "12"; 1009 UInt(*h)(V128*,V128*) = h_pcmpistri_12; 1010 UInt(*s)(V128*,V128*) = s_pcmpistri_12; 1011 1012 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1013 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1014 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1015 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1016 1017 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1018 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1019 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1020 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1021 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1022 1023 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1024 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1025 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1026 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1027 1028 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1029 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1030 1031 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1032 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1033 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1034 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1035 1036 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1037 1038 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1039 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1040} 1041 1042 1043 1044////////////////////////////////////////////////////////// 1045// // 1046// ISTRI_44 // 1047// // 1048////////////////////////////////////////////////////////// 1049 1050UInt h_pcmpistri_44 ( V128* argL, V128* argR ) 1051{ 1052 V128 block[2]; 1053 memcpy(&block[0], argL, sizeof(V128)); 1054 memcpy(&block[1], argR, sizeof(V128)); 1055 ULong res, flags; 1056 __asm__ __volatile__( 1057 "subq $1024, %%rsp" "\n\t" 1058 "movdqu 0(%2), %%xmm2" "\n\t" 1059 "movdqu 16(%2), %%xmm11" "\n\t" 1060 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 1061//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1062//"movd %%xmm0, %%ecx" "\n\t" 1063 "pushfq" "\n\t" 1064 "popq %%rdx" "\n\t" 1065 "movq %%rcx, %0" "\n\t" 1066 "movq %%rdx, %1" "\n\t" 1067 "addq $1024, %%rsp" "\n\t" 1068 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1069 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1070 ); 1071 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1072} 1073 1074UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 1075{ 1076 V128 resV; 1077 UInt resOSZACP, resECX; 1078 Bool ok 1079 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1080 zmask_from_V128(argLU), 1081 zmask_from_V128(argRU), 1082 0x44, False/*!isSTRM*/ 1083 ); 1084 assert(ok); 1085 resECX = resV.uInt[0]; 1086 return (resOSZACP << 16) | resECX; 1087} 1088 1089void istri_44 ( void ) 1090{ 1091 char* wot = "44"; 1092 UInt(*h)(V128*,V128*) = h_pcmpistri_44; 1093 UInt(*s)(V128*,V128*) = s_pcmpistri_44; 1094 1095 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1096 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1097 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1098 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1099 1100 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1101 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1102 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1103 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1104 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1105 1106 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1107 1108 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1109 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1110 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1111 1112 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1113 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1114 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1115 1116 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1117 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1118 1119 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1120 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1121 1122 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1123 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1124 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1125 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1126} 1127 1128 1129////////////////////////////////////////////////////////// 1130// // 1131// ISTRI_00 // 1132// // 1133////////////////////////////////////////////////////////// 1134 1135UInt h_pcmpistri_00 ( V128* argL, V128* argR ) 1136{ 1137 V128 block[2]; 1138 memcpy(&block[0], argL, sizeof(V128)); 1139 memcpy(&block[1], argR, sizeof(V128)); 1140 ULong res, flags; 1141 __asm__ __volatile__( 1142 "subq $1024, %%rsp" "\n\t" 1143 "movdqu 0(%2), %%xmm2" "\n\t" 1144 "movdqu 16(%2), %%xmm11" "\n\t" 1145 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" 1146//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" 1147//"movd %%xmm0, %%ecx" "\n\t" 1148 "pushfq" "\n\t" 1149 "popq %%rdx" "\n\t" 1150 "movq %%rcx, %0" "\n\t" 1151 "movq %%rdx, %1" "\n\t" 1152 "addq $1024, %%rsp" "\n\t" 1153 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1154 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1155 ); 1156 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1157} 1158 1159UInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) 1160{ 1161 V128 resV; 1162 UInt resOSZACP, resECX; 1163 Bool ok 1164 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1165 zmask_from_V128(argLU), 1166 zmask_from_V128(argRU), 1167 0x00, False/*!isSTRM*/ 1168 ); 1169 assert(ok); 1170 resECX = resV.uInt[0]; 1171 return (resOSZACP << 16) | resECX; 1172} 1173 1174void istri_00 ( void ) 1175{ 1176 char* wot = "00"; 1177 UInt(*h)(V128*,V128*) = h_pcmpistri_00; 1178 UInt(*s)(V128*,V128*) = s_pcmpistri_00; 1179 1180 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1181 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1182 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1183 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1184 1185 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1186 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1187 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1188 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1189 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1190 1191 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1192 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1193 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1194 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1195 1196 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1197 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1198 1199 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1200 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1201 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1202 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1203 1204 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1205 1206 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1207 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1208} 1209 1210 1211////////////////////////////////////////////////////////// 1212// // 1213// ISTRI_38 // 1214// // 1215////////////////////////////////////////////////////////// 1216 1217UInt h_pcmpistri_38 ( V128* argL, V128* argR ) 1218{ 1219 V128 block[2]; 1220 memcpy(&block[0], argL, sizeof(V128)); 1221 memcpy(&block[1], argR, sizeof(V128)); 1222 ULong res, flags; 1223 __asm__ __volatile__( 1224 "subq $1024, %%rsp" "\n\t" 1225 "movdqu 0(%2), %%xmm2" "\n\t" 1226 "movdqu 16(%2), %%xmm11" "\n\t" 1227 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" 1228 "pushfq" "\n\t" 1229 "popq %%rdx" "\n\t" 1230 "movq %%rcx, %0" "\n\t" 1231 "movq %%rdx, %1" "\n\t" 1232 "addq $1024, %%rsp" "\n\t" 1233 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1234 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1235 ); 1236 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1237} 1238 1239UInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) 1240{ 1241 V128 resV; 1242 UInt resOSZACP, resECX; 1243 Bool ok 1244 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1245 zmask_from_V128(argLU), 1246 zmask_from_V128(argRU), 1247 0x38, False/*!isSTRM*/ 1248 ); 1249 assert(ok); 1250 resECX = resV.uInt[0]; 1251 return (resOSZACP << 16) | resECX; 1252} 1253 1254void istri_38 ( void ) 1255{ 1256 char* wot = "38"; 1257 UInt(*h)(V128*,V128*) = h_pcmpistri_38; 1258 UInt(*s)(V128*,V128*) = s_pcmpistri_38; 1259 1260 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1261 1262 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1263 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1264 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1265 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1266 1267 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1268 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1269 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1270 1271 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1272 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1273 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1274 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1275 1276 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1277 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1278 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1279 1280 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1281 1282 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1283 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1284 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1285 1286 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1287 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1288 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1289 1290 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1291 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1292 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1293 1294 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1295 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1296 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1297 1298 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1299 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1300} 1301 1302 1303 1304////////////////////////////////////////////////////////// 1305// // 1306// ISTRI_46 // 1307// // 1308////////////////////////////////////////////////////////// 1309 1310UInt h_pcmpistri_46 ( V128* argL, V128* argR ) 1311{ 1312 V128 block[2]; 1313 memcpy(&block[0], argL, sizeof(V128)); 1314 memcpy(&block[1], argR, sizeof(V128)); 1315 ULong res, flags; 1316 __asm__ __volatile__( 1317 "subq $1024, %%rsp" "\n\t" 1318 "movdqu 0(%2), %%xmm2" "\n\t" 1319 "movdqu 16(%2), %%xmm11" "\n\t" 1320 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t" 1321 "pushfq" "\n\t" 1322 "popq %%rdx" "\n\t" 1323 "movq %%rcx, %0" "\n\t" 1324 "movq %%rdx, %1" "\n\t" 1325 "addq $1024, %%rsp" "\n\t" 1326 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1327 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1328 ); 1329 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1330} 1331 1332UInt s_pcmpistri_46 ( V128* argLU, V128* argRU ) 1333{ 1334 V128 resV; 1335 UInt resOSZACP, resECX; 1336 Bool ok 1337 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1338 zmask_from_V128(argLU), 1339 zmask_from_V128(argRU), 1340 0x46, False/*!isSTRM*/ 1341 ); 1342 assert(ok); 1343 resECX = resV.uInt[0]; 1344 return (resOSZACP << 16) | resECX; 1345} 1346 1347void istri_46 ( void ) 1348{ 1349 char* wot = "46"; 1350 UInt(*h)(V128*,V128*) = h_pcmpistri_46; 1351 UInt(*s)(V128*,V128*) = s_pcmpistri_46; 1352 1353 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1354 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1355 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1356 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1357 1358 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1359 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1360 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1361 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1362 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1363 1364 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1365 1366 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1367 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1368 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1369 1370 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1371 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1372 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1373 1374 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1375 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1376 1377 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1378 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1379 1380 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1381 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1382 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1383 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1384} 1385 1386 1387////////////////////////////////////////////////////////// 1388// // 1389// ISTRI_30 // 1390// // 1391////////////////////////////////////////////////////////// 1392 1393UInt h_pcmpistri_30 ( V128* argL, V128* argR ) 1394{ 1395 V128 block[2]; 1396 memcpy(&block[0], argL, sizeof(V128)); 1397 memcpy(&block[1], argR, sizeof(V128)); 1398 ULong res, flags; 1399 __asm__ __volatile__( 1400 "subq $1024, %%rsp" "\n\t" 1401 "movdqu 0(%2), %%xmm2" "\n\t" 1402 "movdqu 16(%2), %%xmm11" "\n\t" 1403 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t" 1404 "pushfq" "\n\t" 1405 "popq %%rdx" "\n\t" 1406 "movq %%rcx, %0" "\n\t" 1407 "movq %%rdx, %1" "\n\t" 1408 "addq $1024, %%rsp" "\n\t" 1409 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1410 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1411 ); 1412 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1413} 1414 1415UInt s_pcmpistri_30 ( V128* argLU, V128* argRU ) 1416{ 1417 V128 resV; 1418 UInt resOSZACP, resECX; 1419 Bool ok 1420 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1421 zmask_from_V128(argLU), 1422 zmask_from_V128(argRU), 1423 0x30, False/*!isSTRM*/ 1424 ); 1425 assert(ok); 1426 resECX = resV.uInt[0]; 1427 return (resOSZACP << 16) | resECX; 1428} 1429 1430void istri_30 ( void ) 1431{ 1432 char* wot = "30"; 1433 UInt(*h)(V128*,V128*) = h_pcmpistri_30; 1434 UInt(*s)(V128*,V128*) = s_pcmpistri_30; 1435 1436 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1437 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1438 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1439 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1440 1441 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1442 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1443 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1444 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1445 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1446 1447 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1448 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1449 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1450 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1451 1452 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1453 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1454 1455 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1456 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1457 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1458 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1459 1460 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1461 1462 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1463 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1464} 1465 1466 1467////////////////////////////////////////////////////////// 1468// // 1469// ISTRI_40 // 1470// // 1471////////////////////////////////////////////////////////// 1472 1473UInt h_pcmpistri_40 ( V128* argL, V128* argR ) 1474{ 1475 V128 block[2]; 1476 memcpy(&block[0], argL, sizeof(V128)); 1477 memcpy(&block[1], argR, sizeof(V128)); 1478 ULong res, flags; 1479 __asm__ __volatile__( 1480 "subq $1024, %%rsp" "\n\t" 1481 "movdqu 0(%2), %%xmm2" "\n\t" 1482 "movdqu 16(%2), %%xmm11" "\n\t" 1483 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t" 1484 "pushfq" "\n\t" 1485 "popq %%rdx" "\n\t" 1486 "movq %%rcx, %0" "\n\t" 1487 "movq %%rdx, %1" "\n\t" 1488 "addq $1024, %%rsp" "\n\t" 1489 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1490 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1491 ); 1492 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1493} 1494 1495UInt s_pcmpistri_40 ( V128* argLU, V128* argRU ) 1496{ 1497 V128 resV; 1498 UInt resOSZACP, resECX; 1499 Bool ok 1500 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1501 zmask_from_V128(argLU), 1502 zmask_from_V128(argRU), 1503 0x40, False/*!isSTRM*/ 1504 ); 1505 assert(ok); 1506 resECX = resV.uInt[0]; 1507 return (resOSZACP << 16) | resECX; 1508} 1509 1510void istri_40 ( void ) 1511{ 1512 char* wot = "40"; 1513 UInt(*h)(V128*,V128*) = h_pcmpistri_40; 1514 UInt(*s)(V128*,V128*) = s_pcmpistri_40; 1515 1516 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1517 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1518 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1519 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1520 1521 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1522 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1523 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1524 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1525 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1526 1527 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1528 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1529 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1530 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1531 1532 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1533 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1534 1535 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1536 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1537 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1538 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1539 1540 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1541 1542 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1543 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1544} 1545 1546 1547////////////////////////////////////////////////////////// 1548// // 1549// ISTRI_0E // 1550// // 1551////////////////////////////////////////////////////////// 1552 1553__attribute__((noinline)) 1554UInt h_pcmpistri_0E ( V128* argL, V128* argR ) 1555{ 1556 V128 block[2]; 1557 memcpy(&block[0], argL, sizeof(V128)); 1558 memcpy(&block[1], argR, sizeof(V128)); 1559 ULong res = 0, flags = 0; 1560 __asm__ __volatile__( 1561 "movdqu 0(%2), %%xmm2" "\n\t" 1562 "movdqu 16(%2), %%xmm11" "\n\t" 1563 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t" 1564 "pushfq" "\n\t" 1565 "popq %%rdx" "\n\t" 1566 "movq %%rcx, %0" "\n\t" 1567 "movq %%rdx, %1" "\n\t" 1568 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1569 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1570 ); 1571 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1572} 1573 1574UInt s_pcmpistri_0E ( V128* argLU, V128* argRU ) 1575{ 1576 V128 resV; 1577 UInt resOSZACP, resECX; 1578 Bool ok 1579 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1580 zmask_from_V128(argLU), 1581 zmask_from_V128(argRU), 1582 0x0E, False/*!isSTRM*/ 1583 ); 1584 assert(ok); 1585 resECX = resV.uInt[0]; 1586 return (resOSZACP << 16) | resECX; 1587} 1588 1589void istri_0E ( void ) 1590{ 1591 char* wot = "0E"; 1592 UInt(*h)(V128*,V128*) = h_pcmpistri_0E; 1593 UInt(*s)(V128*,V128*) = s_pcmpistri_0E; 1594 1595 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 1596 1597 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 1598 1599 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 1600 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 1601 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 1602 1603 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 1604 1605 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 1606 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 1607 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 1608 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 1609 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 1610 1611 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 1612 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 1613 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 1614 1615 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 1616 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 1617 1618 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 1619 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 1620 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 1621 1622 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 1623 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 1624 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 1625 1626 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 1627 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1628 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 1629 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 1630} 1631 1632 1633////////////////////////////////////////////////////////// 1634// // 1635// ISTRI_34 // 1636// // 1637////////////////////////////////////////////////////////// 1638 1639UInt h_pcmpistri_34 ( V128* argL, V128* argR ) 1640{ 1641 V128 block[2]; 1642 memcpy(&block[0], argL, sizeof(V128)); 1643 memcpy(&block[1], argR, sizeof(V128)); 1644 ULong res, flags; 1645 __asm__ __volatile__( 1646 "subq $1024, %%rsp" "\n\t" 1647 "movdqu 0(%2), %%xmm2" "\n\t" 1648 "movdqu 16(%2), %%xmm11" "\n\t" 1649 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t" 1650 "pushfq" "\n\t" 1651 "popq %%rdx" "\n\t" 1652 "movq %%rcx, %0" "\n\t" 1653 "movq %%rdx, %1" "\n\t" 1654 "addq $1024, %%rsp" "\n\t" 1655 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1656 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1657 ); 1658 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1659} 1660 1661UInt s_pcmpistri_34 ( V128* argLU, V128* argRU ) 1662{ 1663 V128 resV; 1664 UInt resOSZACP, resECX; 1665 Bool ok 1666 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1667 zmask_from_V128(argLU), 1668 zmask_from_V128(argRU), 1669 0x34, False/*!isSTRM*/ 1670 ); 1671 assert(ok); 1672 resECX = resV.uInt[0]; 1673 return (resOSZACP << 16) | resECX; 1674} 1675 1676void istri_34 ( void ) 1677{ 1678 char* wot = "34"; 1679 UInt(*h)(V128*,V128*) = h_pcmpistri_34; 1680 UInt(*s)(V128*,V128*) = s_pcmpistri_34; 1681 1682 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1683 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1684 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1685 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1686 1687 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1688 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1689 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1690 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1691 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1692 1693 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1694 1695 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1696 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1697 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1698 1699 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1700 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1701 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1702 1703 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1704 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1705 1706 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1707 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1708 1709 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1710 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1711 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1712 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1713} 1714 1715 1716////////////////////////////////////////////////////////// 1717// // 1718// ISTRI_14 // 1719// // 1720////////////////////////////////////////////////////////// 1721 1722UInt h_pcmpistri_14 ( V128* argL, V128* argR ) 1723{ 1724 V128 block[2]; 1725 memcpy(&block[0], argL, sizeof(V128)); 1726 memcpy(&block[1], argR, sizeof(V128)); 1727 ULong res, flags; 1728 __asm__ __volatile__( 1729 "subq $1024, %%rsp" "\n\t" 1730 "movdqu 0(%2), %%xmm2" "\n\t" 1731 "movdqu 16(%2), %%xmm11" "\n\t" 1732 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t" 1733 "pushfq" "\n\t" 1734 "popq %%rdx" "\n\t" 1735 "movq %%rcx, %0" "\n\t" 1736 "movq %%rdx, %1" "\n\t" 1737 "addq $1024, %%rsp" "\n\t" 1738 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1739 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1740 ); 1741 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1742} 1743 1744UInt s_pcmpistri_14 ( V128* argLU, V128* argRU ) 1745{ 1746 V128 resV; 1747 UInt resOSZACP, resECX; 1748 Bool ok 1749 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1750 zmask_from_V128(argLU), 1751 zmask_from_V128(argRU), 1752 0x14, False/*!isSTRM*/ 1753 ); 1754 assert(ok); 1755 resECX = resV.uInt[0]; 1756 return (resOSZACP << 16) | resECX; 1757} 1758 1759void istri_14 ( void ) 1760{ 1761 char* wot = "14"; 1762 UInt(*h)(V128*,V128*) = h_pcmpistri_14; 1763 UInt(*s)(V128*,V128*) = s_pcmpistri_14; 1764 1765 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1766 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1767 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1768 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1769 1770 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1771 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1772 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1773 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1774 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1775 1776 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1777 1778 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1779 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1780 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1781 1782 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1783 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1784 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1785 1786 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1787 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1788 1789 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1790 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1791 1792 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a"); 1793 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7"); 1794 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54"); 1795 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb"); 1796} 1797 1798 1799////////////////////////////////////////////////////////// 1800// // 1801// main // 1802// // 1803////////////////////////////////////////////////////////// 1804 1805int main ( void ) 1806{ 1807 istri_4A(); 1808 istri_3A(); 1809 istri_08(); 1810 istri_1A(); 1811 istri_02(); 1812 istri_0C(); 1813 istri_12(); 1814 istri_44(); 1815 istri_00(); 1816 istri_38(); 1817 istri_46(); 1818 istri_30(); 1819 istri_40(); 1820 istri_0E(); 1821 istri_14(); 1822 istri_34(); 1823 return 0; 1824} 1825