1 2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6#include <string.h> 7#include <stdio.h> 8#include <assert.h> 9 10typedef unsigned int UInt; 11typedef signed int Int; 12typedef unsigned char UChar; 13typedef unsigned long long int ULong; 14typedef UChar Bool; 15#define False ((Bool)0) 16#define True ((Bool)1) 17 18//typedef unsigned char V128[16]; 19typedef 20 union { 21 UChar uChar[16]; 22 UInt uInt[4]; 23 } 24 V128; 25 26#define SHIFT_O 11 27#define SHIFT_S 7 28#define SHIFT_Z 6 29#define SHIFT_A 4 30#define SHIFT_C 0 31#define SHIFT_P 2 32 33#define MASK_O (1ULL << SHIFT_O) 34#define MASK_S (1ULL << SHIFT_S) 35#define MASK_Z (1ULL << SHIFT_Z) 36#define MASK_A (1ULL << SHIFT_A) 37#define MASK_C (1ULL << SHIFT_C) 38#define MASK_P (1ULL << SHIFT_P) 39 40 41UInt clz32 ( UInt x ) 42{ 43 Int y, m, n; 44 y = -(x >> 16); 45 m = (y >> 16) & 16; 46 n = 16 - m; 47 x = x >> m; 48 y = x - 0x100; 49 m = (y >> 16) & 8; 50 n = n + m; 51 x = x << m; 52 y = x - 0x1000; 53 m = (y >> 16) & 4; 54 n = n + m; 55 x = x << m; 56 y = x - 0x4000; 57 m = (y >> 16) & 2; 58 n = n + m; 59 x = x << m; 60 y = x >> 14; 61 m = y & ~(y >> 1); 62 return n + 2 - m; 63} 64 65UInt ctz32 ( UInt x ) 66{ 67 return 32 - clz32((~x) & (x-1)); 68} 69 70void expand ( V128* dst, char* summary ) 71{ 72 Int i; 73 assert( strlen(summary) == 16 ); 74 for (i = 0; i < 16; i++) { 75 UChar xx = 0; 76 UChar x = summary[15-i]; 77 if (x >= '0' && x <= '9') { xx = x - '0'; } 78 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 79 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 80 else assert(0); 81 82 assert(xx < 16); 83 xx = (xx << 4) | xx; 84 assert(xx < 256); 85 dst->uChar[i] = xx; 86 } 87} 88 89void try_istri ( char* which, 90 UInt(*h_fn)(V128*,V128*), 91 UInt(*s_fn)(V128*,V128*), 92 char* summL, char* summR ) 93{ 94 assert(strlen(which) == 2); 95 V128 argL, argR; 96 expand(&argL, summL); 97 expand(&argR, summR); 98 UInt h_res = h_fn(&argL, &argR); 99 UInt s_res = s_fn(&argL, &argR); 100 printf("istri %s %s %s -> %08x %08x %s\n", 101 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 102} 103 104UInt zmask_from_V128 ( V128* arg ) 105{ 106 UInt i, res = 0; 107 for (i = 0; i < 16; i++) { 108 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 109 } 110 return res; 111} 112 113////////////////////////////////////////////////////////// 114// // 115// GENERAL // 116// // 117////////////////////////////////////////////////////////// 118 119 120/* Given partial results from a pcmpXstrX operation (intRes1, 121 basically), generate an I format (index value for ECX) output, and 122 also the new OSZACP flags. 123*/ 124static 125void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 126 /*OUT*/UInt* resOSZACP, 127 UInt intRes1, 128 UInt zmaskL, UInt zmaskR, 129 UInt validL, 130 UInt pol, UInt idx ) 131{ 132 assert((pol >> 2) == 0); 133 assert((idx >> 1) == 0); 134 135 UInt intRes2 = 0; 136 switch (pol) { 137 case 0: intRes2 = intRes1; break; // pol + 138 case 1: intRes2 = ~intRes1; break; // pol - 139 case 2: intRes2 = intRes1; break; // pol m+ 140 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 141 } 142 intRes2 &= 0xFFFF; 143 144 // generate ecx value 145 UInt newECX = 0; 146 if (idx) { 147 // index of ms-1-bit 148 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 149 } else { 150 // index of ls-1-bit 151 newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 152 } 153 154 *(UInt*)(&resV[0]) = newECX; 155 156 // generate new flags, common to all ISTRI and ISTRM cases 157 *resOSZACP // A, P are zero 158 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 159 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 160 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 161 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 162} 163 164 165/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 166 variants. 167 168 For xSTRI variants, the new ECX value is placed in the 32 bits 169 pointed to by *resV. For xSTRM variants, the result is a 128 bit 170 value and is placed at *resV in the obvious way. 171 172 For all variants, the new OSZACP value is placed at *resOSZACP. 173 174 argLV and argRV are the vector args. The caller must prepare a 175 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 176 must be 1 for each zero byte of of the respective arg. For ESTRx 177 variants this is derived from the explicit length indication, and 178 must be 0 in all places except at the bit index corresponding to 179 the valid length (0 .. 16). If the valid length is 16 then the 180 mask must be all zeroes. In all cases, bits 31:16 must be zero. 181 182 imm8 is the original immediate from the instruction. isSTRM 183 indicates whether this is a xSTRM or xSTRI variant, which controls 184 how much of *res is written. 185 186 If the given imm8 case can be handled, the return value is True. 187 If not, False is returned, and neither *res not *resOSZACP are 188 altered. 189*/ 190 191Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, 192 /*OUT*/UInt* resOSZACP, 193 V128* argLV, V128* argRV, 194 UInt zmaskL, UInt zmaskR, 195 UInt imm8, Bool isSTRM ) 196{ 197 assert(imm8 < 0x80); 198 assert((zmaskL >> 16) == 0); 199 assert((zmaskR >> 16) == 0); 200 201 /* Explicitly reject any imm8 values that haven't been validated, 202 even if they would probably work. Life is too short to have 203 unvalidated cases in the code base. */ 204 switch (imm8) { 205 case 0x00: 206 case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A: 207 case 0x38: case 0x3A: case 0x44: case 0x4A: 208 break; 209 default: 210 return False; 211 } 212 213 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 214 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 215 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 216 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 217 218 /*----------------------------------------*/ 219 /*-- strcmp on byte data --*/ 220 /*----------------------------------------*/ 221 222 if (agg == 2/*equal each, aka strcmp*/ 223 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 224 && !isSTRM) { 225 Int i; 226 UChar* argL = (UChar*)argLV; 227 UChar* argR = (UChar*)argRV; 228 UInt boolResII = 0; 229 for (i = 15; i >= 0; i--) { 230 UChar cL = argL[i]; 231 UChar cR = argR[i]; 232 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 233 } 234 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 235 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 236 237 // do invalidation, common to all equal-each cases 238 UInt intRes1 239 = (boolResII & validL & validR) // if both valid, use cmpres 240 | (~ (validL | validR)); // if both invalid, force 1 241 // else force 0 242 intRes1 &= 0xFFFF; 243 244 // generate I-format output 245 pcmpXstrX_WRK_gen_output_fmt_I( 246 resV, resOSZACP, 247 intRes1, zmaskL, zmaskR, validL, pol, idx 248 ); 249 250 return True; 251 } 252 253 /*----------------------------------------*/ 254 /*-- set membership on byte data --*/ 255 /*----------------------------------------*/ 256 257 if (agg == 0/*equal any, aka find chars in a set*/ 258 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 259 && !isSTRM) { 260 /* argL: the string, argR: charset */ 261 UInt si, ci; 262 UChar* argL = (UChar*)argLV; 263 UChar* argR = (UChar*)argRV; 264 UInt boolRes = 0; 265 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 266 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 267 268 for (si = 0; si < 16; si++) { 269 if ((validL & (1 << si)) == 0) 270 // run off the end of the string. 271 break; 272 UInt m = 0; 273 for (ci = 0; ci < 16; ci++) { 274 if ((validR & (1 << ci)) == 0) break; 275 if (argR[ci] == argL[si]) { m = 1; break; } 276 } 277 boolRes |= (m << si); 278 } 279 280 // boolRes is "pre-invalidated" 281 UInt intRes1 = boolRes & 0xFFFF; 282 283 // generate I-format output 284 pcmpXstrX_WRK_gen_output_fmt_I( 285 resV, resOSZACP, 286 intRes1, zmaskL, zmaskR, validL, pol, idx 287 ); 288 289 return True; 290 } 291 292 /*----------------------------------------*/ 293 /*-- substring search on byte data --*/ 294 /*----------------------------------------*/ 295 296 if (agg == 3/*equal ordered, aka substring search*/ 297 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 298 && !isSTRM) { 299 300 /* argL: haystack, argR: needle */ 301 UInt ni, hi; 302 UChar* argL = (UChar*)argLV; 303 UChar* argR = (UChar*)argRV; 304 UInt boolRes = 0; 305 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 306 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 307 for (hi = 0; hi < 16; hi++) { 308 if ((validL & (1 << hi)) == 0) 309 // run off the end of the haystack 310 break; 311 UInt m = 1; 312 for (ni = 0; ni < 16; ni++) { 313 if ((validR & (1 << ni)) == 0) break; 314 UInt i = ni + hi; 315 if (i >= 16) break; 316 if (argL[i] != argR[ni]) { m = 0; break; } 317 } 318 boolRes |= (m << hi); 319 } 320 321 // boolRes is "pre-invalidated" 322 UInt intRes1 = boolRes & 0xFFFF; 323 324 // generate I-format output 325 pcmpXstrX_WRK_gen_output_fmt_I( 326 resV, resOSZACP, 327 intRes1, zmaskL, zmaskR, validL, pol, idx 328 ); 329 330 return True; 331 } 332 333 /*----------------------------------------*/ 334 /*-- ranges, unsigned byte data --*/ 335 /*----------------------------------------*/ 336 337 if (agg == 1/*ranges*/ 338 && fmt == 0/*ub*/ 339 && !isSTRM) { 340 341 /* argL: string, argR: range-pairs */ 342 UInt ri, si; 343 UChar* argL = (UChar*)argLV; 344 UChar* argR = (UChar*)argRV; 345 UInt boolRes = 0; 346 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 347 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 348 for (si = 0; si < 16; si++) { 349 if ((validL & (1 << si)) == 0) 350 // run off the end of the string 351 break; 352 UInt m = 0; 353 for (ri = 0; ri < 16; ri += 2) { 354 if ((validR & (3 << ri)) != (3 << ri)) break; 355 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 356 m = 1; break; 357 } 358 } 359 boolRes |= (m << si); 360 } 361 362 // boolRes is "pre-invalidated" 363 UInt intRes1 = boolRes & 0xFFFF; 364 365 // generate I-format output 366 pcmpXstrX_WRK_gen_output_fmt_I( 367 resV, resOSZACP, 368 intRes1, zmaskL, zmaskR, validL, pol, idx 369 ); 370 371 return True; 372 } 373 374 return False; 375} 376 377 378////////////////////////////////////////////////////////// 379// // 380// ISTRI_4A // 381// // 382////////////////////////////////////////////////////////// 383 384UInt h_pcmpistri_4A ( V128* argL, V128* argR ) 385{ 386 V128 block[2]; 387 memcpy(&block[0], argL, sizeof(V128)); 388 memcpy(&block[1], argR, sizeof(V128)); 389 ULong res, flags; 390 __asm__ __volatile__( 391 "subq $1024, %%rsp" "\n\t" 392 "movdqu 0(%2), %%xmm2" "\n\t" 393 "movdqu 16(%2), %%xmm11" "\n\t" 394 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 395 "pushfq" "\n\t" 396 "popq %%rdx" "\n\t" 397 "movq %%rcx, %0" "\n\t" 398 "movq %%rdx, %1" "\n\t" 399 "addq $1024, %%rsp" "\n\t" 400 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 401 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 402 ); 403 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 404} 405 406UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 407{ 408 V128 resV; 409 UInt resOSZACP, resECX; 410 Bool ok 411 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 412 zmask_from_V128(argLU), 413 zmask_from_V128(argRU), 414 0x4A, False/*!isSTRM*/ 415 ); 416 assert(ok); 417 resECX = resV.uInt[0]; 418 return (resOSZACP << 16) | resECX; 419} 420 421void istri_4A ( void ) 422{ 423 char* wot = "4A"; 424 UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 425 UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 426 427 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 428 429 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 430 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 431 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 432 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 433 434 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 435 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 437 438 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 439 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 440 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 441 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 442 443 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 444 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 446 447 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 448 449 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 450 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 451 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 452 453 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 454 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 455 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 456 457 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 458 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 459 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 460 461 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 462 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 463 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 464 465 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 466 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 467} 468 469////////////////////////////////////////////////////////// 470// // 471// ISTRI_3A // 472// // 473////////////////////////////////////////////////////////// 474 475UInt h_pcmpistri_3A ( V128* argL, V128* argR ) 476{ 477 V128 block[2]; 478 memcpy(&block[0], argL, sizeof(V128)); 479 memcpy(&block[1], argR, sizeof(V128)); 480 ULong res, flags; 481 __asm__ __volatile__( 482 "subq $1024, %%rsp" "\n\t" 483 "movdqu 0(%2), %%xmm2" "\n\t" 484 "movdqu 16(%2), %%xmm11" "\n\t" 485 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 486 "pushfq" "\n\t" 487 "popq %%rdx" "\n\t" 488 "movq %%rcx, %0" "\n\t" 489 "movq %%rdx, %1" "\n\t" 490 "addq $1024, %%rsp" "\n\t" 491 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 492 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 493 ); 494 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 495} 496 497UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 498{ 499 V128 resV; 500 UInt resOSZACP, resECX; 501 Bool ok 502 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 503 zmask_from_V128(argLU), 504 zmask_from_V128(argRU), 505 0x3A, False/*!isSTRM*/ 506 ); 507 assert(ok); 508 resECX = resV.uInt[0]; 509 return (resOSZACP << 16) | resECX; 510} 511 512void istri_3A ( void ) 513{ 514 char* wot = "3A"; 515 UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 516 UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 517 518 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 519 520 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 521 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 522 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 523 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 524 525 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 526 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 528 529 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 530 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 531 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 532 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 533 534 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 535 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 537 538 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 539 540 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 541 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 542 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 543 544 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 545 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 546 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 547 548 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 549 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 550 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 551 552 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 553 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 554 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 555 556 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 557 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 558} 559 560 561 562////////////////////////////////////////////////////////// 563// // 564// ISTRI_0C // 565// // 566////////////////////////////////////////////////////////// 567 568__attribute__((noinline)) 569UInt h_pcmpistri_0C ( V128* argL, V128* argR ) 570{ 571 V128 block[2]; 572 memcpy(&block[0], argL, sizeof(V128)); 573 memcpy(&block[1], argR, sizeof(V128)); 574 ULong res = 0, flags = 0; 575 __asm__ __volatile__( 576 "movdqa 0(%2), %%xmm2" "\n\t" 577 "movdqa 16(%2), %%xmm11" "\n\t" 578 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 579 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 580 //"movd %%xmm0, %%ecx" "\n\t" 581 "pushfq" "\n\t" 582 "popq %%rdx" "\n\t" 583 "movq %%rcx, %0" "\n\t" 584 "movq %%rdx, %1" "\n\t" 585 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 586 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 587 ); 588 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 589} 590 591UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 592{ 593 V128 resV; 594 UInt resOSZACP, resECX; 595 Bool ok 596 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 597 zmask_from_V128(argLU), 598 zmask_from_V128(argRU), 599 0x0C, False/*!isSTRM*/ 600 ); 601 assert(ok); 602 resECX = resV.uInt[0]; 603 return (resOSZACP << 16) | resECX; 604} 605 606void istri_0C ( void ) 607{ 608 char* wot = "0C"; 609 UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 610 UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 611 612 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 613 614 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 615 616 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 617 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 618 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 619 620 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 621 622 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 623 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 624 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 625 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 626 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 627 628 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 629 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 630 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 631 632 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 633 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 634 635 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 636 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 637 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 638 639 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 640 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 641 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 642} 643 644 645////////////////////////////////////////////////////////// 646// // 647// ISTRI_08 // 648// // 649////////////////////////////////////////////////////////// 650 651UInt h_pcmpistri_08 ( V128* argL, V128* argR ) 652{ 653 V128 block[2]; 654 memcpy(&block[0], argL, sizeof(V128)); 655 memcpy(&block[1], argR, sizeof(V128)); 656 ULong res, flags; 657 __asm__ __volatile__( 658 "subq $1024, %%rsp" "\n\t" 659 "movdqu 0(%2), %%xmm2" "\n\t" 660 "movdqu 16(%2), %%xmm11" "\n\t" 661 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 662 "pushfq" "\n\t" 663 "popq %%rdx" "\n\t" 664 "movq %%rcx, %0" "\n\t" 665 "movq %%rdx, %1" "\n\t" 666 "addq $1024, %%rsp" "\n\t" 667 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 668 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 669 ); 670 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 671} 672 673UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 674{ 675 V128 resV; 676 UInt resOSZACP, resECX; 677 Bool ok 678 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 679 zmask_from_V128(argLU), 680 zmask_from_V128(argRU), 681 0x08, False/*!isSTRM*/ 682 ); 683 assert(ok); 684 resECX = resV.uInt[0]; 685 return (resOSZACP << 16) | resECX; 686} 687 688void istri_08 ( void ) 689{ 690 char* wot = "08"; 691 UInt(*h)(V128*,V128*) = h_pcmpistri_08; 692 UInt(*s)(V128*,V128*) = s_pcmpistri_08; 693 694 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 695 696 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 697 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 698 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 699 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 700 701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 702 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 703 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 704 705 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 706 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 707 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 708 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 709 710 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 711 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 712 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 713 714 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 715 716 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 717 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 718 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 719 720 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 721 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 722 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 723 724 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 725 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 726 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 727 728 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 729 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 730 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 731 732 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 733 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 734} 735 736 737 738////////////////////////////////////////////////////////// 739// // 740// ISTRI_1A // 741// // 742////////////////////////////////////////////////////////// 743 744UInt h_pcmpistri_1A ( V128* argL, V128* argR ) 745{ 746 V128 block[2]; 747 memcpy(&block[0], argL, sizeof(V128)); 748 memcpy(&block[1], argR, sizeof(V128)); 749 ULong res, flags; 750 __asm__ __volatile__( 751 "subq $1024, %%rsp" "\n\t" 752 "movdqu 0(%2), %%xmm2" "\n\t" 753 "movdqu 16(%2), %%xmm11" "\n\t" 754 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 755 "pushfq" "\n\t" 756 "popq %%rdx" "\n\t" 757 "movq %%rcx, %0" "\n\t" 758 "movq %%rdx, %1" "\n\t" 759 "addq $1024, %%rsp" "\n\t" 760 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 761 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 762 ); 763 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 764} 765 766UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 767{ 768 V128 resV; 769 UInt resOSZACP, resECX; 770 Bool ok 771 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 772 zmask_from_V128(argLU), 773 zmask_from_V128(argRU), 774 0x1A, False/*!isSTRM*/ 775 ); 776 assert(ok); 777 resECX = resV.uInt[0]; 778 return (resOSZACP << 16) | resECX; 779} 780 781void istri_1A ( void ) 782{ 783 char* wot = "1A"; 784 UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 785 UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 786 787 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 788 789 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 790 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 791 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 792 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 793 794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 795 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 796 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 797 798 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 799 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 800 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 801 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 802 803 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 804 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 805 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 806 807 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 808 809 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 810 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 811 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 812 813 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 814 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 815 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 816 817 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 818 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 819 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 820 821 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 822 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 823 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 824 825 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 826 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 827} 828 829 830 831////////////////////////////////////////////////////////// 832// // 833// ISTRI_02 // 834// // 835////////////////////////////////////////////////////////// 836 837UInt h_pcmpistri_02 ( V128* argL, V128* argR ) 838{ 839 V128 block[2]; 840 memcpy(&block[0], argL, sizeof(V128)); 841 memcpy(&block[1], argR, sizeof(V128)); 842 ULong res, flags; 843 __asm__ __volatile__( 844 "subq $1024, %%rsp" "\n\t" 845 "movdqu 0(%2), %%xmm2" "\n\t" 846 "movdqu 16(%2), %%xmm11" "\n\t" 847 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 848//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 849//"movd %%xmm0, %%ecx" "\n\t" 850 "pushfq" "\n\t" 851 "popq %%rdx" "\n\t" 852 "movq %%rcx, %0" "\n\t" 853 "movq %%rdx, %1" "\n\t" 854 "addq $1024, %%rsp" "\n\t" 855 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 856 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 857 ); 858 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 859} 860 861UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 862{ 863 V128 resV; 864 UInt resOSZACP, resECX; 865 Bool ok 866 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 867 zmask_from_V128(argLU), 868 zmask_from_V128(argRU), 869 0x02, False/*!isSTRM*/ 870 ); 871 assert(ok); 872 resECX = resV.uInt[0]; 873 return (resOSZACP << 16) | resECX; 874} 875 876void istri_02 ( void ) 877{ 878 char* wot = "02"; 879 UInt(*h)(V128*,V128*) = h_pcmpistri_02; 880 UInt(*s)(V128*,V128*) = s_pcmpistri_02; 881 882 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 883 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 884 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 885 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 886 887 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 888 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 889 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 890 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 891 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 892 893 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 894 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 895 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 896 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 897 898 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 899 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 900 901 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 902 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 903 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 904 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 905 906 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 907 908 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 909 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 910} 911 912 913////////////////////////////////////////////////////////// 914// // 915// ISTRI_12 // 916// // 917////////////////////////////////////////////////////////// 918 919UInt h_pcmpistri_12 ( V128* argL, V128* argR ) 920{ 921 V128 block[2]; 922 memcpy(&block[0], argL, sizeof(V128)); 923 memcpy(&block[1], argR, sizeof(V128)); 924 ULong res, flags; 925 __asm__ __volatile__( 926 "subq $1024, %%rsp" "\n\t" 927 "movdqu 0(%2), %%xmm2" "\n\t" 928 "movdqu 16(%2), %%xmm11" "\n\t" 929 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 930//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 931//"movd %%xmm0, %%ecx" "\n\t" 932 "pushfq" "\n\t" 933 "popq %%rdx" "\n\t" 934 "movq %%rcx, %0" "\n\t" 935 "movq %%rdx, %1" "\n\t" 936 "addq $1024, %%rsp" "\n\t" 937 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 938 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 939 ); 940 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 941} 942 943UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 944{ 945 V128 resV; 946 UInt resOSZACP, resECX; 947 Bool ok 948 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 949 zmask_from_V128(argLU), 950 zmask_from_V128(argRU), 951 0x12, False/*!isSTRM*/ 952 ); 953 assert(ok); 954 resECX = resV.uInt[0]; 955 return (resOSZACP << 16) | resECX; 956} 957 958void istri_12 ( void ) 959{ 960 char* wot = "12"; 961 UInt(*h)(V128*,V128*) = h_pcmpistri_12; 962 UInt(*s)(V128*,V128*) = s_pcmpistri_12; 963 964 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 965 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 966 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 967 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 968 969 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 970 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 971 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 972 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 973 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 974 975 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 976 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 977 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 978 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 979 980 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 981 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 982 983 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 984 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 985 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 986 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 987 988 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 989 990 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 991 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 992} 993 994 995 996////////////////////////////////////////////////////////// 997// // 998// ISTRI_44 // 999// // 1000////////////////////////////////////////////////////////// 1001 1002UInt h_pcmpistri_44 ( V128* argL, V128* argR ) 1003{ 1004 V128 block[2]; 1005 memcpy(&block[0], argL, sizeof(V128)); 1006 memcpy(&block[1], argR, sizeof(V128)); 1007 ULong res, flags; 1008 __asm__ __volatile__( 1009 "subq $1024, %%rsp" "\n\t" 1010 "movdqu 0(%2), %%xmm2" "\n\t" 1011 "movdqu 16(%2), %%xmm11" "\n\t" 1012 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 1013//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1014//"movd %%xmm0, %%ecx" "\n\t" 1015 "pushfq" "\n\t" 1016 "popq %%rdx" "\n\t" 1017 "movq %%rcx, %0" "\n\t" 1018 "movq %%rdx, %1" "\n\t" 1019 "addq $1024, %%rsp" "\n\t" 1020 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1021 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1022 ); 1023 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1024} 1025 1026UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 1027{ 1028 V128 resV; 1029 UInt resOSZACP, resECX; 1030 Bool ok 1031 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1032 zmask_from_V128(argLU), 1033 zmask_from_V128(argRU), 1034 0x44, False/*!isSTRM*/ 1035 ); 1036 assert(ok); 1037 resECX = resV.uInt[0]; 1038 return (resOSZACP << 16) | resECX; 1039} 1040 1041void istri_44 ( void ) 1042{ 1043 char* wot = "44"; 1044 UInt(*h)(V128*,V128*) = h_pcmpistri_44; 1045 UInt(*s)(V128*,V128*) = s_pcmpistri_44; 1046 1047 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 1048 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 1049 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 1050 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 1051 1052 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1053 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 1054 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 1055 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 1056 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 1057 1058 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1059 1060 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 1061 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 1062 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 1063 1064 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 1065 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 1066 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 1067 1068 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 1069 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 1070 1071 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 1072 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 1073} 1074 1075 1076////////////////////////////////////////////////////////// 1077// // 1078// ISTRI_00 // 1079// // 1080////////////////////////////////////////////////////////// 1081 1082UInt h_pcmpistri_00 ( V128* argL, V128* argR ) 1083{ 1084 V128 block[2]; 1085 memcpy(&block[0], argL, sizeof(V128)); 1086 memcpy(&block[1], argR, sizeof(V128)); 1087 ULong res, flags; 1088 __asm__ __volatile__( 1089 "subq $1024, %%rsp" "\n\t" 1090 "movdqu 0(%2), %%xmm2" "\n\t" 1091 "movdqu 16(%2), %%xmm11" "\n\t" 1092 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t" 1093//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t" 1094//"movd %%xmm0, %%ecx" "\n\t" 1095 "pushfq" "\n\t" 1096 "popq %%rdx" "\n\t" 1097 "movq %%rcx, %0" "\n\t" 1098 "movq %%rdx, %1" "\n\t" 1099 "addq $1024, %%rsp" "\n\t" 1100 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1101 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1102 ); 1103 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1104} 1105 1106UInt s_pcmpistri_00 ( V128* argLU, V128* argRU ) 1107{ 1108 V128 resV; 1109 UInt resOSZACP, resECX; 1110 Bool ok 1111 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1112 zmask_from_V128(argLU), 1113 zmask_from_V128(argRU), 1114 0x00, False/*!isSTRM*/ 1115 ); 1116 assert(ok); 1117 resECX = resV.uInt[0]; 1118 return (resOSZACP << 16) | resECX; 1119} 1120 1121void istri_00 ( void ) 1122{ 1123 char* wot = "00"; 1124 UInt(*h)(V128*,V128*) = h_pcmpistri_00; 1125 UInt(*s)(V128*,V128*) = s_pcmpistri_00; 1126 1127 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 1128 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 1129 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 1130 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1131 1132 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1133 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 1134 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 1135 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 1136 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 1137 1138 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 1139 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 1140 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 1141 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 1142 1143 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1144 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1145 1146 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1147 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1148 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 1149 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 1150 1151 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 1152 1153 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1154 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1155} 1156 1157 1158////////////////////////////////////////////////////////// 1159// // 1160// ISTRI_38 // 1161// // 1162////////////////////////////////////////////////////////// 1163 1164UInt h_pcmpistri_38 ( V128* argL, V128* argR ) 1165{ 1166 V128 block[2]; 1167 memcpy(&block[0], argL, sizeof(V128)); 1168 memcpy(&block[1], argR, sizeof(V128)); 1169 ULong res, flags; 1170 __asm__ __volatile__( 1171 "subq $1024, %%rsp" "\n\t" 1172 "movdqu 0(%2), %%xmm2" "\n\t" 1173 "movdqu 16(%2), %%xmm11" "\n\t" 1174 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t" 1175 "pushfq" "\n\t" 1176 "popq %%rdx" "\n\t" 1177 "movq %%rcx, %0" "\n\t" 1178 "movq %%rdx, %1" "\n\t" 1179 "addq $1024, %%rsp" "\n\t" 1180 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1181 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1182 ); 1183 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1184} 1185 1186UInt s_pcmpistri_38 ( V128* argLU, V128* argRU ) 1187{ 1188 V128 resV; 1189 UInt resOSZACP, resECX; 1190 Bool ok 1191 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1192 zmask_from_V128(argLU), 1193 zmask_from_V128(argRU), 1194 0x38, False/*!isSTRM*/ 1195 ); 1196 assert(ok); 1197 resECX = resV.uInt[0]; 1198 return (resOSZACP << 16) | resECX; 1199} 1200 1201void istri_38 ( void ) 1202{ 1203 char* wot = "38"; 1204 UInt(*h)(V128*,V128*) = h_pcmpistri_38; 1205 UInt(*s)(V128*,V128*) = s_pcmpistri_38; 1206 1207 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1208 1209 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1210 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1211 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1212 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1213 1214 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1215 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1216 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1217 1218 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1219 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1220 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1221 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1222 1223 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1224 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1225 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1226 1227 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1228 1229 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1230 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1231 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1232 1233 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1234 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1235 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1236 1237 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1238 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1239 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1240 1241 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1242 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1243 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1244 1245 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1246 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1247} 1248 1249 1250 1251////////////////////////////////////////////////////////// 1252// // 1253// main // 1254// // 1255////////////////////////////////////////////////////////// 1256 1257int main ( void ) 1258{ 1259 istri_4A(); 1260 istri_3A(); 1261 istri_08(); 1262 istri_1A(); 1263 istri_02(); 1264 istri_0C(); 1265 istri_12(); 1266 istri_44(); 1267 return 0; 1268} 1269 1270/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 1271 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 1272 aspect. */ 1273 1274#include <string.h> 1275#include <stdio.h> 1276#include <assert.h> 1277 1278typedef unsigned int UInt; 1279typedef signed int Int; 1280typedef unsigned char UChar; 1281typedef unsigned long long int ULong; 1282typedef UChar Bool; 1283#define False ((Bool)0) 1284#define True ((Bool)1) 1285 1286//typedef unsigned char V128[16]; 1287typedef 1288 union { 1289 UChar uChar[16]; 1290 UInt uInt[4]; 1291 } 1292 V128; 1293 1294#define SHIFT_O 11 1295#define SHIFT_S 7 1296#define SHIFT_Z 6 1297#define SHIFT_A 4 1298#define SHIFT_C 0 1299#define SHIFT_P 2 1300 1301#define MASK_O (1ULL << SHIFT_O) 1302#define MASK_S (1ULL << SHIFT_S) 1303#define MASK_Z (1ULL << SHIFT_Z) 1304#define MASK_A (1ULL << SHIFT_A) 1305#define MASK_C (1ULL << SHIFT_C) 1306#define MASK_P (1ULL << SHIFT_P) 1307 1308 1309UInt clz32 ( UInt x ) 1310{ 1311 Int y, m, n; 1312 y = -(x >> 16); 1313 m = (y >> 16) & 16; 1314 n = 16 - m; 1315 x = x >> m; 1316 y = x - 0x100; 1317 m = (y >> 16) & 8; 1318 n = n + m; 1319 x = x << m; 1320 y = x - 0x1000; 1321 m = (y >> 16) & 4; 1322 n = n + m; 1323 x = x << m; 1324 y = x - 0x4000; 1325 m = (y >> 16) & 2; 1326 n = n + m; 1327 x = x << m; 1328 y = x >> 14; 1329 m = y & ~(y >> 1); 1330 return n + 2 - m; 1331} 1332 1333UInt ctz32 ( UInt x ) 1334{ 1335 return 32 - clz32((~x) & (x-1)); 1336} 1337 1338void expand ( V128* dst, char* summary ) 1339{ 1340 Int i; 1341 assert( strlen(summary) == 16 ); 1342 for (i = 0; i < 16; i++) { 1343 UChar xx = 0; 1344 UChar x = summary[15-i]; 1345 if (x >= '0' && x <= '9') { xx = x - '0'; } 1346 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 1347 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 1348 else assert(0); 1349 1350 assert(xx < 16); 1351 xx = (xx << 4) | xx; 1352 assert(xx < 256); 1353 dst->uChar[i] = xx; 1354 } 1355} 1356 1357void try_istri ( char* which, 1358 UInt(*h_fn)(V128*,V128*), 1359 UInt(*s_fn)(V128*,V128*), 1360 char* summL, char* summR ) 1361{ 1362 assert(strlen(which) == 2); 1363 V128 argL, argR; 1364 expand(&argL, summL); 1365 expand(&argR, summR); 1366 UInt h_res = h_fn(&argL, &argR); 1367 UInt s_res = s_fn(&argL, &argR); 1368 printf("istri %s %s %s -> %08x %08x %s\n", 1369 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 1370} 1371 1372UInt zmask_from_V128 ( V128* arg ) 1373{ 1374 UInt i, res = 0; 1375 for (i = 0; i < 16; i++) { 1376 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i; 1377 } 1378 return res; 1379} 1380 1381////////////////////////////////////////////////////////// 1382// // 1383// GENERAL // 1384// // 1385////////////////////////////////////////////////////////// 1386 1387 1388/* Given partial results from a pcmpXstrX operation (intRes1, 1389 basically), generate an I format (index value for ECX) output, and 1390 also the new OSZACP flags. 1391*/ 1392static 1393void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV, 1394 /*OUT*/UInt* resOSZACP, 1395 UInt intRes1, 1396 UInt zmaskL, UInt zmaskR, 1397 UInt validL, 1398 UInt pol, UInt idx ) 1399{ 1400 assert((pol >> 2) == 0); 1401 assert((idx >> 1) == 0); 1402 1403 UInt intRes2 = 0; 1404 switch (pol) { 1405 case 0: intRes2 = intRes1; break; // pol + 1406 case 1: intRes2 = ~intRes1; break; // pol - 1407 case 2: intRes2 = intRes1; break; // pol m+ 1408 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 1409 } 1410 intRes2 &= 0xFFFF; 1411 1412 // generate ecx value 1413 UInt newECX = 0; 1414 if (idx) { 1415 // index of ms-1-bit 1416 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 1417 } else { 1418 // index of ls-1-bit 1419 newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 1420 } 1421 1422 *(UInt*)(&resV[0]) = newECX; 1423 1424 // generate new flags, common to all ISTRI and ISTRM cases 1425 *resOSZACP // A, P are zero 1426 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 1427 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 1428 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 1429 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 1430} 1431 1432 1433/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 1434 variants. 1435 1436 For xSTRI variants, the new ECX value is placed in the 32 bits 1437 pointed to by *resV. For xSTRM variants, the result is a 128 bit 1438 value and is placed at *resV in the obvious way. 1439 1440 For all variants, the new OSZACP value is placed at *resOSZACP. 1441 1442 argLV and argRV are the vector args. The caller must prepare a 1443 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 1444 must be 1 for each zero byte of of the respective arg. For ESTRx 1445 variants this is derived from the explicit length indication, and 1446 must be 0 in all places except at the bit index corresponding to 1447 the valid length (0 .. 16). If the valid length is 16 then the 1448 mask must be all zeroes. In all cases, bits 31:16 must be zero. 1449 1450 imm8 is the original immediate from the instruction. isSTRM 1451 indicates whether this is a xSTRM or xSTRI variant, which controls 1452 how much of *res is written. 1453 1454 If the given imm8 case can be handled, the return value is True. 1455 If not, False is returned, and neither *res not *resOSZACP are 1456 altered. 1457*/ 1458 1459Bool pcmpXstrX_WRK ( /*OUT*/V128* resV, 1460 /*OUT*/UInt* resOSZACP, 1461 V128* argLV, V128* argRV, 1462 UInt zmaskL, UInt zmaskR, 1463 UInt imm8, Bool isSTRM ) 1464{ 1465 assert(imm8 < 0x80); 1466 assert((zmaskL >> 16) == 0); 1467 assert((zmaskR >> 16) == 0); 1468 1469 /* Explicitly reject any imm8 values that haven't been validated, 1470 even if they would probably work. Life is too short to have 1471 unvalidated cases in the code base. */ 1472 switch (imm8) { 1473 case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A: 1474 case 0x3A: case 0x44: case 0x4A: 1475 break; 1476 default: 1477 return False; 1478 } 1479 1480 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 1481 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 1482 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 1483 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 1484 1485 /*----------------------------------------*/ 1486 /*-- strcmp on byte data --*/ 1487 /*----------------------------------------*/ 1488 1489 if (agg == 2/*equal each, aka strcmp*/ 1490 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 1491 && !isSTRM) { 1492 Int i; 1493 UChar* argL = (UChar*)argLV; 1494 UChar* argR = (UChar*)argRV; 1495 UInt boolResII = 0; 1496 for (i = 15; i >= 0; i--) { 1497 UChar cL = argL[i]; 1498 UChar cR = argR[i]; 1499 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 1500 } 1501 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1502 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1503 1504 // do invalidation, common to all equal-each cases 1505 UInt intRes1 1506 = (boolResII & validL & validR) // if both valid, use cmpres 1507 | (~ (validL | validR)); // if both invalid, force 1 1508 // else force 0 1509 intRes1 &= 0xFFFF; 1510 1511 // generate I-format output 1512 pcmpXstrX_WRK_gen_output_fmt_I( 1513 resV, resOSZACP, 1514 intRes1, zmaskL, zmaskR, validL, pol, idx 1515 ); 1516 1517 return True; 1518 } 1519 1520 /*----------------------------------------*/ 1521 /*-- set membership on byte data --*/ 1522 /*----------------------------------------*/ 1523 1524 if (agg == 0/*equal any, aka find chars in a set*/ 1525 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 1526 && !isSTRM) { 1527 /* argL: the string, argR: charset */ 1528 UInt si, ci; 1529 UChar* argL = (UChar*)argLV; 1530 UChar* argR = (UChar*)argRV; 1531 UInt boolRes = 0; 1532 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1533 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1534 1535 for (si = 0; si < 16; si++) { 1536 if ((validL & (1 << si)) == 0) 1537 // run off the end of the string. 1538 break; 1539 UInt m = 0; 1540 for (ci = 0; ci < 16; ci++) { 1541 if ((validR & (1 << ci)) == 0) break; 1542 if (argR[ci] == argL[si]) { m = 1; break; } 1543 } 1544 boolRes |= (m << si); 1545 } 1546 1547 // boolRes is "pre-invalidated" 1548 UInt intRes1 = boolRes & 0xFFFF; 1549 1550 // generate I-format output 1551 pcmpXstrX_WRK_gen_output_fmt_I( 1552 resV, resOSZACP, 1553 intRes1, zmaskL, zmaskR, validL, pol, idx 1554 ); 1555 1556 return True; 1557 } 1558 1559 /*----------------------------------------*/ 1560 /*-- substring search on byte data --*/ 1561 /*----------------------------------------*/ 1562 1563 if (agg == 3/*equal ordered, aka substring search*/ 1564 && (fmt == 0/*ub*/ || fmt == 2/*sb*/) 1565 && !isSTRM) { 1566 1567 /* argL: haystack, argR: needle */ 1568 UInt ni, hi; 1569 UChar* argL = (UChar*)argLV; 1570 UChar* argR = (UChar*)argRV; 1571 UInt boolRes = 0; 1572 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1573 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1574 for (hi = 0; hi < 16; hi++) { 1575 if ((validL & (1 << hi)) == 0) 1576 // run off the end of the haystack 1577 break; 1578 UInt m = 1; 1579 for (ni = 0; ni < 16; ni++) { 1580 if ((validR & (1 << ni)) == 0) break; 1581 UInt i = ni + hi; 1582 if (i >= 16) break; 1583 if (argL[i] != argR[ni]) { m = 0; break; } 1584 } 1585 boolRes |= (m << hi); 1586 } 1587 1588 // boolRes is "pre-invalidated" 1589 UInt intRes1 = boolRes & 0xFFFF; 1590 1591 // generate I-format output 1592 pcmpXstrX_WRK_gen_output_fmt_I( 1593 resV, resOSZACP, 1594 intRes1, zmaskL, zmaskR, validL, pol, idx 1595 ); 1596 1597 return True; 1598 } 1599 1600 /*----------------------------------------*/ 1601 /*-- ranges, unsigned byte data --*/ 1602 /*----------------------------------------*/ 1603 1604 if (agg == 1/*ranges*/ 1605 && fmt == 0/*ub*/ 1606 && !isSTRM) { 1607 1608 /* argL: string, argR: range-pairs */ 1609 UInt ri, si; 1610 UChar* argL = (UChar*)argLV; 1611 UChar* argR = (UChar*)argRV; 1612 UInt boolRes = 0; 1613 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1614 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1615 for (si = 0; si < 16; si++) { 1616 if ((validL & (1 << si)) == 0) 1617 // run off the end of the string 1618 break; 1619 UInt m = 0; 1620 for (ri = 0; ri < 16; ri += 2) { 1621 if ((validR & (3 << ri)) != (3 << ri)) break; 1622 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 1623 m = 1; break; 1624 } 1625 } 1626 boolRes |= (m << si); 1627 } 1628 1629 // boolRes is "pre-invalidated" 1630 UInt intRes1 = boolRes & 0xFFFF; 1631 1632 // generate I-format output 1633 pcmpXstrX_WRK_gen_output_fmt_I( 1634 resV, resOSZACP, 1635 intRes1, zmaskL, zmaskR, validL, pol, idx 1636 ); 1637 1638 return True; 1639 } 1640 1641 return False; 1642} 1643 1644 1645////////////////////////////////////////////////////////// 1646// // 1647// ISTRI_4A // 1648// // 1649////////////////////////////////////////////////////////// 1650 1651UInt h_pcmpistri_4A ( V128* argL, V128* argR ) 1652{ 1653 V128 block[2]; 1654 memcpy(&block[0], argL, sizeof(V128)); 1655 memcpy(&block[1], argR, sizeof(V128)); 1656 ULong res, flags; 1657 __asm__ __volatile__( 1658 "subq $1024, %%rsp" "\n\t" 1659 "movdqu 0(%2), %%xmm2" "\n\t" 1660 "movdqu 16(%2), %%xmm11" "\n\t" 1661 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" 1662 "pushfq" "\n\t" 1663 "popq %%rdx" "\n\t" 1664 "movq %%rcx, %0" "\n\t" 1665 "movq %%rdx, %1" "\n\t" 1666 "addq $1024, %%rsp" "\n\t" 1667 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1668 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1669 ); 1670 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1671} 1672 1673UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) 1674{ 1675 V128 resV; 1676 UInt resOSZACP, resECX; 1677 Bool ok 1678 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1679 zmask_from_V128(argLU), 1680 zmask_from_V128(argRU), 1681 0x4A, False/*!isSTRM*/ 1682 ); 1683 assert(ok); 1684 resECX = resV.uInt[0]; 1685 return (resOSZACP << 16) | resECX; 1686} 1687 1688void istri_4A ( void ) 1689{ 1690 char* wot = "4A"; 1691 UInt(*h)(V128*,V128*) = h_pcmpistri_4A; 1692 UInt(*s)(V128*,V128*) = s_pcmpistri_4A; 1693 1694 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1695 1696 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1697 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1698 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1699 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1700 1701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1702 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1703 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1704 1705 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1706 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1707 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1708 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1709 1710 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1711 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1712 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1713 1714 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1715 1716 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1717 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1718 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1719 1720 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1721 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1722 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1723 1724 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1725 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1726 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1727 1728 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1729 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1730 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1731 1732 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1733 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1734} 1735 1736////////////////////////////////////////////////////////// 1737// // 1738// ISTRI_3A // 1739// // 1740////////////////////////////////////////////////////////// 1741 1742UInt h_pcmpistri_3A ( V128* argL, V128* argR ) 1743{ 1744 V128 block[2]; 1745 memcpy(&block[0], argL, sizeof(V128)); 1746 memcpy(&block[1], argR, sizeof(V128)); 1747 ULong res, flags; 1748 __asm__ __volatile__( 1749 "subq $1024, %%rsp" "\n\t" 1750 "movdqu 0(%2), %%xmm2" "\n\t" 1751 "movdqu 16(%2), %%xmm11" "\n\t" 1752 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" 1753 "pushfq" "\n\t" 1754 "popq %%rdx" "\n\t" 1755 "movq %%rcx, %0" "\n\t" 1756 "movq %%rdx, %1" "\n\t" 1757 "addq $1024, %%rsp" "\n\t" 1758 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1759 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1760 ); 1761 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1762} 1763 1764UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) 1765{ 1766 V128 resV; 1767 UInt resOSZACP, resECX; 1768 Bool ok 1769 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1770 zmask_from_V128(argLU), 1771 zmask_from_V128(argRU), 1772 0x3A, False/*!isSTRM*/ 1773 ); 1774 assert(ok); 1775 resECX = resV.uInt[0]; 1776 return (resOSZACP << 16) | resECX; 1777} 1778 1779void istri_3A ( void ) 1780{ 1781 char* wot = "3A"; 1782 UInt(*h)(V128*,V128*) = h_pcmpistri_3A; 1783 UInt(*s)(V128*,V128*) = s_pcmpistri_3A; 1784 1785 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1786 1787 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1788 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1789 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1790 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1791 1792 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1793 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1795 1796 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1797 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1798 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1799 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1800 1801 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1802 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1803 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1804 1805 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1806 1807 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1808 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1809 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1810 1811 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1812 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1813 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1814 1815 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1816 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1817 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1818 1819 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1820 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1821 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1822 1823 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1824 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1825} 1826 1827 1828 1829////////////////////////////////////////////////////////// 1830// // 1831// ISTRI_0C // 1832// // 1833////////////////////////////////////////////////////////// 1834 1835__attribute__((noinline)) 1836UInt h_pcmpistri_0C ( V128* argL, V128* argR ) 1837{ 1838 V128 block[2]; 1839 memcpy(&block[0], argL, sizeof(V128)); 1840 memcpy(&block[1], argR, sizeof(V128)); 1841 ULong res = 0, flags = 0; 1842 __asm__ __volatile__( 1843 "movdqa 0(%2), %%xmm2" "\n\t" 1844 "movdqa 16(%2), %%xmm11" "\n\t" 1845 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" 1846 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" 1847 //"movd %%xmm0, %%ecx" "\n\t" 1848 "pushfq" "\n\t" 1849 "popq %%rdx" "\n\t" 1850 "movq %%rcx, %0" "\n\t" 1851 "movq %%rdx, %1" "\n\t" 1852 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1853 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1854 ); 1855 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1856} 1857 1858UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) 1859{ 1860 V128 resV; 1861 UInt resOSZACP, resECX; 1862 Bool ok 1863 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1864 zmask_from_V128(argLU), 1865 zmask_from_V128(argRU), 1866 0x0C, False/*!isSTRM*/ 1867 ); 1868 assert(ok); 1869 resECX = resV.uInt[0]; 1870 return (resOSZACP << 16) | resECX; 1871} 1872 1873void istri_0C ( void ) 1874{ 1875 char* wot = "0C"; 1876 UInt(*h)(V128*,V128*) = h_pcmpistri_0C; 1877 UInt(*s)(V128*,V128*) = s_pcmpistri_0C; 1878 1879 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 1880 1881 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 1882 1883 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 1884 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 1885 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 1886 1887 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 1888 1889 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 1890 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 1891 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 1892 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 1893 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 1894 1895 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 1896 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 1897 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 1898 1899 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 1900 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 1901 1902 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 1903 try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 1904 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 1905 1906 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 1907 try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 1908 try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 1909} 1910 1911 1912////////////////////////////////////////////////////////// 1913// // 1914// ISTRI_08 // 1915// // 1916////////////////////////////////////////////////////////// 1917 1918UInt h_pcmpistri_08 ( V128* argL, V128* argR ) 1919{ 1920 V128 block[2]; 1921 memcpy(&block[0], argL, sizeof(V128)); 1922 memcpy(&block[1], argR, sizeof(V128)); 1923 ULong res, flags; 1924 __asm__ __volatile__( 1925 "subq $1024, %%rsp" "\n\t" 1926 "movdqu 0(%2), %%xmm2" "\n\t" 1927 "movdqu 16(%2), %%xmm11" "\n\t" 1928 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" 1929 "pushfq" "\n\t" 1930 "popq %%rdx" "\n\t" 1931 "movq %%rcx, %0" "\n\t" 1932 "movq %%rdx, %1" "\n\t" 1933 "addq $1024, %%rsp" "\n\t" 1934 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1935 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1936 ); 1937 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1938} 1939 1940UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) 1941{ 1942 V128 resV; 1943 UInt resOSZACP, resECX; 1944 Bool ok 1945 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 1946 zmask_from_V128(argLU), 1947 zmask_from_V128(argRU), 1948 0x08, False/*!isSTRM*/ 1949 ); 1950 assert(ok); 1951 resECX = resV.uInt[0]; 1952 return (resOSZACP << 16) | resECX; 1953} 1954 1955void istri_08 ( void ) 1956{ 1957 char* wot = "08"; 1958 UInt(*h)(V128*,V128*) = h_pcmpistri_08; 1959 UInt(*s)(V128*,V128*) = s_pcmpistri_08; 1960 1961 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1962 1963 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1964 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1965 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1966 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1967 1968 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1969 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1970 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1971 1972 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1973 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1974 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1975 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1976 1977 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1978 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1979 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1980 1981 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1982 1983 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1984 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1985 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 1986 1987 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 1988 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 1989 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 1990 1991 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 1992 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 1993 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 1994 1995 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 1996 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 1997 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 1998 1999 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 2000 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 2001} 2002 2003 2004 2005////////////////////////////////////////////////////////// 2006// // 2007// ISTRI_1A // 2008// // 2009////////////////////////////////////////////////////////// 2010 2011UInt h_pcmpistri_1A ( V128* argL, V128* argR ) 2012{ 2013 V128 block[2]; 2014 memcpy(&block[0], argL, sizeof(V128)); 2015 memcpy(&block[1], argR, sizeof(V128)); 2016 ULong res, flags; 2017 __asm__ __volatile__( 2018 "subq $1024, %%rsp" "\n\t" 2019 "movdqu 0(%2), %%xmm2" "\n\t" 2020 "movdqu 16(%2), %%xmm11" "\n\t" 2021 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t" 2022 "pushfq" "\n\t" 2023 "popq %%rdx" "\n\t" 2024 "movq %%rcx, %0" "\n\t" 2025 "movq %%rdx, %1" "\n\t" 2026 "addq $1024, %%rsp" "\n\t" 2027 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2028 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2029 ); 2030 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2031} 2032 2033UInt s_pcmpistri_1A ( V128* argLU, V128* argRU ) 2034{ 2035 V128 resV; 2036 UInt resOSZACP, resECX; 2037 Bool ok 2038 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2039 zmask_from_V128(argLU), 2040 zmask_from_V128(argRU), 2041 0x1A, False/*!isSTRM*/ 2042 ); 2043 assert(ok); 2044 resECX = resV.uInt[0]; 2045 return (resOSZACP << 16) | resECX; 2046} 2047 2048void istri_1A ( void ) 2049{ 2050 char* wot = "1A"; 2051 UInt(*h)(V128*,V128*) = h_pcmpistri_1A; 2052 UInt(*s)(V128*,V128*) = s_pcmpistri_1A; 2053 2054 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2055 2056 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2057 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2058 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 2059 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 2060 2061 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 2062 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 2063 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 2064 2065 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2066 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2067 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2068 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2069 2070 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2071 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 2072 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 2073 2074 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2075 2076 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 2077 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 2078 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 2079 2080 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 2081 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 2082 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 2083 2084 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 2085 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 2086 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 2087 2088 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 2089 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 2090 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 2091 2092 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 2093 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 2094} 2095 2096 2097 2098////////////////////////////////////////////////////////// 2099// // 2100// ISTRI_02 // 2101// // 2102////////////////////////////////////////////////////////// 2103 2104UInt h_pcmpistri_02 ( V128* argL, V128* argR ) 2105{ 2106 V128 block[2]; 2107 memcpy(&block[0], argL, sizeof(V128)); 2108 memcpy(&block[1], argR, sizeof(V128)); 2109 ULong res, flags; 2110 __asm__ __volatile__( 2111 "subq $1024, %%rsp" "\n\t" 2112 "movdqu 0(%2), %%xmm2" "\n\t" 2113 "movdqu 16(%2), %%xmm11" "\n\t" 2114 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t" 2115//"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t" 2116//"movd %%xmm0, %%ecx" "\n\t" 2117 "pushfq" "\n\t" 2118 "popq %%rdx" "\n\t" 2119 "movq %%rcx, %0" "\n\t" 2120 "movq %%rdx, %1" "\n\t" 2121 "addq $1024, %%rsp" "\n\t" 2122 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2123 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2124 ); 2125 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2126} 2127 2128UInt s_pcmpistri_02 ( V128* argLU, V128* argRU ) 2129{ 2130 V128 resV; 2131 UInt resOSZACP, resECX; 2132 Bool ok 2133 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2134 zmask_from_V128(argLU), 2135 zmask_from_V128(argRU), 2136 0x02, False/*!isSTRM*/ 2137 ); 2138 assert(ok); 2139 resECX = resV.uInt[0]; 2140 return (resOSZACP << 16) | resECX; 2141} 2142 2143void istri_02 ( void ) 2144{ 2145 char* wot = "02"; 2146 UInt(*h)(V128*,V128*) = h_pcmpistri_02; 2147 UInt(*s)(V128*,V128*) = s_pcmpistri_02; 2148 2149 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2150 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2151 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2152 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2153 2154 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2155 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2156 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2157 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2158 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2159 2160 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2161 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2162 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2163 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2164 2165 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2166 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2167 2168 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2169 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2170 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2171 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2172 2173 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2174 2175 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2176 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2177} 2178 2179 2180////////////////////////////////////////////////////////// 2181// // 2182// ISTRI_12 // 2183// // 2184////////////////////////////////////////////////////////// 2185 2186UInt h_pcmpistri_12 ( V128* argL, V128* argR ) 2187{ 2188 V128 block[2]; 2189 memcpy(&block[0], argL, sizeof(V128)); 2190 memcpy(&block[1], argR, sizeof(V128)); 2191 ULong res, flags; 2192 __asm__ __volatile__( 2193 "subq $1024, %%rsp" "\n\t" 2194 "movdqu 0(%2), %%xmm2" "\n\t" 2195 "movdqu 16(%2), %%xmm11" "\n\t" 2196 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t" 2197//"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t" 2198//"movd %%xmm0, %%ecx" "\n\t" 2199 "pushfq" "\n\t" 2200 "popq %%rdx" "\n\t" 2201 "movq %%rcx, %0" "\n\t" 2202 "movq %%rdx, %1" "\n\t" 2203 "addq $1024, %%rsp" "\n\t" 2204 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2205 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2206 ); 2207 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2208} 2209 2210UInt s_pcmpistri_12 ( V128* argLU, V128* argRU ) 2211{ 2212 V128 resV; 2213 UInt resOSZACP, resECX; 2214 Bool ok 2215 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2216 zmask_from_V128(argLU), 2217 zmask_from_V128(argRU), 2218 0x12, False/*!isSTRM*/ 2219 ); 2220 assert(ok); 2221 resECX = resV.uInt[0]; 2222 return (resOSZACP << 16) | resECX; 2223} 2224 2225void istri_12 ( void ) 2226{ 2227 char* wot = "12"; 2228 UInt(*h)(V128*,V128*) = h_pcmpistri_12; 2229 UInt(*s)(V128*,V128*) = s_pcmpistri_12; 2230 2231 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 2232 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 2233 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 2234 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 2235 2236 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2237 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 2238 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 2239 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 2240 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 2241 2242 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 2243 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 2244 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 2245 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 2246 2247 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2248 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 2249 2250 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 2251 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 2252 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 2253 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 2254 2255 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 2256 2257 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 2258 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 2259} 2260 2261 2262 2263////////////////////////////////////////////////////////// 2264// // 2265// ISTRI_44 // 2266// // 2267////////////////////////////////////////////////////////// 2268 2269UInt h_pcmpistri_44 ( V128* argL, V128* argR ) 2270{ 2271 V128 block[2]; 2272 memcpy(&block[0], argL, sizeof(V128)); 2273 memcpy(&block[1], argR, sizeof(V128)); 2274 ULong res, flags; 2275 __asm__ __volatile__( 2276 "subq $1024, %%rsp" "\n\t" 2277 "movdqu 0(%2), %%xmm2" "\n\t" 2278 "movdqu 16(%2), %%xmm11" "\n\t" 2279 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t" 2280//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 2281//"movd %%xmm0, %%ecx" "\n\t" 2282 "pushfq" "\n\t" 2283 "popq %%rdx" "\n\t" 2284 "movq %%rcx, %0" "\n\t" 2285 "movq %%rdx, %1" "\n\t" 2286 "addq $1024, %%rsp" "\n\t" 2287 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 2288 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 2289 ); 2290 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 2291} 2292 2293UInt s_pcmpistri_44 ( V128* argLU, V128* argRU ) 2294{ 2295 V128 resV; 2296 UInt resOSZACP, resECX; 2297 Bool ok 2298 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU, 2299 zmask_from_V128(argLU), 2300 zmask_from_V128(argRU), 2301 0x44, False/*!isSTRM*/ 2302 ); 2303 assert(ok); 2304 resECX = resV.uInt[0]; 2305 return (resOSZACP << 16) | resECX; 2306} 2307 2308void istri_44 ( void ) 2309{ 2310 char* wot = "44"; 2311 UInt(*h)(V128*,V128*) = h_pcmpistri_44; 2312 UInt(*s)(V128*,V128*) = s_pcmpistri_44; 2313 2314 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 2315 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 2316 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 2317 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 2318 2319 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 2320 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 2321 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 2322 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 2323 try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 2324 2325 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 2326 2327 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 2328 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 2329 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 2330 2331 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 2332 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 2333 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b"); 2334 2335 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421"); 2336 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421"); 2337 2338 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532"); 2339 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532"); 2340} 2341 2342 2343 2344 2345 2346////////////////////////////////////////////////////////// 2347// // 2348// main // 2349// // 2350////////////////////////////////////////////////////////// 2351 2352int main ( void ) 2353{ 2354 istri_4A(); 2355 istri_3A(); 2356 istri_08(); 2357 istri_1A(); 2358 istri_02(); 2359 istri_0C(); 2360 istri_12(); 2361 istri_44(); 2362 istri_00(); 2363 istri_38(); 2364 return 0; 2365} 2366