1 2/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using 3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m 4 aspect. */ 5 6#include <string.h> 7#include <stdio.h> 8#include <assert.h> 9 10typedef unsigned int UInt; 11typedef signed int Int; 12typedef unsigned char UChar; 13typedef unsigned short UShort; 14typedef unsigned long long int ULong; 15typedef UChar Bool; 16#define False ((Bool)0) 17#define True ((Bool)1) 18 19//typedef unsigned char V128[16]; 20typedef 21 union { 22 UChar uChar[16]; 23 UShort uShort[8]; 24 UInt uInt[4]; 25 UInt w32[4]; 26 } 27 V128; 28 29#define SHIFT_O 11 30#define SHIFT_S 7 31#define SHIFT_Z 6 32#define SHIFT_A 4 33#define SHIFT_C 0 34#define SHIFT_P 2 35 36#define MASK_O (1ULL << SHIFT_O) 37#define MASK_S (1ULL << SHIFT_S) 38#define MASK_Z (1ULL << SHIFT_Z) 39#define MASK_A (1ULL << SHIFT_A) 40#define MASK_C (1ULL << SHIFT_C) 41#define MASK_P (1ULL << SHIFT_P) 42 43 44UInt clz32 ( UInt x ) 45{ 46 Int y, m, n; 47 y = -(x >> 16); 48 m = (y >> 16) & 16; 49 n = 16 - m; 50 x = x >> m; 51 y = x - 0x100; 52 m = (y >> 16) & 8; 53 n = n + m; 54 x = x << m; 55 y = x - 0x1000; 56 m = (y >> 16) & 4; 57 n = n + m; 58 x = x << m; 59 y = x - 0x4000; 60 m = (y >> 16) & 2; 61 n = n + m; 62 x = x << m; 63 y = x >> 14; 64 m = y & ~(y >> 1); 65 return n + 2 - m; 66} 67 68UInt ctz32 ( UInt x ) 69{ 70 return 32 - clz32((~x) & (x-1)); 71} 72 73void expand ( V128* dst, char* summary ) 74{ 75 Int i; 76 assert( strlen(summary) == 16 ); 77 for (i = 0; i < 16; i++) { 78 UChar xx = 0; 79 UChar x = summary[15-i]; 80 if (x >= '0' && x <= '9') { xx = x - '0'; } 81 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 82 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 83 else assert(0); 84 85 assert(xx < 16); 86 xx = (xx << 4) | xx; 87 assert(xx < 256); 88 dst->uChar[i] = xx; 89 } 90} 91 92void try_istri ( char* which, 93 UInt(*h_fn)(V128*,V128*), 94 UInt(*s_fn)(V128*,V128*), 95 char* summL, char* summR ) 96{ 97 assert(strlen(which) == 2); 98 V128 argL, argR; 99 expand(&argL, summL); 100 expand(&argR, summR); 101 UInt h_res = h_fn(&argL, &argR); 102 UInt s_res = s_fn(&argL, &argR); 103 printf("istri %s %s %s -> %08x %08x %s\n", 104 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); 105} 106 107UInt zmask_from_V128 ( V128* arg ) 108{ 109 UInt i, res = 0; 110 for (i = 0; i < 8; i++) { 111 res |= ((arg->uShort[i] == 0) ? 1 : 0) << i; 112 } 113 return res; 114} 115 116////////////////////////////////////////////////////////// 117// // 118// GENERAL // 119// // 120////////////////////////////////////////////////////////// 121 122 123/* Given partial results from a 16-bit pcmpXstrX operation (intRes1, 124 basically), generate an I- or M-format output value, also the new 125 OSZACP flags. */ 126static 127void PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV, 128 /*OUT*/UInt* resOSZACP, 129 UInt intRes1, 130 UInt zmaskL, UInt zmaskR, 131 UInt validL, 132 UInt pol, UInt idx ) 133{ 134 assert((pol >> 2) == 0); 135 assert((idx >> 1) == 0); 136 137 UInt intRes2 = 0; 138 switch (pol) { 139 case 0: intRes2 = intRes1; break; // pol + 140 case 1: intRes2 = ~intRes1; break; // pol - 141 case 2: intRes2 = intRes1; break; // pol m+ 142 case 3: intRes2 = intRes1 ^ validL; break; // pol m- 143 } 144 intRes2 &= 0xFF; 145 146 // generate I-format output (an index in ECX) 147 // generate ecx value 148 UInt newECX = 0; 149 if (idx) { 150 // index of ms-1-bit 151 newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2)); 152 } else { 153 // index of ls-1-bit 154 newECX = intRes2 == 0 ? 8 : ctz32(intRes2); 155 } 156 157 resV->w32[0] = newECX; 158 resV->w32[1] = 0; 159 resV->w32[2] = 0; 160 resV->w32[3] = 0; 161 162 // generate new flags, common to all ISTRI and ISTRM cases 163 *resOSZACP // A, P are zero 164 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 165 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 166 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 167 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 168} 169 170/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 171 variants on 16-bit characters. 172 173 For xSTRI variants, the new ECX value is placed in the 32 bits 174 pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 175 variants, the result is a 128 bit value and is placed at *resV in 176 the obvious way. 177 178 For all variants, the new OSZACP value is placed at *resOSZACP. 179 180 argLV and argRV are the vector args. The caller must prepare a 181 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 182 must be 1 for each zero byte of of the respective arg. For ESTRx 183 variants this is derived from the explicit length indication, and 184 must be 0 in all places except at the bit index corresponding to 185 the valid length (0 .. 8). If the valid length is 8 then the 186 mask must be all zeroes. In all cases, bits 31:8 must be zero. 187 188 imm8 is the original immediate from the instruction. isSTRM 189 indicates whether this is a xSTRM or xSTRI variant, which controls 190 how much of *res is written. 191 192 If the given imm8 case can be handled, the return value is True. 193 If not, False is returned, and neither *res not *resOSZACP are 194 altered. 195*/ 196 197Bool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV, 198 /*OUT*/UInt* resOSZACP, 199 V128* argLV, V128* argRV, 200 UInt zmaskL, UInt zmaskR, 201 UInt imm8, Bool isxSTRM ) 202{ 203 assert(imm8 < 0x80); 204 assert((zmaskL >> 8) == 0); 205 assert((zmaskR >> 8) == 0); 206 207 /* Explicitly reject any imm8 values that haven't been validated, 208 even if they would probably work. Life is too short to have 209 unvalidated cases in the code base. */ 210 switch (imm8) { 211 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 212 case 0x13: case 0x1B: 213 case 0x39: case 0x3B: 214 case 0x45: case 0x4B: 215 break; 216 default: 217 return False; 218 } 219 220 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 221 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 222 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 223 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 224 225 /*----------------------------------------*/ 226 /*-- strcmp on wide data --*/ 227 /*----------------------------------------*/ 228 229 if (agg == 2/*equal each, aka strcmp*/ 230 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 231 Int i; 232 UShort* argL = (UShort*)argLV; 233 UShort* argR = (UShort*)argRV; 234 UInt boolResII = 0; 235 for (i = 7; i >= 0; i--) { 236 UShort cL = argL[i]; 237 UShort cR = argR[i]; 238 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 239 } 240 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 241 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 242 243 // do invalidation, common to all equal-each cases 244 UInt intRes1 245 = (boolResII & validL & validR) // if both valid, use cmpres 246 | (~ (validL | validR)); // if both invalid, force 1 247 // else force 0 248 intRes1 &= 0xFF; 249 250 // generate I-format output 251 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 252 resV, resOSZACP, 253 intRes1, zmaskL, zmaskR, validL, pol, idx 254 ); 255 256 return True; 257 } 258 259 /*----------------------------------------*/ 260 /*-- set membership on wide data --*/ 261 /*----------------------------------------*/ 262 263 if (agg == 0/*equal any, aka find chars in a set*/ 264 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 265 /* argL: the string, argR: charset */ 266 UInt si, ci; 267 UShort* argL = (UShort*)argLV; 268 UShort* argR = (UShort*)argRV; 269 UInt boolRes = 0; 270 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 271 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 272 273 for (si = 0; si < 8; si++) { 274 if ((validL & (1 << si)) == 0) 275 // run off the end of the string. 276 break; 277 UInt m = 0; 278 for (ci = 0; ci < 8; ci++) { 279 if ((validR & (1 << ci)) == 0) break; 280 if (argR[ci] == argL[si]) { m = 1; break; } 281 } 282 boolRes |= (m << si); 283 } 284 285 // boolRes is "pre-invalidated" 286 UInt intRes1 = boolRes & 0xFF; 287 288 // generate I-format output 289 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 290 resV, resOSZACP, 291 intRes1, zmaskL, zmaskR, validL, pol, idx 292 ); 293 294 return True; 295 } 296 297 /*----------------------------------------*/ 298 /*-- substring search on wide data --*/ 299 /*----------------------------------------*/ 300 301 if (agg == 3/*equal ordered, aka substring search*/ 302 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 303 304 /* argL: haystack, argR: needle */ 305 UInt ni, hi; 306 UShort* argL = (UShort*)argLV; 307 UShort* argR = (UShort*)argRV; 308 UInt boolRes = 0; 309 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 310 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 311 for (hi = 0; hi < 8; hi++) { 312 UInt m = 1; 313 for (ni = 0; ni < 8; ni++) { 314 if ((validR & (1 << ni)) == 0) break; 315 UInt i = ni + hi; 316 if (i >= 8) break; 317 if (argL[i] != argR[ni]) { m = 0; break; } 318 } 319 boolRes |= (m << hi); 320 if ((validL & (1 << hi)) == 0) 321 // run off the end of the haystack 322 break; 323 } 324 325 // boolRes is "pre-invalidated" 326 UInt intRes1 = boolRes & 0xFF; 327 328 // generate I-format output 329 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 330 resV, resOSZACP, 331 intRes1, zmaskL, zmaskR, validL, pol, idx 332 ); 333 334 return True; 335 } 336 337 /*----------------------------------------*/ 338 /*-- ranges, unsigned wide data --*/ 339 /*----------------------------------------*/ 340 341 if (agg == 1/*ranges*/ 342 && fmt == 1/*uw*/) { 343 344 /* argL: string, argR: range-pairs */ 345 UInt ri, si; 346 UShort* argL = (UShort*)argLV; 347 UShort* argR = (UShort*)argRV; 348 UInt boolRes = 0; 349 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 350 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 351 for (si = 0; si < 8; si++) { 352 if ((validL & (1 << si)) == 0) 353 // run off the end of the string 354 break; 355 UInt m = 0; 356 for (ri = 0; ri < 8; ri += 2) { 357 if ((validR & (3 << ri)) != (3 << ri)) break; 358 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 359 m = 1; break; 360 } 361 } 362 boolRes |= (m << si); 363 } 364 365 // boolRes is "pre-invalidated" 366 UInt intRes1 = boolRes & 0xFF; 367 368 // generate I-format output 369 PCMPxSTRx_WRK_gen_output_fmt_I_wide( 370 resV, resOSZACP, 371 intRes1, zmaskL, zmaskR, validL, pol, idx 372 ); 373 374 return True; 375 } 376 377 return False; 378} 379 380////////////////////////////////////////////////////////// 381// // 382// ISTRI_4B // 383// // 384////////////////////////////////////////////////////////// 385 386UInt h_pcmpistri_4B ( V128* argL, V128* argR ) 387{ 388 V128 block[2]; 389 memcpy(&block[0], argL, sizeof(V128)); 390 memcpy(&block[1], argR, sizeof(V128)); 391 ULong res, flags; 392 __asm__ __volatile__( 393 "subq $1024, %%rsp" "\n\t" 394 "movdqu 0(%2), %%xmm2" "\n\t" 395 "movdqu 16(%2), %%xmm11" "\n\t" 396 "pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t" 397 "pushfq" "\n\t" 398 "popq %%rdx" "\n\t" 399 "movq %%rcx, %0" "\n\t" 400 "movq %%rdx, %1" "\n\t" 401 "addq $1024, %%rsp" "\n\t" 402 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 403 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 404 ); 405 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 406} 407 408UInt s_pcmpistri_4B ( V128* argLU, V128* argRU ) 409{ 410 V128 resV; 411 UInt resOSZACP, resECX; 412 Bool ok 413 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 414 zmask_from_V128(argLU), 415 zmask_from_V128(argRU), 416 0x4B, False/*!isSTRM*/ 417 ); 418 assert(ok); 419 resECX = resV.uInt[0]; 420 return (resOSZACP << 16) | resECX; 421} 422 423void istri_4B ( void ) 424{ 425 char* wot = "4B"; 426 UInt(*h)(V128*,V128*) = h_pcmpistri_4B; 427 UInt(*s)(V128*,V128*) = s_pcmpistri_4B; 428 429 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 430 431 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 432 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 433 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 434 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 435 436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 437 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 438 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 439 440 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 441 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 442 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 443 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 444 445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 446 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 447 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 448 449 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 450 451 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 452 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 453 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 454 455 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 456 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 457 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 458 459 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 460 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 461 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 462 463 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 464 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 465 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 466 467 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 468 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 469} 470 471////////////////////////////////////////////////////////// 472// // 473// ISTRI_3B // 474// // 475////////////////////////////////////////////////////////// 476 477UInt h_pcmpistri_3B ( V128* argL, V128* argR ) 478{ 479 V128 block[2]; 480 memcpy(&block[0], argL, sizeof(V128)); 481 memcpy(&block[1], argR, sizeof(V128)); 482 ULong res, flags; 483 __asm__ __volatile__( 484 "subq $1024, %%rsp" "\n\t" 485 "movdqu 0(%2), %%xmm2" "\n\t" 486 "movdqu 16(%2), %%xmm11" "\n\t" 487 "pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t" 488 "pushfq" "\n\t" 489 "popq %%rdx" "\n\t" 490 "movq %%rcx, %0" "\n\t" 491 "movq %%rdx, %1" "\n\t" 492 "addq $1024, %%rsp" "\n\t" 493 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 494 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 495 ); 496 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 497} 498 499UInt s_pcmpistri_3B ( V128* argLU, V128* argRU ) 500{ 501 V128 resV; 502 UInt resOSZACP, resECX; 503 Bool ok 504 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 505 zmask_from_V128(argLU), 506 zmask_from_V128(argRU), 507 0x3B, False/*!isSTRM*/ 508 ); 509 assert(ok); 510 resECX = resV.uInt[0]; 511 return (resOSZACP << 16) | resECX; 512} 513 514void istri_3B ( void ) 515{ 516 char* wot = "3B"; 517 UInt(*h)(V128*,V128*) = h_pcmpistri_3B; 518 UInt(*s)(V128*,V128*) = s_pcmpistri_3B; 519 520 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 521 522 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 523 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 524 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 525 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 526 527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 528 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 529 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 530 531 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 532 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 533 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 534 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 535 536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 537 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 538 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 539 540 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 541 542 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 543 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 544 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 545 546 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 547 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 548 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 549 550 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 551 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 552 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 553 554 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 555 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 556 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 557 558 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 559 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 560} 561 562 563 564////////////////////////////////////////////////////////// 565// // 566// ISTRI_0D // 567// // 568////////////////////////////////////////////////////////// 569 570__attribute__((noinline)) 571UInt h_pcmpistri_0D ( V128* argL, V128* argR ) 572{ 573 V128 block[2]; 574 memcpy(&block[0], argL, sizeof(V128)); 575 memcpy(&block[1], argR, sizeof(V128)); 576 ULong res = 0, flags = 0; 577 __asm__ __volatile__( 578 "movdqu 0(%2), %%xmm2" "\n\t" 579 "movdqu 16(%2), %%xmm11" "\n\t" 580 "pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t" 581 //"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t" 582 //"movd %%xmm0, %%ecx" "\n\t" 583 "pushfq" "\n\t" 584 "popq %%rdx" "\n\t" 585 "movq %%rcx, %0" "\n\t" 586 "movq %%rdx, %1" "\n\t" 587 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 588 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 589 ); 590 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 591} 592 593UInt s_pcmpistri_0D ( V128* argLU, V128* argRU ) 594{ 595 V128 resV; 596 UInt resOSZACP, resECX; 597 Bool ok 598 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 599 zmask_from_V128(argLU), 600 zmask_from_V128(argRU), 601 0x0D, False/*!isSTRM*/ 602 ); 603 assert(ok); 604 resECX = resV.uInt[0]; 605 return (resOSZACP << 16) | resECX; 606} 607 608void istri_0D ( void ) 609{ 610 char* wot = "0D"; 611 UInt(*h)(V128*,V128*) = h_pcmpistri_0D; 612 UInt(*s)(V128*,V128*) = s_pcmpistri_0D; 613 614 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 615 616 try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef"); 617 618 try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef"); 619 try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef"); 620 try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef"); 621 622 try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd"); 623 624 try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd"); 625 try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd"); 626 try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd"); 627 628 try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd"); 629 try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd"); 630 try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd"); 631 632 try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd"); 633 try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd"); 634 635 try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 636 try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 637 try_istri(wot,h,s, "1111111111111234", "0000000000001111"); 638 639 try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 640 try_istri(wot,h,s, "0a11111111111111", "000000000000000a"); 641 try_istri(wot,h,s, "0b11111111111111", "000000000000000a"); 642 643 try_istri(wot,h,s, "b111111111111111", "0000000000000000"); 644 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 645 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000"); 646 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1"); 647} 648 649 650////////////////////////////////////////////////////////// 651// // 652// ISTRI_09 // 653// // 654////////////////////////////////////////////////////////// 655 656UInt h_pcmpistri_09 ( V128* argL, V128* argR ) 657{ 658 V128 block[2]; 659 memcpy(&block[0], argL, sizeof(V128)); 660 memcpy(&block[1], argR, sizeof(V128)); 661 ULong res, flags; 662 __asm__ __volatile__( 663 "subq $1024, %%rsp" "\n\t" 664 "movdqu 0(%2), %%xmm2" "\n\t" 665 "movdqu 16(%2), %%xmm11" "\n\t" 666 "pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t" 667 "pushfq" "\n\t" 668 "popq %%rdx" "\n\t" 669 "movq %%rcx, %0" "\n\t" 670 "movq %%rdx, %1" "\n\t" 671 "addq $1024, %%rsp" "\n\t" 672 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 673 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 674 ); 675 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 676} 677 678UInt s_pcmpistri_09 ( V128* argLU, V128* argRU ) 679{ 680 V128 resV; 681 UInt resOSZACP, resECX; 682 Bool ok 683 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 684 zmask_from_V128(argLU), 685 zmask_from_V128(argRU), 686 0x09, False/*!isSTRM*/ 687 ); 688 assert(ok); 689 resECX = resV.uInt[0]; 690 return (resOSZACP << 16) | resECX; 691} 692 693void istri_09 ( void ) 694{ 695 char* wot = "09"; 696 UInt(*h)(V128*,V128*) = h_pcmpistri_09; 697 UInt(*s)(V128*,V128*) = s_pcmpistri_09; 698 699 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 700 701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 702 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 703 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 704 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 705 706 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 707 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 708 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 709 710 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 711 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 712 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 713 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 714 715 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 716 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 717 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 718 719 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 720 721 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 722 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 723 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 724 725 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 726 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 727 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 728 729 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 730 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 731 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 732 733 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 734 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 735 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 736 737 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 738 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 739} 740 741 742 743////////////////////////////////////////////////////////// 744// // 745// ISTRI_1B // 746// // 747////////////////////////////////////////////////////////// 748 749UInt h_pcmpistri_1B ( V128* argL, V128* argR ) 750{ 751 V128 block[2]; 752 memcpy(&block[0], argL, sizeof(V128)); 753 memcpy(&block[1], argR, sizeof(V128)); 754 ULong res, flags; 755 __asm__ __volatile__( 756 "subq $1024, %%rsp" "\n\t" 757 "movdqu 0(%2), %%xmm2" "\n\t" 758 "movdqu 16(%2), %%xmm11" "\n\t" 759 "pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t" 760 "pushfq" "\n\t" 761 "popq %%rdx" "\n\t" 762 "movq %%rcx, %0" "\n\t" 763 "movq %%rdx, %1" "\n\t" 764 "addq $1024, %%rsp" "\n\t" 765 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 766 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 767 ); 768 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 769} 770 771UInt s_pcmpistri_1B ( V128* argLU, V128* argRU ) 772{ 773 V128 resV; 774 UInt resOSZACP, resECX; 775 Bool ok 776 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 777 zmask_from_V128(argLU), 778 zmask_from_V128(argRU), 779 0x1B, False/*!isSTRM*/ 780 ); 781 assert(ok); 782 resECX = resV.uInt[0]; 783 return (resOSZACP << 16) | resECX; 784} 785 786void istri_1B ( void ) 787{ 788 char* wot = "1B"; 789 UInt(*h)(V128*,V128*) = h_pcmpistri_1B; 790 UInt(*s)(V128*,V128*) = s_pcmpistri_1B; 791 792 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 793 794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 795 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 796 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 797 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 798 799 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 800 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 801 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 802 803 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 804 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 805 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 806 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 807 808 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 809 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 810 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 811 812 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 813 814 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 815 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 816 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 817 818 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 819 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 820 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 821 822 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 823 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 824 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 825 826 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 827 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 828 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 829 830 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 831 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 832} 833 834 835 836////////////////////////////////////////////////////////// 837// // 838// ISTRI_03 // 839// // 840////////////////////////////////////////////////////////// 841 842UInt h_pcmpistri_03 ( V128* argL, V128* argR ) 843{ 844 V128 block[2]; 845 memcpy(&block[0], argL, sizeof(V128)); 846 memcpy(&block[1], argR, sizeof(V128)); 847 ULong res, flags; 848 __asm__ __volatile__( 849 "subq $1024, %%rsp" "\n\t" 850 "movdqu 0(%2), %%xmm2" "\n\t" 851 "movdqu 16(%2), %%xmm11" "\n\t" 852 "pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t" 853//"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t" 854//"movd %%xmm0, %%ecx" "\n\t" 855 "pushfq" "\n\t" 856 "popq %%rdx" "\n\t" 857 "movq %%rcx, %0" "\n\t" 858 "movq %%rdx, %1" "\n\t" 859 "addq $1024, %%rsp" "\n\t" 860 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 861 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 862 ); 863 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 864} 865 866UInt s_pcmpistri_03 ( V128* argLU, V128* argRU ) 867{ 868 V128 resV; 869 UInt resOSZACP, resECX; 870 Bool ok 871 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 872 zmask_from_V128(argLU), 873 zmask_from_V128(argRU), 874 0x03, False/*!isSTRM*/ 875 ); 876 assert(ok); 877 resECX = resV.uInt[0]; 878 return (resOSZACP << 16) | resECX; 879} 880 881void istri_03 ( void ) 882{ 883 char* wot = "03"; 884 UInt(*h)(V128*,V128*) = h_pcmpistri_03; 885 UInt(*s)(V128*,V128*) = s_pcmpistri_03; 886 887 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 888 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 889 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 890 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 891 892 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 893 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 894 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 895 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 896 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 897 898 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 899 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 900 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 901 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 902 903 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 904 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 905 906 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 907 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 908 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 909 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 910 911 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 912 913 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 914 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 915} 916 917 918////////////////////////////////////////////////////////// 919// // 920// ISTRI_13 // 921// // 922////////////////////////////////////////////////////////// 923 924UInt h_pcmpistri_13 ( V128* argL, V128* argR ) 925{ 926 V128 block[2]; 927 memcpy(&block[0], argL, sizeof(V128)); 928 memcpy(&block[1], argR, sizeof(V128)); 929 ULong res, flags; 930 __asm__ __volatile__( 931 "subq $1024, %%rsp" "\n\t" 932 "movdqu 0(%2), %%xmm2" "\n\t" 933 "movdqu 16(%2), %%xmm11" "\n\t" 934 "pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t" 935//"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t" 936//"movd %%xmm0, %%ecx" "\n\t" 937 "pushfq" "\n\t" 938 "popq %%rdx" "\n\t" 939 "movq %%rcx, %0" "\n\t" 940 "movq %%rdx, %1" "\n\t" 941 "addq $1024, %%rsp" "\n\t" 942 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 943 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 944 ); 945 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 946} 947 948UInt s_pcmpistri_13 ( V128* argLU, V128* argRU ) 949{ 950 V128 resV; 951 UInt resOSZACP, resECX; 952 Bool ok 953 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 954 zmask_from_V128(argLU), 955 zmask_from_V128(argRU), 956 0x13, False/*!isSTRM*/ 957 ); 958 assert(ok); 959 resECX = resV.uInt[0]; 960 return (resOSZACP << 16) | resECX; 961} 962 963void istri_13 ( void ) 964{ 965 char* wot = "13"; 966 UInt(*h)(V128*,V128*) = h_pcmpistri_13; 967 UInt(*s)(V128*,V128*) = s_pcmpistri_13; 968 969 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 970 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 971 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 972 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 973 974 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 975 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 976 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 977 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 978 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 979 980 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 981 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 982 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 983 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 984 985 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 986 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 987 988 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 989 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 990 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 991 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 992 993 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 994 995 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 996 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 997} 998 999 1000 1001////////////////////////////////////////////////////////// 1002// // 1003// ISTRI_45 // 1004// // 1005////////////////////////////////////////////////////////// 1006 1007UInt h_pcmpistri_45 ( V128* argL, V128* argR ) 1008{ 1009 V128 block[2]; 1010 memcpy(&block[0], argL, sizeof(V128)); 1011 memcpy(&block[1], argR, sizeof(V128)); 1012 ULong res, flags; 1013 __asm__ __volatile__( 1014 "subq $1024, %%rsp" "\n\t" 1015 "movdqu 0(%2), %%xmm2" "\n\t" 1016 "movdqu 16(%2), %%xmm11" "\n\t" 1017 "pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t" 1018//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t" 1019//"movd %%xmm0, %%ecx" "\n\t" 1020 "pushfq" "\n\t" 1021 "popq %%rdx" "\n\t" 1022 "movq %%rcx, %0" "\n\t" 1023 "movq %%rdx, %1" "\n\t" 1024 "addq $1024, %%rsp" "\n\t" 1025 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1026 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1027 ); 1028 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1029} 1030 1031UInt s_pcmpistri_45 ( V128* argLU, V128* argRU ) 1032{ 1033 V128 resV; 1034 UInt resOSZACP, resECX; 1035 Bool ok 1036 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1037 zmask_from_V128(argLU), 1038 zmask_from_V128(argRU), 1039 0x45, False/*!isSTRM*/ 1040 ); 1041 assert(ok); 1042 resECX = resV.uInt[0]; 1043 return (resOSZACP << 16) | resECX; 1044} 1045 1046void istri_45 ( void ) 1047{ 1048 char* wot = "45"; 1049 UInt(*h)(V128*,V128*) = h_pcmpistri_45; 1050 UInt(*s)(V128*,V128*) = s_pcmpistri_45; 1051 1052 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc"); 1053 try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb"); 1054 try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb"); 1055 try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb"); 1056 1057 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1058 try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb"); 1059 try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb"); 1060 try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb"); 1061 try_istri(wot,h,s, "0000000000000000", "000000000000ccbb"); 1062 1063 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1064 1065 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb"); 1066 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb"); 1067 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb"); 1068 1069 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb"); 1070 try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb"); 1071 try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb"); 1072 1073 try_istri(wot,h,s, "0011223344556677", "0000997755442211"); 1074 try_istri(wot,h,s, "1122334455667711", "0000997755442211"); 1075 1076 try_istri(wot,h,s, "0011223344556677", "0000aa8866553322"); 1077 try_istri(wot,h,s, "1122334455667711", "0000aa8866553322"); 1078} 1079 1080 1081////////////////////////////////////////////////////////// 1082// // 1083// ISTRI_01 // 1084// // 1085////////////////////////////////////////////////////////// 1086 1087UInt h_pcmpistri_01 ( V128* argL, V128* argR ) 1088{ 1089 V128 block[2]; 1090 memcpy(&block[0], argL, sizeof(V128)); 1091 memcpy(&block[1], argR, sizeof(V128)); 1092 ULong res, flags; 1093 __asm__ __volatile__( 1094 "subq $1024, %%rsp" "\n\t" 1095 "movdqu 0(%2), %%xmm2" "\n\t" 1096 "movdqu 16(%2), %%xmm11" "\n\t" 1097 "pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t" 1098//"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t" 1099//"movd %%xmm0, %%ecx" "\n\t" 1100 "pushfq" "\n\t" 1101 "popq %%rdx" "\n\t" 1102 "movq %%rcx, %0" "\n\t" 1103 "movq %%rdx, %1" "\n\t" 1104 "addq $1024, %%rsp" "\n\t" 1105 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1106 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1107 ); 1108 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1109} 1110 1111UInt s_pcmpistri_01 ( V128* argLU, V128* argRU ) 1112{ 1113 V128 resV; 1114 UInt resOSZACP, resECX; 1115 Bool ok 1116 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1117 zmask_from_V128(argLU), 1118 zmask_from_V128(argRU), 1119 0x01, False/*!isSTRM*/ 1120 ); 1121 assert(ok); 1122 resECX = resV.uInt[0]; 1123 return (resOSZACP << 16) | resECX; 1124} 1125 1126void istri_01 ( void ) 1127{ 1128 char* wot = "01"; 1129 UInt(*h)(V128*,V128*) = h_pcmpistri_01; 1130 UInt(*s)(V128*,V128*) = s_pcmpistri_01; 1131 1132 try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa"); 1133 try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb"); 1134 try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb"); 1135 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 1136 1137 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1138 try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd"); 1139 try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd"); 1140 try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd"); 1141 try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd"); 1142 1143 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd"); 1144 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd"); 1145 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd"); 1146 try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00"); 1147 1148 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1149 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1150 1151 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 1152 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 1153 try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb"); 1154 try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa"); 1155 1156 try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00"); 1157 1158 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 1159 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 1160} 1161 1162 1163////////////////////////////////////////////////////////// 1164// // 1165// ISTRI_39 // 1166// // 1167////////////////////////////////////////////////////////// 1168 1169UInt h_pcmpistri_39 ( V128* argL, V128* argR ) 1170{ 1171 V128 block[2]; 1172 memcpy(&block[0], argL, sizeof(V128)); 1173 memcpy(&block[1], argR, sizeof(V128)); 1174 ULong res, flags; 1175 __asm__ __volatile__( 1176 "subq $1024, %%rsp" "\n\t" 1177 "movdqu 0(%2), %%xmm2" "\n\t" 1178 "movdqu 16(%2), %%xmm11" "\n\t" 1179 "pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t" 1180 "pushfq" "\n\t" 1181 "popq %%rdx" "\n\t" 1182 "movq %%rcx, %0" "\n\t" 1183 "movq %%rdx, %1" "\n\t" 1184 "addq $1024, %%rsp" "\n\t" 1185 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) 1186 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" 1187 ); 1188 return ((flags & 0x8D5) << 16) | (res & 0xFFFF); 1189} 1190 1191UInt s_pcmpistri_39 ( V128* argLU, V128* argRU ) 1192{ 1193 V128 resV; 1194 UInt resOSZACP, resECX; 1195 Bool ok 1196 = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU, 1197 zmask_from_V128(argLU), 1198 zmask_from_V128(argRU), 1199 0x39, False/*!isSTRM*/ 1200 ); 1201 assert(ok); 1202 resECX = resV.uInt[0]; 1203 return (resOSZACP << 16) | resECX; 1204} 1205 1206void istri_39 ( void ) 1207{ 1208 char* wot = "39"; 1209 UInt(*h)(V128*,V128*) = h_pcmpistri_39; 1210 UInt(*s)(V128*,V128*) = s_pcmpistri_39; 1211 1212 try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 1213 1214 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1215 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1216 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 1217 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 1218 1219 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 1220 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 1221 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 1222 1223 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1224 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1225 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1226 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1227 1228 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1229 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 1230 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 1231 1232 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 1233 1234 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1235 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1236 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa"); 1237 1238 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa"); 1239 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa"); 1240 try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa"); 1241 1242 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa"); 1243 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa"); 1244 try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa"); 1245 1246 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa"); 1247 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa"); 1248 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa"); 1249 1250 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 1251 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 1252} 1253 1254 1255 1256////////////////////////////////////////////////////////// 1257// // 1258// main // 1259// // 1260////////////////////////////////////////////////////////// 1261 1262int main ( void ) 1263{ 1264 istri_4B(); 1265 istri_3B(); 1266 istri_09(); 1267 istri_1B(); 1268 istri_03(); 1269 istri_0D(); 1270 istri_13(); 1271 istri_45(); 1272 istri_01(); 1273 istri_39(); 1274 return 0; 1275} 1276