pcmpxstrx64.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1 2/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not 3 check the core arithmetic in any detail. */ 4 5#include <string.h> 6#include <stdio.h> 7#include <assert.h> 8 9typedef unsigned char V128[16]; 10typedef unsigned int UInt; 11typedef signed int Int; 12typedef unsigned char UChar; 13typedef unsigned long long int ULong; 14typedef UChar Bool; 15#define False ((Bool)0) 16#define True ((Bool)1) 17 18void show_V128 ( V128* vec ) 19{ 20 Int i; 21 for (i = 15; i >= 0; i--) 22 printf("%02x", (UInt)( (*vec)[i] )); 23} 24 25void expand ( V128* dst, char* summary ) 26{ 27 Int i; 28 assert( strlen(summary) == 16 ); 29 for (i = 0; i < 16; i++) { 30 UChar xx = 0; 31 UChar x = summary[15-i]; 32 if (x >= '0' && x <= '9') { xx = x - '0'; } 33 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 34 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 35 else assert(0); 36 37 assert(xx < 16); 38 xx = (xx << 4) | xx; 39 assert(xx < 256); 40 (*dst)[i] = xx; 41 } 42} 43 44void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) 45{ 46 V128 argL, argR; 47 expand( &argL, summL ); 48 expand( &argR, summR ); 49 printf("\n"); 50 printf("rdx %016llx argL ", rdxIN); 51 show_V128(&argL); 52 printf(" rax %016llx argR ", raxIN); 53 show_V128(&argR); 54 printf("\n"); 55 56 ULong block[ 2/*in:argL*/ // 0 0 57 + 2/*in:argR*/ // 2 16 58 + 1/*in:rdx*/ // 4 32 59 + 1/*in:rax*/ // 5 40 60 + 2/*inout:xmm0*/ // 6 48 61 + 1/*inout:rcx*/ // 8 64 62 + 1/*out:rflags*/ ]; // 9 72 63 assert(sizeof(block) == 80); 64 65 UChar* blockC = (UChar*)&block[0]; 66 67 /* ---------------- ISTRI_4A ---------------- */ 68 memset(blockC, 0x55, 80); 69 memcpy(blockC + 0, &argL, 16); 70 memcpy(blockC + 16, &argR, 16); 71 memcpy(blockC + 24, &rdxIN, 8); 72 memcpy(blockC + 32, &raxIN, 8); 73 memcpy(blockC + 40, &rdxIN, 8); 74 __asm__ __volatile__( 75 "movupd 0(%0), %%xmm2" "\n\t" 76 "movupd 16(%0), %%xmm13" "\n\t" 77 "movq 32(%0), %%rdx" "\n\t" 78 "movq 40(%0), %%rax" "\n\t" 79 "movupd 48(%0), %%xmm0" "\n\t" 80 "movw 64(%0), %%rcx" "\n\t" 81 "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t" 82 "movupd %%xmm0, 48(%0)" "\n\t" 83 "movw %%rcx, 64(%0)" "\n\t" 84 "pushfq" "\n\t" 85 "popq %%r15" "\n\t" 86 "movq %%r15, 72(%0)" "\n\t" 87 : /*out*/ 88 : /*in*/"r"(blockC) 89 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 90 ); 91 printf(" istri $0x4A: "); 92 printf(" xmm0 "); 93 show_V128( (V128*)(blockC+48) ); 94 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 95 96 /* ---------------- ISTRI_0A ---------------- */ 97 memset(blockC, 0x55, 80); 98 memcpy(blockC + 0, &argL, 16); 99 memcpy(blockC + 16, &argR, 16); 100 memcpy(blockC + 24, &rdxIN, 8); 101 memcpy(blockC + 32, &raxIN, 8); 102 memcpy(blockC + 40, &rdxIN, 8); 103 __asm__ __volatile__( 104 "movupd 0(%0), %%xmm2" "\n\t" 105 "movupd 16(%0), %%xmm13" "\n\t" 106 "movq 32(%0), %%rdx" "\n\t" 107 "movq 40(%0), %%rax" "\n\t" 108 "movupd 48(%0), %%xmm0" "\n\t" 109 "movw 64(%0), %%rcx" "\n\t" 110 "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t" 111 "movupd %%xmm0, 48(%0)" "\n\t" 112 "movw %%rcx, 64(%0)" "\n\t" 113 "pushfq" "\n\t" 114 "popq %%r15" "\n\t" 115 "movq %%r15, 72(%0)" "\n\t" 116 : /*out*/ 117 : /*in*/"r"(blockC) 118 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 119 ); 120 printf(" istri $0x0A: "); 121 printf(" xmm0 "); 122 show_V128( (V128*)(blockC+48) ); 123 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 124 125 /* ---------------- ISTRM_4A ---------------- */ 126 memset(blockC, 0x55, 80); 127 memcpy(blockC + 0, &argL, 16); 128 memcpy(blockC + 16, &argR, 16); 129 memcpy(blockC + 24, &rdxIN, 8); 130 memcpy(blockC + 32, &raxIN, 8); 131 memcpy(blockC + 40, &rdxIN, 8); 132 __asm__ __volatile__( 133 "movupd 0(%0), %%xmm2" "\n\t" 134 "movupd 16(%0), %%xmm13" "\n\t" 135 "movq 32(%0), %%rdx" "\n\t" 136 "movq 40(%0), %%rax" "\n\t" 137 "movupd 48(%0), %%xmm0" "\n\t" 138 "movw 64(%0), %%rcx" "\n\t" 139 "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t" 140 "movupd %%xmm0, 48(%0)" "\n\t" 141 "movw %%rcx, 64(%0)" "\n\t" 142 "pushfq" "\n\t" 143 "popq %%r15" "\n\t" 144 "movq %%r15, 72(%0)" "\n\t" 145 : /*out*/ 146 : /*in*/"r"(blockC) 147 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 148 ); 149 printf(" istrm $0x4A: "); 150 printf(" xmm0 "); 151 show_V128( (V128*)(blockC+48) ); 152 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 153 154 /* ---------------- ISTRM_0A ---------------- */ 155 memset(blockC, 0x55, 80); 156 memcpy(blockC + 0, &argL, 16); 157 memcpy(blockC + 16, &argR, 16); 158 memcpy(blockC + 24, &rdxIN, 8); 159 memcpy(blockC + 32, &raxIN, 8); 160 memcpy(blockC + 40, &rdxIN, 8); 161 __asm__ __volatile__( 162 "movupd 0(%0), %%xmm2" "\n\t" 163 "movupd 16(%0), %%xmm13" "\n\t" 164 "movq 32(%0), %%rdx" "\n\t" 165 "movq 40(%0), %%rax" "\n\t" 166 "movupd 48(%0), %%xmm0" "\n\t" 167 "movw 64(%0), %%rcx" "\n\t" 168 "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t" 169 "movupd %%xmm0, 48(%0)" "\n\t" 170 "movw %%rcx, 64(%0)" "\n\t" 171 "pushfq" "\n\t" 172 "popq %%r15" "\n\t" 173 "movq %%r15, 72(%0)" "\n\t" 174 : /*out*/ 175 : /*in*/"r"(blockC) 176 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 177 ); 178 printf(" istrm $0x0A: "); 179 printf(" xmm0 "); 180 show_V128( (V128*)(blockC+48) ); 181 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 182 183 /* ---------------- ESTRI_4A ---------------- */ 184 memset(blockC, 0x55, 80); 185 memcpy(blockC + 0, &argL, 16); 186 memcpy(blockC + 16, &argR, 16); 187 memcpy(blockC + 24, &rdxIN, 8); 188 memcpy(blockC + 32, &raxIN, 8); 189 memcpy(blockC + 40, &rdxIN, 8); 190 __asm__ __volatile__( 191 "movupd 0(%0), %%xmm2" "\n\t" 192 "movupd 16(%0), %%xmm13" "\n\t" 193 "movq 32(%0), %%rdx" "\n\t" 194 "movq 40(%0), %%rax" "\n\t" 195 "movupd 48(%0), %%xmm0" "\n\t" 196 "movw 64(%0), %%rcx" "\n\t" 197 "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t" 198 "movupd %%xmm0, 48(%0)" "\n\t" 199 "movw %%rcx, 64(%0)" "\n\t" 200 "pushfq" "\n\t" 201 "popq %%r15" "\n\t" 202 "movq %%r15, 72(%0)" "\n\t" 203 : /*out*/ 204 : /*in*/"r"(blockC) 205 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 206 ); 207 printf(" estri $0x4A: "); 208 printf(" xmm0 "); 209 show_V128( (V128*)(blockC+48) ); 210 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 211 212 /* ---------------- ESTRI_0A ---------------- */ 213 memset(blockC, 0x55, 80); 214 memcpy(blockC + 0, &argL, 16); 215 memcpy(blockC + 16, &argR, 16); 216 memcpy(blockC + 24, &rdxIN, 8); 217 memcpy(blockC + 32, &raxIN, 8); 218 memcpy(blockC + 40, &rdxIN, 8); 219 __asm__ __volatile__( 220 "movupd 0(%0), %%xmm2" "\n\t" 221 "movupd 16(%0), %%xmm13" "\n\t" 222 "movq 32(%0), %%rdx" "\n\t" 223 "movq 40(%0), %%rax" "\n\t" 224 "movupd 48(%0), %%xmm0" "\n\t" 225 "movw 64(%0), %%rcx" "\n\t" 226 "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t" 227 "movupd %%xmm0, 48(%0)" "\n\t" 228 "movw %%rcx, 64(%0)" "\n\t" 229 "pushfq" "\n\t" 230 "popq %%r15" "\n\t" 231 "movq %%r15, 72(%0)" "\n\t" 232 : /*out*/ 233 : /*in*/"r"(blockC) 234 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 235 ); 236 printf(" estri $0x0A: "); 237 printf(" xmm0 "); 238 show_V128( (V128*)(blockC+48) ); 239 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 240 241 /* ---------------- ESTRM_4A ---------------- */ 242 memset(blockC, 0x55, 80); 243 memcpy(blockC + 0, &argL, 16); 244 memcpy(blockC + 16, &argR, 16); 245 memcpy(blockC + 24, &rdxIN, 8); 246 memcpy(blockC + 32, &raxIN, 8); 247 memcpy(blockC + 40, &rdxIN, 8); 248 __asm__ __volatile__( 249 "movupd 0(%0), %%xmm2" "\n\t" 250 "movupd 16(%0), %%xmm13" "\n\t" 251 "movq 32(%0), %%rdx" "\n\t" 252 "movq 40(%0), %%rax" "\n\t" 253 "movupd 48(%0), %%xmm0" "\n\t" 254 "movw 64(%0), %%rcx" "\n\t" 255 "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t" 256 "movupd %%xmm0, 48(%0)" "\n\t" 257 "movw %%rcx, 64(%0)" "\n\t" 258 "pushfq" "\n\t" 259 "popq %%r15" "\n\t" 260 "movq %%r15, 72(%0)" "\n\t" 261 : /*out*/ 262 : /*in*/"r"(blockC) 263 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 264 ); 265 printf(" estrm $0x4A: "); 266 printf(" xmm0 "); 267 show_V128( (V128*)(blockC+48) ); 268 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 269 270 /* ---------------- ESTRM_0A ---------------- */ 271 memset(blockC, 0x55, 80); 272 memcpy(blockC + 0, &argL, 16); 273 memcpy(blockC + 16, &argR, 16); 274 memcpy(blockC + 24, &rdxIN, 8); 275 memcpy(blockC + 32, &raxIN, 8); 276 memcpy(blockC + 40, &rdxIN, 8); 277 __asm__ __volatile__( 278 "movupd 0(%0), %%xmm2" "\n\t" 279 "movupd 16(%0), %%xmm13" "\n\t" 280 "movq 32(%0), %%rdx" "\n\t" 281 "movq 40(%0), %%rax" "\n\t" 282 "movupd 48(%0), %%xmm0" "\n\t" 283 "movw 64(%0), %%rcx" "\n\t" 284 "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t" 285 "movupd %%xmm0, 48(%0)" "\n\t" 286 "movw %%rcx, 64(%0)" "\n\t" 287 "pushfq" "\n\t" 288 "popq %%r15" "\n\t" 289 "movq %%r15, 72(%0)" "\n\t" 290 : /*out*/ 291 : /*in*/"r"(blockC) 292 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 293 ); 294 printf(" estrm $0x0A: "); 295 printf(" xmm0 "); 296 show_V128( (V128*)(blockC+48) ); 297 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 298 299 300 301 302} 303 304int main ( void ) 305{ 306 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 ); 307 one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 ); 308 309 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); 310 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); 311 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); 312 313 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); 315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); 316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); 317 318 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); 319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); 320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); 321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); 322 323 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 324 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); 325 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); 326 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); 327 328 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); 329 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); 330 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); 331 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); 332 333 return 0; 334} 335 336/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not 337 check the core arithmetic in any detail. */ 338 339#include <string.h> 340#include <stdio.h> 341#include <assert.h> 342 343typedef unsigned char V128[16]; 344typedef unsigned int UInt; 345typedef signed int Int; 346typedef unsigned char UChar; 347typedef unsigned long long int ULong; 348typedef UChar Bool; 349#define False ((Bool)0) 350#define True ((Bool)1) 351 352void show_V128 ( V128* vec ) 353{ 354 Int i; 355 for (i = 15; i >= 0; i--) 356 printf("%02x", (UInt)( (*vec)[i] )); 357} 358 359void expand ( V128* dst, char* summary ) 360{ 361 Int i; 362 assert( strlen(summary) == 16 ); 363 for (i = 0; i < 16; i++) { 364 UChar xx = 0; 365 UChar x = summary[15-i]; 366 if (x >= '0' && x <= '9') { xx = x - '0'; } 367 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 368 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 369 else assert(0); 370 371 assert(xx < 16); 372 xx = (xx << 4) | xx; 373 assert(xx < 256); 374 (*dst)[i] = xx; 375 } 376} 377 378void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) 379{ 380 V128 argL, argR; 381 expand( &argL, summL ); 382 expand( &argR, summR ); 383 printf("\n"); 384 printf("rdx %016llx argL ", rdxIN); 385 show_V128(&argL); 386 printf(" rax %016llx argR ", raxIN); 387 show_V128(&argR); 388 printf("\n"); 389 390 ULong block[ 2/*in:argL*/ // 0 0 391 + 2/*in:argR*/ // 2 16 392 + 1/*in:rdx*/ // 4 32 393 + 1/*in:rax*/ // 5 40 394 + 2/*inout:xmm0*/ // 6 48 395 + 1/*inout:rcx*/ // 8 64 396 + 1/*out:rflags*/ ]; // 9 72 397 assert(sizeof(block) == 80); 398 399 UChar* blockC = (UChar*)&block[0]; 400 401 /* ---------------- ISTRI_4A ---------------- */ 402 memset(blockC, 0x55, 80); 403 memcpy(blockC + 0, &argL, 16); 404 memcpy(blockC + 16, &argR, 16); 405 memcpy(blockC + 24, &rdxIN, 8); 406 memcpy(blockC + 32, &raxIN, 8); 407 memcpy(blockC + 40, &rdxIN, 8); 408 __asm__ __volatile__( 409 "movupd 0(%0), %%xmm2" "\n\t" 410 "movupd 16(%0), %%xmm13" "\n\t" 411 "movq 32(%0), %%rdx" "\n\t" 412 "movq 40(%0), %%rax" "\n\t" 413 "movupd 48(%0), %%xmm0" "\n\t" 414 "movw 64(%0), %%rcx" "\n\t" 415 "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t" 416 "movupd %%xmm0, 48(%0)" "\n\t" 417 "movw %%rcx, 64(%0)" "\n\t" 418 "pushfq" "\n\t" 419 "popq %%r15" "\n\t" 420 "movq %%r15, 72(%0)" "\n\t" 421 : /*out*/ 422 : /*in*/"r"(blockC) 423 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 424 ); 425 printf(" istri $0x4A: "); 426 printf(" xmm0 "); 427 show_V128( (V128*)(blockC+48) ); 428 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 429 430 /* ---------------- ISTRI_0A ---------------- */ 431 memset(blockC, 0x55, 80); 432 memcpy(blockC + 0, &argL, 16); 433 memcpy(blockC + 16, &argR, 16); 434 memcpy(blockC + 24, &rdxIN, 8); 435 memcpy(blockC + 32, &raxIN, 8); 436 memcpy(blockC + 40, &rdxIN, 8); 437 __asm__ __volatile__( 438 "movupd 0(%0), %%xmm2" "\n\t" 439 "movupd 16(%0), %%xmm13" "\n\t" 440 "movq 32(%0), %%rdx" "\n\t" 441 "movq 40(%0), %%rax" "\n\t" 442 "movupd 48(%0), %%xmm0" "\n\t" 443 "movw 64(%0), %%rcx" "\n\t" 444 "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t" 445 "movupd %%xmm0, 48(%0)" "\n\t" 446 "movw %%rcx, 64(%0)" "\n\t" 447 "pushfq" "\n\t" 448 "popq %%r15" "\n\t" 449 "movq %%r15, 72(%0)" "\n\t" 450 : /*out*/ 451 : /*in*/"r"(blockC) 452 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 453 ); 454 printf(" istri $0x0A: "); 455 printf(" xmm0 "); 456 show_V128( (V128*)(blockC+48) ); 457 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 458 459 /* ---------------- ISTRM_4A ---------------- */ 460 memset(blockC, 0x55, 80); 461 memcpy(blockC + 0, &argL, 16); 462 memcpy(blockC + 16, &argR, 16); 463 memcpy(blockC + 24, &rdxIN, 8); 464 memcpy(blockC + 32, &raxIN, 8); 465 memcpy(blockC + 40, &rdxIN, 8); 466 __asm__ __volatile__( 467 "movupd 0(%0), %%xmm2" "\n\t" 468 "movupd 16(%0), %%xmm13" "\n\t" 469 "movq 32(%0), %%rdx" "\n\t" 470 "movq 40(%0), %%rax" "\n\t" 471 "movupd 48(%0), %%xmm0" "\n\t" 472 "movw 64(%0), %%rcx" "\n\t" 473 "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t" 474 "movupd %%xmm0, 48(%0)" "\n\t" 475 "movw %%rcx, 64(%0)" "\n\t" 476 "pushfq" "\n\t" 477 "popq %%r15" "\n\t" 478 "movq %%r15, 72(%0)" "\n\t" 479 : /*out*/ 480 : /*in*/"r"(blockC) 481 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 482 ); 483 printf(" istrm $0x4A: "); 484 printf(" xmm0 "); 485 show_V128( (V128*)(blockC+48) ); 486 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 487 488 /* ---------------- ISTRM_0A ---------------- */ 489 memset(blockC, 0x55, 80); 490 memcpy(blockC + 0, &argL, 16); 491 memcpy(blockC + 16, &argR, 16); 492 memcpy(blockC + 24, &rdxIN, 8); 493 memcpy(blockC + 32, &raxIN, 8); 494 memcpy(blockC + 40, &rdxIN, 8); 495 __asm__ __volatile__( 496 "movupd 0(%0), %%xmm2" "\n\t" 497 "movupd 16(%0), %%xmm13" "\n\t" 498 "movq 32(%0), %%rdx" "\n\t" 499 "movq 40(%0), %%rax" "\n\t" 500 "movupd 48(%0), %%xmm0" "\n\t" 501 "movw 64(%0), %%rcx" "\n\t" 502 "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t" 503 "movupd %%xmm0, 48(%0)" "\n\t" 504 "movw %%rcx, 64(%0)" "\n\t" 505 "pushfq" "\n\t" 506 "popq %%r15" "\n\t" 507 "movq %%r15, 72(%0)" "\n\t" 508 : /*out*/ 509 : /*in*/"r"(blockC) 510 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 511 ); 512 printf(" istrm $0x0A: "); 513 printf(" xmm0 "); 514 show_V128( (V128*)(blockC+48) ); 515 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 516 517 /* ---------------- ESTRI_4A ---------------- */ 518 memset(blockC, 0x55, 80); 519 memcpy(blockC + 0, &argL, 16); 520 memcpy(blockC + 16, &argR, 16); 521 memcpy(blockC + 24, &rdxIN, 8); 522 memcpy(blockC + 32, &raxIN, 8); 523 memcpy(blockC + 40, &rdxIN, 8); 524 __asm__ __volatile__( 525 "movupd 0(%0), %%xmm2" "\n\t" 526 "movupd 16(%0), %%xmm13" "\n\t" 527 "movq 32(%0), %%rdx" "\n\t" 528 "movq 40(%0), %%rax" "\n\t" 529 "movupd 48(%0), %%xmm0" "\n\t" 530 "movw 64(%0), %%rcx" "\n\t" 531 "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t" 532 "movupd %%xmm0, 48(%0)" "\n\t" 533 "movw %%rcx, 64(%0)" "\n\t" 534 "pushfq" "\n\t" 535 "popq %%r15" "\n\t" 536 "movq %%r15, 72(%0)" "\n\t" 537 : /*out*/ 538 : /*in*/"r"(blockC) 539 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 540 ); 541 printf(" estri $0x4A: "); 542 printf(" xmm0 "); 543 show_V128( (V128*)(blockC+48) ); 544 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 545 546 /* ---------------- ESTRI_0A ---------------- */ 547 memset(blockC, 0x55, 80); 548 memcpy(blockC + 0, &argL, 16); 549 memcpy(blockC + 16, &argR, 16); 550 memcpy(blockC + 24, &rdxIN, 8); 551 memcpy(blockC + 32, &raxIN, 8); 552 memcpy(blockC + 40, &rdxIN, 8); 553 __asm__ __volatile__( 554 "movupd 0(%0), %%xmm2" "\n\t" 555 "movupd 16(%0), %%xmm13" "\n\t" 556 "movq 32(%0), %%rdx" "\n\t" 557 "movq 40(%0), %%rax" "\n\t" 558 "movupd 48(%0), %%xmm0" "\n\t" 559 "movw 64(%0), %%rcx" "\n\t" 560 "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t" 561 "movupd %%xmm0, 48(%0)" "\n\t" 562 "movw %%rcx, 64(%0)" "\n\t" 563 "pushfq" "\n\t" 564 "popq %%r15" "\n\t" 565 "movq %%r15, 72(%0)" "\n\t" 566 : /*out*/ 567 : /*in*/"r"(blockC) 568 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 569 ); 570 printf(" estri $0x0A: "); 571 printf(" xmm0 "); 572 show_V128( (V128*)(blockC+48) ); 573 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 574 575 /* ---------------- ESTRM_4A ---------------- */ 576 memset(blockC, 0x55, 80); 577 memcpy(blockC + 0, &argL, 16); 578 memcpy(blockC + 16, &argR, 16); 579 memcpy(blockC + 24, &rdxIN, 8); 580 memcpy(blockC + 32, &raxIN, 8); 581 memcpy(blockC + 40, &rdxIN, 8); 582 __asm__ __volatile__( 583 "movupd 0(%0), %%xmm2" "\n\t" 584 "movupd 16(%0), %%xmm13" "\n\t" 585 "movq 32(%0), %%rdx" "\n\t" 586 "movq 40(%0), %%rax" "\n\t" 587 "movupd 48(%0), %%xmm0" "\n\t" 588 "movw 64(%0), %%rcx" "\n\t" 589 "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t" 590 "movupd %%xmm0, 48(%0)" "\n\t" 591 "movw %%rcx, 64(%0)" "\n\t" 592 "pushfq" "\n\t" 593 "popq %%r15" "\n\t" 594 "movq %%r15, 72(%0)" "\n\t" 595 : /*out*/ 596 : /*in*/"r"(blockC) 597 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 598 ); 599 printf(" estrm $0x4A: "); 600 printf(" xmm0 "); 601 show_V128( (V128*)(blockC+48) ); 602 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 603 604 /* ---------------- ESTRM_0A ---------------- */ 605 memset(blockC, 0x55, 80); 606 memcpy(blockC + 0, &argL, 16); 607 memcpy(blockC + 16, &argR, 16); 608 memcpy(blockC + 24, &rdxIN, 8); 609 memcpy(blockC + 32, &raxIN, 8); 610 memcpy(blockC + 40, &rdxIN, 8); 611 __asm__ __volatile__( 612 "movupd 0(%0), %%xmm2" "\n\t" 613 "movupd 16(%0), %%xmm13" "\n\t" 614 "movq 32(%0), %%rdx" "\n\t" 615 "movq 40(%0), %%rax" "\n\t" 616 "movupd 48(%0), %%xmm0" "\n\t" 617 "movw 64(%0), %%rcx" "\n\t" 618 "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t" 619 "movupd %%xmm0, 48(%0)" "\n\t" 620 "movw %%rcx, 64(%0)" "\n\t" 621 "pushfq" "\n\t" 622 "popq %%r15" "\n\t" 623 "movq %%r15, 72(%0)" "\n\t" 624 : /*out*/ 625 : /*in*/"r"(blockC) 626 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 627 ); 628 printf(" estrm $0x0A: "); 629 printf(" xmm0 "); 630 show_V128( (V128*)(blockC+48) ); 631 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 632 633 634 635 636} 637 638int main ( void ) 639{ 640 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 ); 641 one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 ); 642 643 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); 644 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); 645 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); 646 647 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 648 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); 649 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); 650 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); 651 652 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); 653 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); 654 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); 655 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); 656 657 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 658 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); 659 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); 660 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); 661 662 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); 663 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); 664 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); 665 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); 666 667 return 0; 668} 669