19db268cc7344d8751213307737285e7cdac4fbecsewardj 29db268cc7344d8751213307737285e7cdac4fbecsewardj/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not 39db268cc7344d8751213307737285e7cdac4fbecsewardj check the core arithmetic in any detail. This file checks the 16-bit 49db268cc7344d8751213307737285e7cdac4fbecsewardj character versions (w is for wide) */ 59db268cc7344d8751213307737285e7cdac4fbecsewardj 69db268cc7344d8751213307737285e7cdac4fbecsewardj#include <string.h> 79db268cc7344d8751213307737285e7cdac4fbecsewardj#include <stdio.h> 89db268cc7344d8751213307737285e7cdac4fbecsewardj#include <assert.h> 99db268cc7344d8751213307737285e7cdac4fbecsewardj 109db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned char V128[16]; 119db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned int UInt; 129db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef signed int Int; 139db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned char UChar; 149db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef unsigned long long int ULong; 159db268cc7344d8751213307737285e7cdac4fbecsewardjtypedef UChar Bool; 169db268cc7344d8751213307737285e7cdac4fbecsewardj#define False ((Bool)0) 179db268cc7344d8751213307737285e7cdac4fbecsewardj#define True ((Bool)1) 189db268cc7344d8751213307737285e7cdac4fbecsewardj 199db268cc7344d8751213307737285e7cdac4fbecsewardjvoid show_V128 ( V128* vec ) 209db268cc7344d8751213307737285e7cdac4fbecsewardj{ 219db268cc7344d8751213307737285e7cdac4fbecsewardj Int i; 229db268cc7344d8751213307737285e7cdac4fbecsewardj for (i = 15; i >= 0; i--) 239db268cc7344d8751213307737285e7cdac4fbecsewardj printf("%02x", (UInt)( (*vec)[i] )); 249db268cc7344d8751213307737285e7cdac4fbecsewardj} 259db268cc7344d8751213307737285e7cdac4fbecsewardj 269db268cc7344d8751213307737285e7cdac4fbecsewardjvoid expand ( V128* dst, char* summary ) 279db268cc7344d8751213307737285e7cdac4fbecsewardj{ 289db268cc7344d8751213307737285e7cdac4fbecsewardj Int i; 299db268cc7344d8751213307737285e7cdac4fbecsewardj assert( strlen(summary) == 16 ); 309db268cc7344d8751213307737285e7cdac4fbecsewardj for (i = 0; i < 16; i++) { 319db268cc7344d8751213307737285e7cdac4fbecsewardj UChar xx = 0; 329db268cc7344d8751213307737285e7cdac4fbecsewardj UChar x = summary[15-i]; 339db268cc7344d8751213307737285e7cdac4fbecsewardj if (x >= '0' && x <= '9') { xx = x - '0'; } 349db268cc7344d8751213307737285e7cdac4fbecsewardj else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 359db268cc7344d8751213307737285e7cdac4fbecsewardj else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 369db268cc7344d8751213307737285e7cdac4fbecsewardj else assert(0); 379db268cc7344d8751213307737285e7cdac4fbecsewardj 389db268cc7344d8751213307737285e7cdac4fbecsewardj assert(xx < 16); 399db268cc7344d8751213307737285e7cdac4fbecsewardj xx = (xx << 4) | xx; 409db268cc7344d8751213307737285e7cdac4fbecsewardj assert(xx < 256); 419db268cc7344d8751213307737285e7cdac4fbecsewardj (*dst)[i] = xx; 429db268cc7344d8751213307737285e7cdac4fbecsewardj } 439db268cc7344d8751213307737285e7cdac4fbecsewardj} 449db268cc7344d8751213307737285e7cdac4fbecsewardj 459db268cc7344d8751213307737285e7cdac4fbecsewardjvoid one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) 469db268cc7344d8751213307737285e7cdac4fbecsewardj{ 479db268cc7344d8751213307737285e7cdac4fbecsewardj V128 argL, argR; 489db268cc7344d8751213307737285e7cdac4fbecsewardj expand( &argL, summL ); 499db268cc7344d8751213307737285e7cdac4fbecsewardj expand( &argR, summR ); 509db268cc7344d8751213307737285e7cdac4fbecsewardj printf("\n"); 519db268cc7344d8751213307737285e7cdac4fbecsewardj printf("rdx %016llx argL ", rdxIN); 529db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128(&argL); 539db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rax %016llx argR ", raxIN); 549db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128(&argR); 559db268cc7344d8751213307737285e7cdac4fbecsewardj printf("\n"); 569db268cc7344d8751213307737285e7cdac4fbecsewardj 579db268cc7344d8751213307737285e7cdac4fbecsewardj ULong block[ 2/*in:argL*/ // 0 0 589db268cc7344d8751213307737285e7cdac4fbecsewardj + 2/*in:argR*/ // 2 16 599db268cc7344d8751213307737285e7cdac4fbecsewardj + 1/*in:rdx*/ // 4 32 609db268cc7344d8751213307737285e7cdac4fbecsewardj + 1/*in:rax*/ // 5 40 619db268cc7344d8751213307737285e7cdac4fbecsewardj + 2/*inout:xmm0*/ // 6 48 629db268cc7344d8751213307737285e7cdac4fbecsewardj + 1/*inout:rcx*/ // 8 64 639db268cc7344d8751213307737285e7cdac4fbecsewardj + 1/*out:rflags*/ ]; // 9 72 649db268cc7344d8751213307737285e7cdac4fbecsewardj assert(sizeof(block) == 80); 659db268cc7344d8751213307737285e7cdac4fbecsewardj 669db268cc7344d8751213307737285e7cdac4fbecsewardj UChar* blockC = (UChar*)&block[0]; 679db268cc7344d8751213307737285e7cdac4fbecsewardj 689db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ISTRI_4B ---------------- */ 699db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 709db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 719db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 729db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 739db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 749db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 759db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 769db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 779db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 789db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 799db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 809db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 81b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 829db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x4B, %%xmm2, %%xmm13" "\n\t" 839db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 84b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 859db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 869db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 879db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 889db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 899db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 909db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 919db268cc7344d8751213307737285e7cdac4fbecsewardj ); 929db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" istri $0x4B: "); 939db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 949db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 959db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 969db268cc7344d8751213307737285e7cdac4fbecsewardj 979db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ISTRI_0B ---------------- */ 989db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 999db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 1009db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 1019db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 1029db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 1039db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 1049db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 1059db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 1069db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 1079db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 1089db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 1099db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 110b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 1119db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistri $0x0B, %%xmm2, %%xmm13" "\n\t" 1129db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 113b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 1149db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 1159db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 1169db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 1179db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 1189db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 1199db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 1209db268cc7344d8751213307737285e7cdac4fbecsewardj ); 1219db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" istri $0x0B: "); 1229db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 1239db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 1249db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 1259db268cc7344d8751213307737285e7cdac4fbecsewardj 1269db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ISTRM_4B ---------------- */ 1279db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 1289db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 1299db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 1309db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 1319db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 1329db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 1339db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 1349db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 1359db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 1369db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 1379db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 1389db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 139b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 1409db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistrm $0x4B, %%xmm2, %%xmm13" "\n\t" 1419db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 142b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 1439db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 1449db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 1459db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 1469db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 1479db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 1489db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 1499db268cc7344d8751213307737285e7cdac4fbecsewardj ); 1509db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" istrm $0x4B: "); 1519db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 1529db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 1539db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 1549db268cc7344d8751213307737285e7cdac4fbecsewardj 1559db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ISTRM_0B ---------------- */ 1569db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 1579db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 1589db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 1599db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 1609db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 1619db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 1629db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 1639db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 1649db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 1659db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 1669db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 1679db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 168b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 1699db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpistrm $0x0B, %%xmm2, %%xmm13" "\n\t" 1709db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 171b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 1729db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 1739db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 1749db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 1759db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 1769db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 1779db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 1789db268cc7344d8751213307737285e7cdac4fbecsewardj ); 1799db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" istrm $0x0B: "); 1809db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 1819db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 1829db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 1839db268cc7344d8751213307737285e7cdac4fbecsewardj 1849db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ESTRI_4B ---------------- */ 1859db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 1869db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 1879db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 1889db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 1899db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 1909db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 1919db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 1929db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 1939db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 1949db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 1959db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 1969db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 197b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 1989db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpestri $0x4B, %%xmm2, %%xmm13" "\n\t" 1999db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 200b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 2019db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 2029db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 2039db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 2049db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 2059db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 2069db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 2079db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2089db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" estri $0x4B: "); 2099db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 2109db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 2119db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 2129db268cc7344d8751213307737285e7cdac4fbecsewardj 2139db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ESTRI_0B ---------------- */ 2149db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 2159db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 2169db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 2179db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 2189db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 2199db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 2209db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 2219db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 2229db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 2239db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 2249db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 2259db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 226b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 2279db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpestri $0x0B, %%xmm2, %%xmm13" "\n\t" 2289db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 229b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 2309db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 2319db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 2329db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 2339db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 2349db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 2359db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 2369db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2379db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" estri $0x0B: "); 2389db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 2399db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 2409db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 2419db268cc7344d8751213307737285e7cdac4fbecsewardj 2429db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ESTRM_4B ---------------- */ 2439db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 2449db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 2459db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 2469db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 2479db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 2489db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 2499db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 2509db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 2519db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 2529db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 2539db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 2549db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 255b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 2569db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpestrm $0x4B, %%xmm2, %%xmm13" "\n\t" 2579db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 258b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 2599db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 2609db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 2619db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 2629db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 2639db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 2649db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 2659db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2669db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" estrm $0x4B: "); 2679db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 2689db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 2699db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 2709db268cc7344d8751213307737285e7cdac4fbecsewardj 2719db268cc7344d8751213307737285e7cdac4fbecsewardj /* ---------------- ESTRM_0B ---------------- */ 2729db268cc7344d8751213307737285e7cdac4fbecsewardj memset(blockC, 0x55, 80); 2739db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 0, &argL, 16); 2749db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 16, &argR, 16); 2759db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 24, &rdxIN, 8); 2769db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 32, &raxIN, 8); 2779db268cc7344d8751213307737285e7cdac4fbecsewardj memcpy(blockC + 40, &rdxIN, 8); 2789db268cc7344d8751213307737285e7cdac4fbecsewardj __asm__ __volatile__( 2799db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 0(%0), %%xmm2" "\n\t" 2809db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 16(%0), %%xmm13" "\n\t" 2819db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 32(%0), %%rdx" "\n\t" 2829db268cc7344d8751213307737285e7cdac4fbecsewardj "movq 40(%0), %%rax" "\n\t" 2839db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd 48(%0), %%xmm0" "\n\t" 284b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 2859db268cc7344d8751213307737285e7cdac4fbecsewardj "pcmpestrm $0x0B, %%xmm2, %%xmm13" "\n\t" 2869db268cc7344d8751213307737285e7cdac4fbecsewardj "movupd %%xmm0, 48(%0)" "\n\t" 287b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 2889db268cc7344d8751213307737285e7cdac4fbecsewardj "pushfq" "\n\t" 2899db268cc7344d8751213307737285e7cdac4fbecsewardj "popq %%r15" "\n\t" 2909db268cc7344d8751213307737285e7cdac4fbecsewardj "movq %%r15, 72(%0)" "\n\t" 2919db268cc7344d8751213307737285e7cdac4fbecsewardj : /*out*/ 2929db268cc7344d8751213307737285e7cdac4fbecsewardj : /*in*/"r"(blockC) 2939db268cc7344d8751213307737285e7cdac4fbecsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 2949db268cc7344d8751213307737285e7cdac4fbecsewardj ); 2959db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" estrm $0x0B: "); 2969db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" xmm0 "); 2979db268cc7344d8751213307737285e7cdac4fbecsewardj show_V128( (V128*)(blockC+48) ); 2989db268cc7344d8751213307737285e7cdac4fbecsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 2999db268cc7344d8751213307737285e7cdac4fbecsewardj 3009db268cc7344d8751213307737285e7cdac4fbecsewardj 3019db268cc7344d8751213307737285e7cdac4fbecsewardj 3029db268cc7344d8751213307737285e7cdac4fbecsewardj 3039db268cc7344d8751213307737285e7cdac4fbecsewardj} 3049db268cc7344d8751213307737285e7cdac4fbecsewardj 3059db268cc7344d8751213307737285e7cdac4fbecsewardjint main ( void ) 3069db268cc7344d8751213307737285e7cdac4fbecsewardj{ 3079db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 ); 3089db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 ); 3099db268cc7344d8751213307737285e7cdac4fbecsewardj 3109db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); 3119db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); 3129db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); 3139db268cc7344d8751213307737285e7cdac4fbecsewardj 3149db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 3159db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); 3169db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); 3179db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); 3189db268cc7344d8751213307737285e7cdac4fbecsewardj 3199db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); 3209db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); 3219db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); 3229db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); 3239db268cc7344d8751213307737285e7cdac4fbecsewardj 3249db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 3259db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); 3269db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); 3279db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); 3289db268cc7344d8751213307737285e7cdac4fbecsewardj 3299db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); 3309db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); 3319db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); 3329db268cc7344d8751213307737285e7cdac4fbecsewardj one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); 3339db268cc7344d8751213307737285e7cdac4fbecsewardj 3349db268cc7344d8751213307737285e7cdac4fbecsewardj return 0; 3359db268cc7344d8751213307737285e7cdac4fbecsewardj} 336