1fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 2fc9b17107d04d22f59c82a379d8fa059680166cfsewardj/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not 3fc9b17107d04d22f59c82a379d8fa059680166cfsewardj check the core arithmetic in any detail. */ 4fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 5fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <string.h> 6fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <stdio.h> 7fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#include <assert.h> 8fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 9fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef unsigned char V128[16]; 10fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef unsigned int UInt; 11fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef signed int Int; 12fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef unsigned char UChar; 13fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef unsigned long long int ULong; 14fc9b17107d04d22f59c82a379d8fa059680166cfsewardjtypedef UChar Bool; 15fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#define False ((Bool)0) 16fc9b17107d04d22f59c82a379d8fa059680166cfsewardj#define True ((Bool)1) 17fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 18fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid show_V128 ( V128* vec ) 19fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{ 20fc9b17107d04d22f59c82a379d8fa059680166cfsewardj Int i; 21fc9b17107d04d22f59c82a379d8fa059680166cfsewardj for (i = 15; i >= 0; i--) 22fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf("%02x", (UInt)( (*vec)[i] )); 23fc9b17107d04d22f59c82a379d8fa059680166cfsewardj} 24fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 25fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid expand ( V128* dst, char* summary ) 26fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{ 27fc9b17107d04d22f59c82a379d8fa059680166cfsewardj Int i; 28fc9b17107d04d22f59c82a379d8fa059680166cfsewardj assert( strlen(summary) == 16 ); 29fc9b17107d04d22f59c82a379d8fa059680166cfsewardj for (i = 0; i < 16; i++) { 30fc9b17107d04d22f59c82a379d8fa059680166cfsewardj UChar xx = 0; 31fc9b17107d04d22f59c82a379d8fa059680166cfsewardj UChar x = summary[15-i]; 32fc9b17107d04d22f59c82a379d8fa059680166cfsewardj if (x >= '0' && x <= '9') { xx = x - '0'; } 33fc9b17107d04d22f59c82a379d8fa059680166cfsewardj else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } 34fc9b17107d04d22f59c82a379d8fa059680166cfsewardj else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } 35fc9b17107d04d22f59c82a379d8fa059680166cfsewardj else assert(0); 36fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 37fc9b17107d04d22f59c82a379d8fa059680166cfsewardj assert(xx < 16); 38fc9b17107d04d22f59c82a379d8fa059680166cfsewardj xx = (xx << 4) | xx; 39fc9b17107d04d22f59c82a379d8fa059680166cfsewardj assert(xx < 256); 40fc9b17107d04d22f59c82a379d8fa059680166cfsewardj (*dst)[i] = xx; 41fc9b17107d04d22f59c82a379d8fa059680166cfsewardj } 42fc9b17107d04d22f59c82a379d8fa059680166cfsewardj} 43fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 44fc9b17107d04d22f59c82a379d8fa059680166cfsewardjvoid one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN ) 45fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{ 46fc9b17107d04d22f59c82a379d8fa059680166cfsewardj V128 argL, argR; 47fc9b17107d04d22f59c82a379d8fa059680166cfsewardj expand( &argL, summL ); 48fc9b17107d04d22f59c82a379d8fa059680166cfsewardj expand( &argR, summR ); 49fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf("\n"); 50fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf("rdx %016llx argL ", rdxIN); 51fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128(&argL); 52fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rax %016llx argR ", raxIN); 53fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128(&argR); 54fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf("\n"); 55fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 56fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ULong block[ 2/*in:argL*/ // 0 0 57fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 2/*in:argR*/ // 2 16 58fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 1/*in:rdx*/ // 4 32 59fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 1/*in:rax*/ // 5 40 60fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 2/*inout:xmm0*/ // 6 48 61fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 1/*inout:rcx*/ // 8 64 62fc9b17107d04d22f59c82a379d8fa059680166cfsewardj + 1/*out:rflags*/ ]; // 9 72 63fc9b17107d04d22f59c82a379d8fa059680166cfsewardj assert(sizeof(block) == 80); 64fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 65fc9b17107d04d22f59c82a379d8fa059680166cfsewardj UChar* blockC = (UChar*)&block[0]; 66fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 67fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ISTRI_4A ---------------- */ 68fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 69fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 70fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 71fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 72fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 73fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 74fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 75fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 76fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 77fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 78fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 79fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 80b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 81fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t" 82fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 83b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 84fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 85fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 86fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 87fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 88fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 89fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 90fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 91fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" istri $0x4A: "); 92fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 93fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 94fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 95fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 96fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ISTRI_0A ---------------- */ 97fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 98fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 99fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 100fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 101fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 102fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 103fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 104fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 105fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 106fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 107fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 108fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 109b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 110fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t" 111fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 112b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 113fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 114fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 115fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 116fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 117fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 118fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 119fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 120fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" istri $0x0A: "); 121fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 122fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 123fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 124fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 125fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ISTRM_4A ---------------- */ 126fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 127fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 128fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 129fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 130fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 131fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 132fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 133fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 134fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 135fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 136fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 137fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 138b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 139fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t" 140fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 141b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 142fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 143fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 144fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 145fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 146fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 147fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 148fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 149fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" istrm $0x4A: "); 150fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 151fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 152fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 153fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 154fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ISTRM_0A ---------------- */ 155fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 156fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 157fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 158fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 159fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 160fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 161fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 162fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 163fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 164fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 165fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 166fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 167b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 168fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t" 169fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 170b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 171fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 172fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 173fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 174fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 175fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 176fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 177fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 178fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" istrm $0x0A: "); 179fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 180fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 181fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 182fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 183fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ESTRI_4A ---------------- */ 184fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 185fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 186fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 187fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 188fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 189fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 190fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 191fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 192fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 193fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 194fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 195fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 196b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 197fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t" 198fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 199b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 200fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 201fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 202fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 203fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 204fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 205fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 206fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 207fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" estri $0x4A: "); 208fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 209fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 210fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 211fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 212fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ESTRI_0A ---------------- */ 213fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 214fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 215fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 216fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 217fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 218fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 219fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 220fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 221fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 222fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 223fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 224fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 225b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 226fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t" 227fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 228b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 229fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 230fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 231fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 232fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 233fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 234fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 235fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 236fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" estri $0x0A: "); 237fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 238fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 239fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 240fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 241fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ESTRM_4A ---------------- */ 242fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 243fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 244fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 245fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 246fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 247fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 248fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 249fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 250fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 251fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 252fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 253fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 254b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 255fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t" 256fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 257b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 258fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 259fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 260fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 261fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 262fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 263fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 264fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 265fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" estrm $0x4A: "); 266fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 267fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 268fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 269fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 270fc9b17107d04d22f59c82a379d8fa059680166cfsewardj /* ---------------- ESTRM_0A ---------------- */ 271fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memset(blockC, 0x55, 80); 272fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 0, &argL, 16); 273fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 16, &argR, 16); 274fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 24, &rdxIN, 8); 275fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 32, &raxIN, 8); 276fc9b17107d04d22f59c82a379d8fa059680166cfsewardj memcpy(blockC + 40, &rdxIN, 8); 277fc9b17107d04d22f59c82a379d8fa059680166cfsewardj __asm__ __volatile__( 278fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 0(%0), %%xmm2" "\n\t" 279fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 16(%0), %%xmm13" "\n\t" 280fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 32(%0), %%rdx" "\n\t" 281fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq 40(%0), %%rax" "\n\t" 282fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd 48(%0), %%xmm0" "\n\t" 283b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw 64(%0), %%cx" "\n\t" 284fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t" 285fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movupd %%xmm0, 48(%0)" "\n\t" 286b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian "movw %%cx, 64(%0)" "\n\t" 287fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "pushfq" "\n\t" 288fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "popq %%r15" "\n\t" 289fc9b17107d04d22f59c82a379d8fa059680166cfsewardj "movq %%r15, 72(%0)" "\n\t" 290fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*out*/ 291fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*in*/"r"(blockC) 292fc9b17107d04d22f59c82a379d8fa059680166cfsewardj : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15" 293fc9b17107d04d22f59c82a379d8fa059680166cfsewardj ); 294fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" estrm $0x0A: "); 295fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" xmm0 "); 296fc9b17107d04d22f59c82a379d8fa059680166cfsewardj show_V128( (V128*)(blockC+48) ); 297fc9b17107d04d22f59c82a379d8fa059680166cfsewardj printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5); 298fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 299fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 300fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 301fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 302fc9b17107d04d22f59c82a379d8fa059680166cfsewardj} 303fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 304fc9b17107d04d22f59c82a379d8fa059680166cfsewardjint main ( void ) 305fc9b17107d04d22f59c82a379d8fa059680166cfsewardj{ 306fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 ); 307fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 ); 308fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 309fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 ); 310fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 ); 311fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 ); 312fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 313fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 314fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 ); 315fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 ); 316fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 ); 317fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 318fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 ); 319fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 ); 320fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 ); 321fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 ); 322fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 323fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 ); 324fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 ); 325fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 ); 326fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 ); 327fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 328fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 ); 329fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 ); 330fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 ); 331fc9b17107d04d22f59c82a379d8fa059680166cfsewardj one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 ); 332fc9b17107d04d22f59c82a379d8fa059680166cfsewardj 333fc9b17107d04d22f59c82a379d8fa059680166cfsewardj return 0; 334fc9b17107d04d22f59c82a379d8fa059680166cfsewardj} 335