1
2#include <stdio.h>
3#include <stdlib.h>
4#include <assert.h>
5
6typedef  unsigned char  UChar;
7typedef  unsigned int   UInt;
8
9static UInt randomUInt ( void )
10{
11   static UInt n = 0;
12   /* From "Numerical Recipes in C" 2nd Edition */
13   n = 1664525UL * n + 1013904223UL;
14   return n >> 17;
15}
16
17void maskmovq_mmx ( UChar* regL, UChar* regR )
18{
19   int i;
20   UChar* dst = malloc(8);
21   assert(dst);
22   for (i = 0; i < 8; i++)
23      dst[i] = 17 * (i+1);
24   __asm__ __volatile__(
25      "emms\n\t"
26      "movq (%0), %%mm1\n\t"
27      "movq (%1), %%mm2\n\t"
28      "movq %2, %%rdi\n\t"
29      "maskmovq %%mm1,%%mm2"
30      : /*out*/
31      : /*in*/ "r"(regL), "r"(regR), "r"(&dst[0])
32      : /*trash*/ "rdi", "memory", "cc"
33   );
34   for (i = 0; i < 8; i++)
35      printf("%02x", dst[i]);
36   free(dst);
37}
38
39void maskmovdqu_sse ( UChar* regL, UChar* regR )
40{
41   int i;
42   UChar* dst = malloc(16);
43   assert(dst);
44   for (i = 0; i < 16; i++)
45      dst[i] = i;
46   __asm__ __volatile__(
47      "movups (%0), %%xmm1\n\t"
48      "movups (%1), %%xmm12\n\t"
49      "movq %2, %%rdi\n\t"
50      "maskmovdqu %%xmm12,%%xmm1\n\t"
51      "sfence"
52      : /*out*/
53      : /*in*/ "r"(regL), "r"(regR), "r"(dst)
54      : /*trash*/ "rdi", "memory", "cc"
55   );
56   for (i = 0; i < 16; i++)
57      printf("%02x", dst[i]);
58   free(dst);
59}
60
61int main ( int argc, char** argv )
62{
63   int i, j;
64
65   /* mmx test */
66   {
67      UChar* regL = malloc(8);
68      UChar* regR = malloc(8);
69      assert(regL);
70      assert(regR);
71      for (i = 0; i < 10; i++) {
72         for (j = 0; j < 8; j++) {
73            regL[j] = (UChar)randomUInt();
74            printf("%02x", regL[j]);
75         }
76         printf(" ");
77         for (j = 0; j < 8; j++) {
78            regR[j] = (UChar)randomUInt();
79            printf("%02x", regR[j]);
80         }
81         printf(" ");
82         maskmovq_mmx( regR, regL );
83         printf("\n");
84      }
85   }
86
87   /* sse test */
88   {
89      UChar* regL = malloc(16);
90      UChar* regR = malloc(16);
91      assert(regL);
92      assert(regR);
93      for (i = 0; i < 10; i++) {
94         for (j = 0; j < 16; j++) {
95            regL[j] = (UChar)randomUInt();
96            printf("%02x", regL[j]);
97         }
98         printf(" ");
99         for (j = 0; j < 16; j++) {
100            regR[j] = (UChar)randomUInt();
101            printf("%02x", regR[j]);
102         }
103         printf(" ");
104         maskmovdqu_sse( regR, regL );
105         printf("\n");
106      }
107   }
108
109   return 0;
110}
111