1
2/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3   check the core arithmetic in any detail.  */
4
5#include <string.h>
6#include <stdio.h>
7#include <assert.h>
8
9typedef  unsigned char  V128[16];
10typedef  unsigned int   UInt;
11typedef  signed int     Int;
12typedef  unsigned char  UChar;
13typedef  unsigned long long int ULong;
14typedef  UChar          Bool;
15#define False ((Bool)0)
16#define True  ((Bool)1)
17
18void show_V128 ( V128* vec )
19{
20   Int i;
21   for (i = 15; i >= 0; i--)
22      printf("%02x", (UInt)( (*vec)[i] ));
23}
24
25void expand ( V128* dst, char* summary )
26{
27   Int i;
28   assert( strlen(summary) == 16 );
29   for (i = 0; i < 16; i++) {
30      UChar xx = 0;
31      UChar x = summary[15-i];
32      if      (x >= '0' && x <= '9') { xx = x - '0'; }
33      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
34      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
35      else assert(0);
36
37      assert(xx < 16);
38      xx = (xx << 4) | xx;
39      assert(xx < 256);
40      (*dst)[i] = xx;
41   }
42}
43
44void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
45{
46   V128 argL, argR;
47   expand( &argL, summL );
48   expand( &argR, summR );
49   printf("\n");
50   printf("rdx %016llx  argL ", rdxIN);
51   show_V128(&argL);
52   printf("  rax %016llx  argR ", raxIN);
53   show_V128(&argR);
54   printf("\n");
55
56   ULong block[ 2/*in:argL*/          // 0  0
57                + 2/*in:argR*/        // 2  16
58                + 1/*in:rdx*/         // 4  32
59                + 1/*in:rax*/         // 5  40
60                + 2/*inout:xmm0*/     // 6  48
61                + 1/*inout:rcx*/      // 8  64
62                + 1/*out:rflags*/ ];  // 9  72
63   assert(sizeof(block) == 80);
64
65   UChar* blockC = (UChar*)&block[0];
66
67   /* ---------------- ISTRI_4A ---------------- */
68   memset(blockC, 0x55, 80);
69   memcpy(blockC + 0,  &argL,  16);
70   memcpy(blockC + 16, &argR,  16);
71   memcpy(blockC + 24, &rdxIN, 8);
72   memcpy(blockC + 32, &raxIN, 8);
73   memcpy(blockC + 40, &rdxIN, 8);
74   __asm__ __volatile__(
75      "movupd    0(%0), %%xmm2"           "\n\t"
76      "movupd    16(%0), %%xmm13"         "\n\t"
77      "movq      32(%0), %%rdx"           "\n\t"
78      "movq      40(%0), %%rax"           "\n\t"
79      "movupd    48(%0), %%xmm0"          "\n\t"
80      "movw      64(%0), %%cx"            "\n\t"
81      "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
82      "movupd    %%xmm0, 48(%0)"          "\n\t"
83      "movw      %%cx, 64(%0)"            "\n\t"
84      "pushfq"                            "\n\t"
85      "popq      %%r15"                   "\n\t"
86      "movq      %%r15, 72(%0)"           "\n\t"
87      : /*out*/
88      : /*in*/"r"(blockC)
89      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
90   );
91   printf("  istri $0x4A:  ");
92   printf("    xmm0 ");
93   show_V128( (V128*)(blockC+48) );
94   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
95
96   /* ---------------- ISTRI_0A ---------------- */
97   memset(blockC, 0x55, 80);
98   memcpy(blockC + 0,  &argL,  16);
99   memcpy(blockC + 16, &argR,  16);
100   memcpy(blockC + 24, &rdxIN, 8);
101   memcpy(blockC + 32, &raxIN, 8);
102   memcpy(blockC + 40, &rdxIN, 8);
103   __asm__ __volatile__(
104      "movupd    0(%0), %%xmm2"           "\n\t"
105      "movupd    16(%0), %%xmm13"         "\n\t"
106      "movq      32(%0), %%rdx"           "\n\t"
107      "movq      40(%0), %%rax"           "\n\t"
108      "movupd    48(%0), %%xmm0"          "\n\t"
109      "movw      64(%0), %%cx"            "\n\t"
110      "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
111      "movupd    %%xmm0, 48(%0)"          "\n\t"
112      "movw      %%cx, 64(%0)"            "\n\t"
113      "pushfq"                            "\n\t"
114      "popq      %%r15"                   "\n\t"
115      "movq      %%r15, 72(%0)"           "\n\t"
116      : /*out*/
117      : /*in*/"r"(blockC)
118      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
119   );
120   printf("  istri $0x0A:  ");
121   printf("    xmm0 ");
122   show_V128( (V128*)(blockC+48) );
123   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
124
125   /* ---------------- ISTRM_4A ---------------- */
126   memset(blockC, 0x55, 80);
127   memcpy(blockC + 0,  &argL,  16);
128   memcpy(blockC + 16, &argR,  16);
129   memcpy(blockC + 24, &rdxIN, 8);
130   memcpy(blockC + 32, &raxIN, 8);
131   memcpy(blockC + 40, &rdxIN, 8);
132   __asm__ __volatile__(
133      "movupd    0(%0), %%xmm2"           "\n\t"
134      "movupd    16(%0), %%xmm13"         "\n\t"
135      "movq      32(%0), %%rdx"           "\n\t"
136      "movq      40(%0), %%rax"           "\n\t"
137      "movupd    48(%0), %%xmm0"          "\n\t"
138      "movw      64(%0), %%cx"            "\n\t"
139      "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
140      "movupd    %%xmm0, 48(%0)"          "\n\t"
141      "movw      %%cx, 64(%0)"            "\n\t"
142      "pushfq"                            "\n\t"
143      "popq      %%r15"                   "\n\t"
144      "movq      %%r15, 72(%0)"           "\n\t"
145      : /*out*/
146      : /*in*/"r"(blockC)
147      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
148   );
149   printf("  istrm $0x4A:  ");
150   printf("    xmm0 ");
151   show_V128( (V128*)(blockC+48) );
152   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
153
154   /* ---------------- ISTRM_0A ---------------- */
155   memset(blockC, 0x55, 80);
156   memcpy(blockC + 0,  &argL,  16);
157   memcpy(blockC + 16, &argR,  16);
158   memcpy(blockC + 24, &rdxIN, 8);
159   memcpy(blockC + 32, &raxIN, 8);
160   memcpy(blockC + 40, &rdxIN, 8);
161   __asm__ __volatile__(
162      "movupd    0(%0), %%xmm2"           "\n\t"
163      "movupd    16(%0), %%xmm13"         "\n\t"
164      "movq      32(%0), %%rdx"           "\n\t"
165      "movq      40(%0), %%rax"           "\n\t"
166      "movupd    48(%0), %%xmm0"          "\n\t"
167      "movw      64(%0), %%cx"            "\n\t"
168      "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
169      "movupd    %%xmm0, 48(%0)"          "\n\t"
170      "movw      %%cx, 64(%0)"            "\n\t"
171      "pushfq"                            "\n\t"
172      "popq      %%r15"                   "\n\t"
173      "movq      %%r15, 72(%0)"           "\n\t"
174      : /*out*/
175      : /*in*/"r"(blockC)
176      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
177   );
178   printf("  istrm $0x0A:  ");
179   printf("    xmm0 ");
180   show_V128( (V128*)(blockC+48) );
181   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
182
183   /* ---------------- ESTRI_4A ---------------- */
184   memset(blockC, 0x55, 80);
185   memcpy(blockC + 0,  &argL,  16);
186   memcpy(blockC + 16, &argR,  16);
187   memcpy(blockC + 24, &rdxIN, 8);
188   memcpy(blockC + 32, &raxIN, 8);
189   memcpy(blockC + 40, &rdxIN, 8);
190   __asm__ __volatile__(
191      "movupd    0(%0), %%xmm2"           "\n\t"
192      "movupd    16(%0), %%xmm13"         "\n\t"
193      "movq      32(%0), %%rdx"           "\n\t"
194      "movq      40(%0), %%rax"           "\n\t"
195      "movupd    48(%0), %%xmm0"          "\n\t"
196      "movw      64(%0), %%cx"            "\n\t"
197      "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
198      "movupd    %%xmm0, 48(%0)"          "\n\t"
199      "movw      %%cx, 64(%0)"            "\n\t"
200      "pushfq"                            "\n\t"
201      "popq      %%r15"                   "\n\t"
202      "movq      %%r15, 72(%0)"           "\n\t"
203      : /*out*/
204      : /*in*/"r"(blockC)
205      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
206   );
207   printf("  estri $0x4A:  ");
208   printf("    xmm0 ");
209   show_V128( (V128*)(blockC+48) );
210   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
211
212   /* ---------------- ESTRI_0A ---------------- */
213   memset(blockC, 0x55, 80);
214   memcpy(blockC + 0,  &argL,  16);
215   memcpy(blockC + 16, &argR,  16);
216   memcpy(blockC + 24, &rdxIN, 8);
217   memcpy(blockC + 32, &raxIN, 8);
218   memcpy(blockC + 40, &rdxIN, 8);
219   __asm__ __volatile__(
220      "movupd    0(%0), %%xmm2"           "\n\t"
221      "movupd    16(%0), %%xmm13"         "\n\t"
222      "movq      32(%0), %%rdx"           "\n\t"
223      "movq      40(%0), %%rax"           "\n\t"
224      "movupd    48(%0), %%xmm0"          "\n\t"
225      "movw      64(%0), %%cx"            "\n\t"
226      "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
227      "movupd    %%xmm0, 48(%0)"          "\n\t"
228      "movw      %%cx, 64(%0)"            "\n\t"
229      "pushfq"                            "\n\t"
230      "popq      %%r15"                   "\n\t"
231      "movq      %%r15, 72(%0)"           "\n\t"
232      : /*out*/
233      : /*in*/"r"(blockC)
234      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
235   );
236   printf("  estri $0x0A:  ");
237   printf("    xmm0 ");
238   show_V128( (V128*)(blockC+48) );
239   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
240
241   /* ---------------- ESTRM_4A ---------------- */
242   memset(blockC, 0x55, 80);
243   memcpy(blockC + 0,  &argL,  16);
244   memcpy(blockC + 16, &argR,  16);
245   memcpy(blockC + 24, &rdxIN, 8);
246   memcpy(blockC + 32, &raxIN, 8);
247   memcpy(blockC + 40, &rdxIN, 8);
248   __asm__ __volatile__(
249      "movupd    0(%0), %%xmm2"           "\n\t"
250      "movupd    16(%0), %%xmm13"         "\n\t"
251      "movq      32(%0), %%rdx"           "\n\t"
252      "movq      40(%0), %%rax"           "\n\t"
253      "movupd    48(%0), %%xmm0"          "\n\t"
254      "movw      64(%0), %%cx"            "\n\t"
255      "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
256      "movupd    %%xmm0, 48(%0)"          "\n\t"
257      "movw      %%cx, 64(%0)"            "\n\t"
258      "pushfq"                            "\n\t"
259      "popq      %%r15"                   "\n\t"
260      "movq      %%r15, 72(%0)"           "\n\t"
261      : /*out*/
262      : /*in*/"r"(blockC)
263      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
264   );
265   printf("  estrm $0x4A:  ");
266   printf("    xmm0 ");
267   show_V128( (V128*)(blockC+48) );
268   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
269
270   /* ---------------- ESTRM_0A ---------------- */
271   memset(blockC, 0x55, 80);
272   memcpy(blockC + 0,  &argL,  16);
273   memcpy(blockC + 16, &argR,  16);
274   memcpy(blockC + 24, &rdxIN, 8);
275   memcpy(blockC + 32, &raxIN, 8);
276   memcpy(blockC + 40, &rdxIN, 8);
277   __asm__ __volatile__(
278      "movupd    0(%0), %%xmm2"           "\n\t"
279      "movupd    16(%0), %%xmm13"         "\n\t"
280      "movq      32(%0), %%rdx"           "\n\t"
281      "movq      40(%0), %%rax"           "\n\t"
282      "movupd    48(%0), %%xmm0"          "\n\t"
283      "movw      64(%0), %%cx"            "\n\t"
284      "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
285      "movupd    %%xmm0, 48(%0)"          "\n\t"
286      "movw      %%cx, 64(%0)"            "\n\t"
287      "pushfq"                            "\n\t"
288      "popq      %%r15"                   "\n\t"
289      "movq      %%r15, 72(%0)"           "\n\t"
290      : /*out*/
291      : /*in*/"r"(blockC)
292      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
293   );
294   printf("  estrm $0x0A:  ");
295   printf("    xmm0 ");
296   show_V128( (V128*)(blockC+48) );
297   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
298
299
300
301
302}
303
304int main ( void )
305{
306   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307   one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
308
309   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
312
313   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
317
318   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
322
323   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
324   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
327
328   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
329   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
332
333   return 0;
334}
335