1
2/* Test for a number of SSE instructions which were seen in the wild
3   with a bogus (irrelevant) REX.W bit in their prefixes.  Some just
4   have REX = 0x48 where REX.W is irrelevant, hence the whole REX
5   prefix is pointless.  Probably related to #133962. */
6
7#include <stdlib.h>
8#include <stdio.h>
9#include <assert.h>
10#include "tests/malloc.h"
11
12typedef  unsigned char  UChar;
13
14typedef
15   struct { __attribute__((aligned(16))) UChar b[16]; }
16   UWord128;
17
18typedef
19   struct { UWord128 reg[16]; }
20   XMMRegs;
21
22typedef
23   struct { UWord128 dqw[5]; }
24   Mem;
25
26void pp_UWord128 ( UWord128* w ) {
27   int i;
28   char buf[3];
29   for (i = 15; i >= 0; i--) {
30      buf[2] = 0;
31      sprintf(buf, "%02x", (unsigned int)w->b[i]);
32      assert(buf[2] == 0);
33      if (buf[0] == '0') buf[0] = '.';
34      if (buf[1] == '0') buf[1] = '.';
35      printf("%s", buf);
36   }
37}
38
39void pp_XMMRegs ( char* who, XMMRegs* regs ) {
40   int i;
41   printf ("%s (xmms in order [15..0]) {\n", who );
42   for (i = 0; i < 16; i++) {
43      printf("  %%xmm%2d ", i);
44      pp_UWord128( &regs->reg[i] );
45      printf("\n");
46   }
47   printf("}\n");
48}
49
50void pp_Mem ( char* who, Mem* mem ) {
51   int i;
52   printf ("%s (dqws in order [15 .. 0]) {\n", who );
53   for (i = 0; i < 5; i++) {
54      printf("  [%d]    ", i);
55      pp_UWord128( &mem->dqw[i] );
56      printf("\n");
57   }
58   printf("}\n");
59}
60
61void xor_UWord128( UWord128* src, UWord128* dst ) {
62   int i;
63   for (i = 0; i < 16; i++)
64      dst->b[i] ^= src->b[i];
65}
66void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) {
67   int i;
68   for (i = 0; i < 16; i++)
69      xor_UWord128( &src->reg[i], &dst->reg[i] );
70}
71
72void xor_Mem ( Mem* src, Mem* dst ) {
73   int i;
74   for (i = 0; i < 5; i++)
75      xor_UWord128( &src->dqw[i], &dst->dqw[i] );
76}
77
78void setup_regs_mem ( XMMRegs* regs, Mem* mem ) {
79   int ctr, i, j;
80   ctr = 0;
81   for (i = 0; i < 16; i++) {
82      for (j = 0; j < 16; j++)
83        regs->reg[i].b[j] = 0x51 + (ctr++ % 7);
84   }
85   for (i = 0; i < 5; i++) {
86      for (j = 0; j < 16; j++)
87        mem->dqw[i].b[j] = 0x52 + (ctr++ % 13);
88   }
89}
90
91void before_test ( XMMRegs* regs, Mem* mem ) {
92   setup_regs_mem( regs, mem );
93}
94
95void after_test ( char* who, XMMRegs* regs, Mem* mem ) {
96   XMMRegs rdiff;
97   Mem     mdiff;
98   char s[128];
99   setup_regs_mem( &rdiff, &mdiff );
100   xor_XMMRegs( regs, &rdiff );
101   xor_Mem( mem, &mdiff );
102   sprintf(s, "after \"%s\"", who );
103   pp_Mem( s, &mdiff );
104   pp_XMMRegs( s, &rdiff );
105   printf("\n");
106}
107
108#define LOAD_XMMREGS_from_r14       \
109   "\tmovupd   0(%%r14),  %%xmm0\n" \
110   "\tmovupd  16(%%r14),  %%xmm1\n" \
111   "\tmovupd  32(%%r14),  %%xmm2\n" \
112   "\tmovupd  48(%%r14),  %%xmm3\n" \
113   "\tmovupd  64(%%r14),  %%xmm4\n" \
114   "\tmovupd  80(%%r14),  %%xmm5\n" \
115   "\tmovupd  96(%%r14),  %%xmm6\n" \
116   "\tmovupd 112(%%r14),  %%xmm7\n" \
117   "\tmovupd 128(%%r14),  %%xmm8\n" \
118   "\tmovupd 144(%%r14),  %%xmm9\n" \
119   "\tmovupd 160(%%r14), %%xmm10\n" \
120   "\tmovupd 176(%%r14), %%xmm11\n" \
121   "\tmovupd 192(%%r14), %%xmm12\n" \
122   "\tmovupd 208(%%r14), %%xmm13\n" \
123   "\tmovupd 224(%%r14), %%xmm14\n" \
124   "\tmovupd 240(%%r14), %%xmm15\n"
125
126#define SAVE_XMMREGS_to_r14         \
127   "\tmovupd %%xmm0,    0(%%r14)\n" \
128   "\tmovupd %%xmm1,   16(%%r14)\n" \
129   "\tmovupd %%xmm2,   32(%%r14)\n" \
130   "\tmovupd %%xmm3,   48(%%r14)\n" \
131   "\tmovupd %%xmm4,   64(%%r14)\n" \
132   "\tmovupd %%xmm5,   80(%%r14)\n" \
133   "\tmovupd %%xmm6,   96(%%r14)\n" \
134   "\tmovupd %%xmm7,  112(%%r14)\n" \
135   "\tmovupd %%xmm8,  128(%%r14)\n" \
136   "\tmovupd %%xmm9,  144(%%r14)\n" \
137   "\tmovupd %%xmm10, 160(%%r14)\n" \
138   "\tmovupd %%xmm11, 176(%%r14)\n" \
139   "\tmovupd %%xmm12, 192(%%r14)\n" \
140   "\tmovupd %%xmm13, 208(%%r14)\n" \
141   "\tmovupd %%xmm14, 224(%%r14)\n" \
142   "\tmovupd %%xmm15, 240(%%r14)"
143
144#define XMMREGS \
145   "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \
146   "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
147
148#if 0
149   /* Boilerplate for test */
150   {
151     before_test( regs, mem );
152     __asm__ __volatile__(
153         "movq %0, %%r14\n"
154       "\tmovq %1, %%r15\n"
155       LOAD_XMMREGS_from_r14
156       "\tmovq %%r15, %%rx\n"
157       "\t.byte 0x\n"
158       SAVE_XMMREGS_to_r14
159          : /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] )
160                    : /*trash*/ "r14","r15","memory", XMMREGS,
161                                "x"
162     );
163     after_test( "", regs, mem );
164   }
165#endif
166
167int main ( void )
168{
169   XMMRegs* regs;
170   Mem*     mem;
171   regs = memalign16(sizeof(XMMRegs) + 16);
172   mem  = memalign16(sizeof(Mem) + 16);
173
174   /* addpd mem, reg   66 49 0f 58 48 00  rex.WB addpd  0x0(%r8),%xmm1 */
175   {
176     before_test( regs, mem );
177     __asm__ __volatile__(
178         "movq %0, %%r14\n"
179       "\tmovq %1, %%r15\n"
180       LOAD_XMMREGS_from_r14
181       "\tmovq %%r15, %%r8\n"
182       "\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n"
183       SAVE_XMMREGS_to_r14
184          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
185                    : /*trash*/ "r14","r15","memory", XMMREGS,
186                                "r8"
187     );
188     after_test( "rex.WB addpd  0x0(%r8),%xmm1", regs, mem );
189   }
190
191   /* addsd mem, reg   f2 48 0f 58 27     rex.W addsd  (%rdi),%xmm4 */
192   {
193     before_test( regs, mem );
194     __asm__ __volatile__(
195         "movq %0, %%r14\n"
196       "\tmovq %1, %%r15\n"
197       LOAD_XMMREGS_from_r14
198       "\tmovq %%r15, %%rdi\n"
199       "\t.byte 0xf2,0x48,0x0f,0x58,0x27\n"
200       SAVE_XMMREGS_to_r14
201          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
202                    : /*trash*/ "r14","r15","memory", XMMREGS,
203                                "rdi"
204     );
205     after_test( "rex.W addsd  (%rdi),%xmm4", regs, mem );
206   }
207
208   /* movapd mem, reg  66 48 0f 28 0a     rex.W movapd (%rdx),%xmm1 */
209   {
210     before_test( regs, mem );
211     __asm__ __volatile__(
212         "movq %0, %%r14\n"
213       "\tmovq %1, %%r15\n"
214       LOAD_XMMREGS_from_r14
215       "\tmovq %%r15, %%rdx\n"
216       "\t.byte 0x66,0x48,0x0f,0x28,0x0a\n"
217       SAVE_XMMREGS_to_r14
218          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
219                    : /*trash*/ "r14","r15","memory", XMMREGS,
220                                "rdx"
221     );
222     after_test( "rex.W movapd (%rdx),%xmm1", regs, mem );
223   }
224
225   /* movapd reg, mem  66 48 0f 29 0a     rex.W movapd %xmm1,(%rdx) */
226   {
227     before_test( regs, mem );
228     __asm__ __volatile__(
229         "movq %0, %%r14\n"
230       "\tmovq %1, %%r15\n"
231       LOAD_XMMREGS_from_r14
232       "\tmovq %%r15, %%rdx\n"
233       "\t.byte 0x66,0x48,0x0f,0x29,0x0a\n"
234       SAVE_XMMREGS_to_r14
235          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
236                    : /*trash*/ "r14","r15","memory", XMMREGS,
237                                "rdx"
238     );
239     after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem );
240   }
241
242   /* movaps mem, reg  48 0f 28 42 30     rex.W movaps 0x30(%rdx),%xmm0 */
243   {
244     before_test( regs, mem );
245     __asm__ __volatile__(
246         "movq %0, %%r14\n"
247       "\tmovq %1, %%r15\n"
248       LOAD_XMMREGS_from_r14
249       "\tmovq %%r15, %%rdx\n"
250       "\t.byte 0x48,0x0f,0x28,0x42,0x30\n"
251       SAVE_XMMREGS_to_r14
252          : /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] )
253                    : /*trash*/ "r14","r15","memory", XMMREGS,
254                                "rdx"
255     );
256     after_test( "movaps 0x30(%rdx),%xmm0", regs, mem );
257   }
258
259   /* movaps reg, mem  49 0f 29 48 00     rex.WB movaps %xmm1,0x0(%r8) */
260   {
261     before_test( regs, mem );
262     __asm__ __volatile__(
263         "movq %0, %%r14\n"
264       "\tmovq %1, %%r15\n"
265       LOAD_XMMREGS_from_r14
266       "\tmovq %%r15, %%r8\n"
267       "\t.byte 0x49,0x0f,0x29,0x48,0x00\n"
268       SAVE_XMMREGS_to_r14
269          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
270                    : /*trash*/ "r14","r15","memory", XMMREGS,
271                                "r8"
272     );
273     after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem );
274   }
275
276   /* movddup mem, reg f2 48 0f 12 2a     rex.W movddup (%rdx),%xmm5 */
277   {
278     before_test( regs, mem );
279     __asm__ __volatile__(
280         "movq %0, %%r14\n"
281       "\tmovq %1, %%r15\n"
282       LOAD_XMMREGS_from_r14
283       "\tmovq %%r15, %%rdx\n"
284       "\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n"
285       SAVE_XMMREGS_to_r14
286          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
287                    : /*trash*/ "r14","r15","memory", XMMREGS,
288                                "rdx"
289     );
290     after_test( "movddup (%rdx),%xmm5", regs, mem );
291   }
292
293   /* movhpd mem, reg  66 48 0f 16 06     rex.W movhpd (%rsi),%xmm0 */
294   {
295     before_test( regs, mem );
296     __asm__ __volatile__(
297         "movq %0, %%r14\n"
298       "\tmovq %1, %%r15\n"
299       LOAD_XMMREGS_from_r14
300       "\tmovq %%r15, %%rsi\n"
301       "\t.byte 0x66,0x48,0x0f,0x16,0x06\n"
302       SAVE_XMMREGS_to_r14
303          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
304                    : /*trash*/ "r14","r15","memory", XMMREGS,
305                                "rsi"
306     );
307     after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem );
308   }
309
310   /* movhpd reg, mem  66 48 0f 17 07     rex.W movhpd %xmm0,(%rdi) */
311   {
312     before_test( regs, mem );
313     __asm__ __volatile__(
314         "movq %0, %%r14\n"
315       "\tmovq %1, %%r15\n"
316       LOAD_XMMREGS_from_r14
317       "\tmovq %%r15, %%rdi\n"
318       "\t.byte 0x66,0x48,0x0f,0x17,0x07\n"
319       SAVE_XMMREGS_to_r14
320          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
321                    : /*trash*/ "r14","r15","memory", XMMREGS,
322                                "rdi"
323     );
324     after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem );
325   }
326
327   /* movhps mem, reg  48 0f 16 36        rex.W movhps (%rsi),%xmm6 */
328   {
329     before_test( regs, mem );
330     __asm__ __volatile__(
331         "movq %0, %%r14\n"
332       "\tmovq %1, %%r15\n"
333       LOAD_XMMREGS_from_r14
334       "\tmovq %%r15, %%rsi\n"
335       "\t.byte 0x48,0x0f,0x16,0x36\n"
336       SAVE_XMMREGS_to_r14
337          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
338                    : /*trash*/ "r14","r15","memory", XMMREGS,
339                                "rsi"
340     );
341     after_test( "rex.W movhps (%rsi),%xmm6", regs, mem );
342   }
343   /* movhps reg, mem  49 0f 17 03        rex.WB movhps %xmm0,(%r11) */
344   {
345     before_test( regs, mem );
346     __asm__ __volatile__(
347         "movq %0, %%r14\n"
348       "\tmovq %1, %%r15\n"
349       LOAD_XMMREGS_from_r14
350       "\tmovq %%r15, %%r11\n"
351       "\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */
352       SAVE_XMMREGS_to_r14
353         : /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] )
354                    : /*trash*/ "r14","r15","memory", XMMREGS,
355                                "r11"
356     );
357     after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem );
358   }
359
360   /* movlpd mem, reg  66 48 0f 12 4a 00  rex.W movlpd 0x0(%rdx),%xmm1 */
361   {
362     before_test( regs, mem );
363     __asm__ __volatile__(
364         "movq %0, %%r14\n"
365       "\tmovq %1, %%r15\n"
366       LOAD_XMMREGS_from_r14
367       "\tmovq %%r15, %%rdx\n"
368       "\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n"
369       SAVE_XMMREGS_to_r14
370          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
371                    : /*trash*/ "r14","r15","memory", XMMREGS,
372                                "rdx"
373     );
374     after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem );
375   }
376
377   /* movlpd reg, mem  66 48 0f 13 30     rex.W movlpd %xmm6,(%rax) */
378   {
379     before_test( regs, mem );
380     __asm__ __volatile__(
381         "movq %0, %%r14\n"
382       "\tmovq %1, %%r15\n"
383       LOAD_XMMREGS_from_r14
384       "\tmovq %%r15, %%rax\n"
385       "\t.byte 0x66,0x48,0x0f,0x13,0x30\n"
386       SAVE_XMMREGS_to_r14
387          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
388                    : /*trash*/ "r14","r15","memory", XMMREGS,
389                                "rax"
390     );
391     after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem );
392   }
393
394   /* movlps mem, reg  48 0f 12 07        rex.W movlps (%rdi),%xmm0 */
395   {
396     before_test( regs, mem );
397     __asm__ __volatile__(
398         "movq %0, %%r14\n"
399       "\tmovq %1, %%r15\n"
400       LOAD_XMMREGS_from_r14
401       "\tmovq %%r15, %%rdi\n"
402       "\t.byte 0x48,0x0f,0x12,0x07\n"
403       SAVE_XMMREGS_to_r14
404          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
405                    : /*trash*/ "r14","r15","memory", XMMREGS,
406                                "rdi"
407     );
408     after_test( "rex.W movlps (%rdi),%xmm0", regs, mem );
409   }
410
411   /* movlps reg, mem  49 0f 13 02        rex.WB movlps %xmm0,(%r10) */
412   {
413     before_test( regs, mem );
414     __asm__ __volatile__(
415         "movq %0, %%r14\n"
416       "\tmovq %1, %%r15\n"
417       LOAD_XMMREGS_from_r14
418       "\tmovq %%r15, %%r10\n"
419       "\t.byte 0x49,0x0f,0x13,0x02\n"
420       SAVE_XMMREGS_to_r14
421          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
422                    : /*trash*/ "r14","r15","memory", XMMREGS,
423                                "r10"
424     );
425     after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem );
426   }
427
428   /* movq mem, reg    f3 48 0f 7e 00     rex.W movq   (%rax),%xmm0 */
429   {
430     before_test( regs, mem );
431     __asm__ __volatile__(
432         "movq %0, %%r14\n"
433       "\tmovq %1, %%r15\n"
434       LOAD_XMMREGS_from_r14
435       "\tmovq %%r15, %%rax\n"
436       "\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n"
437       SAVE_XMMREGS_to_r14
438          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
439                    : /*trash*/ "r14","r15","memory", XMMREGS,
440                                "rax"
441     );
442     after_test( "rex.W movq (%rax),%xmm0", regs, mem );
443   }
444
445   /* movq reg, mem    66 48 0f d6 00     rex.W movq   %xmm0,(%rax) */
446   {
447     before_test( regs, mem );
448     __asm__ __volatile__(
449         "movq %0, %%r14\n"
450       "\tmovq %1, %%r15\n"
451       LOAD_XMMREGS_from_r14
452       "\tmovq %%r15, %%rax\n"
453       "\t.byte 0x66,0x48,0x0f,0xd6,0x00\n"
454       SAVE_XMMREGS_to_r14
455          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
456                    : /*trash*/ "r14","r15","memory", XMMREGS,
457                                "rax"
458     );
459     after_test( "rex.W movq %xmm0,(%rax)", regs, mem );
460   }
461
462   /* movsd mem, reg   f2 48 0f 10 11     rex.W movsd  (%rcx),%xmm2 */
463   {
464     before_test( regs, mem );
465     __asm__ __volatile__(
466         "movq %0, %%r14\n"
467       "\tmovq %1, %%r15\n"
468       LOAD_XMMREGS_from_r14
469       "\tmovq %%r15, %%rcx\n"
470       "\t.byte 0xf2,0x48,0x0f,0x10,0x11\n"
471       SAVE_XMMREGS_to_r14
472          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
473                    : /*trash*/ "r14","r15","memory", XMMREGS,
474                                "rcx"
475     );
476     after_test( "rex.W movsd (%rcx),%xmm2", regs, mem );
477   }
478
479   /* movsd reg, mem   f2 48 0f 11 3f     rex.W movsd  %xmm7,(%rdi) */
480   {
481     before_test( regs, mem );
482     __asm__ __volatile__(
483         "movq %0, %%r14\n"
484       "\tmovq %1, %%r15\n"
485       LOAD_XMMREGS_from_r14
486       "\tmovq %%r15, %%rdi\n"
487       "\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n"
488       SAVE_XMMREGS_to_r14
489          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
490                    : /*trash*/ "r14","r15","memory", XMMREGS,
491                                "rdi"
492     );
493     after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem );
494   }
495
496   /* movss mem, reg   f3 48 0f 10 5e 04  rex.W movss  0x4(%rsi),%xmm3 */
497   {
498     before_test( regs, mem );
499     __asm__ __volatile__(
500         "movq %0, %%r14\n"
501       "\tmovq %1, %%r15\n"
502       LOAD_XMMREGS_from_r14
503       "\tmovq %%r15, %%rsi\n"
504       "\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n"
505       SAVE_XMMREGS_to_r14
506          : /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] )
507                    : /*trash*/ "r14","r15","memory", XMMREGS,
508                                "rsi"
509     );
510     after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem );
511   }
512
513   /* movupd reg, mem  66 48 0f 11 07     rex.W movupd %xmm0,(%rdi) */
514   {
515     before_test( regs, mem );
516     __asm__ __volatile__(
517         "movq %0, %%r14\n"
518       "\tmovq %1, %%r15\n"
519       LOAD_XMMREGS_from_r14
520       "\tmovq %%r15, %%rdi\n"
521       "\t.byte 0x66,0x48,0x0f,0x11,0x07\n"
522       SAVE_XMMREGS_to_r14
523          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
524                    : /*trash*/ "r14","r15","memory", XMMREGS,
525                                "rdi"
526     );
527     after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem );
528   }
529
530   /* mulpd mem, reg   66 48 0f 59 61 00  rex.W mulpd  0x0(%rcx),%xmm4 */
531   {
532     before_test( regs, mem );
533     __asm__ __volatile__(
534         "movq %0, %%r14\n"
535       "\tmovq %1, %%r15\n"
536       LOAD_XMMREGS_from_r14
537       "\tmovq %%r15, %%rcx\n"
538       "\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n"
539       SAVE_XMMREGS_to_r14
540          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
541                    : /*trash*/ "r14","r15","memory", XMMREGS,
542                                "rcx"
543     );
544     after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem );
545   }
546
547   /* mulsd mem, reg   f2 48 0f 59 1f     rex.W mulsd  (%rdi),%xmm3 */
548   {
549     before_test( regs, mem );
550     __asm__ __volatile__(
551         "movq %0, %%r14\n"
552       "\tmovq %1, %%r15\n"
553       LOAD_XMMREGS_from_r14
554       "\tmovq %%r15, %%rdi\n"
555       "\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n"
556       SAVE_XMMREGS_to_r14
557          : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
558                    : /*trash*/ "r14","r15","memory", XMMREGS,
559                                "rdi"
560     );
561     after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem );
562   }
563
564   /* prefetchnt0    49 0f 18 4c f2 a0  rex.WB prefetcht0 -0x60(%r10,%rsi,8) */
565   {
566     before_test( regs, mem );
567     __asm__ __volatile__(
568         "movq %0, %%r14\n"
569       "\tmovq %1, %%r15\n"
570       LOAD_XMMREGS_from_r14
571       "\tmovq %%r15, %%r10\n"
572       "\txorq %%rsi, %%rsi\n"
573       "\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n"
574       SAVE_XMMREGS_to_r14
575          : /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] )
576                    : /*trash*/ "r14","r15","memory", XMMREGS,
577                                "r10","rsi"
578     );
579     after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem );
580   }
581
582   /* subsd mem, reg   f2 49 0f 5c 4d f8  rex.WB subsd  -0x8(%r13),%xmm1 */
583   {
584     before_test( regs, mem );
585     __asm__ __volatile__(
586         "movq %0, %%r14\n"
587       "\tmovq %1, %%r15\n"
588       LOAD_XMMREGS_from_r14
589       "\tmovq %%r15, %%r13\n"
590       "\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n"
591       SAVE_XMMREGS_to_r14
592          : /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] )
593                    : /*trash*/ "r14","r15","memory", XMMREGS,
594                                "r13"
595     );
596     after_test( "rex.WB subsd  -0x8(%r13),%xmm1", regs, mem );
597   }
598
599   free(regs);
600   free(mem);
601   return 0;
602}
603