1
2#include <stdio.h>
3#include <stdlib.h>
4#include <assert.h>
5#include <malloc.h>
6
7typedef  unsigned char           UChar;
8typedef  unsigned int            UInt;
9typedef  unsigned long int       UWord;
10typedef  unsigned long long int  ULong;
11
12UChar randArray[1027] __attribute__((used));
13
14#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
15
16typedef  union { UChar u8[32];  UInt u32[8];  }  YMM;
17
18typedef  struct {  YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; }  Block;
19
20void showYMM ( YMM* vec )
21{
22   int i;
23   assert(IS_32_ALIGNED(vec));
24   for (i = 31; i >= 0; i--) {
25      printf("%02x", (UInt)vec->u8[i]);
26      if (i > 0 && 0 == ((i+0) & 7)) printf(".");
27   }
28}
29
30void showBlock ( char* msg, Block* block )
31{
32   printf("  %s\n", msg);
33   printf("    "); showYMM(&block->a1); printf("\n");
34   printf("    "); showYMM(&block->a2); printf("\n");
35   printf("    "); showYMM(&block->a3); printf("\n");
36   printf("    "); showYMM(&block->a4); printf("\n");
37   printf("    %016llx\n", block->u64);
38}
39
40UChar randUChar ( void )
41{
42   static UInt seed = 80021;
43   seed = 1103515245 * seed + 12345;
44   return (seed >> 17) & 0xFF;
45}
46
47void randBlock ( Block* b )
48{
49   int i;
50   UChar* p = (UChar*)b;
51   for (i = 0; i < sizeof(Block); i++)
52      p[i] = randUChar();
53}
54
55
56/* Generate a function test_NAME, that tests the given insn, in both
57   its mem and reg forms.  The reg form of the insn may mention, as
58   operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14.  The mem form of
59   the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
60   and %r14.  It's OK for the insn to clobber ymm0, as this is needed
61   for testing PCMPxSTRx, and ymm6, as this is needed for testing
62   MOVMASK variants. */
63
64#define GEN_test_RandM(_name, _reg_form, _mem_form)   \
65    \
66    __attribute__ ((noinline)) static void test_##_name ( void )   \
67    { \
68       Block* b = memalign(32, sizeof(Block)); \
69       randBlock(b); \
70       printf("%s(reg)\n", #_name); \
71       showBlock("before", b); \
72       __asm__ __volatile__( \
73          "vmovdqa   0(%0),%%ymm7"  "\n\t" \
74          "vmovdqa  32(%0),%%ymm8"  "\n\t" \
75          "vmovdqa  64(%0),%%ymm6"  "\n\t" \
76          "vmovdqa  96(%0),%%ymm9"  "\n\t" \
77          "movq    128(%0),%%r14"   "\n\t" \
78          _reg_form   "\n\t" \
79          "vmovdqa %%ymm7,  0(%0)"  "\n\t" \
80          "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
81          "vmovdqa %%ymm6, 64(%0)"  "\n\t" \
82          "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
83          "movq    %%r14, 128(%0)"  "\n\t" \
84          : /*OUT*/  \
85          : /*IN*/"r"(b) \
86          : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
87       ); \
88       showBlock("after", b); \
89       randBlock(b); \
90       printf("%s(mem)\n", #_name); \
91       showBlock("before", b); \
92       __asm__ __volatile__( \
93          "leaq      0(%0),%%rax"  "\n\t" \
94          "vmovdqa  32(%0),%%ymm8"  "\n\t" \
95          "vmovdqa  64(%0),%%ymm7"  "\n\t" \
96          "vmovdqa  96(%0),%%ymm9"  "\n\t" \
97          "movq    128(%0),%%r14"   "\n\t" \
98          _mem_form   "\n\t" \
99          "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
100          "vmovdqa %%ymm7, 64(%0)"  "\n\t" \
101          "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
102          "movq    %%r14, 128(%0)"  "\n\t" \
103          : /*OUT*/  \
104          : /*IN*/"r"(b) \
105          : /*TRASH*/"xmm6", \
106                     "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
107       ); \
108       showBlock("after", b); \
109       printf("\n"); \
110       free(b); \
111    }
112
113#define GEN_test_Ronly(_name, _reg_form) \
114   GEN_test_RandM(_name, _reg_form, "")
115#define GEN_test_Monly(_name, _mem_form) \
116   GEN_test_RandM(_name, "", _mem_form)
117
118/* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2.  */
119
120GEN_test_RandM(VPOR_256,
121               "vpor %%ymm6,  %%ymm8, %%ymm7",
122               "vpor (%%rax), %%ymm8, %%ymm7")
123
124GEN_test_RandM(VPXOR_256,
125               "vpxor %%ymm6,  %%ymm8, %%ymm7",
126               "vpxor (%%rax), %%ymm8, %%ymm7")
127
128GEN_test_RandM(VPSUBB_256,
129               "vpsubb %%ymm6,  %%ymm8, %%ymm7",
130               "vpsubb (%%rax), %%ymm8, %%ymm7")
131
132GEN_test_RandM(VPSUBD_256,
133               "vpsubd %%ymm6,  %%ymm8, %%ymm7",
134               "vpsubd (%%rax), %%ymm8, %%ymm7")
135
136GEN_test_RandM(VPADDD_256,
137               "vpaddd %%ymm6,  %%ymm8, %%ymm7",
138               "vpaddd (%%rax), %%ymm8, %%ymm7")
139
140GEN_test_RandM(VPMOVZXWD_256,
141               "vpmovzxwd %%xmm6,  %%ymm8",
142               "vpmovzxwd (%%rax), %%ymm8")
143
144GEN_test_RandM(VPMOVZXBW_256,
145               "vpmovzxbw %%xmm6,  %%ymm8",
146               "vpmovzxbw (%%rax), %%ymm8")
147
148GEN_test_RandM(VPBLENDVB_256,
149               "vpblendvb %%ymm9, %%ymm6,  %%ymm8, %%ymm7",
150               "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7")
151
152GEN_test_RandM(VPMINSD_256,
153               "vpminsd %%ymm6,  %%ymm8, %%ymm7",
154               "vpminsd (%%rax), %%ymm8, %%ymm7")
155
156GEN_test_RandM(VPMAXSD_256,
157               "vpmaxsd %%ymm6,  %%ymm8, %%ymm7",
158               "vpmaxsd (%%rax), %%ymm8, %%ymm7")
159
160GEN_test_RandM(VPSHUFB_256,
161               "vpshufb %%ymm6,  %%ymm8, %%ymm7",
162               "vpshufb (%%rax), %%ymm8, %%ymm7")
163
164GEN_test_RandM(VPUNPCKLBW_256,
165               "vpunpcklbw %%ymm6,  %%ymm8, %%ymm7",
166               "vpunpcklbw (%%rax), %%ymm8, %%ymm7")
167
168GEN_test_RandM(VPUNPCKHBW_256,
169               "vpunpckhbw %%ymm6,  %%ymm8, %%ymm7",
170               "vpunpckhbw (%%rax), %%ymm8, %%ymm7")
171
172GEN_test_RandM(VPABSD_256,
173               "vpabsd %%ymm6,  %%ymm8",
174               "vpabsd (%%rax), %%ymm8")
175
176GEN_test_RandM(VPACKUSWB_256,
177               "vpackuswb %%ymm9,  %%ymm8, %%ymm7",
178               "vpackuswb (%%rax), %%ymm8, %%ymm7")
179
180GEN_test_Ronly(VPMOVMSKB_256,
181               "vpmovmskb %%ymm8, %%r14")
182
183GEN_test_RandM(VPAND_256,
184               "vpand %%ymm9,  %%ymm8, %%ymm7",
185               "vpand (%%rax), %%ymm8, %%ymm7")
186
187GEN_test_RandM(VPCMPEQB_256,
188               "vpcmpeqb %%ymm9,  %%ymm8, %%ymm7",
189               "vpcmpeqb (%%rax), %%ymm8, %%ymm7")
190
191GEN_test_RandM(VPSHUFLW_0x39_256,
192               "vpshuflw $0x39, %%ymm9,  %%ymm7",
193               "vpshuflw $0xC6, (%%rax), %%ymm8")
194
195GEN_test_RandM(VPSHUFHW_0x39_256,
196               "vpshufhw $0x39, %%ymm9,  %%ymm7",
197               "vpshufhw $0xC6, (%%rax), %%ymm8")
198
199GEN_test_RandM(VPMULLW_256,
200               "vpmullw %%ymm9,  %%ymm8, %%ymm7",
201               "vpmullw (%%rax), %%ymm8, %%ymm7")
202
203GEN_test_RandM(VPADDUSW_256,
204               "vpaddusw %%ymm9,  %%ymm8, %%ymm7",
205               "vpaddusw (%%rax), %%ymm8, %%ymm7")
206
207GEN_test_RandM(VPMULHUW_256,
208               "vpmulhuw %%ymm9,  %%ymm8, %%ymm7",
209               "vpmulhuw (%%rax), %%ymm8, %%ymm7")
210
211GEN_test_RandM(VPADDUSB_256,
212               "vpaddusb %%ymm9,  %%ymm8, %%ymm7",
213               "vpaddusb (%%rax), %%ymm8, %%ymm7")
214
215GEN_test_RandM(VPUNPCKLWD_256,
216               "vpunpcklwd %%ymm6,  %%ymm8, %%ymm7",
217               "vpunpcklwd (%%rax), %%ymm8, %%ymm7")
218
219GEN_test_RandM(VPUNPCKHWD_256,
220               "vpunpckhwd %%ymm6,  %%ymm8, %%ymm7",
221               "vpunpckhwd (%%rax), %%ymm8, %%ymm7")
222
223GEN_test_Ronly(VPSLLD_0x05_256,
224               "vpslld $0x5, %%ymm9,  %%ymm7")
225
226GEN_test_Ronly(VPSRLD_0x05_256,
227               "vpsrld $0x5, %%ymm9,  %%ymm7")
228
229GEN_test_Ronly(VPSRAD_0x05_256,
230               "vpsrad $0x5, %%ymm9,  %%ymm7")
231
232GEN_test_RandM(VPSUBUSB_256,
233               "vpsubusb %%ymm9,  %%ymm8, %%ymm7",
234               "vpsubusb (%%rax), %%ymm8, %%ymm7")
235
236GEN_test_RandM(VPSUBSB_256,
237               "vpsubsb %%ymm9,  %%ymm8, %%ymm7",
238               "vpsubsb (%%rax), %%ymm8, %%ymm7")
239
240GEN_test_Ronly(VPSRLDQ_0x05_256,
241               "vpsrldq $0x5, %%ymm9,  %%ymm7")
242
243GEN_test_Ronly(VPSLLDQ_0x05_256,
244               "vpslldq $0x5, %%ymm9,  %%ymm7")
245
246GEN_test_RandM(VPANDN_256,
247               "vpandn %%ymm9,  %%ymm8, %%ymm7",
248               "vpandn (%%rax), %%ymm8, %%ymm7")
249
250GEN_test_RandM(VPUNPCKLQDQ_256,
251               "vpunpcklqdq %%ymm6,  %%ymm8, %%ymm7",
252               "vpunpcklqdq (%%rax), %%ymm8, %%ymm7")
253
254GEN_test_Ronly(VPSRLW_0x05_256,
255               "vpsrlw $0x5, %%ymm9,  %%ymm7")
256
257GEN_test_Ronly(VPSLLW_0x05_256,
258               "vpsllw $0x5, %%ymm9,  %%ymm7")
259
260GEN_test_RandM(VPADDW_256,
261               "vpaddw %%ymm6,  %%ymm8, %%ymm7",
262               "vpaddw (%%rax), %%ymm8, %%ymm7")
263
264GEN_test_RandM(VPACKSSDW_256,
265               "vpackssdw %%ymm9,  %%ymm8, %%ymm7",
266               "vpackssdw (%%rax), %%ymm8, %%ymm7")
267
268GEN_test_RandM(VPUNPCKLDQ_256,
269               "vpunpckldq %%ymm6,  %%ymm8, %%ymm7",
270               "vpunpckldq (%%rax), %%ymm8, %%ymm7")
271
272GEN_test_RandM(VPCMPEQD_256,
273               "vpcmpeqd %%ymm6,  %%ymm8, %%ymm7",
274               "vpcmpeqd (%%rax), %%ymm8, %%ymm7")
275
276GEN_test_RandM(VPSHUFD_0x39_256,
277               "vpshufd $0x39, %%ymm9,  %%ymm8",
278               "vpshufd $0xC6, (%%rax), %%ymm7")
279
280GEN_test_RandM(VPADDQ_256,
281               "vpaddq %%ymm6,  %%ymm8, %%ymm7",
282               "vpaddq (%%rax), %%ymm8, %%ymm7")
283
284GEN_test_RandM(VPSUBQ_256,
285               "vpsubq %%ymm6,  %%ymm8, %%ymm7",
286               "vpsubq (%%rax), %%ymm8, %%ymm7")
287
288GEN_test_RandM(VPSUBW_256,
289               "vpsubw %%ymm6,  %%ymm8, %%ymm7",
290               "vpsubw (%%rax), %%ymm8, %%ymm7")
291
292GEN_test_RandM(VPCMPEQQ_256,
293               "vpcmpeqq %%ymm6,  %%ymm8, %%ymm7",
294               "vpcmpeqq (%%rax), %%ymm8, %%ymm7")
295
296GEN_test_RandM(VPCMPGTQ_256,
297               "vpcmpgtq %%ymm6,  %%ymm8, %%ymm7",
298               "vpcmpgtq (%%rax), %%ymm8, %%ymm7")
299
300GEN_test_Ronly(VPSRLQ_0x05_256,
301               "vpsrlq $0x5, %%ymm9,  %%ymm7")
302
303GEN_test_RandM(VPMULUDQ_256,
304               "vpmuludq %%ymm6,  %%ymm8, %%ymm7",
305               "vpmuludq (%%rax), %%ymm8, %%ymm7")
306
307GEN_test_RandM(VPMULDQ_256,
308               "vpmuldq %%ymm6,  %%ymm8, %%ymm7",
309               "vpmuldq (%%rax), %%ymm8, %%ymm7")
310
311GEN_test_Ronly(VPSLLQ_0x05_256,
312               "vpsllq $0x5, %%ymm9,  %%ymm7")
313
314GEN_test_RandM(VPMAXUD_256,
315               "vpmaxud %%ymm6,  %%ymm8, %%ymm7",
316               "vpmaxud (%%rax), %%ymm8, %%ymm7")
317
318GEN_test_RandM(VPMINUD_256,
319               "vpminud %%ymm6,  %%ymm8, %%ymm7",
320               "vpminud (%%rax), %%ymm8, %%ymm7")
321
322GEN_test_RandM(VPMULLD_256,
323               "vpmulld %%ymm6,  %%ymm8, %%ymm7",
324               "vpmulld (%%rax), %%ymm8, %%ymm7")
325
326GEN_test_RandM(VPMAXUW_256,
327               "vpmaxuw %%ymm6,  %%ymm8, %%ymm7",
328               "vpmaxuw (%%rax), %%ymm8, %%ymm7")
329
330GEN_test_RandM(VPMINUW_256,
331               "vpminuw %%ymm6,  %%ymm8, %%ymm7",
332               "vpminuw (%%rax), %%ymm8, %%ymm7")
333
334GEN_test_RandM(VPMAXSW_256,
335               "vpmaxsw %%ymm6,  %%ymm8, %%ymm7",
336               "vpmaxsw (%%rax), %%ymm8, %%ymm7")
337
338GEN_test_RandM(VPMINSW_256,
339               "vpminsw %%ymm6,  %%ymm8, %%ymm7",
340               "vpminsw (%%rax), %%ymm8, %%ymm7")
341
342GEN_test_RandM(VPMAXUB_256,
343               "vpmaxub %%ymm6,  %%ymm8, %%ymm7",
344               "vpmaxub (%%rax), %%ymm8, %%ymm7")
345
346GEN_test_RandM(VPMINUB_256,
347               "vpminub %%ymm6,  %%ymm8, %%ymm7",
348               "vpminub (%%rax), %%ymm8, %%ymm7")
349
350GEN_test_RandM(VPMAXSB_256,
351               "vpmaxsb %%ymm6,  %%ymm8, %%ymm7",
352               "vpmaxsb (%%rax), %%ymm8, %%ymm7")
353
354GEN_test_RandM(VPMINSB_256,
355               "vpminsb %%ymm6,  %%ymm8, %%ymm7",
356               "vpminsb (%%rax), %%ymm8, %%ymm7")
357
358GEN_test_RandM(VPMOVSXBW_256,
359               "vpmovsxbw %%xmm6,  %%ymm8",
360               "vpmovsxbw (%%rax), %%ymm8")
361
362GEN_test_RandM(VPSUBUSW_256,
363               "vpsubusw %%ymm9,  %%ymm8, %%ymm7",
364               "vpsubusw (%%rax), %%ymm8, %%ymm7")
365
366GEN_test_RandM(VPSUBSW_256,
367               "vpsubsw %%ymm9,  %%ymm8, %%ymm7",
368               "vpsubsw (%%rax), %%ymm8, %%ymm7")
369
370GEN_test_RandM(VPCMPEQW_256,
371               "vpcmpeqw %%ymm6,  %%ymm8, %%ymm7",
372               "vpcmpeqw (%%rax), %%ymm8, %%ymm7")
373
374GEN_test_RandM(VPADDB_256,
375               "vpaddb %%ymm6,  %%ymm8, %%ymm7",
376               "vpaddb (%%rax), %%ymm8, %%ymm7")
377
378GEN_test_RandM(VPUNPCKHDQ_256,
379               "vpunpckhdq %%ymm6,  %%ymm8, %%ymm7",
380               "vpunpckhdq (%%rax), %%ymm8, %%ymm7")
381
382GEN_test_RandM(VPMOVSXDQ_256,
383               "vpmovsxdq %%xmm6,  %%ymm8",
384               "vpmovsxdq (%%rax), %%ymm8")
385
386GEN_test_RandM(VPMOVSXWD_256,
387               "vpmovsxwd %%xmm6,  %%ymm8",
388               "vpmovsxwd (%%rax), %%ymm8")
389
390GEN_test_RandM(VPMULHW_256,
391               "vpmulhw %%ymm9,  %%ymm8, %%ymm7",
392               "vpmulhw (%%rax), %%ymm8, %%ymm7")
393
394GEN_test_RandM(VPUNPCKHQDQ_256,
395               "vpunpckhqdq %%ymm6,  %%ymm8, %%ymm7",
396               "vpunpckhqdq (%%rax), %%ymm8, %%ymm7")
397
398GEN_test_Ronly(VPSRAW_0x05_256,
399               "vpsraw $0x5, %%ymm9,  %%ymm7")
400
401GEN_test_RandM(VPCMPGTB_256,
402               "vpcmpgtb %%ymm6,  %%ymm8, %%ymm7",
403               "vpcmpgtb (%%rax), %%ymm8, %%ymm7")
404
405GEN_test_RandM(VPCMPGTW_256,
406               "vpcmpgtw %%ymm6,  %%ymm8, %%ymm7",
407               "vpcmpgtw (%%rax), %%ymm8, %%ymm7")
408
409GEN_test_RandM(VPCMPGTD_256,
410               "vpcmpgtd %%ymm6,  %%ymm8, %%ymm7",
411               "vpcmpgtd (%%rax), %%ymm8, %%ymm7")
412
413GEN_test_RandM(VPMOVZXBD_256,
414               "vpmovzxbd %%xmm6,  %%ymm8",
415               "vpmovzxbd (%%rax), %%ymm8")
416
417GEN_test_RandM(VPMOVSXBD_256,
418               "vpmovsxbd %%xmm6,  %%ymm8",
419               "vpmovsxbd (%%rax), %%ymm8")
420
421GEN_test_RandM(VPALIGNR_256_1of3,
422               "vpalignr $0, %%ymm6,  %%ymm8, %%ymm7",
423               "vpalignr $3, (%%rax), %%ymm8, %%ymm7")
424GEN_test_RandM(VPALIGNR_256_2of3,
425               "vpalignr $6, %%ymm6,  %%ymm8, %%ymm7",
426               "vpalignr $9, (%%rax), %%ymm8, %%ymm7")
427GEN_test_RandM(VPALIGNR_256_3of3,
428               "vpalignr $12, %%ymm6,  %%ymm8, %%ymm7",
429               "vpalignr $15, (%%rax), %%ymm8, %%ymm7")
430
431GEN_test_RandM(VPBLENDW_256_0x00,
432               "vpblendw $0x00, %%ymm6,  %%ymm8, %%ymm7",
433               "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7")
434GEN_test_RandM(VPBLENDW_256_0xFE,
435               "vpblendw $0xFE, %%ymm6,  %%ymm8, %%ymm7",
436               "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7")
437GEN_test_RandM(VPBLENDW_256_0x30,
438               "vpblendw $0x30, %%ymm6,  %%ymm8, %%ymm7",
439               "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7")
440GEN_test_RandM(VPBLENDW_256_0x21,
441               "vpblendw $0x21, %%ymm6,  %%ymm8, %%ymm7",
442               "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7")
443GEN_test_RandM(VPBLENDW_256_0xD7,
444               "vpblendw $0xD7, %%ymm6,  %%ymm8, %%ymm7",
445               "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7")
446GEN_test_RandM(VPBLENDW_256_0xB5,
447               "vpblendw $0xB5, %%ymm6,  %%ymm8, %%ymm7",
448               "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7")
449GEN_test_RandM(VPBLENDW_256_0x85,
450               "vpblendw $0x85, %%ymm6,  %%ymm8, %%ymm7",
451               "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7")
452GEN_test_RandM(VPBLENDW_256_0x29,
453               "vpblendw $0x29, %%ymm6,  %%ymm8, %%ymm7",
454               "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7")
455
456GEN_test_RandM(VPSLLW_256,
457               "andl $15, %%r14d;"
458               "vmovd %%r14d, %%xmm6;"
459               "vpsllw %%xmm6,     %%ymm8, %%ymm9",
460               "andq $15, 128(%%rax);"
461               "vpsllw 128(%%rax), %%ymm8, %%ymm9")
462
463GEN_test_RandM(VPSRLW_256,
464               "andl $15, %%r14d;"
465               "vmovd %%r14d, %%xmm6;"
466               "vpsrlw %%xmm6,     %%ymm8, %%ymm9",
467               "andq $15, 128(%%rax);"
468               "vpsrlw 128(%%rax), %%ymm8, %%ymm9")
469
470GEN_test_RandM(VPSRAW_256,
471               "andl $31, %%r14d;"
472               "vmovd %%r14d, %%xmm6;"
473               "vpsraw %%xmm6,     %%ymm8, %%ymm9",
474               "andq $15, 128(%%rax);"
475               "vpsraw 128(%%rax), %%ymm8, %%ymm9")
476
477GEN_test_RandM(VPSLLD_256,
478               "andl $31, %%r14d;"
479               "vmovd %%r14d, %%xmm6;"
480               "vpslld %%xmm6,     %%ymm8, %%ymm9",
481               "andq $31, 128(%%rax);"
482               "vpslld 128(%%rax), %%ymm8, %%ymm9")
483
484GEN_test_RandM(VPSRLD_256,
485               "andl $31, %%r14d;"
486               "vmovd %%r14d, %%xmm6;"
487               "vpsrld %%xmm6,     %%ymm8, %%ymm9",
488               "andq $31, 128(%%rax);"
489               "vpsrld 128(%%rax), %%ymm8, %%ymm9")
490
491GEN_test_RandM(VPSRAD_256,
492               "andl $31, %%r14d;"
493               "vmovd %%r14d, %%xmm6;"
494               "vpsrad %%xmm6,     %%ymm8, %%ymm9",
495               "andq $31, 128(%%rax);"
496               "vpsrad 128(%%rax), %%ymm8, %%ymm9")
497
498GEN_test_RandM(VPSLLQ_256,
499               "andl $63, %%r14d;"
500               "vmovd %%r14d, %%xmm6;"
501               "vpsllq %%xmm6,     %%ymm8, %%ymm9",
502               "andq $63, 128(%%rax);"
503               "vpsllq 128(%%rax), %%ymm8, %%ymm9")
504
505GEN_test_RandM(VPSRLQ_256,
506               "andl $63, %%r14d;"
507               "vmovd %%r14d, %%xmm6;"
508               "vpsrlq %%xmm6,     %%ymm8, %%ymm9",
509               "andq $63, 128(%%rax);"
510               "vpsrlq 128(%%rax), %%ymm8, %%ymm9")
511
512GEN_test_RandM(VPMADDWD_256,
513               "vpmaddwd %%ymm6,  %%ymm8, %%ymm7",
514               "vpmaddwd (%%rax), %%ymm8, %%ymm7")
515
516GEN_test_Monly(VMOVNTDQA_256,
517               "vmovntdqa (%%rax), %%ymm9")
518
519GEN_test_RandM(VPACKSSWB_256,
520               "vpacksswb %%ymm6,  %%ymm8, %%ymm7",
521               "vpacksswb (%%rax), %%ymm8, %%ymm7")
522
523GEN_test_RandM(VPAVGB_256,
524               "vpavgb %%ymm6,  %%ymm8, %%ymm7",
525               "vpavgb (%%rax), %%ymm8, %%ymm7")
526
527GEN_test_RandM(VPAVGW_256,
528               "vpavgw %%ymm6,  %%ymm8, %%ymm7",
529               "vpavgw (%%rax), %%ymm8, %%ymm7")
530
531GEN_test_RandM(VPADDSB_256,
532               "vpaddsb %%ymm6,  %%ymm8, %%ymm7",
533               "vpaddsb (%%rax), %%ymm8, %%ymm7")
534
535GEN_test_RandM(VPADDSW_256,
536               "vpaddsw %%ymm6,  %%ymm8, %%ymm7",
537               "vpaddsw (%%rax), %%ymm8, %%ymm7")
538
539GEN_test_RandM(VPHADDW_256,
540               "vphaddw %%ymm6,  %%ymm8, %%ymm7",
541               "vphaddw (%%rax), %%ymm8, %%ymm7")
542
543GEN_test_RandM(VPHADDD_256,
544               "vphaddd %%ymm6,  %%ymm8, %%ymm7",
545               "vphaddd (%%rax), %%ymm8, %%ymm7")
546
547GEN_test_RandM(VPHADDSW_256,
548               "vphaddsw %%ymm6,  %%ymm8, %%ymm7",
549               "vphaddsw (%%rax), %%ymm8, %%ymm7")
550
551GEN_test_RandM(VPMADDUBSW_256,
552               "vpmaddubsw %%ymm6,  %%ymm8, %%ymm7",
553               "vpmaddubsw (%%rax), %%ymm8, %%ymm7")
554
555GEN_test_RandM(VPHSUBW_256,
556               "vphsubw %%ymm6,  %%ymm8, %%ymm7",
557               "vphsubw (%%rax), %%ymm8, %%ymm7")
558
559GEN_test_RandM(VPHSUBD_256,
560               "vphsubd %%ymm6,  %%ymm8, %%ymm7",
561               "vphsubd (%%rax), %%ymm8, %%ymm7")
562
563GEN_test_RandM(VPHSUBSW_256,
564               "vphsubsw %%ymm6,  %%ymm8, %%ymm7",
565               "vphsubsw (%%rax), %%ymm8, %%ymm7")
566
567GEN_test_RandM(VPABSB_256,
568               "vpabsb %%ymm6,  %%ymm7",
569               "vpabsb (%%rax), %%ymm7")
570
571GEN_test_RandM(VPABSW_256,
572               "vpabsw %%ymm6,  %%ymm7",
573               "vpabsw (%%rax), %%ymm7")
574
575GEN_test_RandM(VPMOVSXBQ_256,
576               "vpmovsxbq %%xmm6,  %%ymm8",
577               "vpmovsxbq (%%rax), %%ymm8")
578
579GEN_test_RandM(VPMOVSXWQ_256,
580               "vpmovsxwq %%xmm6,  %%ymm8",
581               "vpmovsxwq (%%rax), %%ymm8")
582
583GEN_test_RandM(VPACKUSDW_256,
584               "vpackusdw %%ymm6,  %%ymm8, %%ymm7",
585               "vpackusdw (%%rax), %%ymm8, %%ymm7")
586
587GEN_test_RandM(VPMOVZXBQ_256,
588               "vpmovzxbq %%xmm6,  %%ymm8",
589               "vpmovzxbq (%%rax), %%ymm8")
590
591GEN_test_RandM(VPMOVZXWQ_256,
592               "vpmovzxwq %%xmm6,  %%ymm8",
593               "vpmovzxwq (%%rax), %%ymm8")
594
595GEN_test_RandM(VPMOVZXDQ_256,
596               "vpmovzxdq %%xmm6,  %%ymm8",
597               "vpmovzxdq (%%rax), %%ymm8")
598
599GEN_test_RandM(VMPSADBW_256_0x0,
600               "vmpsadbw $0, %%ymm6,  %%ymm8, %%ymm7",
601               "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7")
602GEN_test_RandM(VMPSADBW_256_0x39,
603               "vmpsadbw $0x39, %%ymm6,  %%ymm8, %%ymm7",
604               "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7")
605GEN_test_RandM(VMPSADBW_256_0x32,
606               "vmpsadbw $0x32, %%ymm6,  %%ymm8, %%ymm7",
607               "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7")
608GEN_test_RandM(VMPSADBW_256_0x2b,
609               "vmpsadbw $0x2b, %%ymm6,  %%ymm8, %%ymm7",
610               "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7")
611GEN_test_RandM(VMPSADBW_256_0x24,
612               "vmpsadbw $0x24, %%ymm6,  %%ymm8, %%ymm7",
613               "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7")
614GEN_test_RandM(VMPSADBW_256_0x1d,
615               "vmpsadbw $0x1d, %%ymm6,  %%ymm8, %%ymm7",
616               "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7")
617GEN_test_RandM(VMPSADBW_256_0x16,
618               "vmpsadbw $0x16, %%ymm6,  %%ymm8, %%ymm7",
619               "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7")
620GEN_test_RandM(VMPSADBW_256_0x0f,
621               "vmpsadbw $0x0f, %%ymm6,  %%ymm8, %%ymm7",
622               "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7")
623
624GEN_test_RandM(VPSADBW_256,
625               "vpsadbw %%ymm6,  %%ymm8, %%ymm7",
626               "vpsadbw (%%rax), %%ymm8, %%ymm7")
627
628GEN_test_RandM(VPSIGNB_256,
629               "vpsignb %%ymm6,  %%ymm8, %%ymm7",
630               "vpsignb (%%rax), %%ymm8, %%ymm7")
631
632GEN_test_RandM(VPSIGNW_256,
633               "vpsignw %%ymm6,  %%ymm8, %%ymm7",
634               "vpsignw (%%rax), %%ymm8, %%ymm7")
635
636GEN_test_RandM(VPSIGND_256,
637               "vpsignd %%ymm6,  %%ymm8, %%ymm7",
638               "vpsignd (%%rax), %%ymm8, %%ymm7")
639
640GEN_test_RandM(VPMULHRSW_256,
641               "vpmulhrsw %%ymm6,  %%ymm8, %%ymm7",
642               "vpmulhrsw (%%rax), %%ymm8, %%ymm7")
643
644/* Instructions new in AVX2.  */
645
646GEN_test_Monly(VBROADCASTI128,
647               "vbroadcasti128 (%%rax), %%ymm9")
648
649GEN_test_RandM(VEXTRACTI128_0x0,
650               "vextracti128 $0x0, %%ymm7, %%xmm9",
651               "vextracti128 $0x0, %%ymm7, (%%rax)")
652
653GEN_test_RandM(VEXTRACTI128_0x1,
654               "vextracti128 $0x1, %%ymm7, %%xmm9",
655               "vextracti128 $0x1, %%ymm7, (%%rax)")
656
657GEN_test_RandM(VINSERTI128_0x0,
658               "vinserti128 $0x0, %%xmm9,  %%ymm7, %%ymm8",
659               "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8")
660
661GEN_test_RandM(VINSERTI128_0x1,
662               "vinserti128 $0x1, %%xmm9,  %%ymm7, %%ymm8",
663               "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8")
664
665GEN_test_RandM(VPERM2I128_0x00,
666               "vperm2i128 $0x00, %%ymm6,  %%ymm8, %%ymm7",
667               "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7")
668GEN_test_RandM(VPERM2I128_0xFF,
669               "vperm2i128 $0xFF, %%ymm6,  %%ymm8, %%ymm7",
670               "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7")
671GEN_test_RandM(VPERM2I128_0x30,
672               "vperm2i128 $0x30, %%ymm6,  %%ymm8, %%ymm7",
673               "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7")
674GEN_test_RandM(VPERM2I128_0x21,
675               "vperm2i128 $0x21, %%ymm6,  %%ymm8, %%ymm7",
676               "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7")
677GEN_test_RandM(VPERM2I128_0x12,
678               "vperm2i128 $0x12, %%ymm6,  %%ymm8, %%ymm7",
679               "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7")
680GEN_test_RandM(VPERM2I128_0x03,
681               "vperm2i128 $0x03, %%ymm6,  %%ymm8, %%ymm7",
682               "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7")
683GEN_test_RandM(VPERM2I128_0x85,
684               "vperm2i128 $0x85, %%ymm6,  %%ymm8, %%ymm7",
685               "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7")
686GEN_test_RandM(VPERM2I128_0x5A,
687               "vperm2i128 $0x5A, %%ymm6,  %%ymm8, %%ymm7",
688               "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7")
689
690GEN_test_Ronly(VBROADCASTSS_128,
691               "vbroadcastss %%xmm9, %%xmm7")
692
693GEN_test_Ronly(VBROADCASTSS_256,
694               "vbroadcastss %%xmm9, %%ymm7")
695
696GEN_test_Ronly(VBROADCASTSD_256,
697               "vbroadcastsd %%xmm9, %%ymm7")
698
699GEN_test_RandM(VPERMD,
700               "vpermd %%ymm6, %%ymm7, %%ymm9",
701               "vpermd (%%rax), %%ymm7, %%ymm9")
702
703GEN_test_RandM(VPERMQ_0x00,
704               "vpermq $0x00, %%ymm6,  %%ymm7",
705               "vpermq $0x01, (%%rax), %%ymm7")
706GEN_test_RandM(VPERMQ_0xFE,
707               "vpermq $0xFE, %%ymm6,  %%ymm7",
708               "vpermq $0xFF, (%%rax), %%ymm7")
709GEN_test_RandM(VPERMQ_0x30,
710               "vpermq $0x30, %%ymm6,  %%ymm7",
711               "vpermq $0x03, (%%rax), %%ymm7")
712GEN_test_RandM(VPERMQ_0x21,
713               "vpermq $0x21, %%ymm6,  %%ymm7",
714               "vpermq $0x12, (%%rax), %%ymm7")
715GEN_test_RandM(VPERMQ_0xD7,
716               "vpermq $0xD7, %%ymm6,  %%ymm7",
717               "vpermq $0x6C, (%%rax), %%ymm7")
718GEN_test_RandM(VPERMQ_0xB5,
719               "vpermq $0xB5, %%ymm6,  %%ymm7",
720               "vpermq $0x4A, (%%rax), %%ymm7")
721GEN_test_RandM(VPERMQ_0x85,
722               "vpermq $0x85, %%ymm6,  %%ymm7",
723               "vpermq $0xDC, (%%rax), %%ymm7")
724GEN_test_RandM(VPERMQ_0x29,
725               "vpermq $0x29, %%ymm6,  %%ymm7",
726               "vpermq $0x92, (%%rax), %%ymm7")
727
728GEN_test_RandM(VPERMPS,
729               "vpermps %%ymm6, %%ymm7, %%ymm9",
730               "vpermps (%%rax), %%ymm7, %%ymm9")
731
732GEN_test_RandM(VPERMPD_0x00,
733               "vpermpd $0x00, %%ymm6,  %%ymm7",
734               "vpermpd $0x01, (%%rax), %%ymm7")
735GEN_test_RandM(VPERMPD_0xFE,
736               "vpermpd $0xFE, %%ymm6,  %%ymm7",
737               "vpermpd $0xFF, (%%rax), %%ymm7")
738GEN_test_RandM(VPERMPD_0x30,
739               "vpermpd $0x30, %%ymm6,  %%ymm7",
740               "vpermpd $0x03, (%%rax), %%ymm7")
741GEN_test_RandM(VPERMPD_0x21,
742               "vpermpd $0x21, %%ymm6,  %%ymm7",
743               "vpermpd $0x12, (%%rax), %%ymm7")
744GEN_test_RandM(VPERMPD_0xD7,
745               "vpermpd $0xD7, %%ymm6,  %%ymm7",
746               "vpermpd $0x6C, (%%rax), %%ymm7")
747GEN_test_RandM(VPERMPD_0xB5,
748               "vpermpd $0xB5, %%ymm6,  %%ymm7",
749               "vpermpd $0x4A, (%%rax), %%ymm7")
750GEN_test_RandM(VPERMPD_0x85,
751               "vpermpd $0x85, %%ymm6,  %%ymm7",
752               "vpermpd $0xDC, (%%rax), %%ymm7")
753GEN_test_RandM(VPERMPD_0x29,
754               "vpermpd $0x29, %%ymm6,  %%ymm7",
755               "vpermpd $0x92, (%%rax), %%ymm7")
756
757GEN_test_RandM(VPBLENDD_128_0x00,
758               "vpblendd $0x00, %%xmm6,  %%xmm8, %%xmm7",
759               "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7")
760GEN_test_RandM(VPBLENDD_128_0x02,
761               "vpblendd $0x02, %%xmm6,  %%xmm8, %%xmm7",
762               "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7")
763GEN_test_RandM(VPBLENDD_128_0x04,
764               "vpblendd $0x04, %%xmm6,  %%xmm8, %%xmm7",
765               "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7")
766GEN_test_RandM(VPBLENDD_128_0x06,
767               "vpblendd $0x06, %%xmm6,  %%xmm8, %%xmm7",
768               "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7")
769GEN_test_RandM(VPBLENDD_128_0x08,
770               "vpblendd $0x08, %%xmm6,  %%xmm8, %%xmm7",
771               "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7")
772GEN_test_RandM(VPBLENDD_128_0x0A,
773               "vpblendd $0x0A, %%xmm6,  %%xmm8, %%xmm7",
774               "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7")
775GEN_test_RandM(VPBLENDD_128_0x0C,
776               "vpblendd $0x0C, %%xmm6,  %%xmm8, %%xmm7",
777               "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7")
778GEN_test_RandM(VPBLENDD_128_0x0E,
779               "vpblendd $0x0E, %%xmm6,  %%xmm8, %%xmm7",
780               "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7")
781
782GEN_test_RandM(VPBLENDD_256_0x00,
783               "vpblendd $0x00, %%ymm6,  %%ymm8, %%ymm7",
784               "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7")
785GEN_test_RandM(VPBLENDD_256_0xFE,
786               "vpblendd $0xFE, %%ymm6,  %%ymm8, %%ymm7",
787               "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7")
788GEN_test_RandM(VPBLENDD_256_0x30,
789               "vpblendd $0x30, %%ymm6,  %%ymm8, %%ymm7",
790               "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7")
791GEN_test_RandM(VPBLENDD_256_0x21,
792               "vpblendd $0x21, %%ymm6,  %%ymm8, %%ymm7",
793               "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7")
794GEN_test_RandM(VPBLENDD_256_0xD7,
795               "vpblendd $0xD7, %%ymm6,  %%ymm8, %%ymm7",
796               "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7")
797GEN_test_RandM(VPBLENDD_256_0xB5,
798               "vpblendd $0xB5, %%ymm6,  %%ymm8, %%ymm7",
799               "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7")
800GEN_test_RandM(VPBLENDD_256_0x85,
801               "vpblendd $0x85, %%ymm6,  %%ymm8, %%ymm7",
802               "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7")
803GEN_test_RandM(VPBLENDD_256_0x29,
804               "vpblendd $0x29, %%ymm6,  %%ymm8, %%ymm7",
805               "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7")
806
807GEN_test_RandM(VPSLLVD_128,
808               "vpslld $27, %%xmm6, %%xmm6;"
809               "vpsrld $27, %%xmm6, %%xmm6;"
810               "vpsllvd %%xmm6, %%xmm8, %%xmm7",
811               "andl $31, (%%rax);"
812               "andl $31, 4(%%rax);"
813               "andl $31, 8(%%rax);"
814               "vpsllvd (%%rax), %%xmm8, %%xmm7")
815
816GEN_test_RandM(VPSLLVD_256,
817               "vpslld $27, %%ymm6, %%ymm6;"
818               "vpsrld $27, %%ymm6, %%ymm6;"
819               "vpsllvd %%ymm6, %%ymm8, %%ymm7",
820               "andl $31, (%%rax);"
821               "andl $31, 4(%%rax);"
822               "andl $31, 8(%%rax);"
823               "andl $31, 16(%%rax);"
824               "andl $31, 20(%%rax);"
825               "andl $31, 24(%%rax);"
826               "vpsllvd (%%rax), %%ymm8, %%ymm7")
827
828GEN_test_RandM(VPSLLVQ_128,
829               "vpsllq $58, %%xmm6, %%xmm6;"
830               "vpsrlq $58, %%xmm6, %%xmm6;"
831               "vpsllvq %%xmm6, %%xmm8, %%xmm7",
832               "andl $63, (%%rax);"
833               "vpsllvq (%%rax), %%xmm8, %%xmm7")
834
835GEN_test_RandM(VPSLLVQ_256,
836               "vpsllq $58, %%ymm6, %%ymm6;"
837               "vpsrlq $58, %%ymm6, %%ymm6;"
838               "vpsllvq %%ymm6, %%ymm8, %%ymm7",
839               "andl $63, (%%rax);"
840               "andl $63, 8(%%rax);"
841               "andl $63, 16(%%rax);"
842               "vpsllvq (%%rax), %%ymm8, %%ymm7")
843
844GEN_test_RandM(VPSRLVD_128,
845               "vpslld $27, %%xmm6, %%xmm6;"
846               "vpsrld $27, %%xmm6, %%xmm6;"
847               "vpsrlvd %%xmm6, %%xmm8, %%xmm7",
848               "andl $31, (%%rax);"
849               "andl $31, 4(%%rax);"
850               "andl $31, 8(%%rax);"
851               "vpsrlvd (%%rax), %%xmm8, %%xmm7")
852
853GEN_test_RandM(VPSRLVD_256,
854               "vpslld $27, %%ymm6, %%ymm6;"
855               "vpsrld $27, %%ymm6, %%ymm6;"
856               "vpsrlvd %%ymm6, %%ymm8, %%ymm7",
857               "andl $31, (%%rax);"
858               "andl $31, 4(%%rax);"
859               "andl $31, 8(%%rax);"
860               "andl $31, 16(%%rax);"
861               "andl $31, 20(%%rax);"
862               "andl $31, 24(%%rax);"
863               "vpsrlvd (%%rax), %%ymm8, %%ymm7")
864
865GEN_test_RandM(VPSRLVQ_128,
866               "vpsllq $58, %%xmm6, %%xmm6;"
867               "vpsrlq $58, %%xmm6, %%xmm6;"
868               "vpsrlvq %%xmm6, %%xmm8, %%xmm7",
869               "andl $63, (%%rax);"
870               "vpsrlvq (%%rax), %%xmm8, %%xmm7")
871
872GEN_test_RandM(VPSRLVQ_256,
873               "vpsllq $58, %%ymm6, %%ymm6;"
874               "vpsrlq $58, %%ymm6, %%ymm6;"
875               "vpsrlvq %%ymm6, %%ymm8, %%ymm7",
876               "andl $63, (%%rax);"
877               "andl $63, 8(%%rax);"
878               "andl $63, 16(%%rax);"
879               "vpsrlvq (%%rax), %%ymm8, %%ymm7")
880
881GEN_test_RandM(VPSRAVD_128,
882               "vpslld $27, %%xmm6, %%xmm6;"
883               "vpsrld $27, %%xmm6, %%xmm6;"
884               "vpsravd %%xmm6, %%xmm8, %%xmm7",
885               "andl $31, (%%rax);"
886               "andl $31, 4(%%rax);"
887               "andl $31, 8(%%rax);"
888               "vpsravd (%%rax), %%xmm8, %%xmm7")
889
890GEN_test_RandM(VPSRAVD_256,
891               "vpslld $27, %%ymm6, %%ymm6;"
892               "vpsrld $27, %%ymm6, %%ymm6;"
893               "vpsravd %%ymm6, %%ymm8, %%ymm7",
894               "andl $31, (%%rax);"
895               "andl $31, 4(%%rax);"
896               "andl $31, 8(%%rax);"
897               "andl $31, 16(%%rax);"
898               "andl $31, 20(%%rax);"
899               "andl $31, 24(%%rax);"
900               "vpsravd (%%rax), %%ymm8, %%ymm7")
901
902GEN_test_RandM(VPBROADCASTB_128,
903               "vpbroadcastb %%xmm9, %%xmm7",
904               "vpbroadcastb (%%rax), %%xmm7")
905
906GEN_test_RandM(VPBROADCASTB_256,
907               "vpbroadcastb %%xmm9, %%ymm7",
908               "vpbroadcastb (%%rax), %%ymm7")
909
910GEN_test_RandM(VPBROADCASTW_128,
911               "vpbroadcastw %%xmm9, %%xmm7",
912               "vpbroadcastw (%%rax), %%xmm7")
913
914GEN_test_RandM(VPBROADCASTW_256,
915               "vpbroadcastw %%xmm9, %%ymm7",
916               "vpbroadcastw (%%rax), %%ymm7")
917
918GEN_test_RandM(VPBROADCASTD_128,
919               "vpbroadcastd %%xmm9, %%xmm7",
920               "vpbroadcastd (%%rax), %%xmm7")
921
922GEN_test_RandM(VPBROADCASTD_256,
923               "vpbroadcastd %%xmm9, %%ymm7",
924               "vpbroadcastd (%%rax), %%ymm7")
925
926GEN_test_RandM(VPBROADCASTQ_128,
927               "vpbroadcastq %%xmm9, %%xmm7",
928               "vpbroadcastq (%%rax), %%xmm7")
929
930GEN_test_RandM(VPBROADCASTQ_256,
931               "vpbroadcastq %%xmm9, %%ymm7",
932               "vpbroadcastq (%%rax), %%ymm7")
933
934GEN_test_Monly(VPMASKMOVD_128_LoadForm,
935               "vpmaskmovd (%%rax), %%xmm8, %%xmm7;"
936               "vxorps %%xmm6, %%xmm6, %%xmm6;"
937               "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9")
938
939GEN_test_Monly(VPMASKMOVD_256_LoadForm,
940               "vpmaskmovd (%%rax), %%ymm8, %%ymm7;"
941               "vxorps %%ymm6, %%ymm6, %%ymm6;"
942               "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9")
943
944GEN_test_Monly(VPMASKMOVQ_128_LoadForm,
945               "vpmaskmovq (%%rax), %%xmm8, %%xmm7;"
946               "vxorpd %%xmm6, %%xmm6, %%xmm6;"
947               "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9")
948
949GEN_test_Monly(VPMASKMOVQ_256_LoadForm,
950               "vpmaskmovq (%%rax), %%ymm8, %%ymm7;"
951               "vxorpd %%ymm6, %%ymm6, %%ymm6;"
952               "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9")
953
954GEN_test_Monly(VPMASKMOVD_128_StoreForm,
955               "vpmaskmovd %%xmm8, %%xmm7, (%%rax);"
956               "vxorps %%xmm6, %%xmm6, %%xmm6;"
957               "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)")
958
959GEN_test_Monly(VPMASKMOVD_256_StoreForm,
960               "vpmaskmovd %%ymm8, %%ymm7, (%%rax);"
961               "vxorps %%ymm6, %%ymm6, %%ymm6;"
962               "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)")
963
964GEN_test_Monly(VPMASKMOVQ_128_StoreForm,
965               "vpmaskmovq %%xmm8, %%xmm7, (%%rax);"
966               "vxorpd %%xmm6, %%xmm6, %%xmm6;"
967               "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)")
968
969GEN_test_Monly(VPMASKMOVQ_256_StoreForm,
970               "vpmaskmovq %%ymm8, %%ymm7, (%%rax);"
971               "vxorpd %%ymm6, %%ymm6, %%ymm6;"
972               "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)")
973
974GEN_test_Ronly(VGATHERDPS_128,
975               "vpslld $25, %%xmm7, %%xmm8;"
976               "vpsrld $25, %%xmm8, %%xmm8;"
977               "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
978               "leaq randArray(%%rip), %%r14;"
979               "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
980               "xorl %%r14d, %%r14d")
981
982GEN_test_Ronly(VGATHERDPS_256,
983               "vpslld $25, %%ymm7, %%ymm8;"
984               "vpsrld $25, %%ymm8, %%ymm8;"
985               "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
986               "leaq randArray(%%rip), %%r14;"
987               "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
988               "xorl %%r14d, %%r14d")
989
990GEN_test_Ronly(VGATHERQPS_128_1,
991               "vpsllq $57, %%xmm7, %%xmm8;"
992               "vpsrlq $57, %%xmm8, %%xmm8;"
993               "vpmovsxdq %%xmm6, %%xmm9;"
994               "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
995               "vmovdqa 96(%0), %%ymm9;"
996               "leaq randArray(%%rip), %%r14;"
997               "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
998               "xorl %%r14d, %%r14d")
999
1000GEN_test_Ronly(VGATHERQPS_256_1,
1001               "vpsllq $57, %%ymm7, %%ymm8;"
1002               "vpsrlq $57, %%ymm8, %%ymm8;"
1003               "vpmovsxdq %%xmm6, %%ymm9;"
1004               "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1005               "vmovdqa 96(%0), %%ymm9;"
1006               "leaq randArray(%%rip), %%r14;"
1007               "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1008               "xorl %%r14d, %%r14d")
1009
1010GEN_test_Ronly(VGATHERQPS_128_2,
1011               "vpsllq $57, %%xmm7, %%xmm8;"
1012               "vpsrlq $57, %%xmm8, %%xmm8;"
1013               "vpmovsxdq %%xmm6, %%xmm9;"
1014               "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1015               "vmovdqa 96(%0), %%ymm9;"
1016               "leaq randArray(%%rip), %%r14;"
1017               "vmovq %%r14, %%xmm7;"
1018               "vpsllq $2, %%xmm8, %%xmm8;"
1019               "vpbroadcastq %%xmm7, %%xmm7;"
1020               "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1021               "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1022               "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1023               "vmovdqa 0(%0), %%ymm7;"
1024               "xorl %%r14d, %%r14d")
1025
1026GEN_test_Ronly(VGATHERQPS_256_2,
1027               "vpsllq $57, %%ymm7, %%ymm8;"
1028               "vpsrlq $57, %%ymm8, %%ymm8;"
1029               "vpmovsxdq %%xmm6, %%ymm9;"
1030               "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1031               "vmovdqa 96(%0), %%ymm9;"
1032               "leaq randArray(%%rip), %%r14;"
1033               "vmovq %%r14, %%xmm7;"
1034               "vpsllq $2, %%ymm8, %%ymm8;"
1035               "vpbroadcastq %%xmm7, %%ymm7;"
1036               "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1037               "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1038               "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1039               "vmovdqa 0(%0), %%ymm7;"
1040               "xorl %%r14d, %%r14d")
1041
1042GEN_test_Ronly(VGATHERDPD_128,
1043               "vpslld $26, %%xmm7, %%xmm8;"
1044               "vpsrld $26, %%xmm8, %%xmm8;"
1045               "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1046               "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1047               "vmovdqa 96(%0), %%ymm9;"
1048               "leaq randArray(%%rip), %%r14;"
1049               "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1050               "xorl %%r14d, %%r14d")
1051
1052GEN_test_Ronly(VGATHERDPD_256,
1053               "vpslld $26, %%ymm7, %%ymm8;"
1054               "vpsrld $26, %%ymm8, %%ymm8;"
1055               "vextracti128 $1, %%ymm6, %%xmm9;"
1056               "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1057               "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1058               "vmovdqa 96(%0), %%ymm9;"
1059               "leaq randArray(%%rip), %%r14;"
1060               "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1061               "xorl %%r14d, %%r14d")
1062
1063GEN_test_Ronly(VGATHERQPD_128_1,
1064               "vpsllq $58, %%xmm7, %%xmm8;"
1065               "vpsrlq $58, %%xmm8, %%xmm8;"
1066               "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1067               "leaq randArray(%%rip), %%r14;"
1068               "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1069               "xorl %%r14d, %%r14d")
1070
1071GEN_test_Ronly(VGATHERQPD_256_1,
1072               "vpsllq $58, %%ymm7, %%ymm8;"
1073               "vpsrlq $58, %%ymm8, %%ymm8;"
1074               "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1075               "leaq randArray(%%rip), %%r14;"
1076               "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1077               "xorl %%r14d, %%r14d")
1078
1079GEN_test_Ronly(VGATHERQPD_128_2,
1080               "vpsllq $58, %%xmm7, %%xmm8;"
1081               "vpsrlq $58, %%xmm8, %%xmm8;"
1082               "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1083               "leaq randArray(%%rip), %%r14;"
1084               "vmovq %%r14, %%xmm7;"
1085               "vpsllq $2, %%xmm8, %%xmm8;"
1086               "vpbroadcastq %%xmm7, %%xmm7;"
1087               "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1088               "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1089               "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1090               "vmovdqa 0(%0), %%ymm7;"
1091               "xorl %%r14d, %%r14d")
1092
1093GEN_test_Ronly(VGATHERQPD_256_2,
1094               "vpsllq $58, %%ymm7, %%ymm8;"
1095               "vpsrlq $58, %%ymm8, %%ymm8;"
1096               "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1097               "leaq randArray(%%rip), %%r14;"
1098               "vmovq %%r14, %%xmm7;"
1099               "vpsllq $2, %%ymm8, %%ymm8;"
1100               "vpbroadcastq %%xmm7, %%ymm7;"
1101               "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1102               "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1103               "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1104               "vmovdqa 0(%0), %%ymm7;"
1105               "xorl %%r14d, %%r14d")
1106
1107GEN_test_Ronly(VPGATHERDD_128,
1108               "vpslld $25, %%xmm7, %%xmm8;"
1109               "vpsrld $25, %%xmm8, %%xmm8;"
1110               "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1111               "leaq randArray(%%rip), %%r14;"
1112               "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1113               "xorl %%r14d, %%r14d")
1114
1115GEN_test_Ronly(VPGATHERDD_256,
1116               "vpslld $25, %%ymm7, %%ymm8;"
1117               "vpsrld $25, %%ymm8, %%ymm8;"
1118               "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1119               "leaq randArray(%%rip), %%r14;"
1120               "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
1121               "xorl %%r14d, %%r14d")
1122
1123GEN_test_Ronly(VPGATHERQD_128_1,
1124               "vpsllq $57, %%xmm7, %%xmm8;"
1125               "vpsrlq $57, %%xmm8, %%xmm8;"
1126               "vpmovsxdq %%xmm6, %%xmm9;"
1127               "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1128               "vmovdqa 96(%0), %%ymm9;"
1129               "leaq randArray(%%rip), %%r14;"
1130               "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1131               "xorl %%r14d, %%r14d")
1132
1133GEN_test_Ronly(VPGATHERQD_256_1,
1134               "vpsllq $57, %%ymm7, %%ymm8;"
1135               "vpsrlq $57, %%ymm8, %%ymm8;"
1136               "vpmovsxdq %%xmm6, %%ymm9;"
1137               "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1138               "vmovdqa 96(%0), %%ymm9;"
1139               "leaq randArray(%%rip), %%r14;"
1140               "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1141               "xorl %%r14d, %%r14d")
1142
1143GEN_test_Ronly(VPGATHERQD_128_2,
1144               "vpsllq $57, %%xmm7, %%xmm8;"
1145               "vpsrlq $57, %%xmm8, %%xmm8;"
1146               "vpmovsxdq %%xmm6, %%xmm9;"
1147               "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1148               "vmovdqa 96(%0), %%ymm9;"
1149               "leaq randArray(%%rip), %%r14;"
1150               "vmovq %%r14, %%xmm7;"
1151               "vpsllq $2, %%xmm8, %%xmm8;"
1152               "vpbroadcastq %%xmm7, %%xmm7;"
1153               "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1154               "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1155               "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1156               "vmovdqa 0(%0), %%ymm7;"
1157               "xorl %%r14d, %%r14d")
1158
1159GEN_test_Ronly(VPGATHERQD_256_2,
1160               "vpsllq $57, %%ymm7, %%ymm8;"
1161               "vpsrlq $57, %%ymm8, %%ymm8;"
1162               "vpmovsxdq %%xmm6, %%ymm9;"
1163               "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1164               "vmovdqa 96(%0), %%ymm9;"
1165               "leaq randArray(%%rip), %%r14;"
1166               "vmovq %%r14, %%xmm7;"
1167               "vpsllq $2, %%ymm8, %%ymm8;"
1168               "vpbroadcastq %%xmm7, %%ymm7;"
1169               "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1170               "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1171               "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1172               "vmovdqa 0(%0), %%ymm7;"
1173               "xorl %%r14d, %%r14d")
1174
1175GEN_test_Ronly(VPGATHERDQ_128,
1176               "vpslld $26, %%xmm7, %%xmm8;"
1177               "vpsrld $26, %%xmm8, %%xmm8;"
1178               "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1179               "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1180               "vmovdqa 96(%0), %%ymm9;"
1181               "leaq randArray(%%rip), %%r14;"
1182               "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1183               "xorl %%r14d, %%r14d")
1184
1185GEN_test_Ronly(VPGATHERDQ_256,
1186               "vpslld $26, %%ymm7, %%ymm8;"
1187               "vpsrld $26, %%ymm8, %%ymm8;"
1188               "vextracti128 $1, %%ymm6, %%xmm9;"
1189               "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1190               "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1191               "vmovdqa 96(%0), %%ymm9;"
1192               "leaq randArray(%%rip), %%r14;"
1193               "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1194               "xorl %%r14d, %%r14d")
1195
1196GEN_test_Ronly(VPGATHERQQ_128_1,
1197               "vpsllq $58, %%xmm7, %%xmm8;"
1198               "vpsrlq $58, %%xmm8, %%xmm8;"
1199               "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1200               "leaq randArray(%%rip), %%r14;"
1201               "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1202               "xorl %%r14d, %%r14d")
1203
1204GEN_test_Ronly(VPGATHERQQ_256_1,
1205               "vpsllq $58, %%ymm7, %%ymm8;"
1206               "vpsrlq $58, %%ymm8, %%ymm8;"
1207               "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1208               "leaq randArray(%%rip), %%r14;"
1209               "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1210               "xorl %%r14d, %%r14d")
1211
1212GEN_test_Ronly(VPGATHERQQ_128_2,
1213               "vpsllq $58, %%xmm7, %%xmm8;"
1214               "vpsrlq $58, %%xmm8, %%xmm8;"
1215               "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1216               "leaq randArray(%%rip), %%r14;"
1217               "vmovq %%r14, %%xmm7;"
1218               "vpsllq $2, %%xmm8, %%xmm8;"
1219               "vpbroadcastq %%xmm7, %%xmm7;"
1220               "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1221               "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1222               "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1223               "vmovdqa 0(%0), %%ymm7;"
1224               "xorl %%r14d, %%r14d")
1225
1226GEN_test_Ronly(VPGATHERQQ_256_2,
1227               "vpsllq $58, %%ymm7, %%ymm8;"
1228               "vpsrlq $58, %%ymm8, %%ymm8;"
1229               "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1230               "leaq randArray(%%rip), %%r14;"
1231               "vmovq %%r14, %%xmm7;"
1232               "vpsllq $2, %%ymm8, %%ymm8;"
1233               "vpbroadcastq %%xmm7, %%ymm7;"
1234               "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1235               "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1236               "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1237               "vmovdqa 0(%0), %%ymm7;"
1238               "xorl %%r14d, %%r14d")
1239
1240/* Comment duplicated above, for convenient reference:
1241   Allowed operands in test insns:
1242     Reg form:  %ymm6,  %ymm7, %ymm8, %ymm9 and %r14.
1243     Mem form:  (%rax), %ymm7, %ymm8, %ymm9 and %r14.
1244   Imm8 etc fields are also allowed, where they make sense.
1245   Both forms may use ymm0 as scratch.  Mem form may also use
1246   ymm6 as scratch.
1247*/
1248
1249#define N_DEFAULT_ITERS 3
1250
1251// Do the specified test some number of times
1252#define DO_N(_iters, _testfn) \
1253   do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
1254
1255// Do the specified test the default number of times
1256#define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
1257
1258
1259int main ( void )
1260{
1261   DO_D( VPOR_256 );
1262   DO_D( VPXOR_256 );
1263   DO_D( VPSUBB_256 );
1264   DO_D( VPSUBD_256 );
1265   DO_D( VPADDD_256 );
1266   DO_D( VPMOVZXWD_256 );
1267   DO_D( VPMOVZXBW_256 );
1268   DO_D( VPBLENDVB_256 );
1269   DO_D( VPMINSD_256 );
1270   DO_D( VPMAXSD_256 );
1271   DO_D( VPSHUFB_256 );
1272   DO_D( VPUNPCKLBW_256 );
1273   DO_D( VPUNPCKHBW_256 );
1274   DO_D( VPABSD_256 );
1275   DO_D( VPACKUSWB_256 );
1276   DO_D( VPMOVMSKB_256 );
1277   DO_D( VPAND_256 );
1278   DO_D( VPCMPEQB_256 );
1279   DO_D( VPSHUFLW_0x39_256 );
1280   DO_D( VPSHUFHW_0x39_256 );
1281   DO_D( VPMULLW_256 );
1282   DO_D( VPADDUSW_256 );
1283   DO_D( VPMULHUW_256 );
1284   DO_D( VPADDUSB_256 );
1285   DO_D( VPUNPCKLWD_256 );
1286   DO_D( VPUNPCKHWD_256 );
1287   DO_D( VPSLLD_0x05_256 );
1288   DO_D( VPSRLD_0x05_256 );
1289   DO_D( VPSRAD_0x05_256 );
1290   DO_D( VPSUBUSB_256 );
1291   DO_D( VPSUBSB_256 );
1292   DO_D( VPSRLDQ_0x05_256 );
1293   DO_D( VPSLLDQ_0x05_256 );
1294   DO_D( VPANDN_256 );
1295   DO_D( VPUNPCKLQDQ_256 );
1296   DO_D( VPSRLW_0x05_256 );
1297   DO_D( VPSLLW_0x05_256 );
1298   DO_D( VPADDW_256 );
1299   DO_D( VPACKSSDW_256 );
1300   DO_D( VPUNPCKLDQ_256 );
1301   DO_D( VPCMPEQD_256 );
1302   DO_D( VPSHUFD_0x39_256 );
1303   DO_D( VPADDQ_256 );
1304   DO_D( VPSUBQ_256 );
1305   DO_D( VPSUBW_256 );
1306   DO_D( VPCMPEQQ_256 );
1307   DO_D( VPCMPGTQ_256 );
1308   DO_D( VPSRLQ_0x05_256 );
1309   DO_D( VPMULUDQ_256 );
1310   DO_D( VPMULDQ_256 );
1311   DO_D( VPSLLQ_0x05_256 );
1312   DO_D( VPMAXUD_256 );
1313   DO_D( VPMINUD_256 );
1314   DO_D( VPMULLD_256 );
1315   DO_D( VPMAXUW_256 );
1316   DO_D( VPMINUW_256 );
1317   DO_D( VPMAXSW_256 );
1318   DO_D( VPMINSW_256 );
1319   DO_D( VPMAXUB_256 );
1320   DO_D( VPMINUB_256 );
1321   DO_D( VPMAXSB_256 );
1322   DO_D( VPMINSB_256 );
1323   DO_D( VPMOVSXBW_256 );
1324   DO_D( VPSUBUSW_256 );
1325   DO_D( VPSUBSW_256 );
1326   DO_D( VPCMPEQW_256 );
1327   DO_D( VPADDB_256 );
1328   DO_D( VPUNPCKHDQ_256 );
1329   DO_D( VPMOVSXDQ_256 );
1330   DO_D( VPMOVSXWD_256 );
1331   DO_D( VPMULHW_256 );
1332   DO_D( VPUNPCKHQDQ_256 );
1333   DO_D( VPSRAW_0x05_256 );
1334   DO_D( VPCMPGTB_256 );
1335   DO_D( VPCMPGTW_256 );
1336   DO_D( VPCMPGTD_256 );
1337   DO_D( VPMOVZXBD_256 );
1338   DO_D( VPMOVSXBD_256 );
1339   DO_D( VPALIGNR_256_1of3 );
1340   DO_D( VPALIGNR_256_2of3 );
1341   DO_D( VPALIGNR_256_3of3 );
1342   DO_D( VPBLENDW_256_0x00 );
1343   DO_D( VPBLENDW_256_0xFE );
1344   DO_D( VPBLENDW_256_0x30 );
1345   DO_D( VPBLENDW_256_0x21 );
1346   DO_D( VPBLENDW_256_0xD7 );
1347   DO_D( VPBLENDW_256_0xB5 );
1348   DO_D( VPBLENDW_256_0x85 );
1349   DO_D( VPBLENDW_256_0x29 );
1350   DO_D( VPSLLW_256 );
1351   DO_D( VPSRLW_256 );
1352   DO_D( VPSRAW_256 );
1353   DO_D( VPSLLD_256 );
1354   DO_D( VPSRLD_256 );
1355   DO_D( VPSRAD_256 );
1356   DO_D( VPSLLQ_256 );
1357   DO_D( VPSRLQ_256 );
1358   DO_D( VPMADDWD_256 );
1359   DO_D( VMOVNTDQA_256 );
1360   DO_D( VPACKSSWB_256 );
1361   DO_D( VPAVGB_256 );
1362   DO_D( VPAVGW_256 );
1363   DO_D( VPADDSB_256 );
1364   DO_D( VPADDSW_256 );
1365   DO_D( VPHADDW_256 );
1366   DO_D( VPHADDD_256 );
1367   DO_D( VPHADDSW_256 );
1368   DO_D( VPMADDUBSW_256 );
1369   DO_D( VPHSUBW_256 );
1370   DO_D( VPHSUBD_256 );
1371   DO_D( VPHSUBSW_256 );
1372   DO_D( VPABSB_256 );
1373   DO_D( VPABSW_256 );
1374   DO_D( VPMOVSXBQ_256 );
1375   DO_D( VPMOVSXWQ_256 );
1376   DO_D( VPACKUSDW_256 );
1377   DO_D( VPMOVZXBQ_256 );
1378   DO_D( VPMOVZXWQ_256 );
1379   DO_D( VPMOVZXDQ_256 );
1380   DO_D( VMPSADBW_256_0x0 );
1381   DO_D( VMPSADBW_256_0x39 );
1382   DO_D( VMPSADBW_256_0x32 );
1383   DO_D( VMPSADBW_256_0x2b );
1384   DO_D( VMPSADBW_256_0x24 );
1385   DO_D( VMPSADBW_256_0x1d );
1386   DO_D( VMPSADBW_256_0x16 );
1387   DO_D( VMPSADBW_256_0x0f );
1388   DO_D( VPSADBW_256 );
1389   DO_D( VPSIGNB_256 );
1390   DO_D( VPSIGNW_256 );
1391   DO_D( VPSIGND_256 );
1392   DO_D( VPMULHRSW_256 );
1393   DO_D( VBROADCASTI128 );
1394   DO_D( VEXTRACTI128_0x0 );
1395   DO_D( VEXTRACTI128_0x1 );
1396   DO_D( VINSERTI128_0x0 );
1397   DO_D( VINSERTI128_0x1 );
1398   DO_D( VPERM2I128_0x00 );
1399   DO_D( VPERM2I128_0xFF );
1400   DO_D( VPERM2I128_0x30 );
1401   DO_D( VPERM2I128_0x21 );
1402   DO_D( VPERM2I128_0x12 );
1403   DO_D( VPERM2I128_0x03 );
1404   DO_D( VPERM2I128_0x85 );
1405   DO_D( VPERM2I128_0x5A );
1406   DO_D( VBROADCASTSS_128 );
1407   DO_D( VBROADCASTSS_256 );
1408   DO_D( VBROADCASTSD_256 );
1409   DO_D( VPERMD );
1410   DO_D( VPERMQ_0x00 );
1411   DO_D( VPERMQ_0xFE );
1412   DO_D( VPERMQ_0x30 );
1413   DO_D( VPERMQ_0x21 );
1414   DO_D( VPERMQ_0xD7 );
1415   DO_D( VPERMQ_0xB5 );
1416   DO_D( VPERMQ_0x85 );
1417   DO_D( VPERMQ_0x29 );
1418   DO_D( VPERMPS );
1419   DO_D( VPERMPD_0x00 );
1420   DO_D( VPERMPD_0xFE );
1421   DO_D( VPERMPD_0x30 );
1422   DO_D( VPERMPD_0x21 );
1423   DO_D( VPERMPD_0xD7 );
1424   DO_D( VPERMPD_0xB5 );
1425   DO_D( VPERMPD_0x85 );
1426   DO_D( VPERMPD_0x29 );
1427   DO_D( VPBLENDD_128_0x00 );
1428   DO_D( VPBLENDD_128_0x02 );
1429   DO_D( VPBLENDD_128_0x04 );
1430   DO_D( VPBLENDD_128_0x06 );
1431   DO_D( VPBLENDD_128_0x08 );
1432   DO_D( VPBLENDD_128_0x0A );
1433   DO_D( VPBLENDD_128_0x0C );
1434   DO_D( VPBLENDD_128_0x0E );
1435   DO_D( VPBLENDD_256_0x00 );
1436   DO_D( VPBLENDD_256_0xFE );
1437   DO_D( VPBLENDD_256_0x30 );
1438   DO_D( VPBLENDD_256_0x21 );
1439   DO_D( VPBLENDD_256_0xD7 );
1440   DO_D( VPBLENDD_256_0xB5 );
1441   DO_D( VPBLENDD_256_0x85 );
1442   DO_D( VPBLENDD_256_0x29 );
1443   DO_D( VPSLLVD_128 );
1444   DO_D( VPSLLVD_256 );
1445   DO_D( VPSLLVQ_128 );
1446   DO_D( VPSLLVQ_256 );
1447   DO_D( VPSRLVD_128 );
1448   DO_D( VPSRLVD_256 );
1449   DO_D( VPSRLVQ_128 );
1450   DO_D( VPSRLVQ_256 );
1451   DO_D( VPSRAVD_128 );
1452   DO_D( VPSRAVD_256 );
1453   DO_D( VPBROADCASTB_128 );
1454   DO_D( VPBROADCASTB_256 );
1455   DO_D( VPBROADCASTW_128 );
1456   DO_D( VPBROADCASTW_256 );
1457   DO_D( VPBROADCASTD_128 );
1458   DO_D( VPBROADCASTD_256 );
1459   DO_D( VPBROADCASTQ_128 );
1460   DO_D( VPBROADCASTQ_256 );
1461   DO_D( VPMASKMOVD_128_LoadForm );
1462   DO_D( VPMASKMOVD_256_LoadForm );
1463   DO_D( VPMASKMOVQ_128_LoadForm );
1464   DO_D( VPMASKMOVQ_256_LoadForm );
1465   DO_D( VPMASKMOVD_128_StoreForm );
1466   DO_D( VPMASKMOVD_256_StoreForm );
1467   DO_D( VPMASKMOVQ_128_StoreForm );
1468   DO_D( VPMASKMOVQ_256_StoreForm );
1469   { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); }
1470   DO_D( VGATHERDPS_128 );
1471   DO_D( VGATHERDPS_256 );
1472   DO_D( VGATHERQPS_128_1 );
1473   DO_D( VGATHERQPS_256_1 );
1474   DO_D( VGATHERQPS_128_2 );
1475   DO_D( VGATHERQPS_256_2 );
1476   DO_D( VGATHERDPD_128 );
1477   DO_D( VGATHERDPD_256 );
1478   DO_D( VGATHERQPD_128_1 );
1479   DO_D( VGATHERQPD_256_1 );
1480   DO_D( VGATHERQPD_128_2 );
1481   DO_D( VGATHERQPD_256_2 );
1482   DO_D( VPGATHERDD_128 );
1483   DO_D( VPGATHERDD_256 );
1484   DO_D( VPGATHERQD_128_1 );
1485   DO_D( VPGATHERQD_256_1 );
1486   DO_D( VPGATHERQD_128_2 );
1487   DO_D( VPGATHERQD_256_2 );
1488   DO_D( VPGATHERDQ_128 );
1489   DO_D( VPGATHERDQ_256 );
1490   DO_D( VPGATHERQQ_128_1 );
1491   DO_D( VPGATHERQQ_256_1 );
1492   DO_D( VPGATHERQQ_128_2 );
1493   DO_D( VPGATHERQQ_256_2 );
1494   return 0;
1495}
1496