1 2#include <stdio.h> 3#include <stdlib.h> 4#include <assert.h> 5#include <malloc.h> 6 7typedef unsigned char UChar; 8typedef unsigned int UInt; 9typedef unsigned long int UWord; 10typedef unsigned long long int ULong; 11 12UChar randArray[1027] __attribute__((used)); 13 14#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) 15 16typedef union { UChar u8[32]; UInt u32[8]; } YMM; 17 18typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; 19 20void showYMM ( YMM* vec ) 21{ 22 int i; 23 assert(IS_32_ALIGNED(vec)); 24 for (i = 31; i >= 0; i--) { 25 printf("%02x", (UInt)vec->u8[i]); 26 if (i > 0 && 0 == ((i+0) & 7)) printf("."); 27 } 28} 29 30void showBlock ( char* msg, Block* block ) 31{ 32 printf(" %s\n", msg); 33 printf(" "); showYMM(&block->a1); printf("\n"); 34 printf(" "); showYMM(&block->a2); printf("\n"); 35 printf(" "); showYMM(&block->a3); printf("\n"); 36 printf(" "); showYMM(&block->a4); printf("\n"); 37 printf(" %016llx\n", block->u64); 38} 39 40UChar randUChar ( void ) 41{ 42 static UInt seed = 80021; 43 seed = 1103515245 * seed + 12345; 44 return (seed >> 17) & 0xFF; 45} 46 47void randBlock ( Block* b ) 48{ 49 int i; 50 UChar* p = (UChar*)b; 51 for (i = 0; i < sizeof(Block); i++) 52 p[i] = randUChar(); 53} 54 55 56/* Generate a function test_NAME, that tests the given insn, in both 57 its mem and reg forms. The reg form of the insn may mention, as 58 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of 59 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 60 and %r14. It's OK for the insn to clobber ymm0, as this is needed 61 for testing PCMPxSTRx, and ymm6, as this is needed for testing 62 MOVMASK variants. */ 63 64#define GEN_test_RandM(_name, _reg_form, _mem_form) \ 65 \ 66 __attribute__ ((noinline)) static void test_##_name ( void ) \ 67 { \ 68 Block* b = memalign(32, sizeof(Block)); \ 69 randBlock(b); \ 70 printf("%s(reg)\n", #_name); \ 71 showBlock("before", b); \ 72 __asm__ __volatile__( \ 73 "vmovdqa 0(%0),%%ymm7" "\n\t" \ 74 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 75 "vmovdqa 64(%0),%%ymm6" "\n\t" \ 76 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 77 "movq 128(%0),%%r14" "\n\t" \ 78 _reg_form "\n\t" \ 79 "vmovdqa %%ymm7, 0(%0)" "\n\t" \ 80 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 81 "vmovdqa %%ymm6, 64(%0)" "\n\t" \ 82 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 83 "movq %%r14, 128(%0)" "\n\t" \ 84 : /*OUT*/ \ 85 : /*IN*/"r"(b) \ 86 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ 87 ); \ 88 showBlock("after", b); \ 89 randBlock(b); \ 90 printf("%s(mem)\n", #_name); \ 91 showBlock("before", b); \ 92 __asm__ __volatile__( \ 93 "leaq 0(%0),%%rax" "\n\t" \ 94 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 95 "vmovdqa 64(%0),%%ymm7" "\n\t" \ 96 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 97 "movq 128(%0),%%r14" "\n\t" \ 98 _mem_form "\n\t" \ 99 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 100 "vmovdqa %%ymm7, 64(%0)" "\n\t" \ 101 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 102 "movq %%r14, 128(%0)" "\n\t" \ 103 : /*OUT*/ \ 104 : /*IN*/"r"(b) \ 105 : /*TRASH*/"xmm6", \ 106 "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ 107 ); \ 108 showBlock("after", b); \ 109 printf("\n"); \ 110 free(b); \ 111 } 112 113#define GEN_test_Ronly(_name, _reg_form) \ 114 GEN_test_RandM(_name, _reg_form, "") 115#define GEN_test_Monly(_name, _mem_form) \ 116 GEN_test_RandM(_name, "", _mem_form) 117 118/* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ 119 120GEN_test_RandM(VPOR_256, 121 "vpor %%ymm6, %%ymm8, %%ymm7", 122 "vpor (%%rax), %%ymm8, %%ymm7") 123 124GEN_test_RandM(VPXOR_256, 125 "vpxor %%ymm6, %%ymm8, %%ymm7", 126 "vpxor (%%rax), %%ymm8, %%ymm7") 127 128GEN_test_RandM(VPSUBB_256, 129 "vpsubb %%ymm6, %%ymm8, %%ymm7", 130 "vpsubb (%%rax), %%ymm8, %%ymm7") 131 132GEN_test_RandM(VPSUBD_256, 133 "vpsubd %%ymm6, %%ymm8, %%ymm7", 134 "vpsubd (%%rax), %%ymm8, %%ymm7") 135 136GEN_test_RandM(VPADDD_256, 137 "vpaddd %%ymm6, %%ymm8, %%ymm7", 138 "vpaddd (%%rax), %%ymm8, %%ymm7") 139 140GEN_test_RandM(VPMOVZXWD_256, 141 "vpmovzxwd %%xmm6, %%ymm8", 142 "vpmovzxwd (%%rax), %%ymm8") 143 144GEN_test_RandM(VPMOVZXBW_256, 145 "vpmovzxbw %%xmm6, %%ymm8", 146 "vpmovzxbw (%%rax), %%ymm8") 147 148GEN_test_RandM(VPBLENDVB_256, 149 "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", 150 "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") 151 152GEN_test_RandM(VPMINSD_256, 153 "vpminsd %%ymm6, %%ymm8, %%ymm7", 154 "vpminsd (%%rax), %%ymm8, %%ymm7") 155 156GEN_test_RandM(VPMAXSD_256, 157 "vpmaxsd %%ymm6, %%ymm8, %%ymm7", 158 "vpmaxsd (%%rax), %%ymm8, %%ymm7") 159 160GEN_test_RandM(VPSHUFB_256, 161 "vpshufb %%ymm6, %%ymm8, %%ymm7", 162 "vpshufb (%%rax), %%ymm8, %%ymm7") 163 164GEN_test_RandM(VPUNPCKLBW_256, 165 "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", 166 "vpunpcklbw (%%rax), %%ymm8, %%ymm7") 167 168GEN_test_RandM(VPUNPCKHBW_256, 169 "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", 170 "vpunpckhbw (%%rax), %%ymm8, %%ymm7") 171 172GEN_test_RandM(VPABSD_256, 173 "vpabsd %%ymm6, %%ymm8", 174 "vpabsd (%%rax), %%ymm8") 175 176GEN_test_RandM(VPACKUSWB_256, 177 "vpackuswb %%ymm9, %%ymm8, %%ymm7", 178 "vpackuswb (%%rax), %%ymm8, %%ymm7") 179 180GEN_test_Ronly(VPMOVMSKB_256, 181 "vpmovmskb %%ymm8, %%r14") 182 183GEN_test_RandM(VPAND_256, 184 "vpand %%ymm9, %%ymm8, %%ymm7", 185 "vpand (%%rax), %%ymm8, %%ymm7") 186 187GEN_test_RandM(VPCMPEQB_256, 188 "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", 189 "vpcmpeqb (%%rax), %%ymm8, %%ymm7") 190 191GEN_test_RandM(VPSHUFLW_0x39_256, 192 "vpshuflw $0x39, %%ymm9, %%ymm7", 193 "vpshuflw $0xC6, (%%rax), %%ymm8") 194 195GEN_test_RandM(VPSHUFHW_0x39_256, 196 "vpshufhw $0x39, %%ymm9, %%ymm7", 197 "vpshufhw $0xC6, (%%rax), %%ymm8") 198 199GEN_test_RandM(VPMULLW_256, 200 "vpmullw %%ymm9, %%ymm8, %%ymm7", 201 "vpmullw (%%rax), %%ymm8, %%ymm7") 202 203GEN_test_RandM(VPADDUSW_256, 204 "vpaddusw %%ymm9, %%ymm8, %%ymm7", 205 "vpaddusw (%%rax), %%ymm8, %%ymm7") 206 207GEN_test_RandM(VPMULHUW_256, 208 "vpmulhuw %%ymm9, %%ymm8, %%ymm7", 209 "vpmulhuw (%%rax), %%ymm8, %%ymm7") 210 211GEN_test_RandM(VPADDUSB_256, 212 "vpaddusb %%ymm9, %%ymm8, %%ymm7", 213 "vpaddusb (%%rax), %%ymm8, %%ymm7") 214 215GEN_test_RandM(VPUNPCKLWD_256, 216 "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", 217 "vpunpcklwd (%%rax), %%ymm8, %%ymm7") 218 219GEN_test_RandM(VPUNPCKHWD_256, 220 "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", 221 "vpunpckhwd (%%rax), %%ymm8, %%ymm7") 222 223GEN_test_Ronly(VPSLLD_0x05_256, 224 "vpslld $0x5, %%ymm9, %%ymm7") 225 226GEN_test_Ronly(VPSRLD_0x05_256, 227 "vpsrld $0x5, %%ymm9, %%ymm7") 228 229GEN_test_Ronly(VPSRAD_0x05_256, 230 "vpsrad $0x5, %%ymm9, %%ymm7") 231 232GEN_test_RandM(VPSUBUSB_256, 233 "vpsubusb %%ymm9, %%ymm8, %%ymm7", 234 "vpsubusb (%%rax), %%ymm8, %%ymm7") 235 236GEN_test_RandM(VPSUBSB_256, 237 "vpsubsb %%ymm9, %%ymm8, %%ymm7", 238 "vpsubsb (%%rax), %%ymm8, %%ymm7") 239 240GEN_test_Ronly(VPSRLDQ_0x05_256, 241 "vpsrldq $0x5, %%ymm9, %%ymm7") 242 243GEN_test_Ronly(VPSLLDQ_0x05_256, 244 "vpslldq $0x5, %%ymm9, %%ymm7") 245 246GEN_test_RandM(VPANDN_256, 247 "vpandn %%ymm9, %%ymm8, %%ymm7", 248 "vpandn (%%rax), %%ymm8, %%ymm7") 249 250GEN_test_RandM(VPUNPCKLQDQ_256, 251 "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", 252 "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") 253 254GEN_test_Ronly(VPSRLW_0x05_256, 255 "vpsrlw $0x5, %%ymm9, %%ymm7") 256 257GEN_test_Ronly(VPSLLW_0x05_256, 258 "vpsllw $0x5, %%ymm9, %%ymm7") 259 260GEN_test_RandM(VPADDW_256, 261 "vpaddw %%ymm6, %%ymm8, %%ymm7", 262 "vpaddw (%%rax), %%ymm8, %%ymm7") 263 264GEN_test_RandM(VPACKSSDW_256, 265 "vpackssdw %%ymm9, %%ymm8, %%ymm7", 266 "vpackssdw (%%rax), %%ymm8, %%ymm7") 267 268GEN_test_RandM(VPUNPCKLDQ_256, 269 "vpunpckldq %%ymm6, %%ymm8, %%ymm7", 270 "vpunpckldq (%%rax), %%ymm8, %%ymm7") 271 272GEN_test_RandM(VPCMPEQD_256, 273 "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", 274 "vpcmpeqd (%%rax), %%ymm8, %%ymm7") 275 276GEN_test_RandM(VPSHUFD_0x39_256, 277 "vpshufd $0x39, %%ymm9, %%ymm8", 278 "vpshufd $0xC6, (%%rax), %%ymm7") 279 280GEN_test_RandM(VPADDQ_256, 281 "vpaddq %%ymm6, %%ymm8, %%ymm7", 282 "vpaddq (%%rax), %%ymm8, %%ymm7") 283 284GEN_test_RandM(VPSUBQ_256, 285 "vpsubq %%ymm6, %%ymm8, %%ymm7", 286 "vpsubq (%%rax), %%ymm8, %%ymm7") 287 288GEN_test_RandM(VPSUBW_256, 289 "vpsubw %%ymm6, %%ymm8, %%ymm7", 290 "vpsubw (%%rax), %%ymm8, %%ymm7") 291 292GEN_test_RandM(VPCMPEQQ_256, 293 "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", 294 "vpcmpeqq (%%rax), %%ymm8, %%ymm7") 295 296GEN_test_RandM(VPCMPGTQ_256, 297 "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", 298 "vpcmpgtq (%%rax), %%ymm8, %%ymm7") 299 300GEN_test_Ronly(VPSRLQ_0x05_256, 301 "vpsrlq $0x5, %%ymm9, %%ymm7") 302 303GEN_test_RandM(VPMULUDQ_256, 304 "vpmuludq %%ymm6, %%ymm8, %%ymm7", 305 "vpmuludq (%%rax), %%ymm8, %%ymm7") 306 307GEN_test_RandM(VPMULDQ_256, 308 "vpmuldq %%ymm6, %%ymm8, %%ymm7", 309 "vpmuldq (%%rax), %%ymm8, %%ymm7") 310 311GEN_test_Ronly(VPSLLQ_0x05_256, 312 "vpsllq $0x5, %%ymm9, %%ymm7") 313 314GEN_test_RandM(VPMAXUD_256, 315 "vpmaxud %%ymm6, %%ymm8, %%ymm7", 316 "vpmaxud (%%rax), %%ymm8, %%ymm7") 317 318GEN_test_RandM(VPMINUD_256, 319 "vpminud %%ymm6, %%ymm8, %%ymm7", 320 "vpminud (%%rax), %%ymm8, %%ymm7") 321 322GEN_test_RandM(VPMULLD_256, 323 "vpmulld %%ymm6, %%ymm8, %%ymm7", 324 "vpmulld (%%rax), %%ymm8, %%ymm7") 325 326GEN_test_RandM(VPMAXUW_256, 327 "vpmaxuw %%ymm6, %%ymm8, %%ymm7", 328 "vpmaxuw (%%rax), %%ymm8, %%ymm7") 329 330GEN_test_RandM(VPMINUW_256, 331 "vpminuw %%ymm6, %%ymm8, %%ymm7", 332 "vpminuw (%%rax), %%ymm8, %%ymm7") 333 334GEN_test_RandM(VPMAXSW_256, 335 "vpmaxsw %%ymm6, %%ymm8, %%ymm7", 336 "vpmaxsw (%%rax), %%ymm8, %%ymm7") 337 338GEN_test_RandM(VPMINSW_256, 339 "vpminsw %%ymm6, %%ymm8, %%ymm7", 340 "vpminsw (%%rax), %%ymm8, %%ymm7") 341 342GEN_test_RandM(VPMAXUB_256, 343 "vpmaxub %%ymm6, %%ymm8, %%ymm7", 344 "vpmaxub (%%rax), %%ymm8, %%ymm7") 345 346GEN_test_RandM(VPMINUB_256, 347 "vpminub %%ymm6, %%ymm8, %%ymm7", 348 "vpminub (%%rax), %%ymm8, %%ymm7") 349 350GEN_test_RandM(VPMAXSB_256, 351 "vpmaxsb %%ymm6, %%ymm8, %%ymm7", 352 "vpmaxsb (%%rax), %%ymm8, %%ymm7") 353 354GEN_test_RandM(VPMINSB_256, 355 "vpminsb %%ymm6, %%ymm8, %%ymm7", 356 "vpminsb (%%rax), %%ymm8, %%ymm7") 357 358GEN_test_RandM(VPMOVSXBW_256, 359 "vpmovsxbw %%xmm6, %%ymm8", 360 "vpmovsxbw (%%rax), %%ymm8") 361 362GEN_test_RandM(VPSUBUSW_256, 363 "vpsubusw %%ymm9, %%ymm8, %%ymm7", 364 "vpsubusw (%%rax), %%ymm8, %%ymm7") 365 366GEN_test_RandM(VPSUBSW_256, 367 "vpsubsw %%ymm9, %%ymm8, %%ymm7", 368 "vpsubsw (%%rax), %%ymm8, %%ymm7") 369 370GEN_test_RandM(VPCMPEQW_256, 371 "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", 372 "vpcmpeqw (%%rax), %%ymm8, %%ymm7") 373 374GEN_test_RandM(VPADDB_256, 375 "vpaddb %%ymm6, %%ymm8, %%ymm7", 376 "vpaddb (%%rax), %%ymm8, %%ymm7") 377 378GEN_test_RandM(VPUNPCKHDQ_256, 379 "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", 380 "vpunpckhdq (%%rax), %%ymm8, %%ymm7") 381 382GEN_test_RandM(VPMOVSXDQ_256, 383 "vpmovsxdq %%xmm6, %%ymm8", 384 "vpmovsxdq (%%rax), %%ymm8") 385 386GEN_test_RandM(VPMOVSXWD_256, 387 "vpmovsxwd %%xmm6, %%ymm8", 388 "vpmovsxwd (%%rax), %%ymm8") 389 390GEN_test_RandM(VPMULHW_256, 391 "vpmulhw %%ymm9, %%ymm8, %%ymm7", 392 "vpmulhw (%%rax), %%ymm8, %%ymm7") 393 394GEN_test_RandM(VPUNPCKHQDQ_256, 395 "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", 396 "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") 397 398GEN_test_Ronly(VPSRAW_0x05_256, 399 "vpsraw $0x5, %%ymm9, %%ymm7") 400 401GEN_test_RandM(VPCMPGTB_256, 402 "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", 403 "vpcmpgtb (%%rax), %%ymm8, %%ymm7") 404 405GEN_test_RandM(VPCMPGTW_256, 406 "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", 407 "vpcmpgtw (%%rax), %%ymm8, %%ymm7") 408 409GEN_test_RandM(VPCMPGTD_256, 410 "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", 411 "vpcmpgtd (%%rax), %%ymm8, %%ymm7") 412 413GEN_test_RandM(VPMOVZXBD_256, 414 "vpmovzxbd %%xmm6, %%ymm8", 415 "vpmovzxbd (%%rax), %%ymm8") 416 417GEN_test_RandM(VPMOVSXBD_256, 418 "vpmovsxbd %%xmm6, %%ymm8", 419 "vpmovsxbd (%%rax), %%ymm8") 420 421GEN_test_RandM(VPALIGNR_256_1of3, 422 "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", 423 "vpalignr $3, (%%rax), %%ymm8, %%ymm7") 424GEN_test_RandM(VPALIGNR_256_2of3, 425 "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", 426 "vpalignr $9, (%%rax), %%ymm8, %%ymm7") 427GEN_test_RandM(VPALIGNR_256_3of3, 428 "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", 429 "vpalignr $15, (%%rax), %%ymm8, %%ymm7") 430 431GEN_test_RandM(VPBLENDW_256_0x00, 432 "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", 433 "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") 434GEN_test_RandM(VPBLENDW_256_0xFE, 435 "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", 436 "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") 437GEN_test_RandM(VPBLENDW_256_0x30, 438 "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", 439 "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") 440GEN_test_RandM(VPBLENDW_256_0x21, 441 "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", 442 "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") 443GEN_test_RandM(VPBLENDW_256_0xD7, 444 "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", 445 "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") 446GEN_test_RandM(VPBLENDW_256_0xB5, 447 "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", 448 "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") 449GEN_test_RandM(VPBLENDW_256_0x85, 450 "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", 451 "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") 452GEN_test_RandM(VPBLENDW_256_0x29, 453 "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", 454 "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") 455 456GEN_test_RandM(VPSLLW_256, 457 "andl $15, %%r14d;" 458 "vmovd %%r14d, %%xmm6;" 459 "vpsllw %%xmm6, %%ymm8, %%ymm9", 460 "andq $15, 128(%%rax);" 461 "vpsllw 128(%%rax), %%ymm8, %%ymm9") 462 463GEN_test_RandM(VPSRLW_256, 464 "andl $15, %%r14d;" 465 "vmovd %%r14d, %%xmm6;" 466 "vpsrlw %%xmm6, %%ymm8, %%ymm9", 467 "andq $15, 128(%%rax);" 468 "vpsrlw 128(%%rax), %%ymm8, %%ymm9") 469 470GEN_test_RandM(VPSRAW_256, 471 "andl $31, %%r14d;" 472 "vmovd %%r14d, %%xmm6;" 473 "vpsraw %%xmm6, %%ymm8, %%ymm9", 474 "andq $15, 128(%%rax);" 475 "vpsraw 128(%%rax), %%ymm8, %%ymm9") 476 477GEN_test_RandM(VPSLLD_256, 478 "andl $31, %%r14d;" 479 "vmovd %%r14d, %%xmm6;" 480 "vpslld %%xmm6, %%ymm8, %%ymm9", 481 "andq $31, 128(%%rax);" 482 "vpslld 128(%%rax), %%ymm8, %%ymm9") 483 484GEN_test_RandM(VPSRLD_256, 485 "andl $31, %%r14d;" 486 "vmovd %%r14d, %%xmm6;" 487 "vpsrld %%xmm6, %%ymm8, %%ymm9", 488 "andq $31, 128(%%rax);" 489 "vpsrld 128(%%rax), %%ymm8, %%ymm9") 490 491GEN_test_RandM(VPSRAD_256, 492 "andl $31, %%r14d;" 493 "vmovd %%r14d, %%xmm6;" 494 "vpsrad %%xmm6, %%ymm8, %%ymm9", 495 "andq $31, 128(%%rax);" 496 "vpsrad 128(%%rax), %%ymm8, %%ymm9") 497 498GEN_test_RandM(VPSLLQ_256, 499 "andl $63, %%r14d;" 500 "vmovd %%r14d, %%xmm6;" 501 "vpsllq %%xmm6, %%ymm8, %%ymm9", 502 "andq $63, 128(%%rax);" 503 "vpsllq 128(%%rax), %%ymm8, %%ymm9") 504 505GEN_test_RandM(VPSRLQ_256, 506 "andl $63, %%r14d;" 507 "vmovd %%r14d, %%xmm6;" 508 "vpsrlq %%xmm6, %%ymm8, %%ymm9", 509 "andq $63, 128(%%rax);" 510 "vpsrlq 128(%%rax), %%ymm8, %%ymm9") 511 512GEN_test_RandM(VPMADDWD_256, 513 "vpmaddwd %%ymm6, %%ymm8, %%ymm7", 514 "vpmaddwd (%%rax), %%ymm8, %%ymm7") 515 516GEN_test_Monly(VMOVNTDQA_256, 517 "vmovntdqa (%%rax), %%ymm9") 518 519GEN_test_RandM(VPACKSSWB_256, 520 "vpacksswb %%ymm6, %%ymm8, %%ymm7", 521 "vpacksswb (%%rax), %%ymm8, %%ymm7") 522 523GEN_test_RandM(VPAVGB_256, 524 "vpavgb %%ymm6, %%ymm8, %%ymm7", 525 "vpavgb (%%rax), %%ymm8, %%ymm7") 526 527GEN_test_RandM(VPAVGW_256, 528 "vpavgw %%ymm6, %%ymm8, %%ymm7", 529 "vpavgw (%%rax), %%ymm8, %%ymm7") 530 531GEN_test_RandM(VPADDSB_256, 532 "vpaddsb %%ymm6, %%ymm8, %%ymm7", 533 "vpaddsb (%%rax), %%ymm8, %%ymm7") 534 535GEN_test_RandM(VPADDSW_256, 536 "vpaddsw %%ymm6, %%ymm8, %%ymm7", 537 "vpaddsw (%%rax), %%ymm8, %%ymm7") 538 539GEN_test_RandM(VPHADDW_256, 540 "vphaddw %%ymm6, %%ymm8, %%ymm7", 541 "vphaddw (%%rax), %%ymm8, %%ymm7") 542 543GEN_test_RandM(VPHADDD_256, 544 "vphaddd %%ymm6, %%ymm8, %%ymm7", 545 "vphaddd (%%rax), %%ymm8, %%ymm7") 546 547GEN_test_RandM(VPHADDSW_256, 548 "vphaddsw %%ymm6, %%ymm8, %%ymm7", 549 "vphaddsw (%%rax), %%ymm8, %%ymm7") 550 551GEN_test_RandM(VPMADDUBSW_256, 552 "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", 553 "vpmaddubsw (%%rax), %%ymm8, %%ymm7") 554 555GEN_test_RandM(VPHSUBW_256, 556 "vphsubw %%ymm6, %%ymm8, %%ymm7", 557 "vphsubw (%%rax), %%ymm8, %%ymm7") 558 559GEN_test_RandM(VPHSUBD_256, 560 "vphsubd %%ymm6, %%ymm8, %%ymm7", 561 "vphsubd (%%rax), %%ymm8, %%ymm7") 562 563GEN_test_RandM(VPHSUBSW_256, 564 "vphsubsw %%ymm6, %%ymm8, %%ymm7", 565 "vphsubsw (%%rax), %%ymm8, %%ymm7") 566 567GEN_test_RandM(VPABSB_256, 568 "vpabsb %%ymm6, %%ymm7", 569 "vpabsb (%%rax), %%ymm7") 570 571GEN_test_RandM(VPABSW_256, 572 "vpabsw %%ymm6, %%ymm7", 573 "vpabsw (%%rax), %%ymm7") 574 575GEN_test_RandM(VPMOVSXBQ_256, 576 "vpmovsxbq %%xmm6, %%ymm8", 577 "vpmovsxbq (%%rax), %%ymm8") 578 579GEN_test_RandM(VPMOVSXWQ_256, 580 "vpmovsxwq %%xmm6, %%ymm8", 581 "vpmovsxwq (%%rax), %%ymm8") 582 583GEN_test_RandM(VPACKUSDW_256, 584 "vpackusdw %%ymm6, %%ymm8, %%ymm7", 585 "vpackusdw (%%rax), %%ymm8, %%ymm7") 586 587GEN_test_RandM(VPMOVZXBQ_256, 588 "vpmovzxbq %%xmm6, %%ymm8", 589 "vpmovzxbq (%%rax), %%ymm8") 590 591GEN_test_RandM(VPMOVZXWQ_256, 592 "vpmovzxwq %%xmm6, %%ymm8", 593 "vpmovzxwq (%%rax), %%ymm8") 594 595GEN_test_RandM(VPMOVZXDQ_256, 596 "vpmovzxdq %%xmm6, %%ymm8", 597 "vpmovzxdq (%%rax), %%ymm8") 598 599GEN_test_RandM(VMPSADBW_256_0x0, 600 "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", 601 "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") 602GEN_test_RandM(VMPSADBW_256_0x39, 603 "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", 604 "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") 605GEN_test_RandM(VMPSADBW_256_0x32, 606 "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", 607 "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") 608GEN_test_RandM(VMPSADBW_256_0x2b, 609 "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", 610 "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") 611GEN_test_RandM(VMPSADBW_256_0x24, 612 "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", 613 "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") 614GEN_test_RandM(VMPSADBW_256_0x1d, 615 "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", 616 "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") 617GEN_test_RandM(VMPSADBW_256_0x16, 618 "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", 619 "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") 620GEN_test_RandM(VMPSADBW_256_0x0f, 621 "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", 622 "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") 623 624GEN_test_RandM(VPSADBW_256, 625 "vpsadbw %%ymm6, %%ymm8, %%ymm7", 626 "vpsadbw (%%rax), %%ymm8, %%ymm7") 627 628GEN_test_RandM(VPSIGNB_256, 629 "vpsignb %%ymm6, %%ymm8, %%ymm7", 630 "vpsignb (%%rax), %%ymm8, %%ymm7") 631 632GEN_test_RandM(VPSIGNW_256, 633 "vpsignw %%ymm6, %%ymm8, %%ymm7", 634 "vpsignw (%%rax), %%ymm8, %%ymm7") 635 636GEN_test_RandM(VPSIGND_256, 637 "vpsignd %%ymm6, %%ymm8, %%ymm7", 638 "vpsignd (%%rax), %%ymm8, %%ymm7") 639 640GEN_test_RandM(VPMULHRSW_256, 641 "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", 642 "vpmulhrsw (%%rax), %%ymm8, %%ymm7") 643 644/* Instructions new in AVX2. */ 645 646GEN_test_Monly(VBROADCASTI128, 647 "vbroadcasti128 (%%rax), %%ymm9") 648 649GEN_test_RandM(VEXTRACTI128_0x0, 650 "vextracti128 $0x0, %%ymm7, %%xmm9", 651 "vextracti128 $0x0, %%ymm7, (%%rax)") 652 653GEN_test_RandM(VEXTRACTI128_0x1, 654 "vextracti128 $0x1, %%ymm7, %%xmm9", 655 "vextracti128 $0x1, %%ymm7, (%%rax)") 656 657GEN_test_RandM(VINSERTI128_0x0, 658 "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", 659 "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") 660 661GEN_test_RandM(VINSERTI128_0x1, 662 "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", 663 "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") 664 665GEN_test_RandM(VPERM2I128_0x00, 666 "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", 667 "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") 668GEN_test_RandM(VPERM2I128_0xFF, 669 "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", 670 "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") 671GEN_test_RandM(VPERM2I128_0x30, 672 "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", 673 "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") 674GEN_test_RandM(VPERM2I128_0x21, 675 "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", 676 "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") 677GEN_test_RandM(VPERM2I128_0x12, 678 "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", 679 "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") 680GEN_test_RandM(VPERM2I128_0x03, 681 "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", 682 "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") 683GEN_test_RandM(VPERM2I128_0x85, 684 "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", 685 "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") 686GEN_test_RandM(VPERM2I128_0x5A, 687 "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", 688 "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") 689 690GEN_test_Ronly(VBROADCASTSS_128, 691 "vbroadcastss %%xmm9, %%xmm7") 692 693GEN_test_Ronly(VBROADCASTSS_256, 694 "vbroadcastss %%xmm9, %%ymm7") 695 696GEN_test_Ronly(VBROADCASTSD_256, 697 "vbroadcastsd %%xmm9, %%ymm7") 698 699GEN_test_RandM(VPERMD, 700 "vpermd %%ymm6, %%ymm7, %%ymm9", 701 "vpermd (%%rax), %%ymm7, %%ymm9") 702 703GEN_test_RandM(VPERMQ_0x00, 704 "vpermq $0x00, %%ymm6, %%ymm7", 705 "vpermq $0x01, (%%rax), %%ymm7") 706GEN_test_RandM(VPERMQ_0xFE, 707 "vpermq $0xFE, %%ymm6, %%ymm7", 708 "vpermq $0xFF, (%%rax), %%ymm7") 709GEN_test_RandM(VPERMQ_0x30, 710 "vpermq $0x30, %%ymm6, %%ymm7", 711 "vpermq $0x03, (%%rax), %%ymm7") 712GEN_test_RandM(VPERMQ_0x21, 713 "vpermq $0x21, %%ymm6, %%ymm7", 714 "vpermq $0x12, (%%rax), %%ymm7") 715GEN_test_RandM(VPERMQ_0xD7, 716 "vpermq $0xD7, %%ymm6, %%ymm7", 717 "vpermq $0x6C, (%%rax), %%ymm7") 718GEN_test_RandM(VPERMQ_0xB5, 719 "vpermq $0xB5, %%ymm6, %%ymm7", 720 "vpermq $0x4A, (%%rax), %%ymm7") 721GEN_test_RandM(VPERMQ_0x85, 722 "vpermq $0x85, %%ymm6, %%ymm7", 723 "vpermq $0xDC, (%%rax), %%ymm7") 724GEN_test_RandM(VPERMQ_0x29, 725 "vpermq $0x29, %%ymm6, %%ymm7", 726 "vpermq $0x92, (%%rax), %%ymm7") 727 728GEN_test_RandM(VPERMPS, 729 "vpermps %%ymm6, %%ymm7, %%ymm9", 730 "vpermps (%%rax), %%ymm7, %%ymm9") 731 732GEN_test_RandM(VPERMPD_0x00, 733 "vpermpd $0x00, %%ymm6, %%ymm7", 734 "vpermpd $0x01, (%%rax), %%ymm7") 735GEN_test_RandM(VPERMPD_0xFE, 736 "vpermpd $0xFE, %%ymm6, %%ymm7", 737 "vpermpd $0xFF, (%%rax), %%ymm7") 738GEN_test_RandM(VPERMPD_0x30, 739 "vpermpd $0x30, %%ymm6, %%ymm7", 740 "vpermpd $0x03, (%%rax), %%ymm7") 741GEN_test_RandM(VPERMPD_0x21, 742 "vpermpd $0x21, %%ymm6, %%ymm7", 743 "vpermpd $0x12, (%%rax), %%ymm7") 744GEN_test_RandM(VPERMPD_0xD7, 745 "vpermpd $0xD7, %%ymm6, %%ymm7", 746 "vpermpd $0x6C, (%%rax), %%ymm7") 747GEN_test_RandM(VPERMPD_0xB5, 748 "vpermpd $0xB5, %%ymm6, %%ymm7", 749 "vpermpd $0x4A, (%%rax), %%ymm7") 750GEN_test_RandM(VPERMPD_0x85, 751 "vpermpd $0x85, %%ymm6, %%ymm7", 752 "vpermpd $0xDC, (%%rax), %%ymm7") 753GEN_test_RandM(VPERMPD_0x29, 754 "vpermpd $0x29, %%ymm6, %%ymm7", 755 "vpermpd $0x92, (%%rax), %%ymm7") 756 757GEN_test_RandM(VPBLENDD_128_0x00, 758 "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", 759 "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") 760GEN_test_RandM(VPBLENDD_128_0x02, 761 "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", 762 "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") 763GEN_test_RandM(VPBLENDD_128_0x04, 764 "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", 765 "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") 766GEN_test_RandM(VPBLENDD_128_0x06, 767 "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", 768 "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") 769GEN_test_RandM(VPBLENDD_128_0x08, 770 "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", 771 "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") 772GEN_test_RandM(VPBLENDD_128_0x0A, 773 "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", 774 "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") 775GEN_test_RandM(VPBLENDD_128_0x0C, 776 "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", 777 "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") 778GEN_test_RandM(VPBLENDD_128_0x0E, 779 "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", 780 "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") 781 782GEN_test_RandM(VPBLENDD_256_0x00, 783 "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", 784 "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") 785GEN_test_RandM(VPBLENDD_256_0xFE, 786 "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", 787 "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") 788GEN_test_RandM(VPBLENDD_256_0x30, 789 "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", 790 "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") 791GEN_test_RandM(VPBLENDD_256_0x21, 792 "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", 793 "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") 794GEN_test_RandM(VPBLENDD_256_0xD7, 795 "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", 796 "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") 797GEN_test_RandM(VPBLENDD_256_0xB5, 798 "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", 799 "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") 800GEN_test_RandM(VPBLENDD_256_0x85, 801 "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", 802 "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") 803GEN_test_RandM(VPBLENDD_256_0x29, 804 "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", 805 "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") 806 807GEN_test_RandM(VPSLLVD_128, 808 "vpslld $27, %%xmm6, %%xmm6;" 809 "vpsrld $27, %%xmm6, %%xmm6;" 810 "vpsllvd %%xmm6, %%xmm8, %%xmm7", 811 "andl $31, (%%rax);" 812 "andl $31, 4(%%rax);" 813 "andl $31, 8(%%rax);" 814 "vpsllvd (%%rax), %%xmm8, %%xmm7") 815 816GEN_test_RandM(VPSLLVD_256, 817 "vpslld $27, %%ymm6, %%ymm6;" 818 "vpsrld $27, %%ymm6, %%ymm6;" 819 "vpsllvd %%ymm6, %%ymm8, %%ymm7", 820 "andl $31, (%%rax);" 821 "andl $31, 4(%%rax);" 822 "andl $31, 8(%%rax);" 823 "andl $31, 16(%%rax);" 824 "andl $31, 20(%%rax);" 825 "andl $31, 24(%%rax);" 826 "vpsllvd (%%rax), %%ymm8, %%ymm7") 827 828GEN_test_RandM(VPSLLVQ_128, 829 "vpsllq $58, %%xmm6, %%xmm6;" 830 "vpsrlq $58, %%xmm6, %%xmm6;" 831 "vpsllvq %%xmm6, %%xmm8, %%xmm7", 832 "andl $63, (%%rax);" 833 "vpsllvq (%%rax), %%xmm8, %%xmm7") 834 835GEN_test_RandM(VPSLLVQ_256, 836 "vpsllq $58, %%ymm6, %%ymm6;" 837 "vpsrlq $58, %%ymm6, %%ymm6;" 838 "vpsllvq %%ymm6, %%ymm8, %%ymm7", 839 "andl $63, (%%rax);" 840 "andl $63, 8(%%rax);" 841 "andl $63, 16(%%rax);" 842 "vpsllvq (%%rax), %%ymm8, %%ymm7") 843 844GEN_test_RandM(VPSRLVD_128, 845 "vpslld $27, %%xmm6, %%xmm6;" 846 "vpsrld $27, %%xmm6, %%xmm6;" 847 "vpsrlvd %%xmm6, %%xmm8, %%xmm7", 848 "andl $31, (%%rax);" 849 "andl $31, 4(%%rax);" 850 "andl $31, 8(%%rax);" 851 "vpsrlvd (%%rax), %%xmm8, %%xmm7") 852 853GEN_test_RandM(VPSRLVD_256, 854 "vpslld $27, %%ymm6, %%ymm6;" 855 "vpsrld $27, %%ymm6, %%ymm6;" 856 "vpsrlvd %%ymm6, %%ymm8, %%ymm7", 857 "andl $31, (%%rax);" 858 "andl $31, 4(%%rax);" 859 "andl $31, 8(%%rax);" 860 "andl $31, 16(%%rax);" 861 "andl $31, 20(%%rax);" 862 "andl $31, 24(%%rax);" 863 "vpsrlvd (%%rax), %%ymm8, %%ymm7") 864 865GEN_test_RandM(VPSRLVQ_128, 866 "vpsllq $58, %%xmm6, %%xmm6;" 867 "vpsrlq $58, %%xmm6, %%xmm6;" 868 "vpsrlvq %%xmm6, %%xmm8, %%xmm7", 869 "andl $63, (%%rax);" 870 "vpsrlvq (%%rax), %%xmm8, %%xmm7") 871 872GEN_test_RandM(VPSRLVQ_256, 873 "vpsllq $58, %%ymm6, %%ymm6;" 874 "vpsrlq $58, %%ymm6, %%ymm6;" 875 "vpsrlvq %%ymm6, %%ymm8, %%ymm7", 876 "andl $63, (%%rax);" 877 "andl $63, 8(%%rax);" 878 "andl $63, 16(%%rax);" 879 "vpsrlvq (%%rax), %%ymm8, %%ymm7") 880 881GEN_test_RandM(VPSRAVD_128, 882 "vpslld $27, %%xmm6, %%xmm6;" 883 "vpsrld $27, %%xmm6, %%xmm6;" 884 "vpsravd %%xmm6, %%xmm8, %%xmm7", 885 "andl $31, (%%rax);" 886 "andl $31, 4(%%rax);" 887 "andl $31, 8(%%rax);" 888 "vpsravd (%%rax), %%xmm8, %%xmm7") 889 890GEN_test_RandM(VPSRAVD_256, 891 "vpslld $27, %%ymm6, %%ymm6;" 892 "vpsrld $27, %%ymm6, %%ymm6;" 893 "vpsravd %%ymm6, %%ymm8, %%ymm7", 894 "andl $31, (%%rax);" 895 "andl $31, 4(%%rax);" 896 "andl $31, 8(%%rax);" 897 "andl $31, 16(%%rax);" 898 "andl $31, 20(%%rax);" 899 "andl $31, 24(%%rax);" 900 "vpsravd (%%rax), %%ymm8, %%ymm7") 901 902GEN_test_RandM(VPBROADCASTB_128, 903 "vpbroadcastb %%xmm9, %%xmm7", 904 "vpbroadcastb (%%rax), %%xmm7") 905 906GEN_test_RandM(VPBROADCASTB_256, 907 "vpbroadcastb %%xmm9, %%ymm7", 908 "vpbroadcastb (%%rax), %%ymm7") 909 910GEN_test_RandM(VPBROADCASTW_128, 911 "vpbroadcastw %%xmm9, %%xmm7", 912 "vpbroadcastw (%%rax), %%xmm7") 913 914GEN_test_RandM(VPBROADCASTW_256, 915 "vpbroadcastw %%xmm9, %%ymm7", 916 "vpbroadcastw (%%rax), %%ymm7") 917 918GEN_test_RandM(VPBROADCASTD_128, 919 "vpbroadcastd %%xmm9, %%xmm7", 920 "vpbroadcastd (%%rax), %%xmm7") 921 922GEN_test_RandM(VPBROADCASTD_256, 923 "vpbroadcastd %%xmm9, %%ymm7", 924 "vpbroadcastd (%%rax), %%ymm7") 925 926GEN_test_RandM(VPBROADCASTQ_128, 927 "vpbroadcastq %%xmm9, %%xmm7", 928 "vpbroadcastq (%%rax), %%xmm7") 929 930GEN_test_RandM(VPBROADCASTQ_256, 931 "vpbroadcastq %%xmm9, %%ymm7", 932 "vpbroadcastq (%%rax), %%ymm7") 933 934GEN_test_Monly(VPMASKMOVD_128_LoadForm, 935 "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" 936 "vxorps %%xmm6, %%xmm6, %%xmm6;" 937 "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") 938 939GEN_test_Monly(VPMASKMOVD_256_LoadForm, 940 "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" 941 "vxorps %%ymm6, %%ymm6, %%ymm6;" 942 "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") 943 944GEN_test_Monly(VPMASKMOVQ_128_LoadForm, 945 "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" 946 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 947 "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") 948 949GEN_test_Monly(VPMASKMOVQ_256_LoadForm, 950 "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" 951 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 952 "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") 953 954GEN_test_Monly(VPMASKMOVD_128_StoreForm, 955 "vpmaskmovd %%xmm8, %%xmm7, (%%rax);" 956 "vxorps %%xmm6, %%xmm6, %%xmm6;" 957 "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)") 958 959GEN_test_Monly(VPMASKMOVD_256_StoreForm, 960 "vpmaskmovd %%ymm8, %%ymm7, (%%rax);" 961 "vxorps %%ymm6, %%ymm6, %%ymm6;" 962 "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)") 963 964GEN_test_Monly(VPMASKMOVQ_128_StoreForm, 965 "vpmaskmovq %%xmm8, %%xmm7, (%%rax);" 966 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 967 "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)") 968 969GEN_test_Monly(VPMASKMOVQ_256_StoreForm, 970 "vpmaskmovq %%ymm8, %%ymm7, (%%rax);" 971 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 972 "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)") 973 974GEN_test_Ronly(VGATHERDPS_128, 975 "vpslld $25, %%xmm7, %%xmm8;" 976 "vpsrld $25, %%xmm8, %%xmm8;" 977 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 978 "leaq randArray(%%rip), %%r14;" 979 "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 980 "xorl %%r14d, %%r14d") 981 982GEN_test_Ronly(VGATHERDPS_256, 983 "vpslld $25, %%ymm7, %%ymm8;" 984 "vpsrld $25, %%ymm8, %%ymm8;" 985 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 986 "leaq randArray(%%rip), %%r14;" 987 "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 988 "xorl %%r14d, %%r14d") 989 990GEN_test_Ronly(VGATHERQPS_128_1, 991 "vpsllq $57, %%xmm7, %%xmm8;" 992 "vpsrlq $57, %%xmm8, %%xmm8;" 993 "vpmovsxdq %%xmm6, %%xmm9;" 994 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 995 "vmovdqa 96(%0), %%ymm9;" 996 "leaq randArray(%%rip), %%r14;" 997 "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 998 "xorl %%r14d, %%r14d") 999 1000GEN_test_Ronly(VGATHERQPS_256_1, 1001 "vpsllq $57, %%ymm7, %%ymm8;" 1002 "vpsrlq $57, %%ymm8, %%ymm8;" 1003 "vpmovsxdq %%xmm6, %%ymm9;" 1004 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1005 "vmovdqa 96(%0), %%ymm9;" 1006 "leaq randArray(%%rip), %%r14;" 1007 "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 1008 "xorl %%r14d, %%r14d") 1009 1010GEN_test_Ronly(VGATHERQPS_128_2, 1011 "vpsllq $57, %%xmm7, %%xmm8;" 1012 "vpsrlq $57, %%xmm8, %%xmm8;" 1013 "vpmovsxdq %%xmm6, %%xmm9;" 1014 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1015 "vmovdqa 96(%0), %%ymm9;" 1016 "leaq randArray(%%rip), %%r14;" 1017 "vmovq %%r14, %%xmm7;" 1018 "vpsllq $2, %%xmm8, %%xmm8;" 1019 "vpbroadcastq %%xmm7, %%xmm7;" 1020 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1021 "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1022 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1023 "vmovdqa 0(%0), %%ymm7;" 1024 "xorl %%r14d, %%r14d") 1025 1026GEN_test_Ronly(VGATHERQPS_256_2, 1027 "vpsllq $57, %%ymm7, %%ymm8;" 1028 "vpsrlq $57, %%ymm8, %%ymm8;" 1029 "vpmovsxdq %%xmm6, %%ymm9;" 1030 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1031 "vmovdqa 96(%0), %%ymm9;" 1032 "leaq randArray(%%rip), %%r14;" 1033 "vmovq %%r14, %%xmm7;" 1034 "vpsllq $2, %%ymm8, %%ymm8;" 1035 "vpbroadcastq %%xmm7, %%ymm7;" 1036 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1037 "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1038 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1039 "vmovdqa 0(%0), %%ymm7;" 1040 "xorl %%r14d, %%r14d") 1041 1042GEN_test_Ronly(VGATHERDPD_128, 1043 "vpslld $26, %%xmm7, %%xmm8;" 1044 "vpsrld $26, %%xmm8, %%xmm8;" 1045 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1046 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1047 "vmovdqa 96(%0), %%ymm9;" 1048 "leaq randArray(%%rip), %%r14;" 1049 "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1050 "xorl %%r14d, %%r14d") 1051 1052GEN_test_Ronly(VGATHERDPD_256, 1053 "vpslld $26, %%ymm7, %%ymm8;" 1054 "vpsrld $26, %%ymm8, %%ymm8;" 1055 "vextracti128 $1, %%ymm6, %%xmm9;" 1056 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1057 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1058 "vmovdqa 96(%0), %%ymm9;" 1059 "leaq randArray(%%rip), %%r14;" 1060 "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1061 "xorl %%r14d, %%r14d") 1062 1063GEN_test_Ronly(VGATHERQPD_128_1, 1064 "vpsllq $58, %%xmm7, %%xmm8;" 1065 "vpsrlq $58, %%xmm8, %%xmm8;" 1066 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1067 "leaq randArray(%%rip), %%r14;" 1068 "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1069 "xorl %%r14d, %%r14d") 1070 1071GEN_test_Ronly(VGATHERQPD_256_1, 1072 "vpsllq $58, %%ymm7, %%ymm8;" 1073 "vpsrlq $58, %%ymm8, %%ymm8;" 1074 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1075 "leaq randArray(%%rip), %%r14;" 1076 "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1077 "xorl %%r14d, %%r14d") 1078 1079GEN_test_Ronly(VGATHERQPD_128_2, 1080 "vpsllq $58, %%xmm7, %%xmm8;" 1081 "vpsrlq $58, %%xmm8, %%xmm8;" 1082 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1083 "leaq randArray(%%rip), %%r14;" 1084 "vmovq %%r14, %%xmm7;" 1085 "vpsllq $2, %%xmm8, %%xmm8;" 1086 "vpbroadcastq %%xmm7, %%xmm7;" 1087 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1088 "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1089 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1090 "vmovdqa 0(%0), %%ymm7;" 1091 "xorl %%r14d, %%r14d") 1092 1093GEN_test_Ronly(VGATHERQPD_256_2, 1094 "vpsllq $58, %%ymm7, %%ymm8;" 1095 "vpsrlq $58, %%ymm8, %%ymm8;" 1096 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1097 "leaq randArray(%%rip), %%r14;" 1098 "vmovq %%r14, %%xmm7;" 1099 "vpsllq $2, %%ymm8, %%ymm8;" 1100 "vpbroadcastq %%xmm7, %%ymm7;" 1101 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1102 "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1103 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1104 "vmovdqa 0(%0), %%ymm7;" 1105 "xorl %%r14d, %%r14d") 1106 1107GEN_test_Ronly(VPGATHERDD_128, 1108 "vpslld $25, %%xmm7, %%xmm8;" 1109 "vpsrld $25, %%xmm8, %%xmm8;" 1110 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1111 "leaq randArray(%%rip), %%r14;" 1112 "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1113 "xorl %%r14d, %%r14d") 1114 1115GEN_test_Ronly(VPGATHERDD_256, 1116 "vpslld $25, %%ymm7, %%ymm8;" 1117 "vpsrld $25, %%ymm8, %%ymm8;" 1118 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1119 "leaq randArray(%%rip), %%r14;" 1120 "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" 1121 "xorl %%r14d, %%r14d") 1122 1123GEN_test_Ronly(VPGATHERQD_128_1, 1124 "vpsllq $57, %%xmm7, %%xmm8;" 1125 "vpsrlq $57, %%xmm8, %%xmm8;" 1126 "vpmovsxdq %%xmm6, %%xmm9;" 1127 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1128 "vmovdqa 96(%0), %%ymm9;" 1129 "leaq randArray(%%rip), %%r14;" 1130 "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" 1131 "xorl %%r14d, %%r14d") 1132 1133GEN_test_Ronly(VPGATHERQD_256_1, 1134 "vpsllq $57, %%ymm7, %%ymm8;" 1135 "vpsrlq $57, %%ymm8, %%ymm8;" 1136 "vpmovsxdq %%xmm6, %%ymm9;" 1137 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1138 "vmovdqa 96(%0), %%ymm9;" 1139 "leaq randArray(%%rip), %%r14;" 1140 "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" 1141 "xorl %%r14d, %%r14d") 1142 1143GEN_test_Ronly(VPGATHERQD_128_2, 1144 "vpsllq $57, %%xmm7, %%xmm8;" 1145 "vpsrlq $57, %%xmm8, %%xmm8;" 1146 "vpmovsxdq %%xmm6, %%xmm9;" 1147 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1148 "vmovdqa 96(%0), %%ymm9;" 1149 "leaq randArray(%%rip), %%r14;" 1150 "vmovq %%r14, %%xmm7;" 1151 "vpsllq $2, %%xmm8, %%xmm8;" 1152 "vpbroadcastq %%xmm7, %%xmm7;" 1153 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1154 "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1155 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1156 "vmovdqa 0(%0), %%ymm7;" 1157 "xorl %%r14d, %%r14d") 1158 1159GEN_test_Ronly(VPGATHERQD_256_2, 1160 "vpsllq $57, %%ymm7, %%ymm8;" 1161 "vpsrlq $57, %%ymm8, %%ymm8;" 1162 "vpmovsxdq %%xmm6, %%ymm9;" 1163 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1164 "vmovdqa 96(%0), %%ymm9;" 1165 "leaq randArray(%%rip), %%r14;" 1166 "vmovq %%r14, %%xmm7;" 1167 "vpsllq $2, %%ymm8, %%ymm8;" 1168 "vpbroadcastq %%xmm7, %%ymm7;" 1169 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1170 "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" 1171 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1172 "vmovdqa 0(%0), %%ymm7;" 1173 "xorl %%r14d, %%r14d") 1174 1175GEN_test_Ronly(VPGATHERDQ_128, 1176 "vpslld $26, %%xmm7, %%xmm8;" 1177 "vpsrld $26, %%xmm8, %%xmm8;" 1178 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" 1179 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" 1180 "vmovdqa 96(%0), %%ymm9;" 1181 "leaq randArray(%%rip), %%r14;" 1182 "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1183 "xorl %%r14d, %%r14d") 1184 1185GEN_test_Ronly(VPGATHERDQ_256, 1186 "vpslld $26, %%ymm7, %%ymm8;" 1187 "vpsrld $26, %%ymm8, %%ymm8;" 1188 "vextracti128 $1, %%ymm6, %%xmm9;" 1189 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" 1190 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" 1191 "vmovdqa 96(%0), %%ymm9;" 1192 "leaq randArray(%%rip), %%r14;" 1193 "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" 1194 "xorl %%r14d, %%r14d") 1195 1196GEN_test_Ronly(VPGATHERQQ_128_1, 1197 "vpsllq $58, %%xmm7, %%xmm8;" 1198 "vpsrlq $58, %%xmm8, %%xmm8;" 1199 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1200 "leaq randArray(%%rip), %%r14;" 1201 "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" 1202 "xorl %%r14d, %%r14d") 1203 1204GEN_test_Ronly(VPGATHERQQ_256_1, 1205 "vpsllq $58, %%ymm7, %%ymm8;" 1206 "vpsrlq $58, %%ymm8, %%ymm8;" 1207 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1208 "leaq randArray(%%rip), %%r14;" 1209 "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" 1210 "xorl %%r14d, %%r14d") 1211 1212GEN_test_Ronly(VPGATHERQQ_128_2, 1213 "vpsllq $58, %%xmm7, %%xmm8;" 1214 "vpsrlq $58, %%xmm8, %%xmm8;" 1215 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" 1216 "leaq randArray(%%rip), %%r14;" 1217 "vmovq %%r14, %%xmm7;" 1218 "vpsllq $2, %%xmm8, %%xmm8;" 1219 "vpbroadcastq %%xmm7, %%xmm7;" 1220 "vpaddq %%xmm7, %%xmm8, %%xmm8;" 1221 "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" 1222 "vpsubq %%xmm7, %%xmm8, %%xmm8;" 1223 "vmovdqa 0(%0), %%ymm7;" 1224 "xorl %%r14d, %%r14d") 1225 1226GEN_test_Ronly(VPGATHERQQ_256_2, 1227 "vpsllq $58, %%ymm7, %%ymm8;" 1228 "vpsrlq $58, %%ymm8, %%ymm8;" 1229 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" 1230 "leaq randArray(%%rip), %%r14;" 1231 "vmovq %%r14, %%xmm7;" 1232 "vpsllq $2, %%ymm8, %%ymm8;" 1233 "vpbroadcastq %%xmm7, %%ymm7;" 1234 "vpaddq %%ymm7, %%ymm8, %%ymm8;" 1235 "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" 1236 "vpsubq %%ymm7, %%ymm8, %%ymm8;" 1237 "vmovdqa 0(%0), %%ymm7;" 1238 "xorl %%r14d, %%r14d") 1239 1240/* Comment duplicated above, for convenient reference: 1241 Allowed operands in test insns: 1242 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. 1243 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. 1244 Imm8 etc fields are also allowed, where they make sense. 1245 Both forms may use ymm0 as scratch. Mem form may also use 1246 ymm6 as scratch. 1247*/ 1248 1249#define N_DEFAULT_ITERS 3 1250 1251// Do the specified test some number of times 1252#define DO_N(_iters, _testfn) \ 1253 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) 1254 1255// Do the specified test the default number of times 1256#define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) 1257 1258 1259int main ( void ) 1260{ 1261 DO_D( VPOR_256 ); 1262 DO_D( VPXOR_256 ); 1263 DO_D( VPSUBB_256 ); 1264 DO_D( VPSUBD_256 ); 1265 DO_D( VPADDD_256 ); 1266 DO_D( VPMOVZXWD_256 ); 1267 DO_D( VPMOVZXBW_256 ); 1268 DO_D( VPBLENDVB_256 ); 1269 DO_D( VPMINSD_256 ); 1270 DO_D( VPMAXSD_256 ); 1271 DO_D( VPSHUFB_256 ); 1272 DO_D( VPUNPCKLBW_256 ); 1273 DO_D( VPUNPCKHBW_256 ); 1274 DO_D( VPABSD_256 ); 1275 DO_D( VPACKUSWB_256 ); 1276 DO_D( VPMOVMSKB_256 ); 1277 DO_D( VPAND_256 ); 1278 DO_D( VPCMPEQB_256 ); 1279 DO_D( VPSHUFLW_0x39_256 ); 1280 DO_D( VPSHUFHW_0x39_256 ); 1281 DO_D( VPMULLW_256 ); 1282 DO_D( VPADDUSW_256 ); 1283 DO_D( VPMULHUW_256 ); 1284 DO_D( VPADDUSB_256 ); 1285 DO_D( VPUNPCKLWD_256 ); 1286 DO_D( VPUNPCKHWD_256 ); 1287 DO_D( VPSLLD_0x05_256 ); 1288 DO_D( VPSRLD_0x05_256 ); 1289 DO_D( VPSRAD_0x05_256 ); 1290 DO_D( VPSUBUSB_256 ); 1291 DO_D( VPSUBSB_256 ); 1292 DO_D( VPSRLDQ_0x05_256 ); 1293 DO_D( VPSLLDQ_0x05_256 ); 1294 DO_D( VPANDN_256 ); 1295 DO_D( VPUNPCKLQDQ_256 ); 1296 DO_D( VPSRLW_0x05_256 ); 1297 DO_D( VPSLLW_0x05_256 ); 1298 DO_D( VPADDW_256 ); 1299 DO_D( VPACKSSDW_256 ); 1300 DO_D( VPUNPCKLDQ_256 ); 1301 DO_D( VPCMPEQD_256 ); 1302 DO_D( VPSHUFD_0x39_256 ); 1303 DO_D( VPADDQ_256 ); 1304 DO_D( VPSUBQ_256 ); 1305 DO_D( VPSUBW_256 ); 1306 DO_D( VPCMPEQQ_256 ); 1307 DO_D( VPCMPGTQ_256 ); 1308 DO_D( VPSRLQ_0x05_256 ); 1309 DO_D( VPMULUDQ_256 ); 1310 DO_D( VPMULDQ_256 ); 1311 DO_D( VPSLLQ_0x05_256 ); 1312 DO_D( VPMAXUD_256 ); 1313 DO_D( VPMINUD_256 ); 1314 DO_D( VPMULLD_256 ); 1315 DO_D( VPMAXUW_256 ); 1316 DO_D( VPMINUW_256 ); 1317 DO_D( VPMAXSW_256 ); 1318 DO_D( VPMINSW_256 ); 1319 DO_D( VPMAXUB_256 ); 1320 DO_D( VPMINUB_256 ); 1321 DO_D( VPMAXSB_256 ); 1322 DO_D( VPMINSB_256 ); 1323 DO_D( VPMOVSXBW_256 ); 1324 DO_D( VPSUBUSW_256 ); 1325 DO_D( VPSUBSW_256 ); 1326 DO_D( VPCMPEQW_256 ); 1327 DO_D( VPADDB_256 ); 1328 DO_D( VPUNPCKHDQ_256 ); 1329 DO_D( VPMOVSXDQ_256 ); 1330 DO_D( VPMOVSXWD_256 ); 1331 DO_D( VPMULHW_256 ); 1332 DO_D( VPUNPCKHQDQ_256 ); 1333 DO_D( VPSRAW_0x05_256 ); 1334 DO_D( VPCMPGTB_256 ); 1335 DO_D( VPCMPGTW_256 ); 1336 DO_D( VPCMPGTD_256 ); 1337 DO_D( VPMOVZXBD_256 ); 1338 DO_D( VPMOVSXBD_256 ); 1339 DO_D( VPALIGNR_256_1of3 ); 1340 DO_D( VPALIGNR_256_2of3 ); 1341 DO_D( VPALIGNR_256_3of3 ); 1342 DO_D( VPBLENDW_256_0x00 ); 1343 DO_D( VPBLENDW_256_0xFE ); 1344 DO_D( VPBLENDW_256_0x30 ); 1345 DO_D( VPBLENDW_256_0x21 ); 1346 DO_D( VPBLENDW_256_0xD7 ); 1347 DO_D( VPBLENDW_256_0xB5 ); 1348 DO_D( VPBLENDW_256_0x85 ); 1349 DO_D( VPBLENDW_256_0x29 ); 1350 DO_D( VPSLLW_256 ); 1351 DO_D( VPSRLW_256 ); 1352 DO_D( VPSRAW_256 ); 1353 DO_D( VPSLLD_256 ); 1354 DO_D( VPSRLD_256 ); 1355 DO_D( VPSRAD_256 ); 1356 DO_D( VPSLLQ_256 ); 1357 DO_D( VPSRLQ_256 ); 1358 DO_D( VPMADDWD_256 ); 1359 DO_D( VMOVNTDQA_256 ); 1360 DO_D( VPACKSSWB_256 ); 1361 DO_D( VPAVGB_256 ); 1362 DO_D( VPAVGW_256 ); 1363 DO_D( VPADDSB_256 ); 1364 DO_D( VPADDSW_256 ); 1365 DO_D( VPHADDW_256 ); 1366 DO_D( VPHADDD_256 ); 1367 DO_D( VPHADDSW_256 ); 1368 DO_D( VPMADDUBSW_256 ); 1369 DO_D( VPHSUBW_256 ); 1370 DO_D( VPHSUBD_256 ); 1371 DO_D( VPHSUBSW_256 ); 1372 DO_D( VPABSB_256 ); 1373 DO_D( VPABSW_256 ); 1374 DO_D( VPMOVSXBQ_256 ); 1375 DO_D( VPMOVSXWQ_256 ); 1376 DO_D( VPACKUSDW_256 ); 1377 DO_D( VPMOVZXBQ_256 ); 1378 DO_D( VPMOVZXWQ_256 ); 1379 DO_D( VPMOVZXDQ_256 ); 1380 DO_D( VMPSADBW_256_0x0 ); 1381 DO_D( VMPSADBW_256_0x39 ); 1382 DO_D( VMPSADBW_256_0x32 ); 1383 DO_D( VMPSADBW_256_0x2b ); 1384 DO_D( VMPSADBW_256_0x24 ); 1385 DO_D( VMPSADBW_256_0x1d ); 1386 DO_D( VMPSADBW_256_0x16 ); 1387 DO_D( VMPSADBW_256_0x0f ); 1388 DO_D( VPSADBW_256 ); 1389 DO_D( VPSIGNB_256 ); 1390 DO_D( VPSIGNW_256 ); 1391 DO_D( VPSIGND_256 ); 1392 DO_D( VPMULHRSW_256 ); 1393 DO_D( VBROADCASTI128 ); 1394 DO_D( VEXTRACTI128_0x0 ); 1395 DO_D( VEXTRACTI128_0x1 ); 1396 DO_D( VINSERTI128_0x0 ); 1397 DO_D( VINSERTI128_0x1 ); 1398 DO_D( VPERM2I128_0x00 ); 1399 DO_D( VPERM2I128_0xFF ); 1400 DO_D( VPERM2I128_0x30 ); 1401 DO_D( VPERM2I128_0x21 ); 1402 DO_D( VPERM2I128_0x12 ); 1403 DO_D( VPERM2I128_0x03 ); 1404 DO_D( VPERM2I128_0x85 ); 1405 DO_D( VPERM2I128_0x5A ); 1406 DO_D( VBROADCASTSS_128 ); 1407 DO_D( VBROADCASTSS_256 ); 1408 DO_D( VBROADCASTSD_256 ); 1409 DO_D( VPERMD ); 1410 DO_D( VPERMQ_0x00 ); 1411 DO_D( VPERMQ_0xFE ); 1412 DO_D( VPERMQ_0x30 ); 1413 DO_D( VPERMQ_0x21 ); 1414 DO_D( VPERMQ_0xD7 ); 1415 DO_D( VPERMQ_0xB5 ); 1416 DO_D( VPERMQ_0x85 ); 1417 DO_D( VPERMQ_0x29 ); 1418 DO_D( VPERMPS ); 1419 DO_D( VPERMPD_0x00 ); 1420 DO_D( VPERMPD_0xFE ); 1421 DO_D( VPERMPD_0x30 ); 1422 DO_D( VPERMPD_0x21 ); 1423 DO_D( VPERMPD_0xD7 ); 1424 DO_D( VPERMPD_0xB5 ); 1425 DO_D( VPERMPD_0x85 ); 1426 DO_D( VPERMPD_0x29 ); 1427 DO_D( VPBLENDD_128_0x00 ); 1428 DO_D( VPBLENDD_128_0x02 ); 1429 DO_D( VPBLENDD_128_0x04 ); 1430 DO_D( VPBLENDD_128_0x06 ); 1431 DO_D( VPBLENDD_128_0x08 ); 1432 DO_D( VPBLENDD_128_0x0A ); 1433 DO_D( VPBLENDD_128_0x0C ); 1434 DO_D( VPBLENDD_128_0x0E ); 1435 DO_D( VPBLENDD_256_0x00 ); 1436 DO_D( VPBLENDD_256_0xFE ); 1437 DO_D( VPBLENDD_256_0x30 ); 1438 DO_D( VPBLENDD_256_0x21 ); 1439 DO_D( VPBLENDD_256_0xD7 ); 1440 DO_D( VPBLENDD_256_0xB5 ); 1441 DO_D( VPBLENDD_256_0x85 ); 1442 DO_D( VPBLENDD_256_0x29 ); 1443 DO_D( VPSLLVD_128 ); 1444 DO_D( VPSLLVD_256 ); 1445 DO_D( VPSLLVQ_128 ); 1446 DO_D( VPSLLVQ_256 ); 1447 DO_D( VPSRLVD_128 ); 1448 DO_D( VPSRLVD_256 ); 1449 DO_D( VPSRLVQ_128 ); 1450 DO_D( VPSRLVQ_256 ); 1451 DO_D( VPSRAVD_128 ); 1452 DO_D( VPSRAVD_256 ); 1453 DO_D( VPBROADCASTB_128 ); 1454 DO_D( VPBROADCASTB_256 ); 1455 DO_D( VPBROADCASTW_128 ); 1456 DO_D( VPBROADCASTW_256 ); 1457 DO_D( VPBROADCASTD_128 ); 1458 DO_D( VPBROADCASTD_256 ); 1459 DO_D( VPBROADCASTQ_128 ); 1460 DO_D( VPBROADCASTQ_256 ); 1461 DO_D( VPMASKMOVD_128_LoadForm ); 1462 DO_D( VPMASKMOVD_256_LoadForm ); 1463 DO_D( VPMASKMOVQ_128_LoadForm ); 1464 DO_D( VPMASKMOVQ_256_LoadForm ); 1465 DO_D( VPMASKMOVD_128_StoreForm ); 1466 DO_D( VPMASKMOVD_256_StoreForm ); 1467 DO_D( VPMASKMOVQ_128_StoreForm ); 1468 DO_D( VPMASKMOVQ_256_StoreForm ); 1469 { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } 1470 DO_D( VGATHERDPS_128 ); 1471 DO_D( VGATHERDPS_256 ); 1472 DO_D( VGATHERQPS_128_1 ); 1473 DO_D( VGATHERQPS_256_1 ); 1474 DO_D( VGATHERQPS_128_2 ); 1475 DO_D( VGATHERQPS_256_2 ); 1476 DO_D( VGATHERDPD_128 ); 1477 DO_D( VGATHERDPD_256 ); 1478 DO_D( VGATHERQPD_128_1 ); 1479 DO_D( VGATHERQPD_256_1 ); 1480 DO_D( VGATHERQPD_128_2 ); 1481 DO_D( VGATHERQPD_256_2 ); 1482 DO_D( VPGATHERDD_128 ); 1483 DO_D( VPGATHERDD_256 ); 1484 DO_D( VPGATHERQD_128_1 ); 1485 DO_D( VPGATHERQD_256_1 ); 1486 DO_D( VPGATHERQD_128_2 ); 1487 DO_D( VPGATHERQD_256_2 ); 1488 DO_D( VPGATHERDQ_128 ); 1489 DO_D( VPGATHERDQ_256 ); 1490 DO_D( VPGATHERQQ_128_1 ); 1491 DO_D( VPGATHERQQ_256_1 ); 1492 DO_D( VPGATHERQQ_128_2 ); 1493 DO_D( VPGATHERQQ_256_2 ); 1494 return 0; 1495} 1496