1 2/* A program to test SSE4.1/SSE4.2 instructions. 3 Revisions: Nov.208 - wrote this file 4 Apr.10.2010 - added PEXTR* tests 5 Apr.16.2010 - added PINS* tests 6*/ 7 8/* HOW TO COMPILE: 9 gcc -m64 -g -O -Wall -o sse4-64 sse4-64.c 10*/ 11 12#include <stdio.h> 13#include <stdlib.h> 14#include <assert.h> 15//#include "tests/malloc.h" // reenable when reintegrated 16#include <string.h> 17 18 19 20// rmme when reintegrated 21// Allocates a 16-aligned block. Asserts if the allocation fails. 22#ifdef VGO_darwin 23#include <stdlib.h> 24#else 25#include <malloc.h> 26#endif 27__attribute__((unused)) 28static void* memalign16(size_t szB) 29{ 30 void* x; 31#if defined(VGO_darwin) 32 // Darwin lacks memalign, but its malloc is always 16-aligned anyway. 33 x = malloc(szB); 34#else 35 x = memalign(16, szB); 36#endif 37 assert(x); 38 assert(0 == ((16-1) & (unsigned long)x)); 39 return x; 40} 41 42 43 44typedef unsigned char V128[16]; 45typedef unsigned int UInt; 46typedef signed int Int; 47typedef unsigned char UChar; 48typedef unsigned long long int ULong; 49 50typedef unsigned char Bool; 51#define False ((Bool)0) 52#define True ((Bool)1) 53 54 55typedef 56 struct { 57 V128 arg1; 58 V128 arg2; 59 V128 res; 60 } 61 RRArgs; 62 63typedef 64 struct { 65 V128 arg1; 66 V128 res; 67 } 68 RMArgs; 69 70static void do64HLtoV128 ( /*OUT*/V128* res, ULong wHi, ULong wLo ) 71{ 72 // try to sidestep strict-aliasing snafus by memcpying explicitly 73 UChar* p = (UChar*)res; 74 memcpy(&p[8], (UChar*)&wHi, 8); 75 memcpy(&p[0], (UChar*)&wLo, 8); 76} 77 78static UChar randUChar ( void ) 79{ 80 static UInt seed = 80021; 81 seed = 1103515245 * seed + 12345; 82 return (seed >> 17) & 0xFF; 83} 84 85static ULong randULong ( void ) 86{ 87 Int i; 88 ULong r = 0; 89 for (i = 0; i < 8; i++) { 90 r = (r << 8) | (ULong)(0xFF & randUChar()); 91 } 92 return r; 93} 94 95static void randV128 ( V128* v ) 96{ 97 Int i; 98 for (i = 0; i < 16; i++) 99 (*v)[i] = randUChar(); 100} 101 102static void showV128 ( V128* v ) 103{ 104 Int i; 105 for (i = 15; i >= 0; i--) 106 printf("%02x", (Int)(*v)[i]); 107} 108 109static void showMaskedV128 ( V128* v, V128* mask ) 110{ 111 Int i; 112 for (i = 15; i >= 0; i--) 113 printf("%02x", (Int)( ((*v)[i]) & ((*mask)[i]) )); 114} 115 116static void showIGVV( char* rOrM, char* op, Int imm, 117 ULong src64, V128* dst, V128* res ) 118{ 119 printf("%s %10s $%d ", rOrM, op, imm); 120 printf("%016llx", src64); 121 printf(" "); 122 showV128(dst); 123 printf(" "); 124 showV128(res); 125 printf("\n"); 126} 127 128static void showIAG ( char* rOrM, char* op, Int imm, 129 V128* argL, ULong argR, ULong res ) 130{ 131 printf("%s %10s $%d ", rOrM, op, imm); 132 showV128(argL); 133 printf(" "); 134 printf("%016llx", argR); 135 printf(" "); 136 printf("%016llx", res); 137 printf("\n"); 138} 139 140static void showIAA ( char* rOrM, char* op, Int imm, RRArgs* rra, V128* rmask ) 141{ 142 printf("%s %10s $%d ", rOrM, op, imm); 143 showV128(&rra->arg1); 144 printf(" "); 145 showV128(&rra->arg2); 146 printf(" "); 147 showMaskedV128(&rra->res, rmask); 148 printf("\n"); 149} 150 151static void showAA ( char* rOrM, char* op, RRArgs* rra, V128* rmask ) 152{ 153 printf("%s %10s ", rOrM, op); 154 showV128(&rra->arg1); 155 printf(" "); 156 showV128(&rra->arg2); 157 printf(" "); 158 showMaskedV128(&rra->res, rmask); 159 printf("\n"); 160} 161 162/* Note: these are little endian. Hence first byte is the least 163 significant byte of lane zero. */ 164 165/* Mask for insns where all result bits are non-approximated. */ 166static V128 AllMask = { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF, 167 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF }; 168 169/* Mark for insns which produce approximated vector short results. */ 170__attribute__((unused)) 171static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF, 172 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF }; 173 174/* Mark for insns which produce approximated scalar short results. */ 175__attribute__((unused)) 176static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF, 177 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF }; 178 179static V128 fives = { 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55, 180 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55 }; 181 182static V128 zeroes = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 183 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 }; 184 185double mkPosInf ( void ) { return 1.0 / 0.0; } 186double mkNegInf ( void ) { return -mkPosInf(); } 187double mkPosNan ( void ) { return 0.0 / 0.0; } 188double mkNegNan ( void ) { return -mkPosNan(); } 189 190__attribute__((noinline)) 191UInt get_mxcsr ( void ) 192{ 193 ULong w64; 194 __asm__ __volatile__( 195 "subq $8, %%rsp" "\n\t" 196 "stmxcsr (%%rsp)" "\n\t" 197 "movq (%%rsp), %0" "\n" 198 "addq $8, %%rsp" 199 : /*OUT*/"=r"(w64) : /*IN*/ : "memory","cc" 200 ); 201 if (0) printf("get %08x\n", (UInt)w64); 202 return (UInt)w64; 203} 204 205__attribute__((noinline)) 206void set_mxcsr ( UInt w32 ) 207{ 208 if (0) printf("set %08x\n", w32); 209 ULong w64 = (ULong)w32; 210 __asm__ __volatile__( 211 "subq $8, %%rsp" "\n\t" 212 "movq %0, (%%rsp)" "\n\t" 213 "ldmxcsr (%%rsp)" "\n\t" 214 "addq $8, %%rsp" 215 : /*OUT*/ : /*IN*/"r"(w64) : "memory",/*"mxcsr",*/"cc" 216 ); 217} 218 219UInt get_sse_roundingmode ( void ) 220{ 221 UInt w = get_mxcsr(); 222 return (w >> 13) & 3; 223} 224 225void set_sse_roundingmode ( UInt m ) 226{ 227 UInt w; 228 assert(0 == (m & ~3)); 229 w = get_mxcsr(); 230 w &= ~(3 << 13); 231 w |= (m << 13); 232 set_mxcsr(w); 233} 234 235 236#define DO_imm_r_r(_opname, _imm, _src, _dst) \ 237 { \ 238 V128 _tmp; \ 239 __asm__ __volatile__( \ 240 "movupd (%0), %%xmm2" "\n\t" \ 241 "movupd (%1), %%xmm11" "\n\t" \ 242 _opname " $" #_imm ", %%xmm2, %%xmm11" "\n\t" \ 243 "movupd %%xmm11, (%2)" "\n" \ 244 : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \ 245 : "cc", "memory", "xmm2", "xmm11" \ 246 ); \ 247 RRArgs rra; \ 248 memcpy(&rra.arg1, &(_src), sizeof(V128)); \ 249 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ 250 memcpy(&rra.res, &(_tmp), sizeof(V128)); \ 251 showIAA("r", (_opname), (_imm), &rra, &AllMask); \ 252 } 253 254#define DO_imm_m_r(_opname, _imm, _src, _dst) \ 255 { \ 256 V128 _tmp; \ 257 V128* _srcM = memalign16(sizeof(V128)); \ 258 memcpy(_srcM, &(_src), sizeof(V128)); \ 259 __asm__ __volatile__( \ 260 "movupd (%1), %%xmm11" "\n\t" \ 261 _opname " $" #_imm ", (%0), %%xmm11" "\n\t" \ 262 "movupd %%xmm11, (%2)" "\n" \ 263 : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \ 264 : "cc", "memory", "xmm11" \ 265 ); \ 266 RRArgs rra; \ 267 memcpy(&rra.arg1, &(_src), sizeof(V128)); \ 268 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ 269 memcpy(&rra.res, &(_tmp), sizeof(V128)); \ 270 showIAA("m", (_opname), (_imm), &rra, &AllMask); \ 271 free(_srcM); \ 272 } 273 274#define DO_imm_mandr_r(_opname, _imm, _src, _dst) \ 275 DO_imm_r_r( _opname, _imm, _src, _dst ) \ 276 DO_imm_m_r( _opname, _imm, _src, _dst ) 277 278 279 280 281 282#define DO_r_r(_opname, _src, _dst) \ 283 { \ 284 V128 _tmp; \ 285 __asm__ __volatile__( \ 286 "movupd (%0), %%xmm2" "\n\t" \ 287 "movupd (%1), %%xmm11" "\n\t" \ 288 _opname " %%xmm2, %%xmm11" "\n\t" \ 289 "movupd %%xmm11, (%2)" "\n" \ 290 : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \ 291 : "cc", "memory", "xmm2", "xmm11" \ 292 ); \ 293 RRArgs rra; \ 294 memcpy(&rra.arg1, &(_src), sizeof(V128)); \ 295 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ 296 memcpy(&rra.res, &(_tmp), sizeof(V128)); \ 297 showAA("r", (_opname), &rra, &AllMask); \ 298 } 299 300#define DO_m_r(_opname, _src, _dst) \ 301 { \ 302 V128 _tmp; \ 303 V128* _srcM = memalign16(sizeof(V128)); \ 304 memcpy(_srcM, &(_src), sizeof(V128)); \ 305 __asm__ __volatile__( \ 306 "movupd (%1), %%xmm11" "\n\t" \ 307 _opname " (%0), %%xmm11" "\n\t" \ 308 "movupd %%xmm11, (%2)" "\n" \ 309 : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \ 310 : "cc", "memory", "xmm11" \ 311 ); \ 312 RRArgs rra; \ 313 memcpy(&rra.arg1, &(_src), sizeof(V128)); \ 314 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ 315 memcpy(&rra.res, &(_tmp), sizeof(V128)); \ 316 showAA("m", (_opname), &rra, &AllMask); \ 317 free(_srcM); \ 318 } 319 320#define DO_mandr_r(_opname, _src, _dst) \ 321 DO_r_r(_opname, _src, _dst) \ 322 DO_m_r(_opname, _src, _dst) 323 324 325 326 327#define DO_imm_r_to_rscalar(_opname, _imm, _src, _dstsuffix) \ 328 { \ 329 ULong _scbefore = 0x5555555555555555ULL; \ 330 ULong _scafter = 0xAAAAAAAAAAAAAAAAULL; \ 331 /* This assumes that gcc won't make any of %0, %1, %2 */ \ 332 /* be r11. That should be ensured (cough, cough) */ \ 333 /* by declaring r11 to be clobbered. */ \ 334 __asm__ __volatile__( \ 335 "movupd (%0), %%xmm2" "\n\t" \ 336 "movq (%1), %%r11" "\n\t" \ 337 _opname " $" #_imm ", %%xmm2, %%r11" _dstsuffix "\n\t" \ 338 "movq %%r11, (%2)" "\n" \ 339 : /*out*/ \ 340 : /*in*/ "r"(&(_src)), "r"(&(_scbefore)), "r"(&(_scafter)) \ 341 : "cc", "memory", "xmm2", "r11" \ 342 ); \ 343 showIAG("r", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \ 344 } 345 346#define DO_imm_r_to_mscalar(_opname, _imm, _src) \ 347 { \ 348 ULong _scbefore = 0x5555555555555555ULL; \ 349 ULong _scafter = _scbefore; \ 350 __asm__ __volatile__( \ 351 "movupd (%0), %%xmm2" "\n\t" \ 352 _opname " $" #_imm ", %%xmm2, (%1)" "\n\t" \ 353 : /*out*/ \ 354 : /*in*/ "r"(&(_src)), "r"(&(_scafter)) \ 355 : "cc", "memory", "xmm2" \ 356 ); \ 357 showIAG("m", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \ 358 } 359 360#define DO_imm_r_to_mandrscalar(_opname, _imm, _src, _dstsuffix) \ 361 DO_imm_r_to_rscalar( _opname, _imm, _src, _dstsuffix ) \ 362 DO_imm_r_to_mscalar( _opname, _imm, _src ) 363 364 365 366 367 368 369 370 371#define DO_imm_rscalar_to_r(_opname, _imm, _src, _srcsuffix) \ 372 { \ 373 V128 dstv; \ 374 V128 res; \ 375 ULong src64 = (ULong)(_src); \ 376 memcpy(dstv, fives, sizeof(dstv)); \ 377 memcpy(res, zeroes, sizeof(res)); \ 378 /* This assumes that gcc won't make any of %0, %1, %2 */ \ 379 /* be r11. That should be ensured (cough, cough) */ \ 380 /* by declaring r11 to be clobbered. */ \ 381 __asm__ __volatile__( \ 382 "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \ 383 "movq (%1), %%r11" "\n\t" /*src64*/ \ 384 _opname " $" #_imm ", %%r11" _srcsuffix ", %%xmm2" "\n\t" \ 385 "movupd %%xmm2, (%2)" "\n" /*res*/ \ 386 : /*out*/ \ 387 : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \ 388 : "cc", "memory", "xmm2", "r11" \ 389 ); \ 390 showIGVV("r", (_opname), (_imm), src64, &dstv, &res); \ 391 } 392#define DO_imm_mscalar_to_r(_opname, _imm, _src) \ 393 { \ 394 V128 dstv; \ 395 V128 res; \ 396 ULong src64 = (ULong)(_src); \ 397 memcpy(dstv, fives, sizeof(dstv)); \ 398 memcpy(res, zeroes, sizeof(res)); \ 399 __asm__ __volatile__( \ 400 "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \ 401 _opname " $" #_imm ", (%1), %%xmm2" "\n\t" \ 402 "movupd %%xmm2, (%2)" "\n" /*res*/ \ 403 : /*out*/ \ 404 : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \ 405 : "cc", "memory", "xmm2" \ 406 ); \ 407 showIGVV("m", (_opname), (_imm), src64, &dstv, &res); \ 408 } 409 410#define DO_imm_mandrscalar_to_r(_opname, _imm, _src, _dstsuffix) \ 411 DO_imm_rscalar_to_r( _opname, _imm, _src, _dstsuffix ) \ 412 DO_imm_mscalar_to_r( _opname, _imm, _src ) 413 414 415 416 417 418void test_BLENDPD ( void ) 419{ 420 V128 src, dst; 421 Int i; 422 for (i = 0; i < 10; i++) { 423 randV128(&src); 424 randV128(&dst); 425 DO_imm_mandr_r("blendpd", 0, src, dst); 426 DO_imm_mandr_r("blendpd", 1, src, dst); 427 DO_imm_mandr_r("blendpd", 2, src, dst); 428 DO_imm_mandr_r("blendpd", 3, src, dst); 429 } 430} 431 432void test_BLENDPS ( void ) 433{ 434 V128 src, dst; 435 Int i; 436 for (i = 0; i < 10; i++) { 437 randV128(&src); 438 randV128(&dst); 439 DO_imm_mandr_r("blendps", 0, src, dst); 440 DO_imm_mandr_r("blendps", 1, src, dst); 441 DO_imm_mandr_r("blendps", 2, src, dst); 442 DO_imm_mandr_r("blendps", 3, src, dst); 443 DO_imm_mandr_r("blendps", 4, src, dst); 444 DO_imm_mandr_r("blendps", 5, src, dst); 445 DO_imm_mandr_r("blendps", 6, src, dst); 446 DO_imm_mandr_r("blendps", 7, src, dst); 447 DO_imm_mandr_r("blendps", 8, src, dst); 448 DO_imm_mandr_r("blendps", 9, src, dst); 449 DO_imm_mandr_r("blendps", 10, src, dst); 450 DO_imm_mandr_r("blendps", 11, src, dst); 451 DO_imm_mandr_r("blendps", 12, src, dst); 452 DO_imm_mandr_r("blendps", 13, src, dst); 453 DO_imm_mandr_r("blendps", 14, src, dst); 454 DO_imm_mandr_r("blendps", 15, src, dst); 455 } 456} 457 458void test_DPPD ( void ) 459{ 460 V128 src, dst; 461 { 462 *(double*)(&src[0]) = 1.2345; 463 *(double*)(&src[8]) = -6.78910; 464 *(double*)(&dst[0]) = -11.121314; 465 *(double*)(&dst[8]) = 15.161718; 466 DO_imm_mandr_r("dppd", 0, src, dst); 467 DO_imm_mandr_r("dppd", 1, src, dst); 468 DO_imm_mandr_r("dppd", 2, src, dst); 469 DO_imm_mandr_r("dppd", 3, src, dst); 470 DO_imm_mandr_r("dppd", 4, src, dst); 471 DO_imm_mandr_r("dppd", 5, src, dst); 472 DO_imm_mandr_r("dppd", 6, src, dst); 473 DO_imm_mandr_r("dppd", 7, src, dst); 474 DO_imm_mandr_r("dppd", 8, src, dst); 475 DO_imm_mandr_r("dppd", 9, src, dst); 476 DO_imm_mandr_r("dppd", 10, src, dst); 477 DO_imm_mandr_r("dppd", 11, src, dst); 478 DO_imm_mandr_r("dppd", 12, src, dst); 479 DO_imm_mandr_r("dppd", 13, src, dst); 480 DO_imm_mandr_r("dppd", 14, src, dst); 481 DO_imm_mandr_r("dppd", 15, src, dst); 482 DO_imm_mandr_r("dppd", 16, src, dst); 483 DO_imm_mandr_r("dppd", 17, src, dst); 484 DO_imm_mandr_r("dppd", 18, src, dst); 485 DO_imm_mandr_r("dppd", 19, src, dst); 486 DO_imm_mandr_r("dppd", 20, src, dst); 487 DO_imm_mandr_r("dppd", 21, src, dst); 488 DO_imm_mandr_r("dppd", 22, src, dst); 489 DO_imm_mandr_r("dppd", 23, src, dst); 490 DO_imm_mandr_r("dppd", 24, src, dst); 491 DO_imm_mandr_r("dppd", 25, src, dst); 492 DO_imm_mandr_r("dppd", 26, src, dst); 493 DO_imm_mandr_r("dppd", 27, src, dst); 494 DO_imm_mandr_r("dppd", 28, src, dst); 495 DO_imm_mandr_r("dppd", 29, src, dst); 496 DO_imm_mandr_r("dppd", 30, src, dst); 497 DO_imm_mandr_r("dppd", 31, src, dst); 498 DO_imm_mandr_r("dppd", 32, src, dst); 499 DO_imm_mandr_r("dppd", 33, src, dst); 500 DO_imm_mandr_r("dppd", 34, src, dst); 501 DO_imm_mandr_r("dppd", 35, src, dst); 502 DO_imm_mandr_r("dppd", 36, src, dst); 503 DO_imm_mandr_r("dppd", 37, src, dst); 504 DO_imm_mandr_r("dppd", 38, src, dst); 505 DO_imm_mandr_r("dppd", 39, src, dst); 506 DO_imm_mandr_r("dppd", 40, src, dst); 507 DO_imm_mandr_r("dppd", 41, src, dst); 508 DO_imm_mandr_r("dppd", 42, src, dst); 509 DO_imm_mandr_r("dppd", 43, src, dst); 510 DO_imm_mandr_r("dppd", 44, src, dst); 511 DO_imm_mandr_r("dppd", 45, src, dst); 512 DO_imm_mandr_r("dppd", 46, src, dst); 513 DO_imm_mandr_r("dppd", 47, src, dst); 514 DO_imm_mandr_r("dppd", 48, src, dst); 515 DO_imm_mandr_r("dppd", 49, src, dst); 516 DO_imm_mandr_r("dppd", 50, src, dst); 517 DO_imm_mandr_r("dppd", 51, src, dst); 518 DO_imm_mandr_r("dppd", 52, src, dst); 519 DO_imm_mandr_r("dppd", 53, src, dst); 520 DO_imm_mandr_r("dppd", 54, src, dst); 521 DO_imm_mandr_r("dppd", 55, src, dst); 522 DO_imm_mandr_r("dppd", 56, src, dst); 523 DO_imm_mandr_r("dppd", 57, src, dst); 524 DO_imm_mandr_r("dppd", 58, src, dst); 525 DO_imm_mandr_r("dppd", 59, src, dst); 526 DO_imm_mandr_r("dppd", 60, src, dst); 527 DO_imm_mandr_r("dppd", 61, src, dst); 528 DO_imm_mandr_r("dppd", 62, src, dst); 529 DO_imm_mandr_r("dppd", 63, src, dst); 530 DO_imm_mandr_r("dppd", 64, src, dst); 531 DO_imm_mandr_r("dppd", 65, src, dst); 532 DO_imm_mandr_r("dppd", 66, src, dst); 533 DO_imm_mandr_r("dppd", 67, src, dst); 534 DO_imm_mandr_r("dppd", 68, src, dst); 535 DO_imm_mandr_r("dppd", 69, src, dst); 536 DO_imm_mandr_r("dppd", 70, src, dst); 537 DO_imm_mandr_r("dppd", 71, src, dst); 538 DO_imm_mandr_r("dppd", 72, src, dst); 539 DO_imm_mandr_r("dppd", 73, src, dst); 540 DO_imm_mandr_r("dppd", 74, src, dst); 541 DO_imm_mandr_r("dppd", 75, src, dst); 542 DO_imm_mandr_r("dppd", 76, src, dst); 543 DO_imm_mandr_r("dppd", 77, src, dst); 544 DO_imm_mandr_r("dppd", 78, src, dst); 545 DO_imm_mandr_r("dppd", 79, src, dst); 546 DO_imm_mandr_r("dppd", 80, src, dst); 547 DO_imm_mandr_r("dppd", 81, src, dst); 548 DO_imm_mandr_r("dppd", 82, src, dst); 549 DO_imm_mandr_r("dppd", 83, src, dst); 550 DO_imm_mandr_r("dppd", 84, src, dst); 551 DO_imm_mandr_r("dppd", 85, src, dst); 552 DO_imm_mandr_r("dppd", 86, src, dst); 553 DO_imm_mandr_r("dppd", 87, src, dst); 554 DO_imm_mandr_r("dppd", 88, src, dst); 555 DO_imm_mandr_r("dppd", 89, src, dst); 556 DO_imm_mandr_r("dppd", 90, src, dst); 557 DO_imm_mandr_r("dppd", 91, src, dst); 558 DO_imm_mandr_r("dppd", 92, src, dst); 559 DO_imm_mandr_r("dppd", 93, src, dst); 560 DO_imm_mandr_r("dppd", 94, src, dst); 561 DO_imm_mandr_r("dppd", 95, src, dst); 562 DO_imm_mandr_r("dppd", 96, src, dst); 563 DO_imm_mandr_r("dppd", 97, src, dst); 564 DO_imm_mandr_r("dppd", 98, src, dst); 565 DO_imm_mandr_r("dppd", 99, src, dst); 566 DO_imm_mandr_r("dppd", 100, src, dst); 567 DO_imm_mandr_r("dppd", 101, src, dst); 568 DO_imm_mandr_r("dppd", 102, src, dst); 569 DO_imm_mandr_r("dppd", 103, src, dst); 570 DO_imm_mandr_r("dppd", 104, src, dst); 571 DO_imm_mandr_r("dppd", 105, src, dst); 572 DO_imm_mandr_r("dppd", 106, src, dst); 573 DO_imm_mandr_r("dppd", 107, src, dst); 574 DO_imm_mandr_r("dppd", 108, src, dst); 575 DO_imm_mandr_r("dppd", 109, src, dst); 576 DO_imm_mandr_r("dppd", 110, src, dst); 577 DO_imm_mandr_r("dppd", 111, src, dst); 578 DO_imm_mandr_r("dppd", 112, src, dst); 579 DO_imm_mandr_r("dppd", 113, src, dst); 580 DO_imm_mandr_r("dppd", 114, src, dst); 581 DO_imm_mandr_r("dppd", 115, src, dst); 582 DO_imm_mandr_r("dppd", 116, src, dst); 583 DO_imm_mandr_r("dppd", 117, src, dst); 584 DO_imm_mandr_r("dppd", 118, src, dst); 585 DO_imm_mandr_r("dppd", 119, src, dst); 586 DO_imm_mandr_r("dppd", 120, src, dst); 587 DO_imm_mandr_r("dppd", 121, src, dst); 588 DO_imm_mandr_r("dppd", 122, src, dst); 589 DO_imm_mandr_r("dppd", 123, src, dst); 590 DO_imm_mandr_r("dppd", 124, src, dst); 591 DO_imm_mandr_r("dppd", 125, src, dst); 592 DO_imm_mandr_r("dppd", 126, src, dst); 593 DO_imm_mandr_r("dppd", 127, src, dst); 594 DO_imm_mandr_r("dppd", 128, src, dst); 595 DO_imm_mandr_r("dppd", 129, src, dst); 596 DO_imm_mandr_r("dppd", 130, src, dst); 597 DO_imm_mandr_r("dppd", 131, src, dst); 598 DO_imm_mandr_r("dppd", 132, src, dst); 599 DO_imm_mandr_r("dppd", 133, src, dst); 600 DO_imm_mandr_r("dppd", 134, src, dst); 601 DO_imm_mandr_r("dppd", 135, src, dst); 602 DO_imm_mandr_r("dppd", 136, src, dst); 603 DO_imm_mandr_r("dppd", 137, src, dst); 604 DO_imm_mandr_r("dppd", 138, src, dst); 605 DO_imm_mandr_r("dppd", 139, src, dst); 606 DO_imm_mandr_r("dppd", 140, src, dst); 607 DO_imm_mandr_r("dppd", 141, src, dst); 608 DO_imm_mandr_r("dppd", 142, src, dst); 609 DO_imm_mandr_r("dppd", 143, src, dst); 610 DO_imm_mandr_r("dppd", 144, src, dst); 611 DO_imm_mandr_r("dppd", 145, src, dst); 612 DO_imm_mandr_r("dppd", 146, src, dst); 613 DO_imm_mandr_r("dppd", 147, src, dst); 614 DO_imm_mandr_r("dppd", 148, src, dst); 615 DO_imm_mandr_r("dppd", 149, src, dst); 616 DO_imm_mandr_r("dppd", 150, src, dst); 617 DO_imm_mandr_r("dppd", 151, src, dst); 618 DO_imm_mandr_r("dppd", 152, src, dst); 619 DO_imm_mandr_r("dppd", 153, src, dst); 620 DO_imm_mandr_r("dppd", 154, src, dst); 621 DO_imm_mandr_r("dppd", 155, src, dst); 622 DO_imm_mandr_r("dppd", 156, src, dst); 623 DO_imm_mandr_r("dppd", 157, src, dst); 624 DO_imm_mandr_r("dppd", 158, src, dst); 625 DO_imm_mandr_r("dppd", 159, src, dst); 626 DO_imm_mandr_r("dppd", 160, src, dst); 627 DO_imm_mandr_r("dppd", 161, src, dst); 628 DO_imm_mandr_r("dppd", 162, src, dst); 629 DO_imm_mandr_r("dppd", 163, src, dst); 630 DO_imm_mandr_r("dppd", 164, src, dst); 631 DO_imm_mandr_r("dppd", 165, src, dst); 632 DO_imm_mandr_r("dppd", 166, src, dst); 633 DO_imm_mandr_r("dppd", 167, src, dst); 634 DO_imm_mandr_r("dppd", 168, src, dst); 635 DO_imm_mandr_r("dppd", 169, src, dst); 636 DO_imm_mandr_r("dppd", 170, src, dst); 637 DO_imm_mandr_r("dppd", 171, src, dst); 638 DO_imm_mandr_r("dppd", 172, src, dst); 639 DO_imm_mandr_r("dppd", 173, src, dst); 640 DO_imm_mandr_r("dppd", 174, src, dst); 641 DO_imm_mandr_r("dppd", 175, src, dst); 642 DO_imm_mandr_r("dppd", 176, src, dst); 643 DO_imm_mandr_r("dppd", 177, src, dst); 644 DO_imm_mandr_r("dppd", 178, src, dst); 645 DO_imm_mandr_r("dppd", 179, src, dst); 646 DO_imm_mandr_r("dppd", 180, src, dst); 647 DO_imm_mandr_r("dppd", 181, src, dst); 648 DO_imm_mandr_r("dppd", 182, src, dst); 649 DO_imm_mandr_r("dppd", 183, src, dst); 650 DO_imm_mandr_r("dppd", 184, src, dst); 651 DO_imm_mandr_r("dppd", 185, src, dst); 652 DO_imm_mandr_r("dppd", 186, src, dst); 653 DO_imm_mandr_r("dppd", 187, src, dst); 654 DO_imm_mandr_r("dppd", 188, src, dst); 655 DO_imm_mandr_r("dppd", 189, src, dst); 656 DO_imm_mandr_r("dppd", 190, src, dst); 657 DO_imm_mandr_r("dppd", 191, src, dst); 658 DO_imm_mandr_r("dppd", 192, src, dst); 659 DO_imm_mandr_r("dppd", 193, src, dst); 660 DO_imm_mandr_r("dppd", 194, src, dst); 661 DO_imm_mandr_r("dppd", 195, src, dst); 662 DO_imm_mandr_r("dppd", 196, src, dst); 663 DO_imm_mandr_r("dppd", 197, src, dst); 664 DO_imm_mandr_r("dppd", 198, src, dst); 665 DO_imm_mandr_r("dppd", 199, src, dst); 666 DO_imm_mandr_r("dppd", 200, src, dst); 667 DO_imm_mandr_r("dppd", 201, src, dst); 668 DO_imm_mandr_r("dppd", 202, src, dst); 669 DO_imm_mandr_r("dppd", 203, src, dst); 670 DO_imm_mandr_r("dppd", 204, src, dst); 671 DO_imm_mandr_r("dppd", 205, src, dst); 672 DO_imm_mandr_r("dppd", 206, src, dst); 673 DO_imm_mandr_r("dppd", 207, src, dst); 674 DO_imm_mandr_r("dppd", 208, src, dst); 675 DO_imm_mandr_r("dppd", 209, src, dst); 676 DO_imm_mandr_r("dppd", 210, src, dst); 677 DO_imm_mandr_r("dppd", 211, src, dst); 678 DO_imm_mandr_r("dppd", 212, src, dst); 679 DO_imm_mandr_r("dppd", 213, src, dst); 680 DO_imm_mandr_r("dppd", 214, src, dst); 681 DO_imm_mandr_r("dppd", 215, src, dst); 682 DO_imm_mandr_r("dppd", 216, src, dst); 683 DO_imm_mandr_r("dppd", 217, src, dst); 684 DO_imm_mandr_r("dppd", 218, src, dst); 685 DO_imm_mandr_r("dppd", 219, src, dst); 686 DO_imm_mandr_r("dppd", 220, src, dst); 687 DO_imm_mandr_r("dppd", 221, src, dst); 688 DO_imm_mandr_r("dppd", 222, src, dst); 689 DO_imm_mandr_r("dppd", 223, src, dst); 690 DO_imm_mandr_r("dppd", 224, src, dst); 691 DO_imm_mandr_r("dppd", 225, src, dst); 692 DO_imm_mandr_r("dppd", 226, src, dst); 693 DO_imm_mandr_r("dppd", 227, src, dst); 694 DO_imm_mandr_r("dppd", 228, src, dst); 695 DO_imm_mandr_r("dppd", 229, src, dst); 696 DO_imm_mandr_r("dppd", 230, src, dst); 697 DO_imm_mandr_r("dppd", 231, src, dst); 698 DO_imm_mandr_r("dppd", 232, src, dst); 699 DO_imm_mandr_r("dppd", 233, src, dst); 700 DO_imm_mandr_r("dppd", 234, src, dst); 701 DO_imm_mandr_r("dppd", 235, src, dst); 702 DO_imm_mandr_r("dppd", 236, src, dst); 703 DO_imm_mandr_r("dppd", 237, src, dst); 704 DO_imm_mandr_r("dppd", 238, src, dst); 705 DO_imm_mandr_r("dppd", 239, src, dst); 706 DO_imm_mandr_r("dppd", 240, src, dst); 707 DO_imm_mandr_r("dppd", 241, src, dst); 708 DO_imm_mandr_r("dppd", 242, src, dst); 709 DO_imm_mandr_r("dppd", 243, src, dst); 710 DO_imm_mandr_r("dppd", 244, src, dst); 711 DO_imm_mandr_r("dppd", 245, src, dst); 712 DO_imm_mandr_r("dppd", 246, src, dst); 713 DO_imm_mandr_r("dppd", 247, src, dst); 714 DO_imm_mandr_r("dppd", 248, src, dst); 715 DO_imm_mandr_r("dppd", 249, src, dst); 716 DO_imm_mandr_r("dppd", 250, src, dst); 717 DO_imm_mandr_r("dppd", 251, src, dst); 718 DO_imm_mandr_r("dppd", 252, src, dst); 719 DO_imm_mandr_r("dppd", 253, src, dst); 720 DO_imm_mandr_r("dppd", 254, src, dst); 721 DO_imm_mandr_r("dppd", 255, src, dst); 722 } 723} 724 725void test_DPPS ( void ) 726{ 727 V128 src, dst; 728 { 729 *(float*)(&src[0]) = 1.2; 730 *(float*)(&src[4]) = -3.4; 731 *(float*)(&src[8]) = -6.7; 732 *(float*)(&src[12]) = 8.9; 733 *(float*)(&dst[0]) = -10.11; 734 *(float*)(&dst[4]) = 12.13; 735 *(float*)(&dst[8]) = 14.15; 736 *(float*)(&dst[12]) = -16.17; 737 DO_imm_mandr_r("dpps", 0, src, dst); 738 DO_imm_mandr_r("dpps", 1, src, dst); 739 DO_imm_mandr_r("dpps", 2, src, dst); 740 DO_imm_mandr_r("dpps", 3, src, dst); 741 DO_imm_mandr_r("dpps", 4, src, dst); 742 DO_imm_mandr_r("dpps", 5, src, dst); 743 DO_imm_mandr_r("dpps", 6, src, dst); 744 DO_imm_mandr_r("dpps", 7, src, dst); 745 DO_imm_mandr_r("dpps", 8, src, dst); 746 DO_imm_mandr_r("dpps", 9, src, dst); 747 DO_imm_mandr_r("dpps", 10, src, dst); 748 DO_imm_mandr_r("dpps", 11, src, dst); 749 DO_imm_mandr_r("dpps", 12, src, dst); 750 DO_imm_mandr_r("dpps", 13, src, dst); 751 DO_imm_mandr_r("dpps", 14, src, dst); 752 DO_imm_mandr_r("dpps", 15, src, dst); 753 DO_imm_mandr_r("dpps", 16, src, dst); 754 DO_imm_mandr_r("dpps", 17, src, dst); 755 DO_imm_mandr_r("dpps", 18, src, dst); 756 DO_imm_mandr_r("dpps", 19, src, dst); 757 DO_imm_mandr_r("dpps", 20, src, dst); 758 DO_imm_mandr_r("dpps", 21, src, dst); 759 DO_imm_mandr_r("dpps", 22, src, dst); 760 DO_imm_mandr_r("dpps", 23, src, dst); 761 DO_imm_mandr_r("dpps", 24, src, dst); 762 DO_imm_mandr_r("dpps", 25, src, dst); 763 DO_imm_mandr_r("dpps", 26, src, dst); 764 DO_imm_mandr_r("dpps", 27, src, dst); 765 DO_imm_mandr_r("dpps", 28, src, dst); 766 DO_imm_mandr_r("dpps", 29, src, dst); 767 DO_imm_mandr_r("dpps", 30, src, dst); 768 DO_imm_mandr_r("dpps", 31, src, dst); 769 DO_imm_mandr_r("dpps", 32, src, dst); 770 DO_imm_mandr_r("dpps", 33, src, dst); 771 DO_imm_mandr_r("dpps", 34, src, dst); 772 DO_imm_mandr_r("dpps", 35, src, dst); 773 DO_imm_mandr_r("dpps", 36, src, dst); 774 DO_imm_mandr_r("dpps", 37, src, dst); 775 DO_imm_mandr_r("dpps", 38, src, dst); 776 DO_imm_mandr_r("dpps", 39, src, dst); 777 DO_imm_mandr_r("dpps", 40, src, dst); 778 DO_imm_mandr_r("dpps", 41, src, dst); 779 DO_imm_mandr_r("dpps", 42, src, dst); 780 DO_imm_mandr_r("dpps", 43, src, dst); 781 DO_imm_mandr_r("dpps", 44, src, dst); 782 DO_imm_mandr_r("dpps", 45, src, dst); 783 DO_imm_mandr_r("dpps", 46, src, dst); 784 DO_imm_mandr_r("dpps", 47, src, dst); 785 DO_imm_mandr_r("dpps", 48, src, dst); 786 DO_imm_mandr_r("dpps", 49, src, dst); 787 DO_imm_mandr_r("dpps", 50, src, dst); 788 DO_imm_mandr_r("dpps", 51, src, dst); 789 DO_imm_mandr_r("dpps", 52, src, dst); 790 DO_imm_mandr_r("dpps", 53, src, dst); 791 DO_imm_mandr_r("dpps", 54, src, dst); 792 DO_imm_mandr_r("dpps", 55, src, dst); 793 DO_imm_mandr_r("dpps", 56, src, dst); 794 DO_imm_mandr_r("dpps", 57, src, dst); 795 DO_imm_mandr_r("dpps", 58, src, dst); 796 DO_imm_mandr_r("dpps", 59, src, dst); 797 DO_imm_mandr_r("dpps", 60, src, dst); 798 DO_imm_mandr_r("dpps", 61, src, dst); 799 DO_imm_mandr_r("dpps", 62, src, dst); 800 DO_imm_mandr_r("dpps", 63, src, dst); 801 DO_imm_mandr_r("dpps", 64, src, dst); 802 DO_imm_mandr_r("dpps", 65, src, dst); 803 DO_imm_mandr_r("dpps", 66, src, dst); 804 DO_imm_mandr_r("dpps", 67, src, dst); 805 DO_imm_mandr_r("dpps", 68, src, dst); 806 DO_imm_mandr_r("dpps", 69, src, dst); 807 DO_imm_mandr_r("dpps", 70, src, dst); 808 DO_imm_mandr_r("dpps", 71, src, dst); 809 DO_imm_mandr_r("dpps", 72, src, dst); 810 DO_imm_mandr_r("dpps", 73, src, dst); 811 DO_imm_mandr_r("dpps", 74, src, dst); 812 DO_imm_mandr_r("dpps", 75, src, dst); 813 DO_imm_mandr_r("dpps", 76, src, dst); 814 DO_imm_mandr_r("dpps", 77, src, dst); 815 DO_imm_mandr_r("dpps", 78, src, dst); 816 DO_imm_mandr_r("dpps", 79, src, dst); 817 DO_imm_mandr_r("dpps", 80, src, dst); 818 DO_imm_mandr_r("dpps", 81, src, dst); 819 DO_imm_mandr_r("dpps", 82, src, dst); 820 DO_imm_mandr_r("dpps", 83, src, dst); 821 DO_imm_mandr_r("dpps", 84, src, dst); 822 DO_imm_mandr_r("dpps", 85, src, dst); 823 DO_imm_mandr_r("dpps", 86, src, dst); 824 DO_imm_mandr_r("dpps", 87, src, dst); 825 DO_imm_mandr_r("dpps", 88, src, dst); 826 DO_imm_mandr_r("dpps", 89, src, dst); 827 DO_imm_mandr_r("dpps", 90, src, dst); 828 DO_imm_mandr_r("dpps", 91, src, dst); 829 DO_imm_mandr_r("dpps", 92, src, dst); 830 DO_imm_mandr_r("dpps", 93, src, dst); 831 DO_imm_mandr_r("dpps", 94, src, dst); 832 DO_imm_mandr_r("dpps", 95, src, dst); 833 DO_imm_mandr_r("dpps", 96, src, dst); 834 DO_imm_mandr_r("dpps", 97, src, dst); 835 DO_imm_mandr_r("dpps", 98, src, dst); 836 DO_imm_mandr_r("dpps", 99, src, dst); 837 DO_imm_mandr_r("dpps", 100, src, dst); 838 DO_imm_mandr_r("dpps", 101, src, dst); 839 DO_imm_mandr_r("dpps", 102, src, dst); 840 DO_imm_mandr_r("dpps", 103, src, dst); 841 DO_imm_mandr_r("dpps", 104, src, dst); 842 DO_imm_mandr_r("dpps", 105, src, dst); 843 DO_imm_mandr_r("dpps", 106, src, dst); 844 DO_imm_mandr_r("dpps", 107, src, dst); 845 DO_imm_mandr_r("dpps", 108, src, dst); 846 DO_imm_mandr_r("dpps", 109, src, dst); 847 DO_imm_mandr_r("dpps", 110, src, dst); 848 DO_imm_mandr_r("dpps", 111, src, dst); 849 DO_imm_mandr_r("dpps", 112, src, dst); 850 DO_imm_mandr_r("dpps", 113, src, dst); 851 DO_imm_mandr_r("dpps", 114, src, dst); 852 DO_imm_mandr_r("dpps", 115, src, dst); 853 DO_imm_mandr_r("dpps", 116, src, dst); 854 DO_imm_mandr_r("dpps", 117, src, dst); 855 DO_imm_mandr_r("dpps", 118, src, dst); 856 DO_imm_mandr_r("dpps", 119, src, dst); 857 DO_imm_mandr_r("dpps", 120, src, dst); 858 DO_imm_mandr_r("dpps", 121, src, dst); 859 DO_imm_mandr_r("dpps", 122, src, dst); 860 DO_imm_mandr_r("dpps", 123, src, dst); 861 DO_imm_mandr_r("dpps", 124, src, dst); 862 DO_imm_mandr_r("dpps", 125, src, dst); 863 DO_imm_mandr_r("dpps", 126, src, dst); 864 DO_imm_mandr_r("dpps", 127, src, dst); 865 DO_imm_mandr_r("dpps", 128, src, dst); 866 DO_imm_mandr_r("dpps", 129, src, dst); 867 DO_imm_mandr_r("dpps", 130, src, dst); 868 DO_imm_mandr_r("dpps", 131, src, dst); 869 DO_imm_mandr_r("dpps", 132, src, dst); 870 DO_imm_mandr_r("dpps", 133, src, dst); 871 DO_imm_mandr_r("dpps", 134, src, dst); 872 DO_imm_mandr_r("dpps", 135, src, dst); 873 DO_imm_mandr_r("dpps", 136, src, dst); 874 DO_imm_mandr_r("dpps", 137, src, dst); 875 DO_imm_mandr_r("dpps", 138, src, dst); 876 DO_imm_mandr_r("dpps", 139, src, dst); 877 DO_imm_mandr_r("dpps", 140, src, dst); 878 DO_imm_mandr_r("dpps", 141, src, dst); 879 DO_imm_mandr_r("dpps", 142, src, dst); 880 DO_imm_mandr_r("dpps", 143, src, dst); 881 DO_imm_mandr_r("dpps", 144, src, dst); 882 DO_imm_mandr_r("dpps", 145, src, dst); 883 DO_imm_mandr_r("dpps", 146, src, dst); 884 DO_imm_mandr_r("dpps", 147, src, dst); 885 DO_imm_mandr_r("dpps", 148, src, dst); 886 DO_imm_mandr_r("dpps", 149, src, dst); 887 DO_imm_mandr_r("dpps", 150, src, dst); 888 DO_imm_mandr_r("dpps", 151, src, dst); 889 DO_imm_mandr_r("dpps", 152, src, dst); 890 DO_imm_mandr_r("dpps", 153, src, dst); 891 DO_imm_mandr_r("dpps", 154, src, dst); 892 DO_imm_mandr_r("dpps", 155, src, dst); 893 DO_imm_mandr_r("dpps", 156, src, dst); 894 DO_imm_mandr_r("dpps", 157, src, dst); 895 DO_imm_mandr_r("dpps", 158, src, dst); 896 DO_imm_mandr_r("dpps", 159, src, dst); 897 DO_imm_mandr_r("dpps", 160, src, dst); 898 DO_imm_mandr_r("dpps", 161, src, dst); 899 DO_imm_mandr_r("dpps", 162, src, dst); 900 DO_imm_mandr_r("dpps", 163, src, dst); 901 DO_imm_mandr_r("dpps", 164, src, dst); 902 DO_imm_mandr_r("dpps", 165, src, dst); 903 DO_imm_mandr_r("dpps", 166, src, dst); 904 DO_imm_mandr_r("dpps", 167, src, dst); 905 DO_imm_mandr_r("dpps", 168, src, dst); 906 DO_imm_mandr_r("dpps", 169, src, dst); 907 DO_imm_mandr_r("dpps", 170, src, dst); 908 DO_imm_mandr_r("dpps", 171, src, dst); 909 DO_imm_mandr_r("dpps", 172, src, dst); 910 DO_imm_mandr_r("dpps", 173, src, dst); 911 DO_imm_mandr_r("dpps", 174, src, dst); 912 DO_imm_mandr_r("dpps", 175, src, dst); 913 DO_imm_mandr_r("dpps", 176, src, dst); 914 DO_imm_mandr_r("dpps", 177, src, dst); 915 DO_imm_mandr_r("dpps", 178, src, dst); 916 DO_imm_mandr_r("dpps", 179, src, dst); 917 DO_imm_mandr_r("dpps", 180, src, dst); 918 DO_imm_mandr_r("dpps", 181, src, dst); 919 DO_imm_mandr_r("dpps", 182, src, dst); 920 DO_imm_mandr_r("dpps", 183, src, dst); 921 DO_imm_mandr_r("dpps", 184, src, dst); 922 DO_imm_mandr_r("dpps", 185, src, dst); 923 DO_imm_mandr_r("dpps", 186, src, dst); 924 DO_imm_mandr_r("dpps", 187, src, dst); 925 DO_imm_mandr_r("dpps", 188, src, dst); 926 DO_imm_mandr_r("dpps", 189, src, dst); 927 DO_imm_mandr_r("dpps", 190, src, dst); 928 DO_imm_mandr_r("dpps", 191, src, dst); 929 DO_imm_mandr_r("dpps", 192, src, dst); 930 DO_imm_mandr_r("dpps", 193, src, dst); 931 DO_imm_mandr_r("dpps", 194, src, dst); 932 DO_imm_mandr_r("dpps", 195, src, dst); 933 DO_imm_mandr_r("dpps", 196, src, dst); 934 DO_imm_mandr_r("dpps", 197, src, dst); 935 DO_imm_mandr_r("dpps", 198, src, dst); 936 DO_imm_mandr_r("dpps", 199, src, dst); 937 DO_imm_mandr_r("dpps", 200, src, dst); 938 DO_imm_mandr_r("dpps", 201, src, dst); 939 DO_imm_mandr_r("dpps", 202, src, dst); 940 DO_imm_mandr_r("dpps", 203, src, dst); 941 DO_imm_mandr_r("dpps", 204, src, dst); 942 DO_imm_mandr_r("dpps", 205, src, dst); 943 DO_imm_mandr_r("dpps", 206, src, dst); 944 DO_imm_mandr_r("dpps", 207, src, dst); 945 DO_imm_mandr_r("dpps", 208, src, dst); 946 DO_imm_mandr_r("dpps", 209, src, dst); 947 DO_imm_mandr_r("dpps", 210, src, dst); 948 DO_imm_mandr_r("dpps", 211, src, dst); 949 DO_imm_mandr_r("dpps", 212, src, dst); 950 DO_imm_mandr_r("dpps", 213, src, dst); 951 DO_imm_mandr_r("dpps", 214, src, dst); 952 DO_imm_mandr_r("dpps", 215, src, dst); 953 DO_imm_mandr_r("dpps", 216, src, dst); 954 DO_imm_mandr_r("dpps", 217, src, dst); 955 DO_imm_mandr_r("dpps", 218, src, dst); 956 DO_imm_mandr_r("dpps", 219, src, dst); 957 DO_imm_mandr_r("dpps", 220, src, dst); 958 DO_imm_mandr_r("dpps", 221, src, dst); 959 DO_imm_mandr_r("dpps", 222, src, dst); 960 DO_imm_mandr_r("dpps", 223, src, dst); 961 DO_imm_mandr_r("dpps", 224, src, dst); 962 DO_imm_mandr_r("dpps", 225, src, dst); 963 DO_imm_mandr_r("dpps", 226, src, dst); 964 DO_imm_mandr_r("dpps", 227, src, dst); 965 DO_imm_mandr_r("dpps", 228, src, dst); 966 DO_imm_mandr_r("dpps", 229, src, dst); 967 DO_imm_mandr_r("dpps", 230, src, dst); 968 DO_imm_mandr_r("dpps", 231, src, dst); 969 DO_imm_mandr_r("dpps", 232, src, dst); 970 DO_imm_mandr_r("dpps", 233, src, dst); 971 DO_imm_mandr_r("dpps", 234, src, dst); 972 DO_imm_mandr_r("dpps", 235, src, dst); 973 DO_imm_mandr_r("dpps", 236, src, dst); 974 DO_imm_mandr_r("dpps", 237, src, dst); 975 DO_imm_mandr_r("dpps", 238, src, dst); 976 DO_imm_mandr_r("dpps", 239, src, dst); 977 DO_imm_mandr_r("dpps", 240, src, dst); 978 DO_imm_mandr_r("dpps", 241, src, dst); 979 DO_imm_mandr_r("dpps", 242, src, dst); 980 DO_imm_mandr_r("dpps", 243, src, dst); 981 DO_imm_mandr_r("dpps", 244, src, dst); 982 DO_imm_mandr_r("dpps", 245, src, dst); 983 DO_imm_mandr_r("dpps", 246, src, dst); 984 DO_imm_mandr_r("dpps", 247, src, dst); 985 DO_imm_mandr_r("dpps", 248, src, dst); 986 DO_imm_mandr_r("dpps", 249, src, dst); 987 DO_imm_mandr_r("dpps", 250, src, dst); 988 DO_imm_mandr_r("dpps", 251, src, dst); 989 DO_imm_mandr_r("dpps", 252, src, dst); 990 DO_imm_mandr_r("dpps", 253, src, dst); 991 DO_imm_mandr_r("dpps", 254, src, dst); 992 DO_imm_mandr_r("dpps", 255, src, dst); 993 } 994} 995 996void test_INSERTPS ( void ) 997{ 998 V128 src, dst; 999 { 1000 *(float*)(&src[0]) = 1.2; 1001 *(float*)(&src[4]) = -3.4; 1002 *(float*)(&src[8]) = -6.7; 1003 *(float*)(&src[12]) = 8.9; 1004 *(float*)(&dst[0]) = -10.11; 1005 *(float*)(&dst[4]) = 12.13; 1006 *(float*)(&dst[8]) = 14.15; 1007 *(float*)(&dst[12]) = -16.17; 1008 DO_imm_mandr_r("insertps", 0, src, dst); 1009 DO_imm_mandr_r("insertps", 1, src, dst); 1010 DO_imm_mandr_r("insertps", 2, src, dst); 1011 DO_imm_mandr_r("insertps", 3, src, dst); 1012 DO_imm_mandr_r("insertps", 4, src, dst); 1013 DO_imm_mandr_r("insertps", 5, src, dst); 1014 DO_imm_mandr_r("insertps", 6, src, dst); 1015 DO_imm_mandr_r("insertps", 7, src, dst); 1016 DO_imm_mandr_r("insertps", 8, src, dst); 1017 DO_imm_mandr_r("insertps", 9, src, dst); 1018 DO_imm_mandr_r("insertps", 10, src, dst); 1019 DO_imm_mandr_r("insertps", 11, src, dst); 1020 DO_imm_mandr_r("insertps", 12, src, dst); 1021 DO_imm_mandr_r("insertps", 13, src, dst); 1022 DO_imm_mandr_r("insertps", 14, src, dst); 1023 DO_imm_mandr_r("insertps", 15, src, dst); 1024 DO_imm_mandr_r("insertps", 16, src, dst); 1025 DO_imm_mandr_r("insertps", 17, src, dst); 1026 DO_imm_mandr_r("insertps", 18, src, dst); 1027 DO_imm_mandr_r("insertps", 19, src, dst); 1028 DO_imm_mandr_r("insertps", 20, src, dst); 1029 DO_imm_mandr_r("insertps", 21, src, dst); 1030 DO_imm_mandr_r("insertps", 22, src, dst); 1031 DO_imm_mandr_r("insertps", 23, src, dst); 1032 DO_imm_mandr_r("insertps", 24, src, dst); 1033 DO_imm_mandr_r("insertps", 25, src, dst); 1034 DO_imm_mandr_r("insertps", 26, src, dst); 1035 DO_imm_mandr_r("insertps", 27, src, dst); 1036 DO_imm_mandr_r("insertps", 28, src, dst); 1037 DO_imm_mandr_r("insertps", 29, src, dst); 1038 DO_imm_mandr_r("insertps", 30, src, dst); 1039 DO_imm_mandr_r("insertps", 31, src, dst); 1040 DO_imm_mandr_r("insertps", 32, src, dst); 1041 DO_imm_mandr_r("insertps", 33, src, dst); 1042 DO_imm_mandr_r("insertps", 34, src, dst); 1043 DO_imm_mandr_r("insertps", 35, src, dst); 1044 DO_imm_mandr_r("insertps", 36, src, dst); 1045 DO_imm_mandr_r("insertps", 37, src, dst); 1046 DO_imm_mandr_r("insertps", 38, src, dst); 1047 DO_imm_mandr_r("insertps", 39, src, dst); 1048 DO_imm_mandr_r("insertps", 40, src, dst); 1049 DO_imm_mandr_r("insertps", 41, src, dst); 1050 DO_imm_mandr_r("insertps", 42, src, dst); 1051 DO_imm_mandr_r("insertps", 43, src, dst); 1052 DO_imm_mandr_r("insertps", 44, src, dst); 1053 DO_imm_mandr_r("insertps", 45, src, dst); 1054 DO_imm_mandr_r("insertps", 46, src, dst); 1055 DO_imm_mandr_r("insertps", 47, src, dst); 1056 DO_imm_mandr_r("insertps", 48, src, dst); 1057 DO_imm_mandr_r("insertps", 49, src, dst); 1058 DO_imm_mandr_r("insertps", 50, src, dst); 1059 DO_imm_mandr_r("insertps", 51, src, dst); 1060 DO_imm_mandr_r("insertps", 52, src, dst); 1061 DO_imm_mandr_r("insertps", 53, src, dst); 1062 DO_imm_mandr_r("insertps", 54, src, dst); 1063 DO_imm_mandr_r("insertps", 55, src, dst); 1064 DO_imm_mandr_r("insertps", 56, src, dst); 1065 DO_imm_mandr_r("insertps", 57, src, dst); 1066 DO_imm_mandr_r("insertps", 58, src, dst); 1067 DO_imm_mandr_r("insertps", 59, src, dst); 1068 DO_imm_mandr_r("insertps", 60, src, dst); 1069 DO_imm_mandr_r("insertps", 61, src, dst); 1070 DO_imm_mandr_r("insertps", 62, src, dst); 1071 DO_imm_mandr_r("insertps", 63, src, dst); 1072 DO_imm_mandr_r("insertps", 64, src, dst); 1073 DO_imm_mandr_r("insertps", 65, src, dst); 1074 DO_imm_mandr_r("insertps", 66, src, dst); 1075 DO_imm_mandr_r("insertps", 67, src, dst); 1076 DO_imm_mandr_r("insertps", 68, src, dst); 1077 DO_imm_mandr_r("insertps", 69, src, dst); 1078 DO_imm_mandr_r("insertps", 70, src, dst); 1079 DO_imm_mandr_r("insertps", 71, src, dst); 1080 DO_imm_mandr_r("insertps", 72, src, dst); 1081 DO_imm_mandr_r("insertps", 73, src, dst); 1082 DO_imm_mandr_r("insertps", 74, src, dst); 1083 DO_imm_mandr_r("insertps", 75, src, dst); 1084 DO_imm_mandr_r("insertps", 76, src, dst); 1085 DO_imm_mandr_r("insertps", 77, src, dst); 1086 DO_imm_mandr_r("insertps", 78, src, dst); 1087 DO_imm_mandr_r("insertps", 79, src, dst); 1088 DO_imm_mandr_r("insertps", 80, src, dst); 1089 DO_imm_mandr_r("insertps", 81, src, dst); 1090 DO_imm_mandr_r("insertps", 82, src, dst); 1091 DO_imm_mandr_r("insertps", 83, src, dst); 1092 DO_imm_mandr_r("insertps", 84, src, dst); 1093 DO_imm_mandr_r("insertps", 85, src, dst); 1094 DO_imm_mandr_r("insertps", 86, src, dst); 1095 DO_imm_mandr_r("insertps", 87, src, dst); 1096 DO_imm_mandr_r("insertps", 88, src, dst); 1097 DO_imm_mandr_r("insertps", 89, src, dst); 1098 DO_imm_mandr_r("insertps", 90, src, dst); 1099 DO_imm_mandr_r("insertps", 91, src, dst); 1100 DO_imm_mandr_r("insertps", 92, src, dst); 1101 DO_imm_mandr_r("insertps", 93, src, dst); 1102 DO_imm_mandr_r("insertps", 94, src, dst); 1103 DO_imm_mandr_r("insertps", 95, src, dst); 1104 DO_imm_mandr_r("insertps", 96, src, dst); 1105 DO_imm_mandr_r("insertps", 97, src, dst); 1106 DO_imm_mandr_r("insertps", 98, src, dst); 1107 DO_imm_mandr_r("insertps", 99, src, dst); 1108 DO_imm_mandr_r("insertps", 100, src, dst); 1109 DO_imm_mandr_r("insertps", 101, src, dst); 1110 DO_imm_mandr_r("insertps", 102, src, dst); 1111 DO_imm_mandr_r("insertps", 103, src, dst); 1112 DO_imm_mandr_r("insertps", 104, src, dst); 1113 DO_imm_mandr_r("insertps", 105, src, dst); 1114 DO_imm_mandr_r("insertps", 106, src, dst); 1115 DO_imm_mandr_r("insertps", 107, src, dst); 1116 DO_imm_mandr_r("insertps", 108, src, dst); 1117 DO_imm_mandr_r("insertps", 109, src, dst); 1118 DO_imm_mandr_r("insertps", 110, src, dst); 1119 DO_imm_mandr_r("insertps", 111, src, dst); 1120 DO_imm_mandr_r("insertps", 112, src, dst); 1121 DO_imm_mandr_r("insertps", 113, src, dst); 1122 DO_imm_mandr_r("insertps", 114, src, dst); 1123 DO_imm_mandr_r("insertps", 115, src, dst); 1124 DO_imm_mandr_r("insertps", 116, src, dst); 1125 DO_imm_mandr_r("insertps", 117, src, dst); 1126 DO_imm_mandr_r("insertps", 118, src, dst); 1127 DO_imm_mandr_r("insertps", 119, src, dst); 1128 DO_imm_mandr_r("insertps", 120, src, dst); 1129 DO_imm_mandr_r("insertps", 121, src, dst); 1130 DO_imm_mandr_r("insertps", 122, src, dst); 1131 DO_imm_mandr_r("insertps", 123, src, dst); 1132 DO_imm_mandr_r("insertps", 124, src, dst); 1133 DO_imm_mandr_r("insertps", 125, src, dst); 1134 DO_imm_mandr_r("insertps", 126, src, dst); 1135 DO_imm_mandr_r("insertps", 127, src, dst); 1136 DO_imm_mandr_r("insertps", 128, src, dst); 1137 DO_imm_mandr_r("insertps", 129, src, dst); 1138 DO_imm_mandr_r("insertps", 130, src, dst); 1139 DO_imm_mandr_r("insertps", 131, src, dst); 1140 DO_imm_mandr_r("insertps", 132, src, dst); 1141 DO_imm_mandr_r("insertps", 133, src, dst); 1142 DO_imm_mandr_r("insertps", 134, src, dst); 1143 DO_imm_mandr_r("insertps", 135, src, dst); 1144 DO_imm_mandr_r("insertps", 136, src, dst); 1145 DO_imm_mandr_r("insertps", 137, src, dst); 1146 DO_imm_mandr_r("insertps", 138, src, dst); 1147 DO_imm_mandr_r("insertps", 139, src, dst); 1148 DO_imm_mandr_r("insertps", 140, src, dst); 1149 DO_imm_mandr_r("insertps", 141, src, dst); 1150 DO_imm_mandr_r("insertps", 142, src, dst); 1151 DO_imm_mandr_r("insertps", 143, src, dst); 1152 DO_imm_mandr_r("insertps", 144, src, dst); 1153 DO_imm_mandr_r("insertps", 145, src, dst); 1154 DO_imm_mandr_r("insertps", 146, src, dst); 1155 DO_imm_mandr_r("insertps", 147, src, dst); 1156 DO_imm_mandr_r("insertps", 148, src, dst); 1157 DO_imm_mandr_r("insertps", 149, src, dst); 1158 DO_imm_mandr_r("insertps", 150, src, dst); 1159 DO_imm_mandr_r("insertps", 151, src, dst); 1160 DO_imm_mandr_r("insertps", 152, src, dst); 1161 DO_imm_mandr_r("insertps", 153, src, dst); 1162 DO_imm_mandr_r("insertps", 154, src, dst); 1163 DO_imm_mandr_r("insertps", 155, src, dst); 1164 DO_imm_mandr_r("insertps", 156, src, dst); 1165 DO_imm_mandr_r("insertps", 157, src, dst); 1166 DO_imm_mandr_r("insertps", 158, src, dst); 1167 DO_imm_mandr_r("insertps", 159, src, dst); 1168 DO_imm_mandr_r("insertps", 160, src, dst); 1169 DO_imm_mandr_r("insertps", 161, src, dst); 1170 DO_imm_mandr_r("insertps", 162, src, dst); 1171 DO_imm_mandr_r("insertps", 163, src, dst); 1172 DO_imm_mandr_r("insertps", 164, src, dst); 1173 DO_imm_mandr_r("insertps", 165, src, dst); 1174 DO_imm_mandr_r("insertps", 166, src, dst); 1175 DO_imm_mandr_r("insertps", 167, src, dst); 1176 DO_imm_mandr_r("insertps", 168, src, dst); 1177 DO_imm_mandr_r("insertps", 169, src, dst); 1178 DO_imm_mandr_r("insertps", 170, src, dst); 1179 DO_imm_mandr_r("insertps", 171, src, dst); 1180 DO_imm_mandr_r("insertps", 172, src, dst); 1181 DO_imm_mandr_r("insertps", 173, src, dst); 1182 DO_imm_mandr_r("insertps", 174, src, dst); 1183 DO_imm_mandr_r("insertps", 175, src, dst); 1184 DO_imm_mandr_r("insertps", 176, src, dst); 1185 DO_imm_mandr_r("insertps", 177, src, dst); 1186 DO_imm_mandr_r("insertps", 178, src, dst); 1187 DO_imm_mandr_r("insertps", 179, src, dst); 1188 DO_imm_mandr_r("insertps", 180, src, dst); 1189 DO_imm_mandr_r("insertps", 181, src, dst); 1190 DO_imm_mandr_r("insertps", 182, src, dst); 1191 DO_imm_mandr_r("insertps", 183, src, dst); 1192 DO_imm_mandr_r("insertps", 184, src, dst); 1193 DO_imm_mandr_r("insertps", 185, src, dst); 1194 DO_imm_mandr_r("insertps", 186, src, dst); 1195 DO_imm_mandr_r("insertps", 187, src, dst); 1196 DO_imm_mandr_r("insertps", 188, src, dst); 1197 DO_imm_mandr_r("insertps", 189, src, dst); 1198 DO_imm_mandr_r("insertps", 190, src, dst); 1199 DO_imm_mandr_r("insertps", 191, src, dst); 1200 DO_imm_mandr_r("insertps", 192, src, dst); 1201 DO_imm_mandr_r("insertps", 193, src, dst); 1202 DO_imm_mandr_r("insertps", 194, src, dst); 1203 DO_imm_mandr_r("insertps", 195, src, dst); 1204 DO_imm_mandr_r("insertps", 196, src, dst); 1205 DO_imm_mandr_r("insertps", 197, src, dst); 1206 DO_imm_mandr_r("insertps", 198, src, dst); 1207 DO_imm_mandr_r("insertps", 199, src, dst); 1208 DO_imm_mandr_r("insertps", 200, src, dst); 1209 DO_imm_mandr_r("insertps", 201, src, dst); 1210 DO_imm_mandr_r("insertps", 202, src, dst); 1211 DO_imm_mandr_r("insertps", 203, src, dst); 1212 DO_imm_mandr_r("insertps", 204, src, dst); 1213 DO_imm_mandr_r("insertps", 205, src, dst); 1214 DO_imm_mandr_r("insertps", 206, src, dst); 1215 DO_imm_mandr_r("insertps", 207, src, dst); 1216 DO_imm_mandr_r("insertps", 208, src, dst); 1217 DO_imm_mandr_r("insertps", 209, src, dst); 1218 DO_imm_mandr_r("insertps", 210, src, dst); 1219 DO_imm_mandr_r("insertps", 211, src, dst); 1220 DO_imm_mandr_r("insertps", 212, src, dst); 1221 DO_imm_mandr_r("insertps", 213, src, dst); 1222 DO_imm_mandr_r("insertps", 214, src, dst); 1223 DO_imm_mandr_r("insertps", 215, src, dst); 1224 DO_imm_mandr_r("insertps", 216, src, dst); 1225 DO_imm_mandr_r("insertps", 217, src, dst); 1226 DO_imm_mandr_r("insertps", 218, src, dst); 1227 DO_imm_mandr_r("insertps", 219, src, dst); 1228 DO_imm_mandr_r("insertps", 220, src, dst); 1229 DO_imm_mandr_r("insertps", 221, src, dst); 1230 DO_imm_mandr_r("insertps", 222, src, dst); 1231 DO_imm_mandr_r("insertps", 223, src, dst); 1232 DO_imm_mandr_r("insertps", 224, src, dst); 1233 DO_imm_mandr_r("insertps", 225, src, dst); 1234 DO_imm_mandr_r("insertps", 226, src, dst); 1235 DO_imm_mandr_r("insertps", 227, src, dst); 1236 DO_imm_mandr_r("insertps", 228, src, dst); 1237 DO_imm_mandr_r("insertps", 229, src, dst); 1238 DO_imm_mandr_r("insertps", 230, src, dst); 1239 DO_imm_mandr_r("insertps", 231, src, dst); 1240 DO_imm_mandr_r("insertps", 232, src, dst); 1241 DO_imm_mandr_r("insertps", 233, src, dst); 1242 DO_imm_mandr_r("insertps", 234, src, dst); 1243 DO_imm_mandr_r("insertps", 235, src, dst); 1244 DO_imm_mandr_r("insertps", 236, src, dst); 1245 DO_imm_mandr_r("insertps", 237, src, dst); 1246 DO_imm_mandr_r("insertps", 238, src, dst); 1247 DO_imm_mandr_r("insertps", 239, src, dst); 1248 DO_imm_mandr_r("insertps", 240, src, dst); 1249 DO_imm_mandr_r("insertps", 241, src, dst); 1250 DO_imm_mandr_r("insertps", 242, src, dst); 1251 DO_imm_mandr_r("insertps", 243, src, dst); 1252 DO_imm_mandr_r("insertps", 244, src, dst); 1253 DO_imm_mandr_r("insertps", 245, src, dst); 1254 DO_imm_mandr_r("insertps", 246, src, dst); 1255 DO_imm_mandr_r("insertps", 247, src, dst); 1256 DO_imm_mandr_r("insertps", 248, src, dst); 1257 DO_imm_mandr_r("insertps", 249, src, dst); 1258 DO_imm_mandr_r("insertps", 250, src, dst); 1259 DO_imm_mandr_r("insertps", 251, src, dst); 1260 DO_imm_mandr_r("insertps", 252, src, dst); 1261 DO_imm_mandr_r("insertps", 253, src, dst); 1262 DO_imm_mandr_r("insertps", 254, src, dst); 1263 DO_imm_mandr_r("insertps", 255, src, dst); 1264 } 1265} 1266 1267void test_MPSADBW ( void ) 1268{ 1269 V128 src, dst; 1270 Int i; 1271 for (i = 0; i < 50; i++) { 1272 randV128(&src); 1273 randV128(&dst); 1274 DO_imm_mandr_r("mpsadbw", 0, src, dst); 1275 DO_imm_mandr_r("mpsadbw", 1, src, dst); 1276 DO_imm_mandr_r("mpsadbw", 2, src, dst); 1277 DO_imm_mandr_r("mpsadbw", 3, src, dst); 1278 DO_imm_mandr_r("mpsadbw", 4, src, dst); 1279 DO_imm_mandr_r("mpsadbw", 5, src, dst); 1280 DO_imm_mandr_r("mpsadbw", 6, src, dst); 1281 DO_imm_mandr_r("mpsadbw", 7, src, dst); 1282 } 1283} 1284 1285void test_PACKUSDW ( void ) 1286{ 1287 V128 src, dst; 1288 Int i; 1289 for (i = 0; i < 10; i++) { 1290 if (i < 9) { 1291 randV128(&src); 1292 randV128(&dst); 1293 } else { 1294 memset(&src, 0, sizeof(src)); 1295 memset(&dst, 0, sizeof(src)); 1296 src[0] = 0x11; src[1] = 0x22; 1297 src[4] = 0x33; src[5] = 0x44; 1298 src[8] = 0x55; src[9] = 0x66; 1299 src[12] = 0x77; src[13] = 0x88; 1300 dst[0] = 0xaa; dst[1] = 0xbb; 1301 dst[4] = 0xcc; dst[5] = 0xdd; 1302 dst[8] = 0xee; dst[9] = 0xff; 1303 dst[12] = 0xa1; dst[13] = 0xb2; 1304 } 1305 DO_mandr_r("packusdw", src, dst); 1306 } 1307} 1308 1309void test_PBLENDW ( void ) 1310{ 1311 V128 src, dst; 1312 randV128(&src); 1313 randV128(&dst); 1314 { 1315 DO_imm_mandr_r("pblendw", 0, src, dst); 1316 DO_imm_mandr_r("pblendw", 1, src, dst); 1317 DO_imm_mandr_r("pblendw", 2, src, dst); 1318 DO_imm_mandr_r("pblendw", 3, src, dst); 1319 DO_imm_mandr_r("pblendw", 4, src, dst); 1320 DO_imm_mandr_r("pblendw", 5, src, dst); 1321 DO_imm_mandr_r("pblendw", 6, src, dst); 1322 DO_imm_mandr_r("pblendw", 7, src, dst); 1323 DO_imm_mandr_r("pblendw", 8, src, dst); 1324 DO_imm_mandr_r("pblendw", 9, src, dst); 1325 DO_imm_mandr_r("pblendw", 10, src, dst); 1326 DO_imm_mandr_r("pblendw", 11, src, dst); 1327 DO_imm_mandr_r("pblendw", 12, src, dst); 1328 DO_imm_mandr_r("pblendw", 13, src, dst); 1329 DO_imm_mandr_r("pblendw", 14, src, dst); 1330 DO_imm_mandr_r("pblendw", 15, src, dst); 1331 DO_imm_mandr_r("pblendw", 16, src, dst); 1332 DO_imm_mandr_r("pblendw", 17, src, dst); 1333 DO_imm_mandr_r("pblendw", 18, src, dst); 1334 DO_imm_mandr_r("pblendw", 19, src, dst); 1335 DO_imm_mandr_r("pblendw", 20, src, dst); 1336 DO_imm_mandr_r("pblendw", 21, src, dst); 1337 DO_imm_mandr_r("pblendw", 22, src, dst); 1338 DO_imm_mandr_r("pblendw", 23, src, dst); 1339 DO_imm_mandr_r("pblendw", 24, src, dst); 1340 DO_imm_mandr_r("pblendw", 25, src, dst); 1341 DO_imm_mandr_r("pblendw", 26, src, dst); 1342 DO_imm_mandr_r("pblendw", 27, src, dst); 1343 DO_imm_mandr_r("pblendw", 28, src, dst); 1344 DO_imm_mandr_r("pblendw", 29, src, dst); 1345 DO_imm_mandr_r("pblendw", 30, src, dst); 1346 DO_imm_mandr_r("pblendw", 31, src, dst); 1347 DO_imm_mandr_r("pblendw", 32, src, dst); 1348 DO_imm_mandr_r("pblendw", 33, src, dst); 1349 DO_imm_mandr_r("pblendw", 34, src, dst); 1350 DO_imm_mandr_r("pblendw", 35, src, dst); 1351 DO_imm_mandr_r("pblendw", 36, src, dst); 1352 DO_imm_mandr_r("pblendw", 37, src, dst); 1353 DO_imm_mandr_r("pblendw", 38, src, dst); 1354 DO_imm_mandr_r("pblendw", 39, src, dst); 1355 DO_imm_mandr_r("pblendw", 40, src, dst); 1356 DO_imm_mandr_r("pblendw", 41, src, dst); 1357 DO_imm_mandr_r("pblendw", 42, src, dst); 1358 DO_imm_mandr_r("pblendw", 43, src, dst); 1359 DO_imm_mandr_r("pblendw", 44, src, dst); 1360 DO_imm_mandr_r("pblendw", 45, src, dst); 1361 DO_imm_mandr_r("pblendw", 46, src, dst); 1362 DO_imm_mandr_r("pblendw", 47, src, dst); 1363 DO_imm_mandr_r("pblendw", 48, src, dst); 1364 DO_imm_mandr_r("pblendw", 49, src, dst); 1365 DO_imm_mandr_r("pblendw", 50, src, dst); 1366 DO_imm_mandr_r("pblendw", 51, src, dst); 1367 DO_imm_mandr_r("pblendw", 52, src, dst); 1368 DO_imm_mandr_r("pblendw", 53, src, dst); 1369 DO_imm_mandr_r("pblendw", 54, src, dst); 1370 DO_imm_mandr_r("pblendw", 55, src, dst); 1371 DO_imm_mandr_r("pblendw", 56, src, dst); 1372 DO_imm_mandr_r("pblendw", 57, src, dst); 1373 DO_imm_mandr_r("pblendw", 58, src, dst); 1374 DO_imm_mandr_r("pblendw", 59, src, dst); 1375 DO_imm_mandr_r("pblendw", 60, src, dst); 1376 DO_imm_mandr_r("pblendw", 61, src, dst); 1377 DO_imm_mandr_r("pblendw", 62, src, dst); 1378 DO_imm_mandr_r("pblendw", 63, src, dst); 1379 DO_imm_mandr_r("pblendw", 64, src, dst); 1380 DO_imm_mandr_r("pblendw", 65, src, dst); 1381 DO_imm_mandr_r("pblendw", 66, src, dst); 1382 DO_imm_mandr_r("pblendw", 67, src, dst); 1383 DO_imm_mandr_r("pblendw", 68, src, dst); 1384 DO_imm_mandr_r("pblendw", 69, src, dst); 1385 DO_imm_mandr_r("pblendw", 70, src, dst); 1386 DO_imm_mandr_r("pblendw", 71, src, dst); 1387 DO_imm_mandr_r("pblendw", 72, src, dst); 1388 DO_imm_mandr_r("pblendw", 73, src, dst); 1389 DO_imm_mandr_r("pblendw", 74, src, dst); 1390 DO_imm_mandr_r("pblendw", 75, src, dst); 1391 DO_imm_mandr_r("pblendw", 76, src, dst); 1392 DO_imm_mandr_r("pblendw", 77, src, dst); 1393 DO_imm_mandr_r("pblendw", 78, src, dst); 1394 DO_imm_mandr_r("pblendw", 79, src, dst); 1395 DO_imm_mandr_r("pblendw", 80, src, dst); 1396 DO_imm_mandr_r("pblendw", 81, src, dst); 1397 DO_imm_mandr_r("pblendw", 82, src, dst); 1398 DO_imm_mandr_r("pblendw", 83, src, dst); 1399 DO_imm_mandr_r("pblendw", 84, src, dst); 1400 DO_imm_mandr_r("pblendw", 85, src, dst); 1401 DO_imm_mandr_r("pblendw", 86, src, dst); 1402 DO_imm_mandr_r("pblendw", 87, src, dst); 1403 DO_imm_mandr_r("pblendw", 88, src, dst); 1404 DO_imm_mandr_r("pblendw", 89, src, dst); 1405 DO_imm_mandr_r("pblendw", 90, src, dst); 1406 DO_imm_mandr_r("pblendw", 91, src, dst); 1407 DO_imm_mandr_r("pblendw", 92, src, dst); 1408 DO_imm_mandr_r("pblendw", 93, src, dst); 1409 DO_imm_mandr_r("pblendw", 94, src, dst); 1410 DO_imm_mandr_r("pblendw", 95, src, dst); 1411 DO_imm_mandr_r("pblendw", 96, src, dst); 1412 DO_imm_mandr_r("pblendw", 97, src, dst); 1413 DO_imm_mandr_r("pblendw", 98, src, dst); 1414 DO_imm_mandr_r("pblendw", 99, src, dst); 1415 DO_imm_mandr_r("pblendw", 100, src, dst); 1416 DO_imm_mandr_r("pblendw", 101, src, dst); 1417 DO_imm_mandr_r("pblendw", 102, src, dst); 1418 DO_imm_mandr_r("pblendw", 103, src, dst); 1419 DO_imm_mandr_r("pblendw", 104, src, dst); 1420 DO_imm_mandr_r("pblendw", 105, src, dst); 1421 DO_imm_mandr_r("pblendw", 106, src, dst); 1422 DO_imm_mandr_r("pblendw", 107, src, dst); 1423 DO_imm_mandr_r("pblendw", 108, src, dst); 1424 DO_imm_mandr_r("pblendw", 109, src, dst); 1425 DO_imm_mandr_r("pblendw", 110, src, dst); 1426 DO_imm_mandr_r("pblendw", 111, src, dst); 1427 DO_imm_mandr_r("pblendw", 112, src, dst); 1428 DO_imm_mandr_r("pblendw", 113, src, dst); 1429 DO_imm_mandr_r("pblendw", 114, src, dst); 1430 DO_imm_mandr_r("pblendw", 115, src, dst); 1431 DO_imm_mandr_r("pblendw", 116, src, dst); 1432 DO_imm_mandr_r("pblendw", 117, src, dst); 1433 DO_imm_mandr_r("pblendw", 118, src, dst); 1434 DO_imm_mandr_r("pblendw", 119, src, dst); 1435 DO_imm_mandr_r("pblendw", 120, src, dst); 1436 DO_imm_mandr_r("pblendw", 121, src, dst); 1437 DO_imm_mandr_r("pblendw", 122, src, dst); 1438 DO_imm_mandr_r("pblendw", 123, src, dst); 1439 DO_imm_mandr_r("pblendw", 124, src, dst); 1440 DO_imm_mandr_r("pblendw", 125, src, dst); 1441 DO_imm_mandr_r("pblendw", 126, src, dst); 1442 DO_imm_mandr_r("pblendw", 127, src, dst); 1443 DO_imm_mandr_r("pblendw", 128, src, dst); 1444 DO_imm_mandr_r("pblendw", 129, src, dst); 1445 DO_imm_mandr_r("pblendw", 130, src, dst); 1446 DO_imm_mandr_r("pblendw", 131, src, dst); 1447 DO_imm_mandr_r("pblendw", 132, src, dst); 1448 DO_imm_mandr_r("pblendw", 133, src, dst); 1449 DO_imm_mandr_r("pblendw", 134, src, dst); 1450 DO_imm_mandr_r("pblendw", 135, src, dst); 1451 DO_imm_mandr_r("pblendw", 136, src, dst); 1452 DO_imm_mandr_r("pblendw", 137, src, dst); 1453 DO_imm_mandr_r("pblendw", 138, src, dst); 1454 DO_imm_mandr_r("pblendw", 139, src, dst); 1455 DO_imm_mandr_r("pblendw", 140, src, dst); 1456 DO_imm_mandr_r("pblendw", 141, src, dst); 1457 DO_imm_mandr_r("pblendw", 142, src, dst); 1458 DO_imm_mandr_r("pblendw", 143, src, dst); 1459 DO_imm_mandr_r("pblendw", 144, src, dst); 1460 DO_imm_mandr_r("pblendw", 145, src, dst); 1461 DO_imm_mandr_r("pblendw", 146, src, dst); 1462 DO_imm_mandr_r("pblendw", 147, src, dst); 1463 DO_imm_mandr_r("pblendw", 148, src, dst); 1464 DO_imm_mandr_r("pblendw", 149, src, dst); 1465 DO_imm_mandr_r("pblendw", 150, src, dst); 1466 DO_imm_mandr_r("pblendw", 151, src, dst); 1467 DO_imm_mandr_r("pblendw", 152, src, dst); 1468 DO_imm_mandr_r("pblendw", 153, src, dst); 1469 DO_imm_mandr_r("pblendw", 154, src, dst); 1470 DO_imm_mandr_r("pblendw", 155, src, dst); 1471 DO_imm_mandr_r("pblendw", 156, src, dst); 1472 DO_imm_mandr_r("pblendw", 157, src, dst); 1473 DO_imm_mandr_r("pblendw", 158, src, dst); 1474 DO_imm_mandr_r("pblendw", 159, src, dst); 1475 DO_imm_mandr_r("pblendw", 160, src, dst); 1476 DO_imm_mandr_r("pblendw", 161, src, dst); 1477 DO_imm_mandr_r("pblendw", 162, src, dst); 1478 DO_imm_mandr_r("pblendw", 163, src, dst); 1479 DO_imm_mandr_r("pblendw", 164, src, dst); 1480 DO_imm_mandr_r("pblendw", 165, src, dst); 1481 DO_imm_mandr_r("pblendw", 166, src, dst); 1482 DO_imm_mandr_r("pblendw", 167, src, dst); 1483 DO_imm_mandr_r("pblendw", 168, src, dst); 1484 DO_imm_mandr_r("pblendw", 169, src, dst); 1485 DO_imm_mandr_r("pblendw", 170, src, dst); 1486 DO_imm_mandr_r("pblendw", 171, src, dst); 1487 DO_imm_mandr_r("pblendw", 172, src, dst); 1488 DO_imm_mandr_r("pblendw", 173, src, dst); 1489 DO_imm_mandr_r("pblendw", 174, src, dst); 1490 DO_imm_mandr_r("pblendw", 175, src, dst); 1491 DO_imm_mandr_r("pblendw", 176, src, dst); 1492 DO_imm_mandr_r("pblendw", 177, src, dst); 1493 DO_imm_mandr_r("pblendw", 178, src, dst); 1494 DO_imm_mandr_r("pblendw", 179, src, dst); 1495 DO_imm_mandr_r("pblendw", 180, src, dst); 1496 DO_imm_mandr_r("pblendw", 181, src, dst); 1497 DO_imm_mandr_r("pblendw", 182, src, dst); 1498 DO_imm_mandr_r("pblendw", 183, src, dst); 1499 DO_imm_mandr_r("pblendw", 184, src, dst); 1500 DO_imm_mandr_r("pblendw", 185, src, dst); 1501 DO_imm_mandr_r("pblendw", 186, src, dst); 1502 DO_imm_mandr_r("pblendw", 187, src, dst); 1503 DO_imm_mandr_r("pblendw", 188, src, dst); 1504 DO_imm_mandr_r("pblendw", 189, src, dst); 1505 DO_imm_mandr_r("pblendw", 190, src, dst); 1506 DO_imm_mandr_r("pblendw", 191, src, dst); 1507 DO_imm_mandr_r("pblendw", 192, src, dst); 1508 DO_imm_mandr_r("pblendw", 193, src, dst); 1509 DO_imm_mandr_r("pblendw", 194, src, dst); 1510 DO_imm_mandr_r("pblendw", 195, src, dst); 1511 DO_imm_mandr_r("pblendw", 196, src, dst); 1512 DO_imm_mandr_r("pblendw", 197, src, dst); 1513 DO_imm_mandr_r("pblendw", 198, src, dst); 1514 DO_imm_mandr_r("pblendw", 199, src, dst); 1515 DO_imm_mandr_r("pblendw", 200, src, dst); 1516 DO_imm_mandr_r("pblendw", 201, src, dst); 1517 DO_imm_mandr_r("pblendw", 202, src, dst); 1518 DO_imm_mandr_r("pblendw", 203, src, dst); 1519 DO_imm_mandr_r("pblendw", 204, src, dst); 1520 DO_imm_mandr_r("pblendw", 205, src, dst); 1521 DO_imm_mandr_r("pblendw", 206, src, dst); 1522 DO_imm_mandr_r("pblendw", 207, src, dst); 1523 DO_imm_mandr_r("pblendw", 208, src, dst); 1524 DO_imm_mandr_r("pblendw", 209, src, dst); 1525 DO_imm_mandr_r("pblendw", 210, src, dst); 1526 DO_imm_mandr_r("pblendw", 211, src, dst); 1527 DO_imm_mandr_r("pblendw", 212, src, dst); 1528 DO_imm_mandr_r("pblendw", 213, src, dst); 1529 DO_imm_mandr_r("pblendw", 214, src, dst); 1530 DO_imm_mandr_r("pblendw", 215, src, dst); 1531 DO_imm_mandr_r("pblendw", 216, src, dst); 1532 DO_imm_mandr_r("pblendw", 217, src, dst); 1533 DO_imm_mandr_r("pblendw", 218, src, dst); 1534 DO_imm_mandr_r("pblendw", 219, src, dst); 1535 DO_imm_mandr_r("pblendw", 220, src, dst); 1536 DO_imm_mandr_r("pblendw", 221, src, dst); 1537 DO_imm_mandr_r("pblendw", 222, src, dst); 1538 DO_imm_mandr_r("pblendw", 223, src, dst); 1539 DO_imm_mandr_r("pblendw", 224, src, dst); 1540 DO_imm_mandr_r("pblendw", 225, src, dst); 1541 DO_imm_mandr_r("pblendw", 226, src, dst); 1542 DO_imm_mandr_r("pblendw", 227, src, dst); 1543 DO_imm_mandr_r("pblendw", 228, src, dst); 1544 DO_imm_mandr_r("pblendw", 229, src, dst); 1545 DO_imm_mandr_r("pblendw", 230, src, dst); 1546 DO_imm_mandr_r("pblendw", 231, src, dst); 1547 DO_imm_mandr_r("pblendw", 232, src, dst); 1548 DO_imm_mandr_r("pblendw", 233, src, dst); 1549 DO_imm_mandr_r("pblendw", 234, src, dst); 1550 DO_imm_mandr_r("pblendw", 235, src, dst); 1551 DO_imm_mandr_r("pblendw", 236, src, dst); 1552 DO_imm_mandr_r("pblendw", 237, src, dst); 1553 DO_imm_mandr_r("pblendw", 238, src, dst); 1554 DO_imm_mandr_r("pblendw", 239, src, dst); 1555 DO_imm_mandr_r("pblendw", 240, src, dst); 1556 DO_imm_mandr_r("pblendw", 241, src, dst); 1557 DO_imm_mandr_r("pblendw", 242, src, dst); 1558 DO_imm_mandr_r("pblendw", 243, src, dst); 1559 DO_imm_mandr_r("pblendw", 244, src, dst); 1560 DO_imm_mandr_r("pblendw", 245, src, dst); 1561 DO_imm_mandr_r("pblendw", 246, src, dst); 1562 DO_imm_mandr_r("pblendw", 247, src, dst); 1563 DO_imm_mandr_r("pblendw", 248, src, dst); 1564 DO_imm_mandr_r("pblendw", 249, src, dst); 1565 DO_imm_mandr_r("pblendw", 250, src, dst); 1566 DO_imm_mandr_r("pblendw", 251, src, dst); 1567 DO_imm_mandr_r("pblendw", 252, src, dst); 1568 DO_imm_mandr_r("pblendw", 253, src, dst); 1569 DO_imm_mandr_r("pblendw", 254, src, dst); 1570 DO_imm_mandr_r("pblendw", 255, src, dst); 1571 } 1572} 1573 1574 1575void test_PCMPEQQ ( void ) 1576{ 1577 V128 src, dst; 1578 Int i; 1579 for (i = 0; i < 10; i++) { 1580 randV128(&src); 1581 randV128(&dst); 1582 switch (i - 6) { 1583 case 0: memset(&src[0], 0x55, 8); 1584 memset(&dst[0], 0x55, 8); break; 1585 case 1: memset(&src[8], 0x55, 8); 1586 memset(&dst[8], 0x55, 8); break; 1587 default: 1588 break; 1589 } 1590 DO_mandr_r("pcmpeqq", src, dst); 1591 } 1592} 1593 1594 1595void test_PEXTRB ( void ) 1596{ 1597 V128 src; 1598 randV128(&src); 1599 DO_imm_r_to_mandrscalar("pextrb", 0, src, "d"); 1600 DO_imm_r_to_mandrscalar("pextrb", 1, src, "d"); 1601 DO_imm_r_to_mandrscalar("pextrb", 2, src, "d"); 1602 DO_imm_r_to_mandrscalar("pextrb", 3, src, "d"); 1603 DO_imm_r_to_mandrscalar("pextrb", 4, src, "d"); 1604 DO_imm_r_to_mandrscalar("pextrb", 5, src, "d"); 1605 DO_imm_r_to_mandrscalar("pextrb", 6, src, "d"); 1606 DO_imm_r_to_mandrscalar("pextrb", 7, src, "d"); 1607 DO_imm_r_to_mandrscalar("pextrb", 8, src, "d"); 1608 DO_imm_r_to_mandrscalar("pextrb", 9, src, "d"); 1609 DO_imm_r_to_mandrscalar("pextrb", 10, src, "d"); 1610 DO_imm_r_to_mandrscalar("pextrb", 11, src, "d"); 1611 DO_imm_r_to_mandrscalar("pextrb", 12, src, "d"); 1612 DO_imm_r_to_mandrscalar("pextrb", 13, src, "d"); 1613 DO_imm_r_to_mandrscalar("pextrb", 14, src, "d"); 1614 DO_imm_r_to_mandrscalar("pextrb", 15, src, "d"); 1615} 1616 1617void test_PINSRB ( void ) 1618{ 1619 ULong src; 1620 src = randULong(); 1621 DO_imm_mandrscalar_to_r("pinsrb", 0, src, "d"); 1622 src = randULong(); 1623 DO_imm_mandrscalar_to_r("pinsrb", 1, src, "d"); 1624 src = randULong(); 1625 DO_imm_mandrscalar_to_r("pinsrb", 2, src, "d"); 1626 src = randULong(); 1627 DO_imm_mandrscalar_to_r("pinsrb", 3, src, "d"); 1628 src = randULong(); 1629 DO_imm_mandrscalar_to_r("pinsrb", 4, src, "d"); 1630 src = randULong(); 1631 DO_imm_mandrscalar_to_r("pinsrb", 5, src, "d"); 1632 src = randULong(); 1633 DO_imm_mandrscalar_to_r("pinsrb", 6, src, "d"); 1634 src = randULong(); 1635 DO_imm_mandrscalar_to_r("pinsrb", 7, src, "d"); 1636 src = randULong(); 1637 DO_imm_mandrscalar_to_r("pinsrb", 8, src, "d"); 1638 src = randULong(); 1639 DO_imm_mandrscalar_to_r("pinsrb", 9, src, "d"); 1640 src = randULong(); 1641 DO_imm_mandrscalar_to_r("pinsrb", 10, src, "d"); 1642 src = randULong(); 1643 DO_imm_mandrscalar_to_r("pinsrb", 11, src, "d"); 1644 src = randULong(); 1645 DO_imm_mandrscalar_to_r("pinsrb", 12, src, "d"); 1646 src = randULong(); 1647 DO_imm_mandrscalar_to_r("pinsrb", 13, src, "d"); 1648 src = randULong(); 1649 DO_imm_mandrscalar_to_r("pinsrb", 14, src, "d"); 1650 src = randULong(); 1651 DO_imm_mandrscalar_to_r("pinsrb", 15, src, "d"); 1652} 1653 1654 1655void test_PEXTRW ( void ) 1656{ 1657 V128 src; 1658 randV128(&src); 1659 DO_imm_r_to_mandrscalar("pextrw", 0, src, "d"); 1660 DO_imm_r_to_mandrscalar("pextrw", 1, src, "d"); 1661 DO_imm_r_to_mandrscalar("pextrw", 2, src, "d"); 1662 DO_imm_r_to_mandrscalar("pextrw", 3, src, "d"); 1663 DO_imm_r_to_mandrscalar("pextrw", 4, src, "d"); 1664 DO_imm_r_to_mandrscalar("pextrw", 5, src, "d"); 1665 DO_imm_r_to_mandrscalar("pextrw", 6, src, "d"); 1666 DO_imm_r_to_mandrscalar("pextrw", 7, src, "d"); 1667} 1668 1669void test_PINSRW ( void ) 1670{ 1671 ULong src; 1672 src = randULong(); 1673 DO_imm_mandrscalar_to_r("pinsrw", 0, src, "d"); 1674 src = randULong(); 1675 DO_imm_mandrscalar_to_r("pinsrw", 1, src, "d"); 1676 src = randULong(); 1677 DO_imm_mandrscalar_to_r("pinsrw", 2, src, "d"); 1678 src = randULong(); 1679 DO_imm_mandrscalar_to_r("pinsrw", 3, src, "d"); 1680 src = randULong(); 1681 DO_imm_mandrscalar_to_r("pinsrw", 4, src, "d"); 1682 src = randULong(); 1683 DO_imm_mandrscalar_to_r("pinsrw", 5, src, "d"); 1684 src = randULong(); 1685 DO_imm_mandrscalar_to_r("pinsrw", 6, src, "d"); 1686 src = randULong(); 1687 DO_imm_mandrscalar_to_r("pinsrw", 7, src, "d"); 1688} 1689 1690 1691void test_PEXTRD ( void ) 1692{ 1693 V128 src; 1694 randV128(&src); 1695 DO_imm_r_to_mandrscalar("pextrd", 0, src, "d"); 1696 DO_imm_r_to_mandrscalar("pextrd", 1, src, "d"); 1697 DO_imm_r_to_mandrscalar("pextrd", 2, src, "d"); 1698 DO_imm_r_to_mandrscalar("pextrd", 3, src, "d"); 1699} 1700 1701void test_PINSRD ( void ) 1702{ 1703 ULong src; 1704 src = randULong(); 1705 DO_imm_mandrscalar_to_r("pinsrd", 0, src, "d"); 1706 src = randULong(); 1707 DO_imm_mandrscalar_to_r("pinsrd", 1, src, "d"); 1708 src = randULong(); 1709 DO_imm_mandrscalar_to_r("pinsrd", 2, src, "d"); 1710 src = randULong(); 1711 DO_imm_mandrscalar_to_r("pinsrd", 3, src, "d"); 1712} 1713 1714 1715void test_PEXTRQ ( void ) 1716{ 1717 V128 src; 1718 randV128(&src); 1719 DO_imm_r_to_mandrscalar("pextrq", 0, src, ""); 1720 DO_imm_r_to_mandrscalar("pextrq", 1, src, ""); 1721} 1722 1723void test_PINSRQ ( void ) 1724{ 1725 ULong src; 1726 src = randULong(); 1727 DO_imm_mandrscalar_to_r("pinsrq", 0, src, ""); 1728 src = randULong(); 1729 DO_imm_mandrscalar_to_r("pinsrq", 1, src, ""); 1730} 1731 1732 1733void test_EXTRACTPS ( void ) 1734{ 1735 V128 src; 1736 randV128(&src); 1737 DO_imm_r_to_mandrscalar("extractps", 0, src, "d"); 1738 DO_imm_r_to_mandrscalar("extractps", 1, src, "d"); 1739 DO_imm_r_to_mandrscalar("extractps", 2, src, "d"); 1740 DO_imm_r_to_mandrscalar("extractps", 3, src, "d"); 1741} 1742 1743 1744void test_PHMINPOSUW ( void ) 1745{ 1746 V128 src, dst; 1747 Int i; 1748 for (i = 0; i < 20; i++) { 1749 randV128(&src); 1750 randV128(&dst); 1751 DO_mandr_r("phminposuw", src, dst); 1752 } 1753 memset(src, 0x55, sizeof(src)); 1754 memset(dst, 0xAA, sizeof(dst)); 1755 DO_mandr_r("phminposuw", src, dst); 1756} 1757 1758void test_PMAXSB ( void ) 1759{ 1760 V128 src, dst; 1761 Int i; 1762 for (i = 0; i < 10; i++) { 1763 randV128(&src); 1764 randV128(&dst); 1765 DO_mandr_r("pmaxsb", src, dst); 1766 } 1767} 1768 1769void test_PMAXSD ( void ) 1770{ 1771 V128 src, dst; 1772 Int i; 1773 for (i = 0; i < 10; i++) { 1774 randV128(&src); 1775 randV128(&dst); 1776 DO_mandr_r("pmaxsd", src, dst); 1777 } 1778} 1779 1780void test_PMAXUD ( void ) 1781{ 1782 V128 src, dst; 1783 Int i; 1784 for (i = 0; i < 10; i++) { 1785 randV128(&src); 1786 randV128(&dst); 1787 DO_mandr_r("pmaxud", src, dst); 1788 } 1789} 1790 1791void test_PMAXUW ( void ) 1792{ 1793 V128 src, dst; 1794 Int i; 1795 for (i = 0; i < 10; i++) { 1796 randV128(&src); 1797 randV128(&dst); 1798 DO_mandr_r("pmaxuw", src, dst); 1799 } 1800} 1801 1802void test_PMINSB ( void ) 1803{ 1804 V128 src, dst; 1805 Int i; 1806 for (i = 0; i < 10; i++) { 1807 randV128(&src); 1808 randV128(&dst); 1809 DO_mandr_r("pminsb", src, dst); 1810 } 1811} 1812 1813void test_PMINSD ( void ) 1814{ 1815 V128 src, dst; 1816 Int i; 1817 for (i = 0; i < 10; i++) { 1818 randV128(&src); 1819 randV128(&dst); 1820 DO_mandr_r("pminsd", src, dst); 1821 } 1822} 1823 1824void test_PMINUD ( void ) 1825{ 1826 V128 src, dst; 1827 Int i; 1828 for (i = 0; i < 10; i++) { 1829 randV128(&src); 1830 randV128(&dst); 1831 DO_mandr_r("pminud", src, dst); 1832 } 1833} 1834 1835void test_PMINUW ( void ) 1836{ 1837 V128 src, dst; 1838 Int i; 1839 for (i = 0; i < 10; i++) { 1840 randV128(&src); 1841 randV128(&dst); 1842 DO_mandr_r("pminuw", src, dst); 1843 } 1844} 1845 1846void test_PMOVSXBW ( void ) 1847{ 1848 V128 src, dst; 1849 Int i; 1850 for (i = 0; i < 10; i++) { 1851 randV128(&src); 1852 randV128(&dst); 1853 DO_mandr_r("pmovsxbw", src, dst); 1854 } 1855} 1856 1857void test_PMOVSXBD ( void ) 1858{ 1859 V128 src, dst; 1860 Int i; 1861 for (i = 0; i < 10; i++) { 1862 randV128(&src); 1863 randV128(&dst); 1864 DO_mandr_r("pmovsxbd", src, dst); 1865 } 1866} 1867 1868void test_PMOVSXBQ ( void ) 1869{ 1870 V128 src, dst; 1871 Int i; 1872 for (i = 0; i < 10; i++) { 1873 randV128(&src); 1874 randV128(&dst); 1875 DO_mandr_r("pmovsxbq", src, dst); 1876 } 1877} 1878 1879void test_PMOVSXWD ( void ) 1880{ 1881 V128 src, dst; 1882 Int i; 1883 for (i = 0; i < 10; i++) { 1884 randV128(&src); 1885 randV128(&dst); 1886 DO_mandr_r("pmovsxwd", src, dst); 1887 } 1888} 1889 1890void test_PMOVSXWQ ( void ) 1891{ 1892 V128 src, dst; 1893 Int i; 1894 for (i = 0; i < 10; i++) { 1895 randV128(&src); 1896 randV128(&dst); 1897 DO_mandr_r("pmovsxwq", src, dst); 1898 } 1899} 1900 1901void test_PMOVSXDQ ( void ) 1902{ 1903 V128 src, dst; 1904 Int i; 1905 for (i = 0; i < 10; i++) { 1906 randV128(&src); 1907 randV128(&dst); 1908 DO_mandr_r("pmovsxdq", src, dst); 1909 } 1910} 1911 1912void test_PMOVZXBW ( void ) 1913{ 1914 V128 src, dst; 1915 Int i; 1916 for (i = 0; i < 10; i++) { 1917 randV128(&src); 1918 randV128(&dst); 1919 DO_mandr_r("pmovzxbw", src, dst); 1920 } 1921} 1922 1923void test_PMOVZXBD ( void ) 1924{ 1925 V128 src, dst; 1926 Int i; 1927 for (i = 0; i < 10; i++) { 1928 randV128(&src); 1929 randV128(&dst); 1930 DO_mandr_r("pmovzxbd", src, dst); 1931 } 1932} 1933 1934void test_PMOVZXBQ ( void ) 1935{ 1936 V128 src, dst; 1937 Int i; 1938 for (i = 0; i < 10; i++) { 1939 randV128(&src); 1940 randV128(&dst); 1941 DO_mandr_r("pmovzxbq", src, dst); 1942 } 1943} 1944 1945void test_PMOVZXWD ( void ) 1946{ 1947 V128 src, dst; 1948 Int i; 1949 for (i = 0; i < 10; i++) { 1950 randV128(&src); 1951 randV128(&dst); 1952 DO_mandr_r("pmovzxwd", src, dst); 1953 } 1954} 1955 1956void test_PMOVZXWQ ( void ) 1957{ 1958 V128 src, dst; 1959 Int i; 1960 for (i = 0; i < 10; i++) { 1961 randV128(&src); 1962 randV128(&dst); 1963 DO_mandr_r("pmovzxwq", src, dst); 1964 } 1965} 1966 1967void test_PMOVZXDQ ( void ) 1968{ 1969 V128 src, dst; 1970 Int i; 1971 for (i = 0; i < 10; i++) { 1972 randV128(&src); 1973 randV128(&dst); 1974 DO_mandr_r("pmovzxdq", src, dst); 1975 } 1976} 1977 1978void test_PMULDQ ( void ) 1979{ 1980 V128 src, dst; 1981 Int i; 1982 for (i = 0; i < 10; i++) { 1983 randV128(&src); 1984 randV128(&dst); 1985 DO_mandr_r("pmuldq", src, dst); 1986 } 1987} 1988 1989 1990void test_PMULLD ( void ) 1991{ 1992 V128 src, dst; 1993 Int i; 1994 for (i = 0; i < 10; i++) { 1995 randV128(&src); 1996 randV128(&dst); 1997 DO_mandr_r("pmulld", src, dst); 1998 } 1999} 2000 2001 2002void test_POPCNTQ ( void ) 2003{ 2004 ULong block[4]; 2005 Int i; 2006 ULong oszacp_mask = 0x8D5; 2007 for (i = 0; i < 10; i++) { 2008 block[0] = i == 0 ? 0 : randULong(); 2009 block[1] = randULong(); 2010 block[2] = randULong(); 2011 block[3] = randULong(); 2012 __asm__ __volatile__( 2013 "movq %0, %%rax" "\n\t" 2014 "movq 0(%%rax), %%rdi" "\n\t" 2015 "movq 8(%%rax), %%r11" "\n\t" 2016#ifndef VGP_amd64_darwin 2017 "popcntq %%rdi, %%r11" "\n\t" 2018#else 2019 "popcnt %%rdi, %%r11" "\n\t" 2020#endif 2021 "movq %%r11, 16(%%rax)" "\n\t" 2022 "pushfq" "\n\t" 2023 "popq %%r12" "\n\t" 2024 "movq %%r12, 24(%%rax)" "\n" 2025 : /*out*/ 2026 : /*in*/"r"(&block[0]) 2027 : /*trash*/ "cc", "memory", "rdi", "r11", "r12" 2028 ); 2029 printf("r popcntq %016llx %016llx %016llx %016llx\n", 2030 block[0], block[1], block[2], block[3] & oszacp_mask); 2031 2032 block[0] = i == 0 ? 0 : randULong(); 2033 block[1] = randULong(); 2034 block[2] = randULong(); 2035 block[3] = randULong(); 2036 __asm__ __volatile__( 2037 "movq %0, %%rax" "\n\t" 2038 "movq 8(%%rax), %%r11" "\n\t" 2039#ifndef VGP_amd64_darwin 2040 "popcntq 0(%%rax), %%r11" "\n\t" 2041#else 2042 "popcnt 0(%%rax), %%r11" "\n\t" 2043#endif 2044 "movq %%r11, 16(%%rax)" "\n\t" 2045 "pushfq" "\n\t" 2046 "popq %%r12" "\n\t" 2047 "movq %%r12, 24(%%rax)" "\n" 2048 : /*out*/ 2049 : /*in*/"r"(&block[0]) 2050 : /*trash*/ "cc", "memory", "r11", "r12" 2051 ); 2052 printf("m popcntq %016llx %016llx %016llx %016llx\n", 2053 block[0], block[1], block[2], block[3] & oszacp_mask); 2054 } 2055} 2056 2057 2058void test_POPCNTL ( void ) 2059{ 2060 ULong block[4]; 2061 Int i; 2062 ULong oszacp_mask = 0x8D5; 2063 for (i = 0; i < 10; i++) { 2064 block[0] = i == 0 ? 0 : randULong(); 2065 block[1] = randULong(); 2066 block[2] = randULong(); 2067 block[3] = randULong(); 2068 __asm__ __volatile__( 2069 "movq %0, %%rax" "\n\t" 2070 "movq 0(%%rax), %%rdi" "\n\t" 2071 "movq 8(%%rax), %%r11" "\n\t" 2072#ifndef VGP_amd64_darwin 2073 "popcntl %%edi, %%r11d" "\n\t" 2074#else 2075 "popcnt %%edi, %%r11d" "\n\t" 2076#endif 2077 "movq %%r11, 16(%%rax)" "\n\t" 2078 "pushfq" "\n\t" 2079 "popq %%r12" "\n\t" 2080 "movq %%r12, 24(%%rax)" "\n" 2081 : /*out*/ 2082 : /*in*/"r"(&block[0]) 2083 : /*trash*/ "cc", "memory", "rdi", "r11", "r12" 2084 ); 2085 printf("r popcntl %016llx %016llx %016llx %016llx\n", 2086 block[0], block[1], block[2], block[3] & oszacp_mask); 2087 2088 block[0] = i == 0 ? 0 : randULong(); 2089 block[1] = randULong(); 2090 block[2] = randULong(); 2091 block[3] = randULong(); 2092 __asm__ __volatile__( 2093 "movq %0, %%rax" "\n\t" 2094 "movq 8(%%rax), %%r11" "\n\t" 2095#ifndef VGP_amd64_darwin 2096 "popcntl 0(%%rax), %%r11d" "\n\t" 2097#else 2098 "popcnt 0(%%rax), %%r11d" "\n\t" 2099#endif 2100 "movq %%r11, 16(%%rax)" "\n\t" 2101 "pushfq" "\n\t" 2102 "popq %%r12" "\n\t" 2103 "movq %%r12, 24(%%rax)" "\n" 2104 : /*out*/ 2105 : /*in*/"r"(&block[0]) 2106 : /*trash*/ "cc", "memory", "r11", "r12" 2107 ); 2108 printf("m popcntl %016llx %016llx %016llx %016llx\n", 2109 block[0], block[1], block[2], block[3] & oszacp_mask); 2110 } 2111} 2112 2113 2114void test_POPCNTW ( void ) 2115{ 2116 ULong block[4]; 2117 Int i; 2118 ULong oszacp_mask = 0x8D5; 2119 for (i = 0; i < 10; i++) { 2120 block[0] = i == 0 ? 0 : randULong(); 2121 block[1] = randULong(); 2122 block[2] = randULong(); 2123 block[3] = randULong(); 2124 __asm__ __volatile__( 2125 "movq %0, %%rax" "\n\t" 2126 "movq 0(%%rax), %%rdi" "\n\t" 2127 "movq 8(%%rax), %%r11" "\n\t" 2128#ifndef VGP_amd64_darwin 2129 "popcntw %%di, %%r11w" "\n\t" 2130#else 2131 "popcnt %%di, %%r11w" "\n\t" 2132#endif 2133 "movq %%r11, 16(%%rax)" "\n\t" 2134 "pushfq" "\n\t" 2135 "popq %%r12" "\n\t" 2136 "movq %%r12, 24(%%rax)" "\n" 2137 : /*out*/ 2138 : /*in*/"r"(&block[0]) 2139 : /*trash*/ "cc", "memory", "rdi", "r11", "r12" 2140 ); 2141 printf("r popcntw %016llx %016llx %016llx %016llx\n", 2142 block[0], block[1], block[2], block[3] & oszacp_mask); 2143 2144 block[0] = i == 0 ? 0 : randULong(); 2145 block[1] = randULong(); 2146 block[2] = randULong(); 2147 block[3] = randULong(); 2148 __asm__ __volatile__( 2149 "movq %0, %%rax" "\n\t" 2150 "movq 8(%%rax), %%r11" "\n\t" 2151#ifndef VGP_amd64_darwin 2152 "popcntw 0(%%rax), %%r11w" "\n\t" 2153#else 2154 "popcnt 0(%%rax), %%r11w" "\n\t" 2155#endif 2156 "movq %%r11, 16(%%rax)" "\n\t" 2157 "pushfq" "\n\t" 2158 "popq %%r12" "\n\t" 2159 "movq %%r12, 24(%%rax)" "\n" 2160 : /*out*/ 2161 : /*in*/"r"(&block[0]) 2162 : /*trash*/ "cc", "memory", "r11", "r12" 2163 ); 2164 printf("m popcntw %016llx %016llx %016llx %016llx\n", 2165 block[0], block[1], block[2], block[3] & oszacp_mask); 2166 } 2167} 2168 2169 2170void test_PCMPGTQ ( void ) 2171{ 2172 V128 spec[7]; 2173 do64HLtoV128( &spec[0], 0x0000000000000000ULL, 0xffffffffffffffffULL ); 2174 do64HLtoV128( &spec[1], 0x0000000000000001ULL, 0xfffffffffffffffeULL ); 2175 do64HLtoV128( &spec[2], 0x7fffffffffffffffULL, 0x8000000000000001ULL ); 2176 do64HLtoV128( &spec[3], 0x8000000000000000ULL, 0x8000000000000000ULL ); 2177 do64HLtoV128( &spec[4], 0x8000000000000001ULL, 0x7fffffffffffffffULL ); 2178 do64HLtoV128( &spec[5], 0xfffffffffffffffeULL, 0x0000000000000001ULL ); 2179 do64HLtoV128( &spec[6], 0xffffffffffffffffULL, 0x0000000000000000ULL ); 2180 2181 V128 src, dst; 2182 Int i, j; 2183 for (i = 0; i < 10; i++) { 2184 randV128(&src); 2185 randV128(&dst); 2186 DO_mandr_r("pcmpgtq", src, dst); 2187 } 2188 for (i = 0; i < 7; i++) { 2189 for (j = 0; j < 7; j++) { 2190 memcpy(&src, &spec[i], 16); 2191 memcpy(&dst, &spec[j], 16); 2192 DO_mandr_r("pcmpgtq", src, dst); 2193 } 2194 } 2195} 2196 2197/* ------------ ROUNDSD ------------ */ 2198 2199void do_ROUNDSD_000 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2200{ 2201 if (mem) { 2202 __asm__ __volatile__( 2203 "movupd (%1), %%xmm11" "\n\t" 2204 "roundsd $0, (%0), %%xmm11" "\n\t" 2205 "movupd %%xmm11, (%1)" "\n" 2206 : /*OUT*/ 2207 : /*IN*/ "r"(src), "r"(dst) 2208 : /*TRASH*/ "xmm11" 2209 ); 2210 } else { 2211 __asm__ __volatile__( 2212 "movupd (%1), %%xmm11" "\n\t" 2213 "movupd (%0), %%xmm2" "\n\t" 2214 "roundsd $0, %%xmm2, %%xmm11" "\n\t" 2215 "movupd %%xmm11, (%1)" "\n" 2216 : /*OUT*/ 2217 : /*IN*/ "r"(src), "r"(dst) 2218 : /*TRASH*/ "xmm11","xmm2" 2219 ); 2220 } 2221} 2222 2223void do_ROUNDSD_001 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2224{ 2225 if (mem) { 2226 __asm__ __volatile__( 2227 "movupd (%1), %%xmm11" "\n\t" 2228 "roundsd $1, (%0), %%xmm11" "\n\t" 2229 "movupd %%xmm11, (%1)" "\n" 2230 : /*OUT*/ 2231 : /*IN*/ "r"(src), "r"(dst) 2232 : /*TRASH*/ "xmm11" 2233 ); 2234 } else { 2235 __asm__ __volatile__( 2236 "movupd (%1), %%xmm11" "\n\t" 2237 "movupd (%0), %%xmm2" "\n\t" 2238 "roundsd $1, %%xmm2, %%xmm11" "\n\t" 2239 "movupd %%xmm11, (%1)" "\n" 2240 : /*OUT*/ 2241 : /*IN*/ "r"(src), "r"(dst) 2242 : /*TRASH*/ "xmm11","xmm2" 2243 ); 2244 } 2245} 2246 2247void do_ROUNDSD_010 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2248{ 2249 if (mem) { 2250 __asm__ __volatile__( 2251 "movupd (%1), %%xmm11" "\n\t" 2252 "roundsd $2, (%0), %%xmm11" "\n\t" 2253 "movupd %%xmm11, (%1)" "\n" 2254 : /*OUT*/ 2255 : /*IN*/ "r"(src), "r"(dst) 2256 : /*TRASH*/ "xmm11" 2257 ); 2258 } else { 2259 __asm__ __volatile__( 2260 "movupd (%1), %%xmm11" "\n\t" 2261 "movupd (%0), %%xmm2" "\n\t" 2262 "roundsd $2, %%xmm2, %%xmm11" "\n\t" 2263 "movupd %%xmm11, (%1)" "\n" 2264 : /*OUT*/ 2265 : /*IN*/ "r"(src), "r"(dst) 2266 : /*TRASH*/ "xmm11","xmm2" 2267 ); 2268 } 2269} 2270 2271void do_ROUNDSD_011 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2272{ 2273 if (mem) { 2274 __asm__ __volatile__( 2275 "movupd (%1), %%xmm11" "\n\t" 2276 "roundsd $3, (%0), %%xmm11" "\n\t" 2277 "movupd %%xmm11, (%1)" "\n" 2278 : /*OUT*/ 2279 : /*IN*/ "r"(src), "r"(dst) 2280 : /*TRASH*/ "xmm11" 2281 ); 2282 } else { 2283 __asm__ __volatile__( 2284 "movupd (%1), %%xmm11" "\n\t" 2285 "movupd (%0), %%xmm2" "\n\t" 2286 "roundsd $3, %%xmm2, %%xmm11" "\n\t" 2287 "movupd %%xmm11, (%1)" "\n" 2288 : /*OUT*/ 2289 : /*IN*/ "r"(src), "r"(dst) 2290 : /*TRASH*/ "xmm11","xmm2" 2291 ); 2292 } 2293} 2294 2295void do_ROUNDSD_1XX ( Bool mem, V128* src, /*OUT*/V128* dst ) 2296{ 2297 if (mem) { 2298 __asm__ __volatile__( 2299 "movupd (%1), %%xmm11" "\n\t" 2300 "roundsd $4, (%0), %%xmm11" "\n\t" 2301 "movupd %%xmm11, (%1)" "\n" 2302 : /*OUT*/ 2303 : /*IN*/ "r"(src), "r"(dst) 2304 : /*TRASH*/ "xmm11" 2305 ); 2306 } else { 2307 __asm__ __volatile__( 2308 "movupd (%1), %%xmm11" "\n\t" 2309 "movupd (%0), %%xmm2" "\n\t" 2310 "roundsd $4, %%xmm2, %%xmm11" "\n\t" 2311 "movupd %%xmm11, (%1)" "\n" 2312 : /*OUT*/ 2313 : /*IN*/ "r"(src), "r"(dst) 2314 : /*TRASH*/ "xmm11","xmm2" 2315 ); 2316 } 2317} 2318 2319void test_ROUNDSD_w_immediate_rounding ( void ) 2320{ 2321 double vals[22]; 2322 Int i = 0; 2323 vals[i++] = 0.0; 2324 vals[i++] = -0.0; 2325 vals[i++] = mkPosInf(); 2326 vals[i++] = mkNegInf(); 2327 vals[i++] = mkPosNan(); 2328 vals[i++] = mkNegNan(); 2329 vals[i++] = -1.3; 2330 vals[i++] = -1.1; 2331 vals[i++] = -0.9; 2332 vals[i++] = -0.7; 2333 vals[i++] = -0.50001; 2334 vals[i++] = -0.49999; 2335 vals[i++] = -0.3; 2336 vals[i++] = -0.1; 2337 vals[i++] = 0.1; 2338 vals[i++] = 0.3; 2339 vals[i++] = 0.49999; 2340 vals[i++] = 0.50001; 2341 vals[i++] = 0.7; 2342 vals[i++] = 0.9; 2343 vals[i++] = 1.1; 2344 vals[i++] = 1.3; 2345 assert(i == 22); 2346 2347 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 2348 V128 src, dst; 2349 2350 randV128(&src); 2351 randV128(&dst); 2352 memcpy(&src[0], &vals[i], 8); 2353 do_ROUNDSD_000(False/*reg*/, &src, &dst); 2354 printf("r roundsd_000 "); 2355 showV128(&src); 2356 printf(" "); 2357 showV128(&dst); 2358 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2359 printf("\n"); 2360 2361 randV128(&src); 2362 randV128(&dst); 2363 memcpy(&src[0], &vals[i], 8); 2364 do_ROUNDSD_000(True/*mem*/, &src, &dst); 2365 printf("m roundsd_000 "); 2366 showV128(&src); 2367 printf(" "); 2368 showV128(&dst); 2369 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2370 printf("\n"); 2371 2372 2373 randV128(&src); 2374 randV128(&dst); 2375 memcpy(&src[0], &vals[i], 8); 2376 do_ROUNDSD_001(False/*reg*/, &src, &dst); 2377 printf("r roundsd_001 "); 2378 showV128(&src); 2379 printf(" "); 2380 showV128(&dst); 2381 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2382 printf("\n"); 2383 2384 randV128(&src); 2385 randV128(&dst); 2386 memcpy(&src[0], &vals[i], 8); 2387 do_ROUNDSD_001(True/*mem*/, &src, &dst); 2388 printf("m roundsd_001 "); 2389 showV128(&src); 2390 printf(" "); 2391 showV128(&dst); 2392 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2393 printf("\n"); 2394 2395 2396 randV128(&src); 2397 randV128(&dst); 2398 memcpy(&src[0], &vals[i], 8); 2399 do_ROUNDSD_010(False/*reg*/, &src, &dst); 2400 printf("r roundsd_010 "); 2401 showV128(&src); 2402 printf(" "); 2403 showV128(&dst); 2404 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2405 printf("\n"); 2406 2407 randV128(&src); 2408 randV128(&dst); 2409 memcpy(&src[0], &vals[i], 8); 2410 do_ROUNDSD_010(True/*mem*/, &src, &dst); 2411 printf("m roundsd_010 "); 2412 showV128(&src); 2413 printf(" "); 2414 showV128(&dst); 2415 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2416 printf("\n"); 2417 2418 2419 randV128(&src); 2420 randV128(&dst); 2421 memcpy(&src[0], &vals[i], 8); 2422 do_ROUNDSD_011(False/*reg*/, &src, &dst); 2423 printf("r roundsd_011 "); 2424 showV128(&src); 2425 printf(" "); 2426 showV128(&dst); 2427 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2428 printf("\n"); 2429 2430 randV128(&src); 2431 randV128(&dst); 2432 memcpy(&src[0], &vals[i], 8); 2433 do_ROUNDSD_011(True/*mem*/, &src, &dst); 2434 printf("m roundsd_011 "); 2435 showV128(&src); 2436 printf(" "); 2437 showV128(&dst); 2438 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2439 printf("\n"); 2440 } 2441} 2442 2443void test_ROUNDSD_w_mxcsr_rounding ( void ) 2444{ 2445 UInt rm; 2446 double vals[22]; 2447 Int i = 0; 2448 vals[i++] = 0.0; 2449 vals[i++] = -0.0; 2450 vals[i++] = mkPosInf(); 2451 vals[i++] = mkNegInf(); 2452 vals[i++] = mkPosNan(); 2453 vals[i++] = mkNegNan(); 2454 vals[i++] = -1.3; 2455 vals[i++] = -1.1; 2456 vals[i++] = -0.9; 2457 vals[i++] = -0.7; 2458 vals[i++] = -0.50001; 2459 vals[i++] = -0.49999; 2460 vals[i++] = -0.3; 2461 vals[i++] = -0.1; 2462 vals[i++] = 0.1; 2463 vals[i++] = 0.3; 2464 vals[i++] = 0.49999; 2465 vals[i++] = 0.50001; 2466 vals[i++] = 0.7; 2467 vals[i++] = 0.9; 2468 vals[i++] = 1.1; 2469 vals[i++] = 1.3; 2470 assert(i == 22); 2471 2472 rm = get_sse_roundingmode(); 2473 assert(rm == 0); // 0 == RN == default 2474 2475 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 2476 V128 src, dst; 2477 2478 for (rm = 0; rm <= 3; rm++) { 2479 set_sse_roundingmode(rm); 2480 2481 randV128(&src); 2482 randV128(&dst); 2483 memcpy(&src[0], &vals[i], 8); 2484 do_ROUNDSD_1XX(False/*reg*/, &src, &dst); 2485 printf("r (rm=%u) roundsd_1XX ", rm); 2486 showV128(&src); 2487 printf(" "); 2488 showV128(&dst); 2489 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2490 printf("\n"); 2491 2492 randV128(&src); 2493 randV128(&dst); 2494 memcpy(&src[0], &vals[i], 8); 2495 do_ROUNDSD_1XX(True/*mem*/, &src, &dst); 2496 printf("m (rm=%u) roundsd_1XX ", rm); 2497 showV128(&src); 2498 printf(" "); 2499 showV128(&dst); 2500 printf(" %10f %10f", vals[i], *(double*)(&dst[0])); 2501 printf("\n"); 2502 } 2503 } 2504 2505 rm = get_sse_roundingmode(); 2506 assert(rm == 3); 2507 set_sse_roundingmode(0); 2508 rm = get_sse_roundingmode(); 2509 assert(rm == 0); // 0 == RN == default 2510} 2511 2512 2513/* ------------ ROUNDSS ------------ */ 2514 2515void do_ROUNDSS_000 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2516{ 2517 if (mem) { 2518 __asm__ __volatile__( 2519 "movupd (%1), %%xmm11" "\n\t" 2520 "roundss $0, (%0), %%xmm11" "\n\t" 2521 "movupd %%xmm11, (%1)" "\n" 2522 : /*OUT*/ 2523 : /*IN*/ "r"(src), "r"(dst) 2524 : /*TRASH*/ "xmm11" 2525 ); 2526 } else { 2527 __asm__ __volatile__( 2528 "movupd (%1), %%xmm11" "\n\t" 2529 "movupd (%0), %%xmm2" "\n\t" 2530 "roundss $0, %%xmm2, %%xmm11" "\n\t" 2531 "movupd %%xmm11, (%1)" "\n" 2532 : /*OUT*/ 2533 : /*IN*/ "r"(src), "r"(dst) 2534 : /*TRASH*/ "xmm11","xmm2" 2535 ); 2536 } 2537} 2538 2539void do_ROUNDSS_001 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2540{ 2541 if (mem) { 2542 __asm__ __volatile__( 2543 "movupd (%1), %%xmm11" "\n\t" 2544 "roundss $1, (%0), %%xmm11" "\n\t" 2545 "movupd %%xmm11, (%1)" "\n" 2546 : /*OUT*/ 2547 : /*IN*/ "r"(src), "r"(dst) 2548 : /*TRASH*/ "xmm11" 2549 ); 2550 } else { 2551 __asm__ __volatile__( 2552 "movupd (%1), %%xmm11" "\n\t" 2553 "movupd (%0), %%xmm2" "\n\t" 2554 "roundss $1, %%xmm2, %%xmm11" "\n\t" 2555 "movupd %%xmm11, (%1)" "\n" 2556 : /*OUT*/ 2557 : /*IN*/ "r"(src), "r"(dst) 2558 : /*TRASH*/ "xmm11","xmm2" 2559 ); 2560 } 2561} 2562 2563void do_ROUNDSS_010 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2564{ 2565 if (mem) { 2566 __asm__ __volatile__( 2567 "movupd (%1), %%xmm11" "\n\t" 2568 "roundss $2, (%0), %%xmm11" "\n\t" 2569 "movupd %%xmm11, (%1)" "\n" 2570 : /*OUT*/ 2571 : /*IN*/ "r"(src), "r"(dst) 2572 : /*TRASH*/ "xmm11" 2573 ); 2574 } else { 2575 __asm__ __volatile__( 2576 "movupd (%1), %%xmm11" "\n\t" 2577 "movupd (%0), %%xmm2" "\n\t" 2578 "roundss $2, %%xmm2, %%xmm11" "\n\t" 2579 "movupd %%xmm11, (%1)" "\n" 2580 : /*OUT*/ 2581 : /*IN*/ "r"(src), "r"(dst) 2582 : /*TRASH*/ "xmm11","xmm2" 2583 ); 2584 } 2585} 2586 2587void do_ROUNDSS_011 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2588{ 2589 if (mem) { 2590 __asm__ __volatile__( 2591 "movupd (%1), %%xmm11" "\n\t" 2592 "roundss $3, (%0), %%xmm11" "\n\t" 2593 "movupd %%xmm11, (%1)" "\n" 2594 : /*OUT*/ 2595 : /*IN*/ "r"(src), "r"(dst) 2596 : /*TRASH*/ "xmm11" 2597 ); 2598 } else { 2599 __asm__ __volatile__( 2600 "movupd (%1), %%xmm11" "\n\t" 2601 "movupd (%0), %%xmm2" "\n\t" 2602 "roundss $3, %%xmm2, %%xmm11" "\n\t" 2603 "movupd %%xmm11, (%1)" "\n" 2604 : /*OUT*/ 2605 : /*IN*/ "r"(src), "r"(dst) 2606 : /*TRASH*/ "xmm11","xmm2" 2607 ); 2608 } 2609} 2610 2611void do_ROUNDSS_1XX ( Bool mem, V128* src, /*OUT*/V128* dst ) 2612{ 2613 if (mem) { 2614 __asm__ __volatile__( 2615 "movupd (%1), %%xmm11" "\n\t" 2616 "roundss $4, (%0), %%xmm11" "\n\t" 2617 "movupd %%xmm11, (%1)" "\n" 2618 : /*OUT*/ 2619 : /*IN*/ "r"(src), "r"(dst) 2620 : /*TRASH*/ "xmm11" 2621 ); 2622 } else { 2623 __asm__ __volatile__( 2624 "movupd (%1), %%xmm11" "\n\t" 2625 "movupd (%0), %%xmm2" "\n\t" 2626 "roundss $4, %%xmm2, %%xmm11" "\n\t" 2627 "movupd %%xmm11, (%1)" "\n" 2628 : /*OUT*/ 2629 : /*IN*/ "r"(src), "r"(dst) 2630 : /*TRASH*/ "xmm11","xmm2" 2631 ); 2632 } 2633} 2634 2635void test_ROUNDSS_w_immediate_rounding ( void ) 2636{ 2637 float vals[22]; 2638 Int i = 0; 2639 vals[i++] = 0.0; 2640 vals[i++] = -0.0; 2641 vals[i++] = mkPosInf(); 2642 vals[i++] = mkNegInf(); 2643 vals[i++] = mkPosNan(); 2644 vals[i++] = mkNegNan(); 2645 vals[i++] = -1.3; 2646 vals[i++] = -1.1; 2647 vals[i++] = -0.9; 2648 vals[i++] = -0.7; 2649 vals[i++] = -0.50001; 2650 vals[i++] = -0.49999; 2651 vals[i++] = -0.3; 2652 vals[i++] = -0.1; 2653 vals[i++] = 0.1; 2654 vals[i++] = 0.3; 2655 vals[i++] = 0.49999; 2656 vals[i++] = 0.50001; 2657 vals[i++] = 0.7; 2658 vals[i++] = 0.9; 2659 vals[i++] = 1.1; 2660 vals[i++] = 1.3; 2661 assert(i == 22); 2662 2663 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 2664 V128 src, dst; 2665 2666 randV128(&src); 2667 randV128(&dst); 2668 memcpy(&src[0], &vals[i], 4); 2669 do_ROUNDSS_000(False/*reg*/, &src, &dst); 2670 printf("r roundss_000 "); 2671 showV128(&src); 2672 printf(" "); 2673 showV128(&dst); 2674 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2675 printf("\n"); 2676 2677 randV128(&src); 2678 randV128(&dst); 2679 memcpy(&src[0], &vals[i], 4); 2680 do_ROUNDSS_000(True/*mem*/, &src, &dst); 2681 printf("m roundss_000 "); 2682 showV128(&src); 2683 printf(" "); 2684 showV128(&dst); 2685 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2686 printf("\n"); 2687 2688 2689 randV128(&src); 2690 randV128(&dst); 2691 memcpy(&src[0], &vals[i], 4); 2692 do_ROUNDSS_001(False/*reg*/, &src, &dst); 2693 printf("r roundss_001 "); 2694 showV128(&src); 2695 printf(" "); 2696 showV128(&dst); 2697 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2698 printf("\n"); 2699 2700 randV128(&src); 2701 randV128(&dst); 2702 memcpy(&src[0], &vals[i], 4); 2703 do_ROUNDSS_001(True/*mem*/, &src, &dst); 2704 printf("m roundss_001 "); 2705 showV128(&src); 2706 printf(" "); 2707 showV128(&dst); 2708 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2709 printf("\n"); 2710 2711 2712 randV128(&src); 2713 randV128(&dst); 2714 memcpy(&src[0], &vals[i], 4); 2715 do_ROUNDSS_010(False/*reg*/, &src, &dst); 2716 printf("r roundss_010 "); 2717 showV128(&src); 2718 printf(" "); 2719 showV128(&dst); 2720 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2721 printf("\n"); 2722 2723 randV128(&src); 2724 randV128(&dst); 2725 memcpy(&src[0], &vals[i], 4); 2726 do_ROUNDSS_010(True/*mem*/, &src, &dst); 2727 printf("m roundss_010 "); 2728 showV128(&src); 2729 printf(" "); 2730 showV128(&dst); 2731 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2732 printf("\n"); 2733 2734 2735 randV128(&src); 2736 randV128(&dst); 2737 memcpy(&src[0], &vals[i], 4); 2738 do_ROUNDSS_011(False/*reg*/, &src, &dst); 2739 printf("r roundss_011 "); 2740 showV128(&src); 2741 printf(" "); 2742 showV128(&dst); 2743 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2744 printf("\n"); 2745 2746 randV128(&src); 2747 randV128(&dst); 2748 memcpy(&src[0], &vals[i], 4); 2749 do_ROUNDSS_011(True/*mem*/, &src, &dst); 2750 printf("m roundss_011 "); 2751 showV128(&src); 2752 printf(" "); 2753 showV128(&dst); 2754 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2755 printf("\n"); 2756 } 2757} 2758 2759void test_ROUNDSS_w_mxcsr_rounding ( void ) 2760{ 2761 UInt rm; 2762 float vals[22]; 2763 Int i = 0; 2764 vals[i++] = 0.0; 2765 vals[i++] = -0.0; 2766 vals[i++] = mkPosInf(); 2767 vals[i++] = mkNegInf(); 2768 vals[i++] = mkPosNan(); 2769 vals[i++] = mkNegNan(); 2770 vals[i++] = -1.3; 2771 vals[i++] = -1.1; 2772 vals[i++] = -0.9; 2773 vals[i++] = -0.7; 2774 vals[i++] = -0.50001; 2775 vals[i++] = -0.49999; 2776 vals[i++] = -0.3; 2777 vals[i++] = -0.1; 2778 vals[i++] = 0.1; 2779 vals[i++] = 0.3; 2780 vals[i++] = 0.49999; 2781 vals[i++] = 0.50001; 2782 vals[i++] = 0.7; 2783 vals[i++] = 0.9; 2784 vals[i++] = 1.1; 2785 vals[i++] = 1.3; 2786 assert(i == 22); 2787 2788 rm = get_sse_roundingmode(); 2789 assert(rm == 0); // 0 == RN == default 2790 2791 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 2792 V128 src, dst; 2793 2794 for (rm = 0; rm <= 3; rm++) { 2795 set_sse_roundingmode(rm); 2796 2797 randV128(&src); 2798 randV128(&dst); 2799 memcpy(&src[0], &vals[i], 4); 2800 do_ROUNDSS_1XX(False/*reg*/, &src, &dst); 2801 printf("r (rm=%u) roundss_1XX ", rm); 2802 showV128(&src); 2803 printf(" "); 2804 showV128(&dst); 2805 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2806 printf("\n"); 2807 2808 randV128(&src); 2809 randV128(&dst); 2810 memcpy(&src[0], &vals[i], 4); 2811 do_ROUNDSS_1XX(True/*mem*/, &src, &dst); 2812 printf("m (rm=%u) roundss_1XX ", rm); 2813 showV128(&src); 2814 printf(" "); 2815 showV128(&dst); 2816 printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0])); 2817 printf("\n"); 2818 } 2819 } 2820 2821 rm = get_sse_roundingmode(); 2822 assert(rm == 3); 2823 set_sse_roundingmode(0); 2824 rm = get_sse_roundingmode(); 2825 assert(rm == 0); // 0 == RN == default 2826} 2827 2828/* ------------ ROUNDPD ------------ */ 2829 2830void do_ROUNDPD_000 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2831{ 2832 if (mem) { 2833 __asm__ __volatile__( 2834 "movupd (%1), %%xmm11" "\n\t" 2835 "roundpd $0, (%0), %%xmm11" "\n\t" 2836 "movupd %%xmm11, (%1)" "\n" 2837 : /*OUT*/ 2838 : /*IN*/ "r"(src), "r"(dst) 2839 : /*TRASH*/ "xmm11" 2840 ); 2841 } else { 2842 __asm__ __volatile__( 2843 "movupd (%1), %%xmm11" "\n\t" 2844 "movupd (%0), %%xmm2" "\n\t" 2845 "roundpd $0, %%xmm2, %%xmm11" "\n\t" 2846 "movupd %%xmm11, (%1)" "\n" 2847 : /*OUT*/ 2848 : /*IN*/ "r"(src), "r"(dst) 2849 : /*TRASH*/ "xmm11","xmm2" 2850 ); 2851 } 2852} 2853 2854void do_ROUNDPD_001 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2855{ 2856 if (mem) { 2857 __asm__ __volatile__( 2858 "movupd (%1), %%xmm11" "\n\t" 2859 "roundpd $1, (%0), %%xmm11" "\n\t" 2860 "movupd %%xmm11, (%1)" "\n" 2861 : /*OUT*/ 2862 : /*IN*/ "r"(src), "r"(dst) 2863 : /*TRASH*/ "xmm11" 2864 ); 2865 } else { 2866 __asm__ __volatile__( 2867 "movupd (%1), %%xmm11" "\n\t" 2868 "movupd (%0), %%xmm2" "\n\t" 2869 "roundpd $1, %%xmm2, %%xmm11" "\n\t" 2870 "movupd %%xmm11, (%1)" "\n" 2871 : /*OUT*/ 2872 : /*IN*/ "r"(src), "r"(dst) 2873 : /*TRASH*/ "xmm11","xmm2" 2874 ); 2875 } 2876} 2877 2878void do_ROUNDPD_010 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2879{ 2880 if (mem) { 2881 __asm__ __volatile__( 2882 "movupd (%1), %%xmm11" "\n\t" 2883 "roundpd $2, (%0), %%xmm11" "\n\t" 2884 "movupd %%xmm11, (%1)" "\n" 2885 : /*OUT*/ 2886 : /*IN*/ "r"(src), "r"(dst) 2887 : /*TRASH*/ "xmm11" 2888 ); 2889 } else { 2890 __asm__ __volatile__( 2891 "movupd (%1), %%xmm11" "\n\t" 2892 "movupd (%0), %%xmm2" "\n\t" 2893 "roundpd $2, %%xmm2, %%xmm11" "\n\t" 2894 "movupd %%xmm11, (%1)" "\n" 2895 : /*OUT*/ 2896 : /*IN*/ "r"(src), "r"(dst) 2897 : /*TRASH*/ "xmm11","xmm2" 2898 ); 2899 } 2900} 2901 2902void do_ROUNDPD_011 ( Bool mem, V128* src, /*OUT*/V128* dst ) 2903{ 2904 if (mem) { 2905 __asm__ __volatile__( 2906 "movupd (%1), %%xmm11" "\n\t" 2907 "roundpd $3, (%0), %%xmm11" "\n\t" 2908 "movupd %%xmm11, (%1)" "\n" 2909 : /*OUT*/ 2910 : /*IN*/ "r"(src), "r"(dst) 2911 : /*TRASH*/ "xmm11" 2912 ); 2913 } else { 2914 __asm__ __volatile__( 2915 "movupd (%1), %%xmm11" "\n\t" 2916 "movupd (%0), %%xmm2" "\n\t" 2917 "roundpd $3, %%xmm2, %%xmm11" "\n\t" 2918 "movupd %%xmm11, (%1)" "\n" 2919 : /*OUT*/ 2920 : /*IN*/ "r"(src), "r"(dst) 2921 : /*TRASH*/ "xmm11","xmm2" 2922 ); 2923 } 2924} 2925 2926void do_ROUNDPD_1XX ( Bool mem, V128* src, /*OUT*/V128* dst ) 2927{ 2928 if (mem) { 2929 __asm__ __volatile__( 2930 "movupd (%1), %%xmm11" "\n\t" 2931 "roundpd $4, (%0), %%xmm11" "\n\t" 2932 "movupd %%xmm11, (%1)" "\n" 2933 : /*OUT*/ 2934 : /*IN*/ "r"(src), "r"(dst) 2935 : /*TRASH*/ "xmm11" 2936 ); 2937 } else { 2938 __asm__ __volatile__( 2939 "movupd (%1), %%xmm11" "\n\t" 2940 "movupd (%0), %%xmm2" "\n\t" 2941 "roundpd $4, %%xmm2, %%xmm11" "\n\t" 2942 "movupd %%xmm11, (%1)" "\n" 2943 : /*OUT*/ 2944 : /*IN*/ "r"(src), "r"(dst) 2945 : /*TRASH*/ "xmm11","xmm2" 2946 ); 2947 } 2948} 2949 2950void test_ROUNDPD_w_immediate_rounding ( void ) 2951{ 2952 double vals[22]; 2953 Int i = 0; 2954 vals[i++] = 0.0; 2955 vals[i++] = -0.0; 2956 vals[i++] = mkPosInf(); 2957 vals[i++] = mkNegInf(); 2958 vals[i++] = mkPosNan(); 2959 vals[i++] = mkNegNan(); 2960 vals[i++] = -1.3; 2961 vals[i++] = -1.1; 2962 vals[i++] = -0.9; 2963 vals[i++] = -0.7; 2964 vals[i++] = -0.50001; 2965 vals[i++] = -0.49999; 2966 vals[i++] = -0.3; 2967 vals[i++] = -0.1; 2968 vals[i++] = 0.1; 2969 vals[i++] = 0.3; 2970 vals[i++] = 0.49999; 2971 vals[i++] = 0.50001; 2972 vals[i++] = 0.7; 2973 vals[i++] = 0.9; 2974 vals[i++] = 1.1; 2975 vals[i++] = 1.3; 2976 assert(i == 22); 2977 2978 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 2979 V128 src, dst; 2980 2981 randV128(&src); 2982 randV128(&dst); 2983 memcpy(&src[0], &vals[i], 8); 2984 memcpy(&src[8], &vals[(i+11)%22], 8); 2985 do_ROUNDPD_000(False/*reg*/, &src, &dst); 2986 printf("r roundpd_000 "); 2987 showV128(&src); 2988 printf(" "); 2989 showV128(&dst); 2990 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 2991 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 2992 printf("\n"); 2993 2994 randV128(&src); 2995 randV128(&dst); 2996 memcpy(&src[0], &vals[i], 8); 2997 memcpy(&src[8], &vals[(i+11)%22], 8); 2998 do_ROUNDPD_000(True/*mem*/, &src, &dst); 2999 printf("m roundpd_000 "); 3000 showV128(&src); 3001 printf(" "); 3002 showV128(&dst); 3003 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3004 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3005 printf("\n"); 3006 3007 3008 randV128(&src); 3009 randV128(&dst); 3010 memcpy(&src[0], &vals[i], 8); 3011 memcpy(&src[8], &vals[(i+11)%22], 8); 3012 do_ROUNDPD_001(False/*reg*/, &src, &dst); 3013 printf("r roundpd_001 "); 3014 showV128(&src); 3015 printf(" "); 3016 showV128(&dst); 3017 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3018 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3019 printf("\n"); 3020 3021 randV128(&src); 3022 randV128(&dst); 3023 memcpy(&src[0], &vals[i], 8); 3024 memcpy(&src[8], &vals[(i+11)%22], 8); 3025 do_ROUNDPD_001(True/*mem*/, &src, &dst); 3026 printf("m roundpd_001 "); 3027 showV128(&src); 3028 printf(" "); 3029 showV128(&dst); 3030 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3031 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3032 printf("\n"); 3033 3034 3035 randV128(&src); 3036 randV128(&dst); 3037 memcpy(&src[0], &vals[i], 8); 3038 memcpy(&src[8], &vals[(i+11)%22], 8); 3039 do_ROUNDPD_010(False/*reg*/, &src, &dst); 3040 printf("r roundpd_010 "); 3041 showV128(&src); 3042 printf(" "); 3043 showV128(&dst); 3044 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3045 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3046 printf("\n"); 3047 3048 randV128(&src); 3049 randV128(&dst); 3050 memcpy(&src[0], &vals[i], 8); 3051 memcpy(&src[8], &vals[(i+11)%22], 8); 3052 do_ROUNDPD_010(True/*mem*/, &src, &dst); 3053 printf("m roundpd_010 "); 3054 showV128(&src); 3055 printf(" "); 3056 showV128(&dst); 3057 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3058 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3059 printf("\n"); 3060 3061 3062 randV128(&src); 3063 randV128(&dst); 3064 memcpy(&src[0], &vals[i], 8); 3065 memcpy(&src[8], &vals[(i+11)%22], 8); 3066 do_ROUNDPD_011(False/*reg*/, &src, &dst); 3067 printf("r roundpd_011 "); 3068 showV128(&src); 3069 printf(" "); 3070 showV128(&dst); 3071 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3072 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3073 printf("\n"); 3074 3075 randV128(&src); 3076 randV128(&dst); 3077 memcpy(&src[0], &vals[i], 8); 3078 memcpy(&src[8], &vals[(i+11)%22], 8); 3079 do_ROUNDPD_011(True/*mem*/, &src, &dst); 3080 printf("m roundpd_011 "); 3081 showV128(&src); 3082 printf(" "); 3083 showV128(&dst); 3084 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3085 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3086 printf("\n"); 3087 } 3088} 3089 3090void test_ROUNDPD_w_mxcsr_rounding ( void ) 3091{ 3092 UInt rm; 3093 double vals[22]; 3094 Int i = 0; 3095 vals[i++] = 0.0; 3096 vals[i++] = -0.0; 3097 vals[i++] = mkPosInf(); 3098 vals[i++] = mkNegInf(); 3099 vals[i++] = mkPosNan(); 3100 vals[i++] = mkNegNan(); 3101 vals[i++] = -1.3; 3102 vals[i++] = -1.1; 3103 vals[i++] = -0.9; 3104 vals[i++] = -0.7; 3105 vals[i++] = -0.50001; 3106 vals[i++] = -0.49999; 3107 vals[i++] = -0.3; 3108 vals[i++] = -0.1; 3109 vals[i++] = 0.1; 3110 vals[i++] = 0.3; 3111 vals[i++] = 0.49999; 3112 vals[i++] = 0.50001; 3113 vals[i++] = 0.7; 3114 vals[i++] = 0.9; 3115 vals[i++] = 1.1; 3116 vals[i++] = 1.3; 3117 assert(i == 22); 3118 3119 rm = get_sse_roundingmode(); 3120 assert(rm == 0); // 0 == RN == default 3121 3122 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 3123 V128 src, dst; 3124 3125 for (rm = 0; rm <= 3; rm++) { 3126 set_sse_roundingmode(rm); 3127 3128 randV128(&src); 3129 randV128(&dst); 3130 memcpy(&src[0], &vals[i], 8); 3131 memcpy(&src[8], &vals[(i+11)%22], 8); 3132 do_ROUNDPD_1XX(False/*reg*/, &src, &dst); 3133 printf("r (rm=%u) roundpd_1XX ", rm); 3134 showV128(&src); 3135 printf(" "); 3136 showV128(&dst); 3137 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3138 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3139 printf("\n"); 3140 3141 randV128(&src); 3142 randV128(&dst); 3143 memcpy(&src[0], &vals[i], 8); 3144 memcpy(&src[8], &vals[(i+11)%22], 8); 3145 do_ROUNDPD_1XX(True/*mem*/, &src, &dst); 3146 printf("m (rm=%u) roundpd_1XX ", rm); 3147 showV128(&src); 3148 printf(" "); 3149 showV128(&dst); 3150 printf(" %10f -> %10f", vals[i], *(double*)(&dst[0])); 3151 printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8])); 3152 printf("\n"); 3153 } 3154 } 3155 3156 rm = get_sse_roundingmode(); 3157 assert(rm == 3); 3158 set_sse_roundingmode(0); 3159 rm = get_sse_roundingmode(); 3160 assert(rm == 0); // 0 == RN == default 3161} 3162 3163/* ------------ ROUNDPS ------------ */ 3164 3165void do_ROUNDPS_000 ( Bool mem, V128* src, /*OUT*/V128* dst ) 3166{ 3167 if (mem) { 3168 __asm__ __volatile__( 3169 "movupd (%1), %%xmm11" "\n\t" 3170 "roundps $0, (%0), %%xmm11" "\n\t" 3171 "movupd %%xmm11, (%1)" "\n" 3172 : /*OUT*/ 3173 : /*IN*/ "r"(src), "r"(dst) 3174 : /*TRASH*/ "xmm11" 3175 ); 3176 } else { 3177 __asm__ __volatile__( 3178 "movupd (%1), %%xmm11" "\n\t" 3179 "movupd (%0), %%xmm2" "\n\t" 3180 "roundps $0, %%xmm2, %%xmm11" "\n\t" 3181 "movupd %%xmm11, (%1)" "\n" 3182 : /*OUT*/ 3183 : /*IN*/ "r"(src), "r"(dst) 3184 : /*TRASH*/ "xmm11","xmm2" 3185 ); 3186 } 3187} 3188 3189void do_ROUNDPS_001 ( Bool mem, V128* src, /*OUT*/V128* dst ) 3190{ 3191 if (mem) { 3192 __asm__ __volatile__( 3193 "movupd (%1), %%xmm11" "\n\t" 3194 "roundps $1, (%0), %%xmm11" "\n\t" 3195 "movupd %%xmm11, (%1)" "\n" 3196 : /*OUT*/ 3197 : /*IN*/ "r"(src), "r"(dst) 3198 : /*TRASH*/ "xmm11" 3199 ); 3200 } else { 3201 __asm__ __volatile__( 3202 "movupd (%1), %%xmm11" "\n\t" 3203 "movupd (%0), %%xmm2" "\n\t" 3204 "roundps $1, %%xmm2, %%xmm11" "\n\t" 3205 "movupd %%xmm11, (%1)" "\n" 3206 : /*OUT*/ 3207 : /*IN*/ "r"(src), "r"(dst) 3208 : /*TRASH*/ "xmm11","xmm2" 3209 ); 3210 } 3211} 3212 3213void do_ROUNDPS_010 ( Bool mem, V128* src, /*OUT*/V128* dst ) 3214{ 3215 if (mem) { 3216 __asm__ __volatile__( 3217 "movupd (%1), %%xmm11" "\n\t" 3218 "roundps $2, (%0), %%xmm11" "\n\t" 3219 "movupd %%xmm11, (%1)" "\n" 3220 : /*OUT*/ 3221 : /*IN*/ "r"(src), "r"(dst) 3222 : /*TRASH*/ "xmm11" 3223 ); 3224 } else { 3225 __asm__ __volatile__( 3226 "movupd (%1), %%xmm11" "\n\t" 3227 "movupd (%0), %%xmm2" "\n\t" 3228 "roundps $2, %%xmm2, %%xmm11" "\n\t" 3229 "movupd %%xmm11, (%1)" "\n" 3230 : /*OUT*/ 3231 : /*IN*/ "r"(src), "r"(dst) 3232 : /*TRASH*/ "xmm11","xmm2" 3233 ); 3234 } 3235} 3236 3237void do_ROUNDPS_011 ( Bool mem, V128* src, /*OUT*/V128* dst ) 3238{ 3239 if (mem) { 3240 __asm__ __volatile__( 3241 "movupd (%1), %%xmm11" "\n\t" 3242 "roundps $3, (%0), %%xmm11" "\n\t" 3243 "movupd %%xmm11, (%1)" "\n" 3244 : /*OUT*/ 3245 : /*IN*/ "r"(src), "r"(dst) 3246 : /*TRASH*/ "xmm11" 3247 ); 3248 } else { 3249 __asm__ __volatile__( 3250 "movupd (%1), %%xmm11" "\n\t" 3251 "movupd (%0), %%xmm2" "\n\t" 3252 "roundps $3, %%xmm2, %%xmm11" "\n\t" 3253 "movupd %%xmm11, (%1)" "\n" 3254 : /*OUT*/ 3255 : /*IN*/ "r"(src), "r"(dst) 3256 : /*TRASH*/ "xmm11","xmm2" 3257 ); 3258 } 3259} 3260 3261void do_ROUNDPS_1XX ( Bool mem, V128* src, /*OUT*/V128* dst ) 3262{ 3263 if (mem) { 3264 __asm__ __volatile__( 3265 "movupd (%1), %%xmm11" "\n\t" 3266 "roundps $4, (%0), %%xmm11" "\n\t" 3267 "movupd %%xmm11, (%1)" "\n" 3268 : /*OUT*/ 3269 : /*IN*/ "r"(src), "r"(dst) 3270 : /*TRASH*/ "xmm11" 3271 ); 3272 } else { 3273 __asm__ __volatile__( 3274 "movupd (%1), %%xmm11" "\n\t" 3275 "movupd (%0), %%xmm2" "\n\t" 3276 "roundps $4, %%xmm2, %%xmm11" "\n\t" 3277 "movupd %%xmm11, (%1)" "\n" 3278 : /*OUT*/ 3279 : /*IN*/ "r"(src), "r"(dst) 3280 : /*TRASH*/ "xmm11","xmm2" 3281 ); 3282 } 3283} 3284 3285void test_ROUNDPS_w_immediate_rounding ( void ) 3286{ 3287 float vals[22]; 3288 Int i = 0; 3289 vals[i++] = 0.0; 3290 vals[i++] = -0.0; 3291 vals[i++] = mkPosInf(); 3292 vals[i++] = mkNegInf(); 3293 vals[i++] = mkPosNan(); 3294 vals[i++] = mkNegNan(); 3295 vals[i++] = -1.3; 3296 vals[i++] = -1.1; 3297 vals[i++] = -0.9; 3298 vals[i++] = -0.7; 3299 vals[i++] = -0.50001; 3300 vals[i++] = -0.49999; 3301 vals[i++] = -0.3; 3302 vals[i++] = -0.1; 3303 vals[i++] = 0.1; 3304 vals[i++] = 0.3; 3305 vals[i++] = 0.49999; 3306 vals[i++] = 0.50001; 3307 vals[i++] = 0.7; 3308 vals[i++] = 0.9; 3309 vals[i++] = 1.1; 3310 vals[i++] = 1.3; 3311 assert(i == 22); 3312 3313 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 3314 V128 src, dst; 3315 3316 randV128(&src); 3317 randV128(&dst); 3318 memcpy(&src[0], &vals[i], 4); 3319 memcpy(&src[4], &vals[(i+5)%22], 4); 3320 memcpy(&src[8], &vals[(i+11)%22], 4); 3321 memcpy(&src[12], &vals[(i+17)%22], 4); 3322 do_ROUNDPS_000(False/*reg*/, &src, &dst); 3323 printf("r roundps_000 "); 3324 showV128(&src); 3325 printf(" "); 3326 showV128(&dst); 3327 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3328 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3329 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3330 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3331 printf("\n"); 3332 3333 randV128(&src); 3334 randV128(&dst); 3335 memcpy(&src[0], &vals[i], 4); 3336 memcpy(&src[4], &vals[(i+5)%22], 4); 3337 memcpy(&src[8], &vals[(i+11)%22], 4); 3338 memcpy(&src[12], &vals[(i+17)%22], 4); 3339 do_ROUNDPS_000(True/*mem*/, &src, &dst); 3340 printf("m roundps_000 "); 3341 showV128(&src); 3342 printf(" "); 3343 showV128(&dst); 3344 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3345 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3346 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3347 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3348 printf("\n"); 3349 3350 3351 randV128(&src); 3352 randV128(&dst); 3353 memcpy(&src[0], &vals[i], 4); 3354 memcpy(&src[4], &vals[(i+5)%22], 4); 3355 memcpy(&src[8], &vals[(i+11)%22], 4); 3356 memcpy(&src[12], &vals[(i+17)%22], 4); 3357 do_ROUNDPS_001(False/*reg*/, &src, &dst); 3358 printf("r roundps_001 "); 3359 showV128(&src); 3360 printf(" "); 3361 showV128(&dst); 3362 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3363 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3364 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3365 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3366 printf("\n"); 3367 3368 randV128(&src); 3369 randV128(&dst); 3370 memcpy(&src[0], &vals[i], 4); 3371 memcpy(&src[4], &vals[(i+5)%22], 4); 3372 memcpy(&src[8], &vals[(i+11)%22], 4); 3373 memcpy(&src[12], &vals[(i+17)%22], 4); 3374 do_ROUNDPS_001(True/*mem*/, &src, &dst); 3375 printf("m roundps_001 "); 3376 showV128(&src); 3377 printf(" "); 3378 showV128(&dst); 3379 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3380 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3381 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3382 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3383 printf("\n"); 3384 3385 3386 randV128(&src); 3387 randV128(&dst); 3388 memcpy(&src[0], &vals[i], 4); 3389 memcpy(&src[4], &vals[(i+5)%22], 4); 3390 memcpy(&src[8], &vals[(i+11)%22], 4); 3391 memcpy(&src[12], &vals[(i+17)%22], 4); 3392 do_ROUNDPS_010(False/*reg*/, &src, &dst); 3393 printf("r roundps_010 "); 3394 showV128(&src); 3395 printf(" "); 3396 showV128(&dst); 3397 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3398 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3399 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3400 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3401 printf("\n"); 3402 3403 randV128(&src); 3404 randV128(&dst); 3405 memcpy(&src[0], &vals[i], 4); 3406 memcpy(&src[4], &vals[(i+5)%22], 4); 3407 memcpy(&src[8], &vals[(i+11)%22], 4); 3408 memcpy(&src[12], &vals[(i+17)%22], 4); 3409 do_ROUNDPS_010(True/*mem*/, &src, &dst); 3410 printf("m roundps_010 "); 3411 showV128(&src); 3412 printf(" "); 3413 showV128(&dst); 3414 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3415 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3416 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3417 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3418 printf("\n"); 3419 3420 3421 randV128(&src); 3422 randV128(&dst); 3423 memcpy(&src[0], &vals[i], 4); 3424 memcpy(&src[4], &vals[(i+5)%22], 4); 3425 memcpy(&src[8], &vals[(i+11)%22], 4); 3426 memcpy(&src[12], &vals[(i+17)%22], 4); 3427 do_ROUNDPS_011(False/*reg*/, &src, &dst); 3428 printf("r roundps_011 "); 3429 showV128(&src); 3430 printf(" "); 3431 showV128(&dst); 3432 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3433 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3434 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3435 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3436 printf("\n"); 3437 3438 randV128(&src); 3439 randV128(&dst); 3440 memcpy(&src[0], &vals[i], 4); 3441 memcpy(&src[4], &vals[(i+5)%22], 4); 3442 memcpy(&src[8], &vals[(i+11)%22], 4); 3443 memcpy(&src[12], &vals[(i+17)%22], 4); 3444 do_ROUNDPS_011(True/*mem*/, &src, &dst); 3445 printf("m roundps_011 "); 3446 showV128(&src); 3447 printf(" "); 3448 showV128(&dst); 3449 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3450 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3451 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3452 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3453 printf("\n"); 3454 } 3455} 3456 3457void test_ROUNDPS_w_mxcsr_rounding ( void ) 3458{ 3459 UInt rm; 3460 float vals[22]; 3461 Int i = 0; 3462 vals[i++] = 0.0; 3463 vals[i++] = -0.0; 3464 vals[i++] = mkPosInf(); 3465 vals[i++] = mkNegInf(); 3466 vals[i++] = mkPosNan(); 3467 vals[i++] = mkNegNan(); 3468 vals[i++] = -1.3; 3469 vals[i++] = -1.1; 3470 vals[i++] = -0.9; 3471 vals[i++] = -0.7; 3472 vals[i++] = -0.50001; 3473 vals[i++] = -0.49999; 3474 vals[i++] = -0.3; 3475 vals[i++] = -0.1; 3476 vals[i++] = 0.1; 3477 vals[i++] = 0.3; 3478 vals[i++] = 0.49999; 3479 vals[i++] = 0.50001; 3480 vals[i++] = 0.7; 3481 vals[i++] = 0.9; 3482 vals[i++] = 1.1; 3483 vals[i++] = 1.3; 3484 assert(i == 22); 3485 3486 rm = get_sse_roundingmode(); 3487 assert(rm == 0); // 0 == RN == default 3488 3489 for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { 3490 V128 src, dst; 3491 3492 for (rm = 0; rm <= 3; rm++) { 3493 set_sse_roundingmode(rm); 3494 3495 randV128(&src); 3496 randV128(&dst); 3497 memcpy(&src[0], &vals[i], 4); 3498 memcpy(&src[4], &vals[(i+5)%22], 4); 3499 memcpy(&src[8], &vals[(i+11)%22], 4); 3500 memcpy(&src[12], &vals[(i+17)%22], 4); 3501 do_ROUNDPS_1XX(False/*reg*/, &src, &dst); 3502 printf("r (rm=%u) roundps_1XX ", rm); 3503 showV128(&src); 3504 printf(" "); 3505 showV128(&dst); 3506 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3507 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3508 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3509 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3510 printf("\n"); 3511 3512 randV128(&src); 3513 randV128(&dst); 3514 memcpy(&src[0], &vals[i], 4); 3515 memcpy(&src[4], &vals[(i+5)%22], 4); 3516 memcpy(&src[8], &vals[(i+11)%22], 4); 3517 memcpy(&src[12], &vals[(i+17)%22], 4); 3518 do_ROUNDPS_1XX(True/*mem*/, &src, &dst); 3519 printf("m (rm=%u) roundps_1XX ", rm); 3520 showV128(&src); 3521 printf(" "); 3522 showV128(&dst); 3523 printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0])); 3524 printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4])); 3525 printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8])); 3526 printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12])); 3527 printf("\n"); 3528 } 3529 } 3530 3531 rm = get_sse_roundingmode(); 3532 assert(rm == 3); 3533 set_sse_roundingmode(0); 3534 rm = get_sse_roundingmode(); 3535 assert(rm == 0); // 0 == RN == default 3536} 3537 3538/* ------------ PTEST ------------ */ 3539 3540void test_PTEST ( void ) 3541{ 3542 const Int ntests = 8; 3543 V128 spec[ntests]; 3544 do64HLtoV128( &spec[0], 0x0000000000000000ULL, 0x0000000000000000ULL ); 3545 do64HLtoV128( &spec[1], 0x0000000000000000ULL, 0x0000000000000001ULL ); 3546 do64HLtoV128( &spec[2], 0x0000000000000001ULL, 0x0000000000000000ULL ); 3547 do64HLtoV128( &spec[3], 0x0000000000000001ULL, 0x0000000000000001ULL ); 3548 do64HLtoV128( &spec[4], 0xffffffffffffffffULL, 0xffffffffffffffffULL ); 3549 do64HLtoV128( &spec[5], 0xffffffffffffffffULL, 0xfffffffffffffffeULL ); 3550 do64HLtoV128( &spec[6], 0xfffffffffffffffeULL, 0xffffffffffffffffULL ); 3551 do64HLtoV128( &spec[7], 0xfffffffffffffffeULL, 0xfffffffffffffffeULL ); 3552 V128 block[2]; 3553 Int i, j; 3554 ULong flags; 3555 for (i = 0; i < ntests; i++) { 3556 for (j = 0; j < ntests; j++) { 3557 memcpy(&block[0], &spec[i], 16); 3558 memcpy(&block[1], &spec[j], 16); 3559 __asm__ __volatile__( 3560 "subq $256, %%rsp" "\n\t" 3561 "movupd 0(%1), %%xmm2" "\n\t" 3562 "ptest 16(%1), %%xmm2" "\n\t" 3563 "pushfq" "\n\t" 3564 "popq %0" "\n\t" 3565 "addq $256, %%rsp" "\n\t" 3566 : /*out*/"=r"(flags) : /*in*/ "r"(&block[0]) : 3567 "xmm2", "memory", "cc" 3568 ); 3569 printf("r ptest "); 3570 showV128(&block[0]); 3571 printf(" "); 3572 showV128(&block[1]); 3573 printf(" -> eflags %04x\n", (UInt)flags & 0x8D5); 3574 } 3575 } 3576} 3577 3578/* ------------ PBLENDVB ------------ */ 3579 3580void do_PBLENDVB ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst ) 3581{ 3582 if (mem) { 3583 __asm__ __volatile__( 3584 "movupd (%2), %%xmm0" "\n\t" 3585 "movupd (%1), %%xmm11" "\n\t" 3586 "pblendvb (%0), %%xmm11" "\n\t" 3587 "movupd %%xmm11, (%1)" "\n" 3588 : /*OUT*/ 3589 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3590 : /*TRASH*/ "xmm11","xmm0" 3591 ); 3592 } else { 3593 __asm__ __volatile__( 3594 "movupd (%2), %%xmm0" "\n\t" 3595 "movupd (%1), %%xmm11" "\n\t" 3596 "movupd (%0), %%xmm2" "\n\t" 3597 "pblendvb %%xmm2, %%xmm11" "\n\t" 3598 "movupd %%xmm11, (%1)" "\n" 3599 : /*OUT*/ 3600 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3601 : /*TRASH*/ "xmm11","xmm2","xmm0" 3602 ); 3603 } 3604} 3605 3606void test_PBLENDVB ( void ) 3607{ 3608 V128 xmm0, src, dst, t_xmm0, t_src, t_dst; 3609 Int i; 3610 for (i = 0; i < 10; i++) { 3611 randV128(&t_xmm0); 3612 randV128(&t_src); 3613 randV128(&t_dst); 3614 3615 memcpy(&xmm0, &t_xmm0, 16); 3616 memcpy(&src, &t_src, 16); 3617 memcpy(&dst, &t_dst, 16); 3618 do_PBLENDVB(False/*reg*/, &xmm0, &src, &dst); 3619 printf("r pblendvb "); 3620 showV128(&t_xmm0); 3621 printf(" "); 3622 showV128(&t_src); 3623 printf(" "); 3624 showV128(&t_dst); 3625 printf(" -> "); 3626 showV128(&dst); 3627 printf("\n"); 3628 3629 memcpy(&xmm0, &t_xmm0, 16); 3630 memcpy(&src, &t_src, 16); 3631 memcpy(&dst, &t_dst, 16); 3632 do_PBLENDVB(True/*mem*/, &xmm0, &src, &dst); 3633 printf("m pblendvb "); 3634 showV128(&t_xmm0); 3635 printf(" "); 3636 showV128(&t_src); 3637 printf(" "); 3638 showV128(&t_dst); 3639 printf(" -> "); 3640 showV128(&dst); 3641 printf("\n"); 3642 } 3643} 3644 3645/* ------------ BLENDVPD ------------ */ 3646 3647void do_BLENDVPD ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst ) 3648{ 3649 if (mem) { 3650 __asm__ __volatile__( 3651 "movupd (%2), %%xmm0" "\n\t" 3652 "movupd (%1), %%xmm11" "\n\t" 3653 "blendvpd (%0), %%xmm11" "\n\t" 3654 "movupd %%xmm11, (%1)" "\n" 3655 : /*OUT*/ 3656 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3657 : /*TRASH*/ "xmm11","xmm0" 3658 ); 3659 } else { 3660 __asm__ __volatile__( 3661 "movupd (%2), %%xmm0" "\n\t" 3662 "movupd (%1), %%xmm11" "\n\t" 3663 "movupd (%0), %%xmm2" "\n\t" 3664 "blendvpd %%xmm2, %%xmm11" "\n\t" 3665 "movupd %%xmm11, (%1)" "\n" 3666 : /*OUT*/ 3667 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3668 : /*TRASH*/ "xmm11","xmm2","xmm0" 3669 ); 3670 } 3671} 3672 3673void test_BLENDVPD ( void ) 3674{ 3675 V128 xmm0, src, dst, t_xmm0, t_src, t_dst; 3676 Int i; 3677 for (i = 0; i < 10; i++) { 3678 randV128(&t_xmm0); 3679 randV128(&t_src); 3680 randV128(&t_dst); 3681 3682 memcpy(&xmm0, &t_xmm0, 16); 3683 memcpy(&src, &t_src, 16); 3684 memcpy(&dst, &t_dst, 16); 3685 do_BLENDVPD(False/*reg*/, &xmm0, &src, &dst); 3686 printf("r blendvpd "); 3687 showV128(&t_xmm0); 3688 printf(" "); 3689 showV128(&t_src); 3690 printf(" "); 3691 showV128(&t_dst); 3692 printf(" -> "); 3693 showV128(&dst); 3694 printf("\n"); 3695 3696 memcpy(&xmm0, &t_xmm0, 16); 3697 memcpy(&src, &t_src, 16); 3698 memcpy(&dst, &t_dst, 16); 3699 do_BLENDVPD(True/*mem*/, &xmm0, &src, &dst); 3700 printf("m blendvpd "); 3701 showV128(&t_xmm0); 3702 printf(" "); 3703 showV128(&t_src); 3704 printf(" "); 3705 showV128(&t_dst); 3706 printf(" -> "); 3707 showV128(&dst); 3708 printf("\n"); 3709 } 3710} 3711 3712/* ------------ BLENDVPS ------------ */ 3713 3714void do_BLENDVPS ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst ) 3715{ 3716 if (mem) { 3717 __asm__ __volatile__( 3718 "movupd (%2), %%xmm0" "\n\t" 3719 "movupd (%1), %%xmm11" "\n\t" 3720 "blendvps (%0), %%xmm11" "\n\t" 3721 "movupd %%xmm11, (%1)" "\n" 3722 : /*OUT*/ 3723 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3724 : /*TRASH*/ "xmm11","xmm0" 3725 ); 3726 } else { 3727 __asm__ __volatile__( 3728 "movupd (%2), %%xmm0" "\n\t" 3729 "movupd (%1), %%xmm11" "\n\t" 3730 "movupd (%0), %%xmm2" "\n\t" 3731 "blendvps %%xmm2, %%xmm11" "\n\t" 3732 "movupd %%xmm11, (%1)" "\n" 3733 : /*OUT*/ 3734 : /*IN*/ "r"(src), "r"(dst), "r"(xmm0) 3735 : /*TRASH*/ "xmm11","xmm2","xmm0" 3736 ); 3737 } 3738} 3739 3740void test_BLENDVPS ( void ) 3741{ 3742 V128 xmm0, src, dst, t_xmm0, t_src, t_dst; 3743 Int i; 3744 for (i = 0; i < 10; i++) { 3745 randV128(&t_xmm0); 3746 randV128(&t_src); 3747 randV128(&t_dst); 3748 3749 memcpy(&xmm0, &t_xmm0, 16); 3750 memcpy(&src, &t_src, 16); 3751 memcpy(&dst, &t_dst, 16); 3752 do_BLENDVPS(False/*reg*/, &xmm0, &src, &dst); 3753 printf("r blendvps "); 3754 showV128(&t_xmm0); 3755 printf(" "); 3756 showV128(&t_src); 3757 printf(" "); 3758 showV128(&t_dst); 3759 printf(" -> "); 3760 showV128(&dst); 3761 printf("\n"); 3762 3763 memcpy(&xmm0, &t_xmm0, 16); 3764 memcpy(&src, &t_src, 16); 3765 memcpy(&dst, &t_dst, 16); 3766 do_BLENDVPS(True/*mem*/, &xmm0, &src, &dst); 3767 printf("m blendvps "); 3768 showV128(&t_xmm0); 3769 printf(" "); 3770 showV128(&t_src); 3771 printf(" "); 3772 showV128(&t_dst); 3773 printf(" -> "); 3774 showV128(&dst); 3775 printf("\n"); 3776 } 3777} 3778 3779void test_MOVNTDQA ( void ) 3780{ 3781 V128 src, dst; 3782 Int i; 3783 for (i = 0; i < 10; i++) { 3784 randV128(&src); 3785 /* make sure the load actually happens */ 3786 randV128(&dst); 3787 DO_m_r("movntdqa", src, dst); 3788 } 3789} 3790 3791/* ------------ main ------------ */ 3792 3793int main ( int argc, char** argv ) 3794{ 3795#if 1 3796 // ------ SSE 4.1 ------ 3797 test_BLENDPD(); // done Apr.01.2010 3798 test_BLENDPS(); // done Apr.02.2010 3799 test_PBLENDW(); 3800 test_PBLENDVB(); 3801 test_BLENDVPD(); 3802 test_BLENDVPS(); 3803 test_DPPD(); // done Apr.08.2010 3804 test_DPPS(); // done Apr.09.2010 3805 test_EXTRACTPS(); 3806 test_INSERTPS(); // done Apr.01.2010 3807 test_PCMPEQQ(); 3808 test_PEXTRB(); // done Apr.15.2010 3809 test_PEXTRD(); // done Apr.14.2010 3810 test_PEXTRQ(); // done Apr.14.2010 3811 test_PEXTRW(); // done Apr.14.2010 3812 test_PINSRQ(); // done Apr.16.2010 3813 test_PINSRD(); // todo 3814 test_PINSRW(); /* Umm, this is SSE2, not SSE4. Right? */ 3815 test_PINSRB(); // todo 3816 test_PMAXSB(); 3817 test_PMAXSD(); // done Apr.09.2010 3818 test_PMAXUD(); // done Apr.16.2010 3819 test_PMAXUW(); 3820 test_PMINSB(); 3821 test_PMINSD(); // done Apr.09.2010 3822 test_PMINUD(); 3823 test_PMINUW(); 3824 test_PMOVSXBW(); // done Apr.02.2010 3825 test_PMOVSXBD(); // done Mar.30.2010 3826 test_PMOVSXBQ(); // done Mar.30.2010 3827 test_PMOVSXWD(); // done Mar.31.2010 3828 test_PMOVSXWQ(); // done Mar.31.2010 3829 test_PMOVSXDQ(); // done Mar.31.2010 3830 test_PMOVZXBW(); // done Mar.28.2010 3831 test_PMOVZXBD(); // done Mar.29.2010 3832 test_PMOVZXBQ(); // done Mar.29.2010 3833 test_PMOVZXWD(); // done Mar.28.2010 3834 test_PMOVZXWQ(); // done Mar.29.2010 3835 test_PMOVZXDQ(); // done Mar.29.2010 3836 test_POPCNTW(); 3837 test_POPCNTL(); 3838 test_POPCNTQ(); 3839 test_PMULDQ(); 3840 test_PMULLD(); 3841 test_PTEST(); 3842 test_ROUNDSD_w_immediate_rounding(); 3843 test_ROUNDSS_w_immediate_rounding(); 3844 test_ROUNDPD_w_immediate_rounding(); 3845 test_ROUNDPS_w_immediate_rounding(); 3846 test_ROUNDSD_w_mxcsr_rounding(); 3847 test_ROUNDSS_w_mxcsr_rounding(); 3848 test_ROUNDPD_w_mxcsr_rounding(); 3849 test_ROUNDPS_w_mxcsr_rounding(); 3850 // ------ SSE 4.2 ------ 3851 test_PCMPGTQ(); 3852 // CRC32B,Q 3853 test_PACKUSDW(); 3854 test_PHMINPOSUW(); 3855 test_MPSADBW(); 3856 test_MOVNTDQA(); /* not sure whether this is 4.1 or 4.2 */ 3857#else 3858 test_MPSADBW(); 3859#endif 3860 3861 return 0; 3862} 3863 3864