1/* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj@us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23#ifdef HAS_VSX 24 25#include <stdio.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29#include <malloc.h> 30#include <altivec.h> 31#include <math.h> 32#include <unistd.h> // getopt 33 34#ifndef __powerpc64__ 35typedef uint32_t HWord_t; 36#else 37typedef uint64_t HWord_t; 38#endif /* __powerpc64__ */ 39 40#ifdef VGP_ppc64le_linux 41#define isLE 1 42#else 43#define isLE 0 44#endif 45 46typedef unsigned char Bool; 47#define True 1 48#define False 0 49register HWord_t r14 __asm__ ("r14"); 50register HWord_t r15 __asm__ ("r15"); 51register HWord_t r16 __asm__ ("r16"); 52register HWord_t r17 __asm__ ("r17"); 53register double f14 __asm__ ("fr14"); 54register double f15 __asm__ ("fr15"); 55register double f16 __asm__ ("fr16"); 56register double f17 __asm__ ("fr17"); 57 58static volatile unsigned int div_flags, div_xer; 59 60#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 61 62#define SET_CR(_arg) \ 63 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 64 65#define SET_XER(_arg) \ 66 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 67 68#define GET_CR(_lval) \ 69 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 70 71#define GET_XER(_lval) \ 72 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 73 74#define GET_CR_XER(_lval_cr,_lval_xer) \ 75 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 76 77#define SET_CR_ZERO \ 78 SET_CR(0) 79 80#define SET_XER_ZERO \ 81 SET_XER(0) 82 83#define SET_CR_XER_ZERO \ 84 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 85 86#define SET_FPSCR_ZERO \ 87 do { double _d = 0.0; \ 88 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 89 } while (0) 90 91 92typedef void (*test_func_t)(void); 93typedef struct test_table test_table_t; 94 95/* Defines for the instructiion groups, use bit field to identify */ 96#define SCALAR_DIV_INST 0x0001 97#define OTHER_INST 0x0002 98 99/* These functions below that construct a table of floating point 100 * values were lifted from none/tests/ppc32/jm-insns.c. 101 */ 102 103#if defined (DEBUG_ARGS_BUILD) 104#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 105#else 106#define AB_DPRINTF(fmt, args...) do { } while (0) 107#endif 108 109static inline void register_farg (void *farg, 110 int s, uint16_t _exp, uint64_t mant) 111{ 112 uint64_t tmp; 113 114 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 115 *(uint64_t *)farg = tmp; 116 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 117 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 118} 119 120static inline void register_sp_farg (void *farg, 121 int s, uint16_t _exp, uint32_t mant) 122{ 123 uint32_t tmp; 124 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant; 125 *(uint32_t *)farg = tmp; 126} 127 128 129typedef struct fp_test_args { 130 int fra_idx; 131 int frb_idx; 132} fp_test_args_t; 133 134 135fp_test_args_t two_arg_fp_tests[] = { 136 {8, 8}, 137 {8, 14}, 138 {15, 16}, 139 {8, 5}, 140 {8, 4}, 141 {8, 7}, 142 {8, 9}, 143 {8, 11}, 144 {14, 8}, 145 {14, 14}, 146 {14, 6}, 147 {14, 5}, 148 {14, 4}, 149 {14, 7}, 150 {14, 9}, 151 {14, 11}, 152 {6, 8}, 153 {6, 14}, 154 {6, 6}, 155 {6, 5}, 156 {6, 4}, 157 {6, 7}, 158 {6, 9}, 159 {6, 11}, 160 {5, 8}, 161 {5, 14}, 162 {5, 6}, 163 {5, 5}, 164 {5, 4}, 165 {5, 7}, 166 {5, 9}, 167 {5, 11}, 168 {4, 8}, 169 {4, 14}, 170 {4, 6}, 171 {4, 5}, 172 {4, 1}, 173 {4, 7}, 174 {4, 9}, 175 {4, 11}, 176 {7, 8}, 177 {7, 14}, 178 {7, 6}, 179 {7, 5}, 180 {7, 4}, 181 {7, 7}, 182 {7, 9}, 183 {7, 11}, 184 {10, 8}, 185 {10, 14}, 186 {12, 6}, 187 {12, 5}, 188 {10, 4}, 189 {10, 7}, 190 {10, 9}, 191 {10, 11}, 192 {12, 8 }, 193 {12, 14}, 194 {12, 6}, 195 {15, 16}, 196 {15, 16}, 197 {9, 11}, 198 {11, 11}, 199 {11, 12}, 200 {16, 18}, 201 {17, 16}, 202 {19, 19}, 203 {19, 18} 204}; 205 206 207static int nb_special_fargs; 208static double * spec_fargs; 209static float * spec_sp_fargs; 210 211static void build_special_fargs_table(void) 212{ 213/* 214 Entry Sign Exp fraction Special value 215 0 0 3fd 0x8000000000000ULL Positive finite number 216 1 0 404 0xf000000000000ULL ... 217 2 0 001 0x8000000b77501ULL ... 218 3 0 7fe 0x800000000051bULL ... 219 4 0 012 0x3214569900000ULL ... 220 5 0 000 0x0000000000000ULL +0.0 (+zero) 221 6 1 000 0x0000000000000ULL -0.0 (-zero) 222 7 0 7ff 0x0000000000000ULL +infinity 223 8 1 7ff 0x0000000000000ULL -infinity 224 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 225 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 226 11 0 7ff 0x8000000000000ULL +QNaN 227 12 1 7ff 0x8000000000000ULL -QNaN 228 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 229 14 1 40d 0x0650f5a07b353ULL Negative finite number 230 15 0 412 0x32585a9900000ULL A few more positive finite numbers 231 16 0 413 0x82511a2000000ULL ... 232 17 . . . . . . . . . . . . . . . . . . . . . . . 233 18 . . . . . . . . . . . . . . . . . . . . . . . 234 19 . . . . . . . . . . . . . . . . . . . . . . . 235*/ 236 237 uint64_t mant; 238 uint32_t mant_sp; 239 uint16_t _exp; 240 int s; 241 int j, i = 0; 242 243 if (spec_fargs) 244 return; 245 246 spec_fargs = malloc( 20 * sizeof(double) ); 247 spec_sp_fargs = malloc( 20 * sizeof(float) ); 248 249 // #0 250 s = 0; 251 _exp = 0x3fd; 252 mant = 0x8000000000000ULL; 253 register_farg(&spec_fargs[i++], s, _exp, mant); 254 255 // #1 256 s = 0; 257 _exp = 0x404; 258 mant = 0xf000000000000ULL; 259 register_farg(&spec_fargs[i++], s, _exp, mant); 260 261 // #2 262 s = 0; 263 _exp = 0x001; 264 mant = 0x8000000b77501ULL; 265 register_farg(&spec_fargs[i++], s, _exp, mant); 266 267 // #3 268 s = 0; 269 _exp = 0x7fe; 270 mant = 0x800000000051bULL; 271 register_farg(&spec_fargs[i++], s, _exp, mant); 272 273 // #4 274 s = 0; 275 _exp = 0x012; 276 mant = 0x3214569900000ULL; 277 register_farg(&spec_fargs[i++], s, _exp, mant); 278 279 280 /* Special values */ 281 /* +0.0 : 0 0x000 0x0000000000000 */ 282 // #5 283 s = 0; 284 _exp = 0x000; 285 mant = 0x0000000000000ULL; 286 register_farg(&spec_fargs[i++], s, _exp, mant); 287 288 /* -0.0 : 1 0x000 0x0000000000000 */ 289 // #6 290 s = 1; 291 _exp = 0x000; 292 mant = 0x0000000000000ULL; 293 register_farg(&spec_fargs[i++], s, _exp, mant); 294 295 /* +infinity : 0 0x7FF 0x0000000000000 */ 296 // #7 297 s = 0; 298 _exp = 0x7FF; 299 mant = 0x0000000000000ULL; 300 register_farg(&spec_fargs[i++], s, _exp, mant); 301 302 /* -infinity : 1 0x7FF 0x0000000000000 */ 303 // #8 304 s = 1; 305 _exp = 0x7FF; 306 mant = 0x0000000000000ULL; 307 register_farg(&spec_fargs[i++], s, _exp, mant); 308 309 /* 310 * This comment applies to values #9 and #10 below: 311 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision, 312 * so we can't just copy the double-precision value to the corresponding slot in the 313 * single-precision array (i.e., in the loop at the end of this function). Instead, we 314 * have to manually set the bits using register_sp_farg(). 315 */ 316 317 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 318 // #9 319 s = 0; 320 _exp = 0x7FF; 321 mant = 0x7FFFFFFFFFFFFULL; 322 register_farg(&spec_fargs[i++], s, _exp, mant); 323 _exp = 0xff; 324 mant_sp = 0x3FFFFF; 325 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 326 327 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 328 // #10 329 s = 1; 330 _exp = 0x7FF; 331 mant = 0x7FFFFFFFFFFFFULL; 332 register_farg(&spec_fargs[i++], s, _exp, mant); 333 _exp = 0xff; 334 mant_sp = 0x3FFFFF; 335 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 336 337 /* +QNaN : 0 0x7FF 0x8000000000000 */ 338 // #11 339 s = 0; 340 _exp = 0x7FF; 341 mant = 0x8000000000000ULL; 342 register_farg(&spec_fargs[i++], s, _exp, mant); 343 344 /* -QNaN : 1 0x7FF 0x8000000000000 */ 345 // #12 346 s = 1; 347 _exp = 0x7FF; 348 mant = 0x8000000000000ULL; 349 register_farg(&spec_fargs[i++], s, _exp, mant); 350 351 /* denormalized value */ 352 // #13 353 s = 1; 354 _exp = 0x000; 355 mant = 0x8340000078000ULL; 356 register_farg(&spec_fargs[i++], s, _exp, mant); 357 358 /* Negative finite number */ 359 // #14 360 s = 1; 361 _exp = 0x40d; 362 mant = 0x0650f5a07b353ULL; 363 register_farg(&spec_fargs[i++], s, _exp, mant); 364 365 /* A few positive finite numbers ... */ 366 // #15 367 s = 0; 368 _exp = 0x412; 369 mant = 0x32585a9900000ULL; 370 register_farg(&spec_fargs[i++], s, _exp, mant); 371 372 // #16 373 s = 0; 374 _exp = 0x413; 375 mant = 0x82511a2000000ULL; 376 register_farg(&spec_fargs[i++], s, _exp, mant); 377 378 // #17 379 s = 0; 380 _exp = 0x403; 381 mant = 0x12ef5a9300000ULL; 382 register_farg(&spec_fargs[i++], s, _exp, mant); 383 384 // #18 385 s = 0; 386 _exp = 0x405; 387 mant = 0x14bf5d2300000ULL; 388 register_farg(&spec_fargs[i++], s, _exp, mant); 389 390 // #19 391 s = 0; 392 _exp = 0x409; 393 mant = 0x76bf982440000ULL; 394 register_farg(&spec_fargs[i++], s, _exp, mant); 395 396 nb_special_fargs = i; 397 for (j = 0; j < i; j++) { 398 if (!(j == 9 || j == 10)) 399 spec_sp_fargs[j] = spec_fargs[j]; 400 } 401} 402 403 404struct test_table 405{ 406 test_func_t test_category; 407 char * name; 408 unsigned int test_group; 409}; 410 411/* Type of input for floating point operations.*/ 412typedef enum { 413 SINGLE_TEST, 414 DOUBLE_TEST 415} precision_type_t; 416 417typedef enum { 418 VX_SCALAR_CONV_TO_WORD, 419 VX_CONV_TO_SINGLE, 420 VX_CONV_TO_DOUBLE, 421 VX_ESTIMATE, 422 VX_DEFAULT 423} vx_fp_test_type; 424 425static vector unsigned int vec_out, vec_inA, vec_inB; 426 427/* This function is for checking the reciprocal and reciprocal square root 428 * estimate instructions. 429 */ 430Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx) 431{ 432 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is 433 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions 434 * does an actual reciprocal calculation versus estimation, so the answer we get back from 435 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of 436 * precision) and the estimate may still be within expected tolerances. On top of that, 437 * we can't count on these estimates always being the same across implementations. 438 * For example, with the fre[s] instruction (which should be correct to within one part 439 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111, 440 * one implementation could return 1.0111_1111_0000 and another implementation could return 441 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a 442 * single bit in common. 443 * 444 * The upshot is we can't validate the VEX output for these instructions by comparing against 445 * stored bit patterns. We must check that the result is within expected tolerances. 446 */ 447 448 449 /* A mask to be used for validation as a last resort. 450 * Only use 12 bits of precision for reasons discussed above. 451 */ 452#define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL 453#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00 454 455 Bool result = False; 456 Bool dp_test = type == DOUBLE_TEST; 457 double src_dp, res_dp; 458 float src_sp, res_sp; 459 src_dp = res_dp = 0; 460 src_sp = res_sp = 0; 461#define SRC (dp_test ? src_dp : src_sp) 462#define RES (dp_test ? res_dp : res_sp) 463 Bool src_is_negative = False; 464 Bool res_is_negative = False; 465 unsigned long long * dst_dp = NULL; 466 unsigned int * dst_sp = NULL; 467 if (dp_test) { 468 unsigned long long * src_dp_ull; 469 dst_dp = (unsigned long long *) &vec_out; 470 src_dp = spec_fargs[idx]; 471 src_dp_ull = (unsigned long long *) &src_dp; 472 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False; 473 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False; 474 memcpy(&res_dp, &dst_dp[output_vec_idx], 8); 475 } else { 476 unsigned int * src_sp_uint; 477 dst_sp = (unsigned int *) &vec_out; 478 src_sp = spec_sp_fargs[idx]; 479 src_sp_uint = (unsigned int *) &src_sp; 480 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False; 481 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False; 482 memcpy(&res_sp, &dst_sp[output_vec_idx], 4); 483 } 484 485 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p 486 if (isnan(SRC)) 487 return isnan(RES); 488 if (fpclassify(SRC) == FP_ZERO) 489 return isinf(RES); 490 if (!src_is_negative && isinf(SRC)) 491 return !res_is_negative && (fpclassify(RES) == FP_ZERO); 492 if (is_rsqrte) { 493 if (src_is_negative) 494 return isnan(RES); 495 } else { 496 if (src_is_negative && isinf(SRC)) 497 return res_is_negative && (fpclassify(RES) == FP_ZERO); 498 } 499 if (dp_test) { 500 double calc_diff; 501 double real_diff; 502 double recip_divisor; 503 double div_result; 504 double calc_diff_tmp; 505 506 if (is_rsqrte) 507 recip_divisor = sqrt(src_dp); 508 else 509 recip_divisor = src_dp; 510 511 div_result = 1.0/recip_divisor; 512 calc_diff_tmp = recip_divisor * 16384.0; 513 if (isnormal(calc_diff_tmp)) { 514 calc_diff = fabs(1.0/calc_diff_tmp); 515 real_diff = fabs(res_dp - div_result); 516 result = ( ( res_dp == div_result ) 517 || ( real_diff <= calc_diff ) ); 518 } else { 519 /* Unable to compute theoretical difference, so we fall back to masking out 520 * un-precise bits. 521 */ 522 unsigned long long * div_result_dp = (unsigned long long *) &div_result; 523 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP); 524 } 525 /* For debug use . . . 526 if (!result) { 527 unsigned long long * dv = &div_result; 528 unsigned long long * rd = &real_diff; 529 unsigned long long * cd = &calc_diff; 530 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 531 *dv, *rd, *cd); 532 } 533 */ 534 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2) 535 float calc_diff; 536 float real_diff; 537 float div_result; 538 float calc_diff_tmp; 539 float recip_divisor = sqrt(src_sp); 540 541 div_result = 1.0/recip_divisor; 542 calc_diff_tmp = recip_divisor * 16384.0; 543 if (isnormal(calc_diff_tmp)) { 544 calc_diff = fabsf(1.0/calc_diff_tmp); 545 real_diff = fabsf(res_sp - div_result); 546 result = ( ( res_sp == div_result ) 547 || ( real_diff <= calc_diff ) ); 548 } else { 549 /* Unable to compute theoretical difference, so we fall back to masking out 550 * un-precise bits. 551 */ 552 unsigned int * div_result_sp = (unsigned int *) &div_result; 553 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP); 554 } 555 /* For debug use . . . 556 if (!result) { 557 unsigned long long * dv = &div_result; 558 unsigned long long * rd = &real_diff; 559 unsigned long long * cd = &calc_diff; 560 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 561 *dv, *rd, *cd); 562 } 563 */ 564 } 565 return result; 566} 567 568typedef struct vx_fp_test 569{ 570 test_func_t test_func; 571 const char * name; 572 fp_test_args_t * targs; 573 int num_tests; 574 precision_type_t precision; 575 vx_fp_test_type type; 576 const char * op; 577} vx_fp_test_t; 578 579 580static Bool do_dot; 581 582static void test_xvredp(void) 583{ 584 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 585} 586 587static void test_xsredp(void) 588{ 589 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 590} 591 592static void test_xvrsqrtedp(void) 593{ 594 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 595} 596 597static void test_xsrsqrtedp(void) 598{ 599 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 600} 601 602static void test_xvrsqrtesp(void) 603{ 604 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 605} 606 607static void test_xstsqrtdp(void) 608{ 609 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB)); 610} 611 612static void test_xvtsqrtdp(void) 613{ 614 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB)); 615} 616 617static void test_xvtsqrtsp(void) 618{ 619 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB)); 620} 621 622static void test_xvsqrtdp(void) 623{ 624 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 625} 626 627static void test_xvsqrtsp(void) 628{ 629 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 630} 631 632static void test_xvtdivdp(void) 633{ 634 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 635} 636 637static void test_xvtdivsp(void) 638{ 639 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 640} 641 642static void test_xscvdpsp(void) 643{ 644 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 645} 646 647static void test_xscvdpuxws(void) 648{ 649 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 650} 651 652static void test_xscvspdp(void) 653{ 654 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 655} 656 657static void test_xvcvdpsp(void) 658{ 659 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 660} 661 662static void test_xvcvdpuxds(void) 663{ 664 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 665} 666 667static void test_xvcvdpuxws(void) 668{ 669 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 670} 671 672static void test_xvcvspdp(void) 673{ 674 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 675} 676 677static void test_xvcvspsxds(void) 678{ 679 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 680} 681 682static void test_xvcvspuxds(void) 683{ 684 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 685} 686 687static void test_xvcvdpsxds(void) 688{ 689 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 690} 691 692static void test_xvcvspuxws(void) 693{ 694 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 695} 696 697static void test_xvcvsxddp(void) 698{ 699 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 700} 701 702static void test_xvcvuxddp(void) 703{ 704 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 705} 706 707static void test_xvcvsxdsp(void) 708{ 709 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 710} 711 712static void test_xvcvuxdsp(void) 713{ 714 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 715} 716 717static void test_xvcvsxwdp(void) 718{ 719 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 720} 721 722static void test_xvcvuxwdp(void) 723{ 724 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 725} 726 727static void test_xvcvsxwsp(void) 728{ 729 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 730} 731 732static void test_xvcvuxwsp(void) 733{ 734 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 735} 736 737static void test_xsrdpic(void) 738{ 739 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 740} 741 742static void test_xsrdpiz(void) 743{ 744 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 745} 746 747static void test_xsrdpi(void) 748{ 749 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 750} 751 752static void test_xvabsdp(void) 753{ 754 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 755} 756 757static void test_xvnabsdp(void) 758{ 759 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 760} 761 762static void test_xvnegdp(void) 763{ 764 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 765} 766 767static void test_xvabssp(void) 768{ 769 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 770} 771 772static void test_xvnabssp(void) 773{ 774 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 775} 776 777static void test_xvrdpi(void) 778{ 779 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 780} 781 782static void test_xvrdpic(void) 783{ 784 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 785} 786 787static void test_xvrdpim(void) 788{ 789 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 790} 791 792static void test_xvrdpip(void) 793{ 794 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 795} 796 797static void test_xvrdpiz(void) 798{ 799 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 800} 801 802static void test_xvrspi(void) 803{ 804 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 805} 806 807static void test_xvrspic(void) 808{ 809 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 810} 811 812static void test_xvrspim(void) 813{ 814 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 815} 816 817static void test_xvrspip(void) 818{ 819 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 820} 821 822static void test_xvrspiz(void) 823{ 824 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 825} 826 827static vx_fp_test_t 828vsx_one_fp_arg_tests[] = { 829 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 830 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 831 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 832 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 833 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 834 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"}, 835 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"}, 836 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 837 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"}, 838 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 839 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 840 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 841 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 842 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 843 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 844 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 845 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 846 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 847 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 848 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 849 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 850 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"}, 851 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"}, 852 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"}, 853 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"}, 854 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"}, 855 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 856 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 857 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 858 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 859 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 860 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 861 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 862 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 863 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 864 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 865 { NULL, NULL, NULL, 0, 0, 0, NULL} 866}; 867 868static vx_fp_test_t 869vx_tdivORtsqrt_tests[] = { 870 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 871 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 872 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"}, 873 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"}, 874 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"}, 875 { NULL, NULL, NULL, 0 , 0, 0, NULL} 876}; 877 878static unsigned long long doubleWord[] = { 0, 879 0xffffffff00000000LL, 880 0x00000000ffffffffLL, 881 0xffffffffffffffffLL, 882 0x89abcde123456789LL, 883 0x0102030405060708LL, 884 0x00000000a0b1c2d3LL, 885 0x1111222233334444LL 886}; 887 888static unsigned int singleWord[] = {0, 889 0xffff0000, 890 0x0000ffff, 891 0xffffffff, 892 0x89a73522, 893 0x01020304, 894 0x0000abcd, 895 0x11223344 896}; 897 898typedef struct vx_intToFp_test 899{ 900 test_func_t test_func; 901 const char * name; 902 void * targs; 903 int num_tests; 904 precision_type_t precision; 905 vx_fp_test_type type; 906} vx_intToFp_test_t; 907 908static vx_intToFp_test_t 909intToFp_tests[] = { 910 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 911 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 912 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 913 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 914 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 915 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 916 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 917 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 918 { NULL, NULL, NULL, 0, 0 } 919}; 920 921static Bool do_OE; 922typedef enum { 923 DIV_BASE = 1, 924 DIV_OE = 2, 925 DIV_DOT = 4, 926} div_type_t; 927/* Possible divde type combinations are: 928 * - base 929 * - base+dot 930 * - base+OE 931 * - base+OE+dot 932 */ 933#ifdef __powerpc64__ 934static void test_divdeu(void) 935{ 936 int divdeu_type = DIV_BASE; 937 if (do_OE) 938 divdeu_type |= DIV_OE; 939 if (do_dot) 940 divdeu_type |= DIV_DOT; 941 942 switch (divdeu_type) { 943 case 1: 944 SET_CR_XER_ZERO; 945 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 946 GET_CR_XER(div_flags, div_xer); 947 break; 948 case 3: 949 SET_CR_XER_ZERO; 950 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 951 GET_CR_XER(div_flags, div_xer); 952 break; 953 case 5: 954 SET_CR_XER_ZERO; 955 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 956 GET_CR_XER(div_flags, div_xer); 957 break; 958 case 7: 959 SET_CR_XER_ZERO; 960 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 961 GET_CR_XER(div_flags, div_xer); 962 break; 963 default: 964 fprintf(stderr, "Invalid divdeu type. Exiting\n"); 965 exit(1); 966 } 967} 968#endif 969 970static void test_divwe(void) 971{ 972 int divwe_type = DIV_BASE; 973 if (do_OE) 974 divwe_type |= DIV_OE; 975 if (do_dot) 976 divwe_type |= DIV_DOT; 977 978 switch (divwe_type) { 979 case 1: 980 SET_CR_XER_ZERO; 981 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 982 GET_CR_XER(div_flags, div_xer); 983 break; 984 case 3: 985 SET_CR_XER_ZERO; 986 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 987 GET_CR_XER(div_flags, div_xer); 988 break; 989 case 5: 990 SET_CR_XER_ZERO; 991 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 992 GET_CR_XER(div_flags, div_xer); 993 break; 994 case 7: 995 SET_CR_XER_ZERO; 996 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 997 GET_CR_XER(div_flags, div_xer); 998 break; 999 default: 1000 fprintf(stderr, "Invalid divweu type. Exiting\n"); 1001 exit(1); 1002 } 1003} 1004 1005 1006typedef struct simple_test { 1007 test_func_t test_func; 1008 char * name; 1009 precision_type_t precision; 1010} simple_test_t; 1011 1012 1013static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1014{ 1015 int a_idx, b_idx, i; 1016 void * inA, * inB; 1017 void * vec_src = swap_inputs ? &vec_out : &vec_inB; 1018 1019 for (i = 0; i < 4; i++) { 1020 a_idx = targs->fra_idx; 1021 b_idx = targs->frb_idx; 1022 inA = (void *)&spec_sp_fargs[a_idx]; 1023 inB = (void *)&spec_sp_fargs[b_idx]; 1024 // copy single precision FP into vector element i 1025 memcpy(((void *)&vec_inA) + (i * 4), inA, 4); 1026 memcpy(vec_src + (i * 4), inB, 4); 1027 targs++; 1028 } 1029} 1030 1031static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1032{ 1033 int a_idx, b_idx, i; 1034 void * inA, * inB; 1035 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB; 1036 1037 for (i = 0; i < 2; i++) { 1038 a_idx = targs->fra_idx; 1039 b_idx = targs->frb_idx; 1040 inA = (void *)&spec_fargs[a_idx]; 1041 inB = (void *)&spec_fargs[b_idx]; 1042 // copy double precision FP into vector element i 1043 memcpy(((void *)&vec_inA) + (i * 8), inA, 8); 1044 memcpy(vec_src + (i * 8), inB, 8); 1045 targs++; 1046 } 1047} 1048 1049#define VX_NOT_CMP_OP 0xffffffff 1050static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out) 1051{ 1052 int a_idx, b_idx, k; 1053 char * name = malloc(20); 1054 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1055 int loops = dp ? 2 : 4; 1056 fp_test_args_t * targs = &test_group->targs[i]; 1057 unsigned long long * frA_dp, * frB_dp, * dst_dp; 1058 unsigned int * frA_sp, *frB_sp, * dst_sp; 1059 strcpy(name, test_group->name); 1060 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : "")); 1061 for (k = 0; k < loops; k++) { 1062 a_idx = targs->fra_idx; 1063 b_idx = targs->frb_idx; 1064 if (k) 1065 printf(" AND "); 1066 if (dp) { 1067 frA_dp = (unsigned long long *)&spec_fargs[a_idx]; 1068 frB_dp = (unsigned long long *)&spec_fargs[b_idx]; 1069 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp); 1070 } else { 1071 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx]; 1072 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx]; 1073 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp); 1074 } 1075 targs++; 1076 } 1077 if (cc != VX_NOT_CMP_OP) 1078 printf(" ? cc=%x", cc); 1079 1080 if (print_vec_out) { 1081 if (dp) { 1082 dst_dp = (unsigned long long *) &vec_out; 1083 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1084 } else { 1085 dst_sp = (unsigned int *) &vec_out; 1086 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1087 } 1088 } else { 1089 printf("\n"); 1090 } 1091 free(name); 1092} 1093 1094 1095 1096static void test_vsx_one_fp_arg(void) 1097{ 1098 test_func_t func; 1099 int k; 1100 k = 0; 1101 build_special_fargs_table(); 1102 1103 while ((func = vsx_one_fp_arg_tests[k].test_func)) { 1104 int idx, i; 1105 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k]; 1106 Bool estimate = (test_group.type == VX_ESTIMATE); 1107 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1108 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False; 1109 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1110 Bool sparse_sp = False; 1111 int stride = dp ? 2 : 4; 1112 int loops = is_scalar ? 1 : stride; 1113 stride = is_scalar ? 1: stride; 1114 1115 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1116 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op 1117 * or 1118 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op 1119 * 1120 * For the vector op case, we need to adjust stride from '4' to '2', since 1121 * we'll only be loading two values per loop into the input register. 1122 */ 1123 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) { 1124 sparse_sp = True; 1125 stride = 2; 1126 } 1127 1128 for (i = 0; i < test_group.num_tests; i+=stride) { 1129 unsigned int * pv; 1130 void * inB, * vecB_void_ptr = (void *)&vec_inB; 1131 1132 pv = (unsigned int *)&vec_out; 1133 // clear vec_out 1134 for (idx = 0; idx < 4; idx++, pv++) 1135 *pv = 0; 1136 1137 if (dp) { 1138 int j; 1139 unsigned long long * frB_dp, *dst_dp; 1140 for (j = 0; j < loops; j++) { 1141 inB = (void *)&spec_fargs[i + j]; 1142 // copy double precision FP into vector element i 1143 if (isLE && is_scalar) 1144 vecB_void_ptr += 8; 1145 memcpy(vecB_void_ptr + (j * 8), inB, 8); 1146 } 1147 // execute test insn 1148 (*func)(); 1149 dst_dp = (unsigned long long *) &vec_out; 1150 if (isLE && is_scalar) 1151 dst_dp++; 1152 printf("#%d: %s ", i/stride, test_group.name); 1153 for (j = 0; j < loops; j++) { 1154 if (j) 1155 printf("; "); 1156 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1157 printf("%s(%016llx)", test_group.op, *frB_dp); 1158 if (estimate) { 1159 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j); 1160 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1161 /* For debugging . . . 1162 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]); 1163 */ 1164 } else { 1165 vx_fp_test_type type = test_group.type; 1166 switch (type) { 1167 case VX_SCALAR_CONV_TO_WORD: 1168 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL); 1169 break; 1170 case VX_CONV_TO_SINGLE: 1171 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL); 1172 break; 1173 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . . 1174 printf(" = %016llx", dst_dp[j]); 1175 } 1176 } 1177 } 1178 printf("\n"); 1179 } else { 1180 int j; 1181 unsigned int * frB_sp, * dst_sp = NULL; 1182 unsigned long long * dst_dp = NULL; 1183 if (sparse_sp) 1184 loops = 2; 1185 for (j = 0; j < loops; j++) { 1186 inB = (void *)&spec_sp_fargs[i + j]; 1187 // copy single precision FP into vector element i 1188 if (sparse_sp) { 1189 if (isLE) 1190 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4); 1191 else 1192 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4); 1193 } else { 1194 if (isLE && is_scalar) 1195 vecB_void_ptr += 12; 1196 memcpy(vecB_void_ptr + (j * 4), inB, 4); 1197 } 1198 } 1199 // execute test insn 1200 (*func)(); 1201 if (test_group.type == VX_CONV_TO_DOUBLE) { 1202 dst_dp = (unsigned long long *) &vec_out; 1203 if (isLE && is_scalar) 1204 dst_dp++; 1205 } else { 1206 dst_sp = (unsigned int *) &vec_out; 1207 if (isLE && is_scalar) 1208 dst_sp += 3; 1209 } 1210 // print result 1211 printf("#%d: %s ", i/stride, test_group.name); 1212 for (j = 0; j < loops; j++) { 1213 if (j) 1214 printf("; "); 1215 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1216 printf("%s(%08x)", test_group.op, *frB_sp); 1217 if (estimate) { 1218 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j); 1219 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1220 } else { 1221 if (test_group.type == VX_CONV_TO_DOUBLE) 1222 printf(" = %016llx", dst_dp[j]); 1223 else 1224 /* Special case: Current VEX implementation for fsqrts (single precision) 1225 * uses the same implementation as that used for double precision fsqrt. 1226 * However, I've found that for xvsqrtsp, the result from that implementation 1227 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the 1228 * output to appear very different if you end up with a carry. But for the given 1229 * inputs in this testcase, we can simply mask out these bits. 1230 */ 1231 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]); 1232 } 1233 } 1234 printf("\n"); 1235 } 1236 } 1237 k++; 1238 printf( "\n" ); 1239 } 1240} 1241 1242static void test_int_to_fp_convert(void) 1243{ 1244 test_func_t func; 1245 int k; 1246 k = 0; 1247 1248 while ((func = intToFp_tests[k].test_func)) { 1249 int idx, i; 1250 vx_intToFp_test_t test_group = intToFp_tests[k]; 1251 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1252 Bool sparse_sp = False; 1253 int stride = dp ? 2 : 4; 1254 int loops = stride; 1255 1256 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1257 * |___ int___|_Unused_|___int___|__Unused__| // for vector op 1258 * or 1259 * We need to adjust stride from '4' to '2', since we'll only be loading 1260 * two values per loop into the input register. 1261 */ 1262 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) { 1263 sparse_sp = True; 1264 stride = 2; 1265 } 1266 1267 for (i = 0; i < test_group.num_tests; i+=stride) { 1268 unsigned int * pv; 1269 void * inB; 1270 1271 pv = (unsigned int *)&vec_out; 1272 // clear vec_out 1273 for (idx = 0; idx < 4; idx++, pv++) 1274 *pv = 0; 1275 1276 if (dp) { 1277 int j; 1278 unsigned long long *dst_dw, * targs = test_group.targs; 1279 for (j = 0; j < loops; j++) { 1280 inB = (void *)&targs[i + j]; 1281 // copy doubleword into vector element i 1282 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1283 } 1284 // execute test insn 1285 (*func)(); 1286 dst_dw = (unsigned long long *) &vec_out; 1287 printf("#%d: %s ", i/stride, test_group.name); 1288 for (j = 0; j < loops; j++) { 1289 if (j) 1290 printf("; "); 1291 printf("conv(%016llx)", targs[i + j]); 1292 1293 if (test_group.type == VX_CONV_TO_SINGLE) 1294 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL); 1295 else 1296 printf(" = %016llx", dst_dw[j]); 1297 } 1298 printf("\n"); 1299 } else { 1300 int j; 1301 unsigned int * dst_sp = NULL; 1302 unsigned int * targs = test_group.targs; 1303 unsigned long long * dst_dp = NULL; 1304 void * vecB_void_ptr = (void *)&vec_inB; 1305 if (sparse_sp) 1306 loops = 2; 1307 for (j = 0; j < loops; j++) { 1308 inB = (void *)&targs[i + j]; 1309 // copy single word into vector element i 1310 if (sparse_sp) { 1311 if (isLE) 1312 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4); 1313 else 1314 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4); 1315 } else { 1316 memcpy(vecB_void_ptr + (j * 4), inB, 4); 1317 } 1318 } 1319 // execute test insn 1320 (*func)(); 1321 if (test_group.type == VX_CONV_TO_DOUBLE) 1322 dst_dp = (unsigned long long *) &vec_out; 1323 else 1324 dst_sp = (unsigned int *) &vec_out; 1325 // print result 1326 printf("#%d: %s ", i/stride, test_group.name); 1327 for (j = 0; j < loops; j++) { 1328 if (j) 1329 printf("; "); 1330 printf("conv(%08x)", targs[i + j]); 1331 if (test_group.type == VX_CONV_TO_DOUBLE) 1332 printf(" = %016llx", dst_dp[j]); 1333 else 1334 printf(" = %08x", dst_sp[j]); 1335 } 1336 printf("\n"); 1337 } 1338 } 1339 k++; 1340 printf( "\n" ); 1341 } 1342} 1343 1344 1345 1346// The div doubleword test data 1347signed long long div_dw_tdata[13][2] = { 1348 { 4, -4 }, 1349 { 4, -3 }, 1350 { 4, 4 }, 1351 { 4, -5 }, 1352 { 3, 8 }, 1353 { 0x8000000000000000ULL, 0xa }, 1354 { 0x50c, -1 }, 1355 { 0x50c, -4096 }, 1356 { 0x1234fedc, 0x8000a873 }, 1357 { 0xabcd87651234fedcULL, 0xa123b893 }, 1358 { 0x123456789abdcULL, 0 }, 1359 { 0, 2 }, 1360 { 0x77, 0xa3499 } 1361}; 1362#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2) 1363 1364// The div word test data 1365unsigned int div_w_tdata[6][2] = { 1366 { 0, 2 }, 1367 { 2, 0 }, 1368 { 0x7abc1234, 0xf0000000 }, 1369 { 0xfabc1234, 5 }, 1370 { 77, 66 }, 1371 { 5, 0xfabc1234 }, 1372}; 1373#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2) 1374 1375typedef struct div_ext_test 1376{ 1377 test_func_t test_func; 1378 const char *name; 1379 int num_tests; 1380 div_type_t div_type; 1381 precision_type_t precision; 1382} div_ext_test_t; 1383 1384static div_ext_test_t div_tests[] = { 1385#ifdef __powerpc64__ 1386 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST }, 1387 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST }, 1388#endif 1389 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST }, 1390 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST }, 1391 { NULL, NULL, 0, 0, 0 } 1392}; 1393 1394static void test_div_extensions(void) 1395{ 1396 test_func_t func; 1397 int k; 1398 k = 0; 1399 1400 while ((func = div_tests[k].test_func)) { 1401 int i, repeat = 1; 1402 div_ext_test_t test_group = div_tests[k]; 1403 do_dot = False; 1404 1405again: 1406 for (i = 0; i < test_group.num_tests; i++) { 1407 unsigned int condreg; 1408 1409 if (test_group.div_type == DIV_OE) 1410 do_OE = True; 1411 else 1412 do_OE = False; 1413 1414 if (test_group.precision == DOUBLE_TEST) { 1415 r14 = div_dw_tdata[i][0]; 1416 r15 = div_dw_tdata[i][1]; 1417 } else { 1418 r14 = div_w_tdata[i][0]; 1419 r15 = div_w_tdata[i][1]; 1420 } 1421 // execute test insn 1422 (*func)(); 1423 condreg = (div_flags & 0xf0000000) >> 28; 1424 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1425 if (test_group.precision == DOUBLE_TEST) { 1426 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;", 1427 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17); 1428 } else { 1429 printf("0x%08x00000000 / 0x%08x = 0x%08x;", 1430 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17); 1431 } 1432 printf(" CR=%x; XER=%x\n", condreg, div_xer); 1433 } 1434 printf("\n"); 1435 if (repeat) { 1436 repeat = 0; 1437 do_dot = True; 1438 goto again; 1439 } 1440 k++; 1441 printf( "\n" ); 1442 } 1443} 1444 1445 1446static void test_vx_tdivORtsqrt(void) 1447{ 1448 test_func_t func; 1449 int k, crx; 1450 unsigned int flags; 1451 k = 0; 1452 do_dot = False; 1453 build_special_fargs_table(); 1454 1455 while ((func = vx_tdivORtsqrt_tests[k].test_func)) { 1456 int idx, i; 1457 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k]; 1458 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1459 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1460 Bool two_args = test_group.targs ? True : False; 1461 int stride = dp ? 2 : 4; 1462 int loops = is_scalar ? 1 : stride; 1463 stride = is_scalar ? 1: stride; 1464 1465 for (i = 0; i < test_group.num_tests; i+=stride) { 1466 unsigned int * pv; 1467 void * inB, * vecB_void_ptr = (void *)&vec_inB; 1468 1469 pv = (unsigned int *)&vec_out; 1470 // clear vec_out 1471 for (idx = 0; idx < 4; idx++, pv++) 1472 *pv = 0; 1473 1474 if (dp) { 1475 int j; 1476 unsigned long long * frB_dp; 1477 if (two_args) { 1478 setup_dp_fp_args(&test_group.targs[i], False); 1479 } else { 1480 for (j = 0; j < loops; j++) { 1481 inB = (void *)&spec_fargs[i + j]; 1482 // copy double precision FP into vector element i 1483 if (isLE && is_scalar) 1484 vecB_void_ptr += 8; 1485 memcpy(vecB_void_ptr + (j * 8), inB, 8); 1486 } 1487 } 1488 // execute test insn 1489 // Must do set/get of CRs immediately before/after calling the asm func 1490 // to avoid CRs being modified by other instructions. 1491 SET_FPSCR_ZERO; 1492 SET_CR_XER_ZERO; 1493 (*func)(); 1494 GET_CR(flags); 1495 // assumes using CR1 1496 crx = (flags & 0x0f000000) >> 24; 1497 if (two_args) { 1498 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1499 } else { 1500 printf("#%d: %s ", i/stride, test_group.name); 1501 for (j = 0; j < loops; j++) { 1502 if (j) 1503 printf("; "); 1504 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1505 printf("%s(%016llx)", test_group.op, *frB_dp); 1506 } 1507 printf( " ? %x (CRx)\n", crx); 1508 } 1509 } else { 1510 int j; 1511 unsigned int * frB_sp; 1512 if (two_args) { 1513 setup_sp_fp_args(&test_group.targs[i], False); 1514 } else { 1515 for (j = 0; j < loops; j++) { 1516 inB = (void *)&spec_sp_fargs[i + j]; 1517 // copy single precision FP into vector element i 1518 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1519 } 1520 } 1521 // execute test insn 1522 SET_FPSCR_ZERO; 1523 SET_CR_XER_ZERO; 1524 (*func)(); 1525 GET_CR(flags); 1526 crx = (flags & 0x0f000000) >> 24; 1527 // print result 1528 if (two_args) { 1529 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1530 } else { 1531 printf("#%d: %s ", i/stride, test_group.name); 1532 for (j = 0; j < loops; j++) { 1533 if (j) 1534 printf("; "); 1535 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1536 printf("%s(%08x)", test_group.op, *frB_sp); 1537 } 1538 printf( " ? %x (CRx)\n", crx); 1539 } 1540 } 1541 } 1542 k++; 1543 printf( "\n" ); 1544 } 1545} 1546 1547 1548static void test_ftsqrt(void) 1549{ 1550 int i, crx; 1551 unsigned int flags; 1552 unsigned long long * frbp; 1553 build_special_fargs_table(); 1554 1555 1556 for (i = 0; i < nb_special_fargs; i++) { 1557 f14 = spec_fargs[i]; 1558 frbp = (unsigned long long *)&spec_fargs[i]; 1559 SET_FPSCR_ZERO; 1560 SET_CR_XER_ZERO; 1561 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14)); 1562 GET_CR(flags); 1563 crx = (flags & 0x0f000000) >> 24; 1564 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx); 1565 } 1566 printf( "\n" ); 1567} 1568 1569static void 1570test_popcntw(void) 1571{ 1572#ifdef __powerpc64__ 1573 uint64_t res; 1574 unsigned long long src = 0x9182736405504536ULL; 1575 r14 = src; 1576 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1577 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res); 1578#else 1579 uint32_t res; 1580 unsigned int src = 0x9182730E; 1581 r14 = src; 1582 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1583 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res); 1584#endif 1585 printf( "\n" ); 1586} 1587 1588 1589static test_table_t 1590 all_tests[] = 1591{ 1592 1593 { &test_vsx_one_fp_arg, 1594 "Test VSX vector and scalar single argument instructions", OTHER_INST } , 1595 { &test_int_to_fp_convert, 1596 "Test VSX vector integer to float conversion instructions", OTHER_INST }, 1597 { &test_div_extensions, 1598 "Test div extensions", SCALAR_DIV_INST }, 1599 { &test_ftsqrt, 1600 "Test ftsqrt instruction", OTHER_INST }, 1601 { &test_vx_tdivORtsqrt, 1602 "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST }, 1603 { &test_popcntw, 1604 "Test popcntw instruction", OTHER_INST }, 1605 { NULL, NULL } 1606}; 1607#endif // HAS_VSX 1608 1609static void usage (void) 1610{ 1611 fprintf(stderr, 1612 "Usage: test_isa_3_0 [OPTIONS]\n" 1613 "\t-d: test scalar division instructions (default)\n" 1614 "\t-o: test non scalar division instructions (default)\n" 1615 "\t-A: test all instructions (default)\n" 1616 "\t-h: display this help and exit\n" 1617 ); 1618} 1619 1620int main(int argc, char **argv) 1621{ 1622#ifdef HAS_VSX 1623 1624 test_table_t aTest; 1625 test_func_t func; 1626 int c; 1627 int i = 0; 1628 unsigned int test_run_mask = 0; 1629 1630 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These 1631 * bits are set on various arithimetic instructions. This means this 1632 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0 1633 * hardware. The tests have been grouped so that the tests that generate 1634 * different results are in one test and the rest are in a different test. 1635 * this minimizes the size of the result expect files for the two cases. 1636 */ 1637 1638 while ((c = getopt(argc, argv, "doAh")) != -1) { 1639 switch (c) { 1640 case 'd': 1641 test_run_mask |= SCALAR_DIV_INST; 1642 break; 1643 case 'o': 1644 test_run_mask |= OTHER_INST; 1645 break; 1646 case 'A': 1647 test_run_mask = 0xFFFF; 1648 break; 1649 case 'h': 1650 usage(); 1651 return 0; 1652 1653 default: 1654 usage(); 1655 fprintf(stderr, "Unknown argument: '%c'\n", c); 1656 return 1; 1657 } 1658 } 1659 1660 while ((func = all_tests[i].test_category)) { 1661 aTest = all_tests[i]; 1662 1663 if(test_run_mask & aTest.test_group) { 1664 /* Test group specified on command line */ 1665 1666 printf( "%s\n", aTest.name ); 1667 (*func)(); 1668 } 1669 i++; 1670 } 1671 if (spec_fargs) 1672 free(spec_fargs); 1673 if (spec_sp_fargs) 1674 free(spec_sp_fargs); 1675 1676#endif // HAS _VSX 1677 1678 return 0; 1679} 1680