1/* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj@us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23#ifdef HAS_VSX 24 25#include <stdio.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29#include <malloc.h> 30#include <altivec.h> 31#include <math.h> 32#include <unistd.h> // getopt 33 34#ifndef __powerpc64__ 35typedef uint32_t HWord_t; 36#else 37typedef uint64_t HWord_t; 38#endif /* __powerpc64__ */ 39 40typedef unsigned char Bool; 41#define True 1 42#define False 0 43 44#ifdef VGP_ppc64le_linux 45#define isLE 1 46#else 47#define isLE 0 48#endif 49 50register HWord_t r14 __asm__ ("r14"); 51register HWord_t r15 __asm__ ("r15"); 52register HWord_t r16 __asm__ ("r16"); 53register HWord_t r17 __asm__ ("r17"); 54register double f14 __asm__ ("fr14"); 55register double f15 __asm__ ("fr15"); 56register double f16 __asm__ ("fr16"); 57register double f17 __asm__ ("fr17"); 58 59static volatile unsigned int div_flags, div_xer; 60 61#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 62 63#define SET_CR(_arg) \ 64 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 65 66#define SET_XER(_arg) \ 67 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 68 69#define GET_CR(_lval) \ 70 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 71 72#define GET_XER(_lval) \ 73 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 74 75#define GET_CR_XER(_lval_cr,_lval_xer) \ 76 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 77 78#define SET_CR_ZERO \ 79 SET_CR(0) 80 81#define SET_XER_ZERO \ 82 SET_XER(0) 83 84#define SET_CR_XER_ZERO \ 85 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 86 87#define SET_FPSCR_ZERO \ 88 do { double _d = 0.0; \ 89 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 90 } while (0) 91 92 93typedef void (*test_func_t)(void); 94typedef struct test_table test_table_t; 95 96/* Defines for the instructiion groups, use bit field to identify */ 97#define SCALAR_DIV_INST 0x0001 98#define OTHER_INST 0x0002 99 100/* These functions below that construct a table of floating point 101 * values were lifted from none/tests/ppc32/jm-insns.c. 102 */ 103 104#if defined (DEBUG_ARGS_BUILD) 105#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 106#else 107#define AB_DPRINTF(fmt, args...) do { } while (0) 108#endif 109 110static inline void register_farg (void *farg, 111 int s, uint16_t _exp, uint64_t mant) 112{ 113 uint64_t tmp; 114 115 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 116 *(uint64_t *)farg = tmp; 117 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 118 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 119} 120 121static inline void register_sp_farg (void *farg, 122 int s, uint16_t _exp, uint32_t mant) 123{ 124 uint32_t tmp; 125 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant; 126 *(uint32_t *)farg = tmp; 127} 128 129typedef struct fp_test_args { 130 int fra_idx; 131 int frb_idx; 132} fp_test_args_t; 133 134 135fp_test_args_t fp_cmp_tests[] = { 136 {8, 8}, 137 {8, 14}, 138 {8, 6}, 139 {8, 5}, 140 {8, 4}, 141 {8, 7}, 142 {8, 9}, 143 {8, 11}, 144 {14, 8}, 145 {14, 14}, 146 {14, 6}, 147 {14, 5}, 148 {14, 4}, 149 {14, 7}, 150 {14, 9}, 151 {14, 11}, 152 {6, 8}, 153 {6, 14}, 154 {6, 6}, 155 {6, 5}, 156 {6, 4}, 157 {6, 7}, 158 {6, 9}, 159 {6, 11}, 160 {5, 8}, 161 {5, 14}, 162 {5, 6}, 163 {5, 5}, 164 {5, 4}, 165 {5, 7}, 166 {5, 9}, 167 {5, 11}, 168 {4, 8}, 169 {4, 14}, 170 {4, 6}, 171 {4, 5}, 172 {4, 1}, 173 {4, 7}, 174 {4, 9}, 175 {4, 11}, 176 {7, 8}, 177 {7, 14}, 178 {7, 6}, 179 {7, 5}, 180 {7, 4}, 181 {7, 7}, 182 {7, 9}, 183 {7, 11}, 184 {10, 8}, 185 {10, 14}, 186 {10, 6}, 187 {10, 5}, 188 {10, 4}, 189 {10, 7}, 190 {10, 9}, 191 {10, 10}, 192 {12, 8}, 193 {12, 14}, 194 {12, 6}, 195 {12, 5}, 196 {1, 1}, 197 {2, 2}, 198 {3, 3}, 199 {4, 4}, 200}; 201 202 203fp_test_args_t two_arg_fp_tests[] = { 204 {8, 8}, 205 {8, 14}, 206 {15, 16}, 207 {8, 5}, 208 {8, 4}, 209 {8, 7}, 210 {8, 9}, 211 {8, 11}, 212 {14, 8}, 213 {14, 14}, 214 {14, 6}, 215 {14, 5}, 216 {14, 4}, 217 {14, 7}, 218 {14, 9}, 219 {14, 11}, 220 {6, 8}, 221 {6, 14}, 222 {6, 6}, 223 {6, 5}, 224 {6, 4}, 225 {6, 7}, 226 {6, 9}, 227 {6, 11}, 228 {5, 8}, 229 {5, 14}, 230 {5, 6}, 231 {5, 5}, 232 {5, 4}, 233 {5, 7}, 234 {5, 9}, 235 {5, 11}, 236 {4, 8}, 237 {4, 14}, 238 {4, 6}, 239 {4, 5}, 240 {4, 1}, 241 {4, 7}, 242 {4, 9}, 243 {4, 11}, 244 {7, 8}, 245 {7, 14}, 246 {7, 6}, 247 {7, 5}, 248 {7, 4}, 249 {7, 7}, 250 {7, 9}, 251 {7, 11}, 252 {10, 8}, 253 {10, 14}, 254 {12, 6}, 255 {12, 5}, 256 {10, 4}, 257 {10, 7}, 258 {10, 9}, 259 {10, 11}, 260 {12, 8 }, 261 {12, 14}, 262 {12, 6}, 263 {15, 16}, 264 {15, 16}, 265 {9, 11}, 266 {11, 11}, 267 {11, 12} 268}; 269 270 271static int nb_special_fargs; 272static double * spec_fargs; 273static float * spec_sp_fargs; 274 275static void build_special_fargs_table(void) 276{ 277/* 278 Entry Sign Exp fraction Special value 279 0 0 3fd 0x8000000000000ULL Positive finite number 280 1 0 404 0xf000000000000ULL ... 281 2 0 001 0x8000000b77501ULL ... 282 3 0 7fe 0x800000000051bULL ... 283 4 0 012 0x3214569900000ULL ... 284 5 0 000 0x0000000000000ULL +0.0 (+zero) 285 6 1 000 0x0000000000000ULL -0.0 (-zero) 286 7 0 7ff 0x0000000000000ULL +infinity 287 8 1 7ff 0x0000000000000ULL -infinity 288 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 289 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 290 11 0 7ff 0x8000000000000ULL +QNaN 291 12 1 7ff 0x8000000000000ULL -QNaN 292 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 293 14 1 40d 0x0650f5a07b353ULL Negative finite number 294 15 0 412 0x32585a9900000ULL A couple more positive finite numbers 295 16 0 413 0x82511a2000000ULL ... 296*/ 297 298 uint64_t mant; 299 uint32_t mant_sp; 300 uint16_t _exp; 301 int s; 302 int j, i = 0; 303 304 if (spec_fargs) 305 return; 306 307 spec_fargs = malloc( 17 * sizeof(double) ); 308 spec_sp_fargs = malloc( 17 * sizeof(float) ); 309 310 // #0 311 s = 0; 312 _exp = 0x3fd; 313 mant = 0x8000000000000ULL; 314 register_farg(&spec_fargs[i++], s, _exp, mant); 315 316 // #1 317 s = 0; 318 _exp = 0x404; 319 mant = 0xf000000000000ULL; 320 register_farg(&spec_fargs[i++], s, _exp, mant); 321 322 /* None of the ftdiv tests succeed. 323 * FRA = value #0; FRB = value #1 324 * ea_ = -2; e_b = 5 325 * fl_flag || fg_flag || fe_flag = 100 326 */ 327 328 /************************************************* 329 * fe_flag tests 330 * 331 *************************************************/ 332 333 /* fe_flag <- 1 if FRA is a NaN 334 * FRA = value #9; FRB = value #1 335 * e_a = 1024; e_b = 5 336 * fl_flag || fg_flag || fe_flag = 101 337 */ 338 339 /* fe_flag <- 1 if FRB is a NaN 340 * FRA = value #1; FRB = value #12 341 * e_a = 5; e_b = 1024 342 * fl_flag || fg_flag || fe_flag = 101 343 */ 344 345 /* fe_flag <- 1 if e_b <= -1022 346 * FRA = value #0; FRB = value #2 347 * e_a = -2; e_b = -1022 348 * fl_flag || fg_flag || fe_flag = 101 349 * 350 */ 351 // #2 352 s = 0; 353 _exp = 0x001; 354 mant = 0x8000000b77501ULL; 355 register_farg(&spec_fargs[i++], s, _exp, mant); 356 357 /* fe_flag <- 1 if e_b >= 1021 358 * FRA = value #1; FRB = value #3 359 * e_a = 5; e_b = 1023 360 * fl_flag || fg_flag || fe_flag = 101 361 */ 362 // #3 363 s = 0; 364 _exp = 0x7fe; 365 mant = 0x800000000051bULL; 366 register_farg(&spec_fargs[i++], s, _exp, mant); 367 368 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023 369 * Let FRA = value #3 and FRB be value #0. 370 * e_a = 1023; e_b = -2 371 * fl_flag || fg_flag || fe_flag = 101 372 */ 373 374 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023 375 * Let FRA = value #0 above and FRB be value #3 above 376 * e_a = -2; e_b = 1023 377 * fl_flag || fg_flag || fe_flag = 101 378 */ 379 380 /* fe_flag <- 1 if FRA != 0 && e_a <= -970 381 * Let FRA = value #4 and FRB be value #0 382 * e_a = -1005; e_b = -2 383 * fl_flag || fg_flag || fe_flag = 101 384 */ 385 // #4 386 s = 0; 387 _exp = 0x012; 388 mant = 0x3214569900000ULL; 389 register_farg(&spec_fargs[i++], s, _exp, mant); 390 391 /************************************************* 392 * fg_flag tests 393 * 394 *************************************************/ 395 /* fg_flag <- 1 if FRA is an Infinity 396 * NOTE: FRA = Inf also sets fe_flag 397 * Do two tests, using values #7 and #8 (+/- Inf) for FRA. 398 * Test 1: 399 * Let FRA be value #7 and FRB be value #1 400 * e_a = 1024; e_b = 5 401 * fl_flag || fg_flag || fe_flag = 111 402 * 403 * Test 2: 404 * Let FRA be value #8 and FRB be value #1 405 * e_a = 1024; e_b = 5 406 * fl_flag || fg_flag || fe_flag = 111 407 * 408 */ 409 410 /* fg_flag <- 1 if FRB is an Infinity 411 * NOTE: FRB = Inf also sets fe_flag 412 * Let FRA be value #1 and FRB be value #7 413 * e_a = 5; e_b = 1024 414 * fl_flag || fg_flag || fe_flag = 111 415 */ 416 417 /* fg_flag <- 1 if FRB is denormalized 418 * NOTE: e_b < -1022 ==> fe_flag <- 1 419 * Let FRA be value #0 and FRB be value #13 420 * e_a = -2; e_b = -1023 421 * fl_flag || fg_flag || fe_flag = 111 422 */ 423 424 /* fg_flag <- 1 if FRB is +zero 425 * NOTE: FRA = Inf also sets fe_flag 426 * Let FRA = val #5; FRB = val #5 427 * ea_ = -1023; e_b = -1023 428 * fl_flag || fg_flag || fe_flag = 111 429 */ 430 431 /* fg_flag <- 1 if FRB is -zero 432 * NOTE: FRA = Inf also sets fe_flag 433 * Let FRA = val #5; FRB = val #6 434 * ea_ = -1023; e_b = -1023 435 * fl_flag || fg_flag || fe_flag = 111 436 */ 437 438 /* Special values */ 439 /* +0.0 : 0 0x000 0x0000000000000 */ 440 // #5 441 s = 0; 442 _exp = 0x000; 443 mant = 0x0000000000000ULL; 444 register_farg(&spec_fargs[i++], s, _exp, mant); 445 446 /* -0.0 : 1 0x000 0x0000000000000 */ 447 // #6 448 s = 1; 449 _exp = 0x000; 450 mant = 0x0000000000000ULL; 451 register_farg(&spec_fargs[i++], s, _exp, mant); 452 453 /* +infinity : 0 0x7FF 0x0000000000000 */ 454 // #7 455 s = 0; 456 _exp = 0x7FF; 457 mant = 0x0000000000000ULL; 458 register_farg(&spec_fargs[i++], s, _exp, mant); 459 460 /* -infinity : 1 0x7FF 0x0000000000000 */ 461 // #8 462 s = 1; 463 _exp = 0x7FF; 464 mant = 0x0000000000000ULL; 465 register_farg(&spec_fargs[i++], s, _exp, mant); 466 467 /* 468 * This comment applies to values #9 and #10 below: 469 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision, 470 * so we can't just copy the double-precision value to the corresponding slot in the 471 * single-precision array (i.e., in the loop at the end of this function). Instead, we 472 * have to manually set the bits using register_sp_farg(). 473 */ 474 475 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 476 // #9 477 s = 0; 478 _exp = 0x7FF; 479 mant = 0x7FFFFFFFFFFFFULL; 480 register_farg(&spec_fargs[i++], s, _exp, mant); 481 _exp = 0xff; 482 mant_sp = 0x3FFFFF; 483 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 484 485 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 486 // #10 487 s = 1; 488 _exp = 0x7FF; 489 mant = 0x7FFFFFFFFFFFFULL; 490 register_farg(&spec_fargs[i++], s, _exp, mant); 491 _exp = 0xff; 492 mant_sp = 0x3FFFFF; 493 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 494 495 /* +QNaN : 0 0x7FF 0x8000000000000 */ 496 // #11 497 s = 0; 498 _exp = 0x7FF; 499 mant = 0x8000000000000ULL; 500 register_farg(&spec_fargs[i++], s, _exp, mant); 501 502 /* -QNaN : 1 0x7FF 0x8000000000000 */ 503 // #12 504 s = 1; 505 _exp = 0x7FF; 506 mant = 0x8000000000000ULL; 507 register_farg(&spec_fargs[i++], s, _exp, mant); 508 509 /* denormalized value */ 510 // #13 511 s = 1; 512 _exp = 0x000; 513 mant = 0x8340000078000ULL; 514 register_farg(&spec_fargs[i++], s, _exp, mant); 515 516 /* Negative finite number */ 517 // #14 518 s = 1; 519 _exp = 0x40d; 520 mant = 0x0650f5a07b353ULL; 521 register_farg(&spec_fargs[i++], s, _exp, mant); 522 523 /* A couple positive finite numbers ... */ 524 // #15 525 s = 0; 526 _exp = 0x412; 527 mant = 0x32585a9900000ULL; 528 register_farg(&spec_fargs[i++], s, _exp, mant); 529 530 // #16 531 s = 0; 532 _exp = 0x413; 533 mant = 0x82511a2000000ULL; 534 register_farg(&spec_fargs[i++], s, _exp, mant); 535 536 nb_special_fargs = i; 537 for (j = 0; j < i; j++) { 538 if (!(j == 9 || j == 10)) 539 spec_sp_fargs[j] = spec_fargs[j]; 540 } 541} 542 543 544struct test_table 545{ 546 test_func_t test_category; 547 char * name; 548 unsigned int test_group; 549}; 550 551typedef enum { 552 SINGLE_TEST, 553 DOUBLE_TEST 554} precision_type_t; 555 556typedef enum { 557 VX_SCALAR_FP_NMSUB = 0, 558 // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE 559 VX_VECTOR_FP_MULT_AND_OP2 = 10, 560 // and before this line 561 VX_BASIC_CMP = 30, 562 VX_CONV_WORD, 563 VX_DEFAULT 564} vx_fp_test_type; 565 566typedef struct vx_fp_test 567{ 568 test_func_t test_func; 569 const char * name; 570 fp_test_args_t * targs; 571 int num_tests; 572 precision_type_t precision; 573 vx_fp_test_type type; 574 const char * op; 575} vx_fp_test_t; 576 577static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC; 578 579static Bool do_dot; 580static void test_xvcmpeqdp(void) 581{ 582 if (do_dot) 583 __asm__ __volatile__ ("xvcmpeqdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 584 else 585 __asm__ __volatile__ ("xvcmpeqdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 586} 587 588static void test_xvcmpgedp(void) 589{ 590 if (do_dot) 591 __asm__ __volatile__ ("xvcmpgedp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 592 else 593 __asm__ __volatile__ ("xvcmpgedp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 594} 595 596static void test_xvcmpgtdp(void) 597{ 598 if (do_dot) 599 __asm__ __volatile__ ("xvcmpgtdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 600 else 601 __asm__ __volatile__ ("xvcmpgtdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 602} 603 604static void test_xvcmpeqsp(void) 605{ 606 if (do_dot) 607 __asm__ __volatile__ ("xvcmpeqsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 608 else 609 __asm__ __volatile__ ("xvcmpeqsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 610} 611 612static void test_xvcmpgesp(void) 613{ 614 if (do_dot) 615 __asm__ __volatile__ ("xvcmpgesp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 616 else 617 __asm__ __volatile__ ("xvcmpgesp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 618} 619 620static void test_xvcmpgtsp(void) 621{ 622 if (do_dot) 623 __asm__ __volatile__ ("xvcmpgtsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 624 else 625 __asm__ __volatile__ ("xvcmpgtsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 626} 627 628static Bool do_aXp; 629static Bool do_dp; 630static void test_xsnmsub(void) 631{ 632 if (do_aXp) 633 __asm__ __volatile__ ("xsnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 634 else 635 __asm__ __volatile__ ("xsnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 636} 637 638static void test_xvmadd(void) 639{ 640 if (do_aXp) 641 if (do_dp) 642 __asm__ __volatile__ ("xvmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 643 else 644 __asm__ __volatile__ ("xvmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 645 else 646 if (do_dp) 647 __asm__ __volatile__ ("xvmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 648 else 649 __asm__ __volatile__ ("xvmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 650} 651 652static void test_xvnmadd(void) 653{ 654 if (do_aXp) 655 if (do_dp) 656 __asm__ __volatile__ ("xvnmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 657 else 658 __asm__ __volatile__ ("xvnmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 659 else 660 if (do_dp) 661 __asm__ __volatile__ ("xvnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 662 else 663 __asm__ __volatile__ ("xvnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 664} 665 666static void test_xvnmsub(void) 667{ 668 if (do_aXp) 669 if (do_dp) 670 __asm__ __volatile__ ("xvnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 671 else 672 __asm__ __volatile__ ("xvnmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 673 else 674 if (do_dp) 675 __asm__ __volatile__ ("xvnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 676 else 677 __asm__ __volatile__ ("xvnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 678} 679 680static void test_xvmsub(void) 681{ 682 if (do_aXp) 683 if (do_dp) 684 __asm__ __volatile__ ("xvmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 685 else 686 __asm__ __volatile__ ("xvmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 687 else 688 if (do_dp) 689 __asm__ __volatile__ ("xvmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 690 else 691 __asm__ __volatile__ ("xvmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 692} 693 694static void test_xssqrtdp(void) 695{ 696 __asm__ __volatile__ ("xssqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 697} 698 699static void test_xsrdpim(void) 700{ 701 __asm__ __volatile__ ("xsrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 702} 703 704static void test_xsrdpip(void) 705{ 706 __asm__ __volatile__ ("xsrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 707} 708 709static void test_xstdivdp(void) 710{ 711 __asm__ __volatile__ ("xstdivdp 6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 712} 713 714static void test_xsmaxdp(void) 715{ 716 __asm__ __volatile__ ("xsmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 717} 718 719static void test_xsmindp(void) 720{ 721 __asm__ __volatile__ ("xsmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 722} 723 724static void test_xvadddp(void) 725{ 726 __asm__ __volatile__ ("xvadddp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 727} 728 729static void test_xvaddsp(void) 730{ 731 __asm__ __volatile__ ("xvaddsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 732} 733 734static void test_xvdivdp(void) 735{ 736 __asm__ __volatile__ ("xvdivdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 737} 738 739static void test_xvdivsp(void) 740{ 741 __asm__ __volatile__ ("xvdivsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 742} 743 744static void test_xvmuldp(void) 745{ 746 __asm__ __volatile__ ("xvmuldp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 747} 748 749static void test_xvmulsp(void) 750{ 751 __asm__ __volatile__ ("xvmulsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 752} 753 754static void test_xvsubdp(void) 755{ 756 __asm__ __volatile__ ("xvsubdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 757} 758 759static void test_xvmaxdp(void) 760{ 761 __asm__ __volatile__ ("xvmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 762} 763 764static void test_xvmindp(void) 765{ 766 __asm__ __volatile__ ("xvmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 767} 768 769static void test_xvmaxsp(void) 770{ 771 __asm__ __volatile__ ("xvmaxsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 772} 773 774static void test_xvminsp(void) 775{ 776 __asm__ __volatile__ ("xvminsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 777} 778 779static void test_xvsubsp(void) 780{ 781 __asm__ __volatile__ ("xvsubsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 782} 783 784static void test_xvresp(void) 785{ 786 __asm__ __volatile__ ("xvresp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 787} 788 789static void test_xxsel(void) 790{ 791 unsigned long long * dst; 792 unsigned long long xa[] = { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL}; 793 unsigned long long xb[] = { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL}; 794 unsigned long long xc[] = { 0xffffffff01020304ULL, 0x128934bd00000000ULL}; 795 796 memcpy(&vec_inA, xa, 16); 797 memcpy(&vec_inB, xb, 16); 798 memcpy(&vec_inC, xc, 16); 799 800 801 __asm__ __volatile__ ("xxsel %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC)); 802 dst = (unsigned long long *) &vec_out; 803 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst); 804 dst++; 805 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst); 806 printf("\n"); 807} 808 809static void test_xxspltw(void) 810{ 811 int uim; 812 unsigned long long * dst = NULL; 813 unsigned int xb[] = { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708}; 814 int i; 815 void * vecB_ptr = &vec_inB; 816 if (isLE) { 817 for (i = 3; i >=0; i--) { 818 memcpy(vecB_ptr, &xb[i], 4); 819 vecB_ptr+=4; 820 } 821 } else { 822 for (i = 0; i < 4; i++) { 823 memcpy(vecB_ptr, &xb[i], 4); 824 vecB_ptr+=4; 825 } 826 } 827 828 for (uim = 0; uim < 4; uim++) { 829 switch (uim) { 830 case 0: 831 __asm__ __volatile__ ("xxspltw %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB)); 832 break; 833 case 1: 834 __asm__ __volatile__ ("xxspltw %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB)); 835 break; 836 case 2: 837 __asm__ __volatile__ ("xxspltw %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB)); 838 break; 839 case 3: 840 __asm__ __volatile__ ("xxspltw %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB)); 841 break; 842 } 843 dst = (unsigned long long *) &vec_out; 844 printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx", xb[0], xb[1], 845 xb[2], xb[3], uim, *dst); 846 dst++; 847 printf("%016llx\n", *dst); 848 } 849 printf("\n"); 850} 851 852static void test_xscvdpsxws(void) 853{ 854 __asm__ __volatile__ ("xscvdpsxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 855} 856 857static void test_xscvdpuxds(void) 858{ 859 __asm__ __volatile__ ("xscvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 860} 861 862static void test_xvcpsgndp(void) 863{ 864 __asm__ __volatile__ ("xvcpsgndp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 865} 866 867static void test_xvcpsgnsp(void) 868{ 869 __asm__ __volatile__ ("xvcpsgnsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 870} 871 872static void test_xvcvdpsxws(void) 873{ 874 __asm__ __volatile__ ("xvcvdpsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB)); 875} 876 877static void test_xvcvspsxws(void) 878{ 879 __asm__ __volatile__ ("xvcvspsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB)); 880} 881 882static vx_fp_test_t 883vx_vector_one_fp_arg_tests[] = { 884 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"}, 885 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"}, 886 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"}, 887 { NULL, NULL, NULL, 0 , 0, 0, NULL} 888}; 889 890static vx_fp_test_t 891vx_vector_fp_tests[] = { 892 { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"}, 893 { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"}, 894 { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"}, 895 { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"}, 896 { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"}, 897 { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"}, 898 { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" }, 899 { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" }, 900 { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" }, 901 { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" }, 902 { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" }, 903 { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" }, 904 { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" }, 905 { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" }, 906 { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" }, 907 { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" }, 908 { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" }, 909 { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" }, 910 { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"}, 911 { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"}, 912 { NULL, NULL, NULL, 0 , 0, 0, NULL} 913}; 914 915 916static vx_fp_test_t 917vx_aORm_fp_tests[] = { 918 { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"}, 919 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"}, 920 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"}, 921 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"}, 922 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"}, 923 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"}, 924 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"}, 925 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"}, 926 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"}, 927 { NULL, NULL, NULL, 0, 0, 0, NULL } 928}; 929 930static vx_fp_test_t 931vx_simple_scalar_fp_tests[] = { 932 { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 933 { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 934 { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 935 { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 936 { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 937 { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 938 { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL}, 939 { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 940 { NULL, NULL, NULL, 0, 0, 0, NULL } 941}; 942 943 944#ifdef __powerpc64__ 945static void test_bpermd(void) 946{ 947 /* NOTE: Bit number is '0 . . . 63' 948 * 949 * Permuted bits are generated bit 0 -7 as follows: 950 * index = (r14)8*i:8*i+7 951 * perm[i] = (r15)index 952 * 953 * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB 954 * byte of r14, 0x1b(27/base 10). This identifies bit 27 of r15, which is '1'. 955 * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'. 956 * So the result of the first two iterations of i are: 957 * perm = 0b01xxxxxx 958 * 959 */ 960 r15 = 0xa12bc37de56f9708ULL; 961 r14 = 0x1b2c31f030000001ULL; 962 __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 963 printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14, 964 (unsigned long long)r15, (unsigned long long)r17); 965 printf("\n"); 966} 967#endif 968 969static Bool do_OE; 970typedef enum { 971 DIV_BASE = 1, 972 DIV_OE = 2, 973 DIV_DOT = 4, 974} div_type_t; 975/* Possible divde type combinations are: 976 * - base 977 * - base+dot 978 * - base+OE 979 * - base+OE+dot 980 */ 981#ifdef __powerpc64__ 982static void test_divde(void) 983{ 984 int divde_type = DIV_BASE; 985 if (do_OE) 986 divde_type |= DIV_OE; 987 if (do_dot) 988 divde_type |= DIV_DOT; 989 990 switch (divde_type) { 991 case 1: 992 SET_CR_XER_ZERO; 993 __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 994 GET_CR_XER(div_flags, div_xer); 995 break; 996 case 3: 997 SET_CR_XER_ZERO; 998 __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 999 GET_CR_XER(div_flags, div_xer); 1000 break; 1001 case 5: 1002 SET_CR_XER_ZERO; 1003 __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1004 GET_CR_XER(div_flags, div_xer); 1005 break; 1006 case 7: 1007 SET_CR_XER_ZERO; 1008 __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1009 GET_CR_XER(div_flags, div_xer); 1010 break; 1011 default: 1012 fprintf(stderr, "Invalid divde type. Exiting\n"); 1013 exit(1); 1014 } 1015} 1016#endif 1017 1018static void test_divweu(void) 1019{ 1020 int divweu_type = DIV_BASE; 1021 if (do_OE) 1022 divweu_type |= DIV_OE; 1023 if (do_dot) 1024 divweu_type |= DIV_DOT; 1025 1026 switch (divweu_type) { 1027 case 1: 1028 SET_CR_XER_ZERO; 1029 __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1030 GET_CR_XER(div_flags, div_xer); 1031 break; 1032 case 3: 1033 SET_CR_XER_ZERO; 1034 __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1035 GET_CR_XER(div_flags, div_xer); 1036 break; 1037 case 5: 1038 SET_CR_XER_ZERO; 1039 __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1040 GET_CR_XER(div_flags, div_xer); 1041 break; 1042 case 7: 1043 SET_CR_XER_ZERO; 1044 __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 1045 GET_CR_XER(div_flags, div_xer); 1046 break; 1047 default: 1048 fprintf(stderr, "Invalid divweu type. Exiting\n"); 1049 exit(1); 1050 } 1051} 1052 1053static void test_fctiduz(void) 1054{ 1055 if (do_dot) 1056 __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14)); 1057 else 1058 __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14)); 1059} 1060 1061static void test_fctidu(void) 1062{ 1063 if (do_dot) 1064 __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14)); 1065 else 1066 __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14)); 1067} 1068 1069static void test_fctiwuz(void) 1070{ 1071 if (do_dot) 1072 __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14)); 1073 else 1074 __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14)); 1075} 1076 1077static void test_fctiwu(void) 1078{ 1079 if (do_dot) 1080 __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14)); 1081 else 1082 __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14)); 1083} 1084 1085typedef struct simple_test { 1086 test_func_t test_func; 1087 char * name; 1088 precision_type_t precision; 1089} simple_test_t; 1090 1091static simple_test_t fct_tests[] = { 1092 { &test_fctiduz, "fctiduz", DOUBLE_TEST }, 1093 { &test_fctidu, "fctidu", DOUBLE_TEST }, 1094 { &test_fctiwuz, "fctiwuz", SINGLE_TEST }, 1095 { &test_fctiwu, "fctiwu", SINGLE_TEST }, 1096 { NULL, NULL } 1097}; 1098 1099static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1100{ 1101 int a_idx, b_idx, i; 1102 void * inA, * inB; 1103 void * vec_src = swap_inputs ? &vec_out : &vec_inB; 1104 1105 for (i = 0; i < 4; i++) { 1106 a_idx = targs->fra_idx; 1107 b_idx = targs->frb_idx; 1108 inA = (void *)&spec_sp_fargs[a_idx]; 1109 inB = (void *)&spec_sp_fargs[b_idx]; 1110 // copy single precision FP into vector element i 1111 memcpy(((void *)&vec_inA) + (i * 4), inA, 4); 1112 memcpy(vec_src + (i * 4), inB, 4); 1113 targs++; 1114 } 1115} 1116 1117static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1118{ 1119 int a_idx, b_idx, i; 1120 void * inA, * inB; 1121 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB; 1122 1123 for (i = 0; i < 2; i++) { 1124 a_idx = targs->fra_idx; 1125 b_idx = targs->frb_idx; 1126 inA = (void *)&spec_fargs[a_idx]; 1127 inB = (void *)&spec_fargs[b_idx]; 1128 // copy double precision FP into vector element i 1129 memcpy(((void *)&vec_inA) + (i * 8), inA, 8); 1130 memcpy(vec_src + (i * 8), inB, 8); 1131 targs++; 1132 } 1133} 1134 1135#define VX_NOT_CMP_OP 0xffffffff 1136static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i) 1137{ 1138 int a_idx, b_idx, k; 1139 char * name = malloc(20); 1140 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1141 int loops = dp ? 2 : 4; 1142 fp_test_args_t * targs = &test_group->targs[i]; 1143 unsigned long long * frA_dp, * frB_dp, * dst_dp; 1144 unsigned int * frA_sp, *frB_sp, * dst_sp; 1145 strcpy(name, test_group->name); 1146 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : "")); 1147 for (k = 0; k < loops; k++) { 1148 a_idx = targs->fra_idx; 1149 b_idx = targs->frb_idx; 1150 if (k) 1151 printf(" AND "); 1152 if (dp) { 1153 frA_dp = (unsigned long long *)&spec_fargs[a_idx]; 1154 frB_dp = (unsigned long long *)&spec_fargs[b_idx]; 1155 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp); 1156 } else { 1157 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx]; 1158 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx]; 1159 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp); 1160 } 1161 targs++; 1162 } 1163 if (cc != VX_NOT_CMP_OP) 1164 printf(" ? cc=%x", cc); 1165 1166 if (dp) { 1167 dst_dp = (unsigned long long *) &vec_out; 1168 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1169 } else { 1170 dst_sp = (unsigned int *) &vec_out; 1171 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1172 } 1173 free(name); 1174} 1175 1176 1177static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg, 1178 vx_fp_test_t * test_group, int i) 1179{ 1180 int a_idx, k; 1181 char * name = malloc(20); 1182 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1183 int loops = dp ? 2 : 4; 1184 fp_test_args_t * targs = &test_group->targs[i]; 1185 unsigned long long frA_dp, * dst_dp; 1186 unsigned int frA_sp, * dst_sp; 1187 1188 strcpy(name, test_group->name); 1189 if (do_aXp) 1190 if (dp) 1191 strcat(name, "adp"); 1192 else 1193 strcat(name, "asp"); 1194 else 1195 if (dp) 1196 strcat(name, "mdp"); 1197 else 1198 strcat(name, "msp"); 1199 1200 printf("#%d: %s ", dp? i/2 : i/4, name); 1201 for (k = 0; k < loops; k++) { 1202 a_idx = targs->fra_idx; 1203 if (k) 1204 printf(" AND "); 1205 if (dp) { 1206 frA_dp = *((unsigned long long *)&spec_fargs[a_idx]); 1207 printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]); 1208 } else { 1209 unsigned int * xt_sp = (unsigned int *)XT_arg; 1210 unsigned int * xb_sp = (unsigned int *)XB_arg; 1211 frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]); 1212 printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]); 1213 } 1214 targs++; 1215 } 1216 1217 if (dp) { 1218 dst_dp = (unsigned long long *) &vec_out; 1219 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1220 } else { 1221 dst_sp = (unsigned int *) &vec_out; 1222 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1223 } 1224 free(name); 1225} 1226 1227/* This function currently only supports double precision input arguments. */ 1228static void test_vx_simple_scalar_fp_ops(void) 1229{ 1230 test_func_t func; 1231 int k = 0; 1232 1233 build_special_fargs_table(); 1234 while ((func = vx_simple_scalar_fp_tests[k].test_func)) { 1235 unsigned long long * frap, * frbp, * dst; 1236 unsigned int * pv; 1237 int idx; 1238 vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k]; 1239 Bool convToWord = (test_group.type == VX_CONV_WORD); 1240 if (test_group.precision != DOUBLE_TEST) { 1241 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n"); 1242 exit(1); 1243 } 1244 pv = (unsigned int *)&vec_out; 1245 // clear vec_out 1246 for (idx = 0; idx < 4; idx++, pv++) 1247 *pv = 0; 1248 1249 /* If num_tests is exactly equal to nb_special_fargs, this implies the 1250 * instruction being tested only requires one floating point argument 1251 * (e.g. xssqrtdp). 1252 */ 1253 if (test_group.num_tests == nb_special_fargs && !test_group.targs) { 1254 void * inB, * vec_void_ptr = (void *)&vec_inB; 1255 int i; 1256 if (isLE) 1257 vec_void_ptr += 8; 1258 for (i = 0; i < nb_special_fargs; i++) { 1259 inB = (void *)&spec_fargs[i]; 1260 frbp = (unsigned long long *)&spec_fargs[i]; 1261 memcpy(vec_void_ptr, inB, 8); 1262 (*func)(); 1263 dst = (unsigned long long *) &vec_out; 1264 if (isLE) 1265 dst++; 1266 printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, 1267 convToWord ? (*dst & 0x00000000ffffffffULL) : *dst); 1268 } 1269 } else { 1270 void * inA, * inB, * vecA_void_ptr, * vecB_void_ptr; 1271 unsigned int condreg, flags; 1272 int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0; 1273 int i; 1274 if (isLE) { 1275 vecA_void_ptr = (void *)&vec_inA + 8; 1276 vecB_void_ptr = (void *)&vec_inB + 8; 1277 } else { 1278 vecA_void_ptr = (void *)&vec_inA; 1279 vecB_void_ptr = (void *)&vec_inB; 1280 } 1281 for (i = 0; i < test_group.num_tests; i++) { 1282 fp_test_args_t aTest = test_group.targs[i]; 1283 inA = (void *)&spec_fargs[aTest.fra_idx]; 1284 inB = (void *)&spec_fargs[aTest.frb_idx]; 1285 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1286 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1287 // Only need to copy one doubleword into each vector's element 0 1288 memcpy(vecA_void_ptr, inA, 8); 1289 memcpy(vecB_void_ptr, inB, 8); 1290 SET_FPSCR_ZERO; 1291 SET_CR_XER_ZERO; 1292 (*func)(); 1293 GET_CR(flags); 1294 if (isTdiv) { 1295 condreg = (flags & 0x000000f0) >> 4; 1296 printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg); 1297 } else { 1298 dst = (unsigned long long *) &vec_out; 1299 if (isLE) 1300 dst++; 1301 printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name, 1302 *frap, *frbp, *dst); 1303 } 1304 } 1305 } 1306 printf( "\n" ); 1307 k++; 1308 } 1309} 1310 1311static void test_vx_aORm_fp_ops(void) 1312{ 1313 /* These ops need a third src argument, which is stored in element 0 of 1314 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds 1315 * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds 1316 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test 1317 * data (input args, result) contain only two inputs, so I arbitrarily 1318 * choose some spec_fargs elements for the third source argument. 1319 * Note that that by using the same input data for a given pair of 1320 * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3 1321 * arguments), the expected result should be the same. 1322 */ 1323 1324 test_func_t func; 1325 int k; 1326 char * test_name = (char *)malloc(20); 1327 k = 0; 1328 do_dot = False; 1329 1330 build_special_fargs_table(); 1331 while ((func = vx_aORm_fp_tests[k].test_func)) { 1332 int i, stride; 1333 Bool repeat = False; 1334 Bool scalar = False; 1335 unsigned long long * frap, * frbp, * dst; 1336 vx_fp_test_t test_group = vx_aORm_fp_tests[k]; 1337 vx_fp_test_type test_type = test_group.type; 1338 do_dp = test_group.precision == DOUBLE_TEST ? True : False; 1339 frap = frbp = NULL; 1340 1341 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) { 1342 scalar = True; 1343 strcpy(test_name, test_group.name); 1344 if (!repeat) { 1345 repeat = 1; 1346 stride = 1; 1347 // Only support double precision scalar ops in this function 1348 if (do_dp) { 1349 strcat(test_name, "adp"); 1350 } else { 1351 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n"); 1352 exit(1); 1353 } 1354 do_aXp = True; 1355 } 1356 } else if (test_type < VX_BASIC_CMP) { 1357 // Then it must be a VX_VECTOR_xxx type 1358 stride = do_dp ? 2 : 4; 1359 if (!repeat) { 1360 // No need to work up the testcase name here, since that will be done in 1361 // the print_vx_aORm_fp_result() function we'll call for vector-type ops. 1362 repeat = 1; 1363 do_aXp = True; 1364 } 1365 } else { 1366 printf("ERROR: Invalid VX FP test type %d\n", test_type); 1367 exit(1); 1368 } 1369 1370again: 1371 for (i = 0; i < test_group.num_tests; i+=stride) { 1372 void * inA, * inB; 1373 int m, fp_idx[4]; 1374 unsigned long long vsr_XT[2]; 1375 unsigned long long vsr_XB[2]; 1376 fp_test_args_t aTest = test_group.targs[i]; 1377 for (m = 0; m < stride; m++) 1378 fp_idx[m] = i % (nb_special_fargs - stride) + m; 1379 1380 /* When repeat == True, we're on the first time through of one of the VX_FP_SMx 1381 * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap 1382 * inputs as described above: 1383 * src2 <= VSX[XT] 1384 * src3 <= VSX[XB] 1385 */ 1386 if (scalar) { 1387#ifdef VGP_ppc64le_linux 1388#define VECTOR_ADDR(_v) ((void *)&_v) + 8 1389#else 1390#define VECTOR_ADDR(_v) ((void *)&_v) 1391#endif 1392 // For scalar op, only need to copy one doubleword into each vector's element 0 1393 inA = (void *)&spec_fargs[aTest.fra_idx]; 1394 inB = (void *)&spec_fargs[aTest.frb_idx]; 1395 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1396 memcpy(VECTOR_ADDR(vec_inA), inA, 8); 1397 if (repeat) { 1398 memcpy(VECTOR_ADDR(vec_out), inB, 8); // src2 1399 memcpy(VECTOR_ADDR(vec_inB), &spec_fargs[fp_idx[0]], 8); //src3 1400 frbp = (unsigned long long *)&spec_fargs[fp_idx[0]]; 1401 } else { 1402 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1403 memcpy(VECTOR_ADDR(vec_inB), inB, 8); // src2 1404 memcpy(VECTOR_ADDR(vec_out), &spec_fargs[fp_idx[0]], 8); //src3 1405 } 1406 memcpy(vsr_XT, VECTOR_ADDR(vec_out), 8); 1407 } else { 1408 int j, loops = do_dp ? 2 : 4; 1409 size_t len = do_dp ? 8 : 4; 1410 void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out; 1411 for (j = 0; j < loops; j++) { 1412 if (do_dp) 1413 memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len); 1414 else 1415 memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len); 1416 } 1417 if (do_dp) 1418 setup_dp_fp_args(&test_group.targs[i], repeat); 1419 else 1420 setup_sp_fp_args(&test_group.targs[i], repeat); 1421 1422 memcpy(vsr_XT, &vec_out, 16); 1423 memcpy(vsr_XB, &vec_inB, 16); 1424 } 1425 1426 (*func)(); 1427 dst = (unsigned long long *) &vec_out; 1428 if (isLE) 1429 dst++; 1430 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) 1431 printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i, 1432 test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst ); 1433 else 1434 print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i); 1435 } 1436 printf( "\n" ); 1437 1438 if (repeat) { 1439 repeat = 0; 1440 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) { 1441 strcpy(test_name, test_group.name); 1442 strcat(test_name, "mdp"); 1443 } 1444 do_aXp = False; 1445 goto again; 1446 } 1447 k++; 1448 } 1449 printf( "\n" ); 1450 free(test_name); 1451} 1452 1453static void test_vx_vector_one_fp_arg(void) 1454{ 1455 test_func_t func; 1456 int k; 1457 k = 0; 1458 build_special_fargs_table(); 1459 1460 while ((func = vx_vector_one_fp_arg_tests[k].test_func)) { 1461 int idx, i; 1462 vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k]; 1463 Bool convToWord = (test_group.type == VX_CONV_WORD); 1464 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1465 Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False; 1466 int stride = dp ? 2 : 4; 1467 1468 for (i = 0; i < test_group.num_tests; i+=stride) { 1469 unsigned int * pv; 1470 void * inB; 1471 1472 pv = (unsigned int *)&vec_out; 1473 // clear vec_out 1474 for (idx = 0; idx < 4; idx++, pv++) 1475 *pv = 0; 1476 1477 if (dp) { 1478 int j; 1479 unsigned long long * frB_dp, *dst_dp; 1480 for (j = 0; j < 2; j++) { 1481 inB = (void *)&spec_fargs[i + j]; 1482 // copy double precision FP into vector element i 1483 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1484 } 1485 // execute test insn 1486 (*func)(); 1487 dst_dp = (unsigned long long *) &vec_out; 1488 printf("#%d: %s ", i/2, test_group.name); 1489 for (j = 0; j < 2; j++) { 1490 if (j) 1491 printf("; "); 1492 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1493 printf("%s(%016llx)", test_group.op, *frB_dp); 1494 printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]); 1495 } 1496 printf("\n"); 1497 } else { 1498 int j; 1499 unsigned int * frB_sp, * dst_sp; 1500 1501 for (j = 0; j < 4; j++) { 1502 inB = (void *)&spec_sp_fargs[i + j]; 1503 // copy single precision FP into vector element i 1504 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1505 } 1506 // execute test insn 1507 (*func)(); 1508 dst_sp = (unsigned int *) &vec_out; 1509 // print result 1510 printf("#%d: %s ", i/4, test_group.name); 1511 for (j = 0; j < 4; j++) { 1512 if (j) 1513 printf("; "); 1514 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1515 printf("%s(%08x)", test_group.op, *frB_sp); 1516 if (xvrespTest) { 1517 float calc_diff = fabs(spec_sp_fargs[i + j]/256); 1518 float sp_res; 1519 memcpy(&sp_res, &dst_sp[j], 4); 1520 float div_result = 1/spec_sp_fargs[i + j]; 1521 float real_diff = fabs(sp_res - div_result); 1522 printf( " ==> %s", 1523 ( ( sp_res == div_result ) 1524 || ( isnan(sp_res) && isnan(div_result) ) 1525 || ( real_diff <= calc_diff ) ) ? "PASS" 1526 : "FAIL"); 1527 } else { 1528 printf(" = %08x", dst_sp[j]); 1529 } 1530 } 1531 printf("\n"); 1532 } 1533 } 1534 k++; 1535 printf( "\n" ); 1536 } 1537 1538} 1539 1540/* This function assumes the instruction being tested requires two args. */ 1541static void test_vx_vector_fp_ops(void) 1542{ 1543 test_func_t func; 1544 int k; 1545 k = 0; 1546 build_special_fargs_table(); 1547 1548 while ((func = vx_vector_fp_tests[k].test_func)) { 1549 int idx, i, repeat = 1; 1550 vx_fp_test_t test_group = vx_vector_fp_tests[k]; 1551 int stride = test_group.precision == DOUBLE_TEST ? 2 : 4; 1552 do_dot = False; 1553 1554again: 1555 for (i = 0; i < test_group.num_tests; i+=stride) { 1556 unsigned int * pv, condreg; 1557 unsigned int flags; 1558 1559 pv = (unsigned int *)&vec_out; 1560 if (test_group.precision == DOUBLE_TEST) 1561 setup_dp_fp_args(&test_group.targs[i], False); 1562 else 1563 setup_sp_fp_args(&test_group.targs[i], False); 1564 1565 // clear vec_out 1566 for (idx = 0; idx < 4; idx++, pv++) 1567 *pv = 0; 1568 1569 // execute test insn 1570 SET_FPSCR_ZERO; 1571 SET_CR_XER_ZERO; 1572 (*func)(); 1573 GET_CR(flags); 1574 if (test_group.type == VX_BASIC_CMP) { 1575 condreg = (flags & 0x000000f0) >> 4; 1576 } else { 1577 condreg = VX_NOT_CMP_OP; 1578 } 1579 print_vector_fp_result(condreg, &test_group, i); 1580 } 1581 printf("\n"); 1582 if (repeat && test_group.type == VX_BASIC_CMP) { 1583 repeat = 0; 1584 do_dot = True; 1585 goto again; 1586 } 1587 k++; 1588 printf( "\n" ); 1589 } 1590} 1591 1592 1593// The div doubleword test data 1594signed long long div_dw_tdata[13][2] = { 1595 { 4, -4 }, 1596 { 4, -3 }, 1597 { 4, 4 }, 1598 { 4, -5 }, 1599 { 3, 8 }, 1600 { 0x8000000000000000ULL, 0xa }, 1601 { 0x50c, -1 }, 1602 { 0x50c, -4096 }, 1603 { 0x1234fedc, 0x8000a873 }, 1604 { 0xabcd87651234fedcULL, 0xa123b893 }, 1605 { 0x123456789abdcULL, 0 }, 1606 { 0, 2 }, 1607 { 0x77, 0xa3499 } 1608}; 1609#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2) 1610 1611// The div word test data 1612unsigned int div_w_tdata[6][2] = { 1613 { 0, 2 }, 1614 { 2, 0 }, 1615 { 0x7abc1234, 0xf0000000 }, 1616 { 0xfabc1234, 5 }, 1617 { 77, 66 }, 1618 { 5, 0xfabc1234 }, 1619}; 1620#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2) 1621 1622typedef struct div_ext_test 1623{ 1624 test_func_t test_func; 1625 const char *name; 1626 int num_tests; 1627 div_type_t div_type; 1628 precision_type_t precision; 1629} div_ext_test_t; 1630 1631static div_ext_test_t div_tests[] = { 1632#ifdef __powerpc64__ 1633 { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST }, 1634 { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST }, 1635#endif 1636 { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST }, 1637 { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST }, 1638 { NULL, NULL, 0, 0, 0 } 1639}; 1640 1641static void test_div_extensions(void) 1642{ 1643 test_func_t func; 1644 int k; 1645 k = 0; 1646 1647 while ((func = div_tests[k].test_func)) { 1648 int i, repeat = 1; 1649 div_ext_test_t test_group = div_tests[k]; 1650 do_dot = False; 1651 1652again: 1653 for (i = 0; i < test_group.num_tests; i++) { 1654 unsigned int condreg; 1655 1656 if (test_group.div_type == DIV_OE) 1657 do_OE = True; 1658 else 1659 do_OE = False; 1660 1661 if (test_group.precision == DOUBLE_TEST) { 1662 r14 = div_dw_tdata[i][0]; 1663 r15 = div_dw_tdata[i][1]; 1664 } else { 1665 r14 = div_w_tdata[i][0]; 1666 r15 = div_w_tdata[i][1]; 1667 } 1668 // execute test insn 1669 (*func)(); 1670 condreg = (div_flags & 0xf0000000) >> 28; 1671 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1672 if (test_group.precision == DOUBLE_TEST) { 1673 printf("0x%016llx / 0x%016llx = 0x%016llx;", 1674 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17); 1675 } else { 1676 printf("0x%08x / 0x%08x = 0x%08x;", 1677 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17); 1678 } 1679 printf(" CR=%x; XER=%x\n", condreg, div_xer); 1680 } 1681 printf("\n"); 1682 if (repeat) { 1683 repeat = 0; 1684 do_dot = True; 1685 goto again; 1686 } 1687 k++; 1688 printf( "\n" ); 1689 } 1690 1691} 1692 1693static void test_fct_ops(void) 1694{ 1695 test_func_t func; 1696 int k; 1697 k = 0; 1698 1699 while ((func = fct_tests[k].test_func)) { 1700 int i, repeat = 1; 1701 simple_test_t test_group = fct_tests[k]; 1702 do_dot = False; 1703 1704again: 1705 for (i = 0; i < nb_special_fargs; i++) { 1706 double result; 1707#define SINGLE_MASK 0x00000000FFFFFFFFULL 1708 1709 f14 = spec_fargs[i]; 1710 // execute test insn 1711 SET_FPSCR_ZERO; 1712 (*func)(); 1713 result = f17; 1714 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1715 printf("0x%016llx (%e) ==> 0x%016llx\n", 1716 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i], 1717 test_group.precision == SINGLE_TEST ? (SINGLE_MASK & 1718 *((unsigned long long *)(&result))) : 1719 *((unsigned long long *)(&result))); 1720 } 1721 printf("\n"); 1722 if (repeat) { 1723 repeat = 0; 1724 do_dot = True; 1725 goto again; 1726 } 1727 k++; 1728 printf( "\n" ); 1729 } 1730} 1731 1732#ifdef __powerpc64__ 1733void test_stdbrx(void) 1734{ 1735 unsigned long long store, val = 0xdeadbacf12345678ULL; 1736 printf("stdbrx: 0x%llx ==> ", val); 1737 r17 = (HWord_t)val; 1738 r14 = (HWord_t)&store; 1739 __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14)); 1740 printf("0x%llx\n", store); 1741 printf( "\n" ); 1742} 1743#endif 1744 1745static test_table_t 1746 all_tests[] = 1747{ 1748 { &test_vx_vector_one_fp_arg, 1749 "Test VSX vector single arg instructions", OTHER_INST }, 1750 { &test_vx_vector_fp_ops, 1751 "Test VSX floating point compare and basic arithmetic instructions", OTHER_INST }, 1752#ifdef __powerpc64__ 1753 { &test_bpermd, 1754 "Test bit permute double", OTHER_INST }, 1755#endif 1756 { &test_xxsel, 1757 "Test xxsel instruction", OTHER_INST }, 1758 { &test_xxspltw, 1759 "Test xxspltw instruction", OTHER_INST }, 1760 { &test_div_extensions, 1761 "Test div extensions", SCALAR_DIV_INST }, 1762 { &test_fct_ops, 1763 "Test floating point convert [word | doubleword] unsigned, with round toward zero", OTHER_INST }, 1764#ifdef __powerpc64__ 1765 { &test_stdbrx, 1766 "Test stdbrx instruction", OTHER_INST }, 1767#endif 1768 { &test_vx_aORm_fp_ops, 1769 "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p", OTHER_INST }, 1770 { &test_vx_simple_scalar_fp_ops, 1771 "Test scalar floating point arithmetic instructions", OTHER_INST }, 1772 { NULL, NULL } 1773}; 1774#endif // HAS_VSX 1775 1776static void usage (void) 1777{ 1778 fprintf(stderr, 1779 "Usage: test_isa_3_0 [OPTIONS]\n" 1780 "\t-d: test scalar division instructions (default)\n" 1781 "\t-o: test non scalar division instructions (default)\n" 1782 "\t-A: test all instructions (default)\n" 1783 "\t-h: display this help and exit\n" 1784 ); 1785} 1786 1787int main(int argc, char *argv[]) 1788{ 1789#ifdef HAS_VSX 1790 1791 test_table_t aTest; 1792 test_func_t func; 1793 int i = 0; 1794 int c; 1795 unsigned int test_run_mask = 0; 1796 1797 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These 1798 * bits are set on various arithimetic instructions. This means this 1799 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0 1800 * hardware. The tests have been grouped so that the tests that generate 1801 * different results are in one test and the rest are in a different test. 1802 * this minimizes the size of the result expect files for the two cases. 1803 */ 1804 1805 while ((c = getopt(argc, argv, "doAh")) != -1) { 1806 switch (c) { 1807 case 'd': 1808 test_run_mask |= SCALAR_DIV_INST; 1809 break; 1810 case 'o': 1811 test_run_mask |= OTHER_INST; 1812 break; 1813 case 'A': 1814 test_run_mask = 0xFFFF; 1815 break; 1816 case 'h': 1817 usage(); 1818 return 0; 1819 1820 default: 1821 usage(); 1822 fprintf(stderr, "Unknown argument: '%c'\n", c); 1823 return 1; 1824 } 1825 } 1826 1827 while ((func = all_tests[i].test_category)) { 1828 aTest = all_tests[i]; 1829 if(test_run_mask & aTest.test_group) { 1830 /* Test group specified on command line */ 1831 1832 printf( "%s\n", aTest.name ); 1833 (*func)(); 1834 } 1835 i++; 1836 } 1837 if (spec_fargs) 1838 free(spec_fargs); 1839 if (spec_sp_fargs) 1840 free(spec_sp_fargs); 1841 1842#endif // HAS _VSX 1843 1844 return 0; 1845} 1846