1/* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj@us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23#ifdef HAS_VSX 24 25#include <stdio.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29#include <malloc.h> 30#include <altivec.h> 31#include <math.h> 32 33#ifndef __powerpc64__ 34typedef uint32_t HWord_t; 35#else 36typedef uint64_t HWord_t; 37#endif /* __powerpc64__ */ 38 39typedef unsigned char Bool; 40#define True 1 41#define False 0 42register HWord_t r14 __asm__ ("r14"); 43register HWord_t r15 __asm__ ("r15"); 44register HWord_t r16 __asm__ ("r16"); 45register HWord_t r17 __asm__ ("r17"); 46register double f14 __asm__ ("fr14"); 47register double f15 __asm__ ("fr15"); 48register double f16 __asm__ ("fr16"); 49register double f17 __asm__ ("fr17"); 50 51static volatile unsigned int div_flags, div_xer; 52 53#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 54 55#define SET_CR(_arg) \ 56 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 57 58#define SET_XER(_arg) \ 59 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 60 61#define GET_CR(_lval) \ 62 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 63 64#define GET_XER(_lval) \ 65 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 66 67#define GET_CR_XER(_lval_cr,_lval_xer) \ 68 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 69 70#define SET_CR_ZERO \ 71 SET_CR(0) 72 73#define SET_XER_ZERO \ 74 SET_XER(0) 75 76#define SET_CR_XER_ZERO \ 77 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 78 79#define SET_FPSCR_ZERO \ 80 do { double _d = 0.0; \ 81 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 82 } while (0) 83 84 85typedef void (*test_func_t)(void); 86typedef struct test_table test_table_t; 87 88 89/* These functions below that construct a table of floating point 90 * values were lifted from none/tests/ppc32/jm-insns.c. 91 */ 92 93#if defined (DEBUG_ARGS_BUILD) 94#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 95#else 96#define AB_DPRINTF(fmt, args...) do { } while (0) 97#endif 98 99static inline void register_farg (void *farg, 100 int s, uint16_t _exp, uint64_t mant) 101{ 102 uint64_t tmp; 103 104 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 105 *(uint64_t *)farg = tmp; 106 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 107 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 108} 109 110 111typedef struct fp_test_args { 112 int fra_idx; 113 int frb_idx; 114} fp_test_args_t; 115 116 117fp_test_args_t fp_cmp_tests[] = { 118 {8, 8}, 119 {8, 14}, 120 {8, 6}, 121 {8, 5}, 122 {8, 4}, 123 {8, 7}, 124 {8, 9}, 125 {8, 11}, 126 {14, 8}, 127 {14, 14}, 128 {14, 6}, 129 {14, 5}, 130 {14, 4}, 131 {14, 7}, 132 {14, 9}, 133 {14, 11}, 134 {6, 8}, 135 {6, 14}, 136 {6, 6}, 137 {6, 5}, 138 {6, 4}, 139 {6, 7}, 140 {6, 9}, 141 {6, 11}, 142 {5, 8}, 143 {5, 14}, 144 {5, 6}, 145 {5, 5}, 146 {5, 4}, 147 {5, 7}, 148 {5, 9}, 149 {5, 11}, 150 {4, 8}, 151 {4, 14}, 152 {4, 6}, 153 {4, 5}, 154 {4, 1}, 155 {4, 7}, 156 {4, 9}, 157 {4, 11}, 158 {7, 8}, 159 {7, 14}, 160 {7, 6}, 161 {7, 5}, 162 {7, 4}, 163 {7, 7}, 164 {7, 9}, 165 {7, 11}, 166 {10, 8}, 167 {10, 14}, 168 {10, 6}, 169 {10, 5}, 170 {10, 4}, 171 {10, 7}, 172 {10, 9}, 173 {10, 10}, 174 {12, 8}, 175 {12, 14}, 176 {12, 6}, 177 {12, 5}, 178 {1, 1}, 179 {2, 2}, 180 {3, 3}, 181 {4, 4}, 182}; 183 184 185fp_test_args_t two_arg_fp_tests[] = { 186 {8, 8}, 187 {8, 14}, 188 {15, 16}, 189 {8, 5}, 190 {8, 4}, 191 {8, 7}, 192 {8, 9}, 193 {8, 11}, 194 {14, 8}, 195 {14, 14}, 196 {14, 6}, 197 {14, 5}, 198 {14, 4}, 199 {14, 7}, 200 {14, 9}, 201 {14, 11}, 202 {6, 8}, 203 {6, 14}, 204 {6, 6}, 205 {6, 5}, 206 {6, 4}, 207 {6, 7}, 208 {6, 9}, 209 {6, 11}, 210 {5, 8}, 211 {5, 14}, 212 {5, 6}, 213 {5, 5}, 214 {5, 4}, 215 {5, 7}, 216 {5, 9}, 217 {5, 11}, 218 {4, 8}, 219 {4, 14}, 220 {4, 6}, 221 {4, 5}, 222 {4, 1}, 223 {4, 7}, 224 {4, 9}, 225 {4, 11}, 226 {7, 8}, 227 {7, 14}, 228 {7, 6}, 229 {7, 5}, 230 {7, 4}, 231 {7, 7}, 232 {7, 9}, 233 {7, 11}, 234 {10, 8}, 235 {10, 14}, 236 {12, 6}, 237 {12, 5}, 238 {10, 4}, 239 {10, 7}, 240 {10, 9}, 241 {10, 11}, 242 {12, 8 }, 243 {12, 14}, 244 {12, 6}, 245 {15, 16}, 246 {15, 16}, 247 {9, 11}, 248 {11, 11}, 249 {11, 12} 250}; 251 252 253static int nb_special_fargs; 254static double * spec_fargs; 255static float * spec_sp_fargs; 256 257static void build_special_fargs_table(void) 258{ 259/* 260 Entry Sign Exp fraction Special value 261 0 0 3fd 0x8000000000000ULL Positive finite number 262 1 0 404 0xf000000000000ULL ... 263 2 0 001 0x8000000b77501ULL ... 264 3 0 7fe 0x800000000051bULL ... 265 4 0 012 0x3214569900000ULL ... 266 5 0 000 0x0000000000000ULL +0.0 (+zero) 267 6 1 000 0x0000000000000ULL -0.0 (-zero) 268 7 0 7ff 0x0000000000000ULL +infinity 269 8 1 7ff 0x0000000000000ULL -infinity 270 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 271 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 272 11 0 7ff 0x8000000000000ULL +QNaN 273 12 1 7ff 0x8000000000000ULL -QNaN 274 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 275 14 1 40d 0x0650f5a07b353ULL Negative finite number 276 15 0 412 0x32585a9900000ULL A couple more positive finite numbers 277 16 0 413 0x82511a2000000ULL ... 278*/ 279 280 uint64_t mant; 281 uint16_t _exp; 282 int s; 283 int j, i = 0; 284 285 if (spec_fargs) 286 return; 287 288 spec_fargs = malloc( 17 * sizeof(double) ); 289 spec_sp_fargs = malloc( 17 * sizeof(float) ); 290 291 // #0 292 s = 0; 293 _exp = 0x3fd; 294 mant = 0x8000000000000ULL; 295 register_farg(&spec_fargs[i++], s, _exp, mant); 296 297 // #1 298 s = 0; 299 _exp = 0x404; 300 mant = 0xf000000000000ULL; 301 register_farg(&spec_fargs[i++], s, _exp, mant); 302 303 /* None of the ftdiv tests succeed. 304 * FRA = value #0; FRB = value #1 305 * ea_ = -2; e_b = 5 306 * fl_flag || fg_flag || fe_flag = 100 307 */ 308 309 /************************************************* 310 * fe_flag tests 311 * 312 *************************************************/ 313 314 /* fe_flag <- 1 if FRA is a NaN 315 * FRA = value #9; FRB = value #1 316 * e_a = 1024; e_b = 5 317 * fl_flag || fg_flag || fe_flag = 101 318 */ 319 320 /* fe_flag <- 1 if FRB is a NaN 321 * FRA = value #1; FRB = value #12 322 * e_a = 5; e_b = 1024 323 * fl_flag || fg_flag || fe_flag = 101 324 */ 325 326 /* fe_flag <- 1 if e_b <= -1022 327 * FRA = value #0; FRB = value #2 328 * e_a = -2; e_b = -1022 329 * fl_flag || fg_flag || fe_flag = 101 330 * 331 */ 332 // #2 333 s = 0; 334 _exp = 0x001; 335 mant = 0x8000000b77501ULL; 336 register_farg(&spec_fargs[i++], s, _exp, mant); 337 338 /* fe_flag <- 1 if e_b >= 1021 339 * FRA = value #1; FRB = value #3 340 * e_a = 5; e_b = 1023 341 * fl_flag || fg_flag || fe_flag = 101 342 */ 343 // #3 344 s = 0; 345 _exp = 0x7fe; 346 mant = 0x800000000051bULL; 347 register_farg(&spec_fargs[i++], s, _exp, mant); 348 349 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023 350 * Let FRA = value #3 and FRB be value #0. 351 * e_a = 1023; e_b = -2 352 * fl_flag || fg_flag || fe_flag = 101 353 */ 354 355 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023 356 * Let FRA = value #0 above and FRB be value #3 above 357 * e_a = -2; e_b = 1023 358 * fl_flag || fg_flag || fe_flag = 101 359 */ 360 361 /* fe_flag <- 1 if FRA != 0 && e_a <= -970 362 * Let FRA = value #4 and FRB be value #0 363 * e_a = -1005; e_b = -2 364 * fl_flag || fg_flag || fe_flag = 101 365 */ 366 // #4 367 s = 0; 368 _exp = 0x012; 369 mant = 0x3214569900000ULL; 370 register_farg(&spec_fargs[i++], s, _exp, mant); 371 372 /************************************************* 373 * fg_flag tests 374 * 375 *************************************************/ 376 /* fg_flag <- 1 if FRA is an Infinity 377 * NOTE: FRA = Inf also sets fe_flag 378 * Do two tests, using values #7 and #8 (+/- Inf) for FRA. 379 * Test 1: 380 * Let FRA be value #7 and FRB be value #1 381 * e_a = 1024; e_b = 5 382 * fl_flag || fg_flag || fe_flag = 111 383 * 384 * Test 2: 385 * Let FRA be value #8 and FRB be value #1 386 * e_a = 1024; e_b = 5 387 * fl_flag || fg_flag || fe_flag = 111 388 * 389 */ 390 391 /* fg_flag <- 1 if FRB is an Infinity 392 * NOTE: FRB = Inf also sets fe_flag 393 * Let FRA be value #1 and FRB be value #7 394 * e_a = 5; e_b = 1024 395 * fl_flag || fg_flag || fe_flag = 111 396 */ 397 398 /* fg_flag <- 1 if FRB is denormalized 399 * NOTE: e_b < -1022 ==> fe_flag <- 1 400 * Let FRA be value #0 and FRB be value #13 401 * e_a = -2; e_b = -1023 402 * fl_flag || fg_flag || fe_flag = 111 403 */ 404 405 /* fg_flag <- 1 if FRB is +zero 406 * NOTE: FRA = Inf also sets fe_flag 407 * Let FRA = val #5; FRB = val #5 408 * ea_ = -1023; e_b = -1023 409 * fl_flag || fg_flag || fe_flag = 111 410 */ 411 412 /* fg_flag <- 1 if FRB is -zero 413 * NOTE: FRA = Inf also sets fe_flag 414 * Let FRA = val #5; FRB = val #6 415 * ea_ = -1023; e_b = -1023 416 * fl_flag || fg_flag || fe_flag = 111 417 */ 418 419 /* Special values */ 420 /* +0.0 : 0 0x000 0x0000000000000 */ 421 // #5 422 s = 0; 423 _exp = 0x000; 424 mant = 0x0000000000000ULL; 425 register_farg(&spec_fargs[i++], s, _exp, mant); 426 427 /* -0.0 : 1 0x000 0x0000000000000 */ 428 // #6 429 s = 1; 430 _exp = 0x000; 431 mant = 0x0000000000000ULL; 432 register_farg(&spec_fargs[i++], s, _exp, mant); 433 434 /* +infinity : 0 0x7FF 0x0000000000000 */ 435 // #7 436 s = 0; 437 _exp = 0x7FF; 438 mant = 0x0000000000000ULL; 439 register_farg(&spec_fargs[i++], s, _exp, mant); 440 441 /* -infinity : 1 0x7FF 0x0000000000000 */ 442 // #8 443 s = 1; 444 _exp = 0x7FF; 445 mant = 0x0000000000000ULL; 446 register_farg(&spec_fargs[i++], s, _exp, mant); 447 448 /* +QNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 449 // #9 450 s = 0; 451 _exp = 0x7FF; 452 mant = 0x7FFFFFFFFFFFFULL; 453 register_farg(&spec_fargs[i++], s, _exp, mant); 454 455 /* -QNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 456 // #10 457 s = 1; 458 _exp = 0x7FF; 459 mant = 0x7FFFFFFFFFFFFULL; 460 register_farg(&spec_fargs[i++], s, _exp, mant); 461 462 /* +SNaN : 0 0x7FF 0x8000000000000 */ 463 // #11 464 s = 0; 465 _exp = 0x7FF; 466 mant = 0x8000000000000ULL; 467 register_farg(&spec_fargs[i++], s, _exp, mant); 468 469 /* -SNaN : 1 0x7FF 0x8000000000000 */ 470 // #12 471 s = 1; 472 _exp = 0x7FF; 473 mant = 0x8000000000000ULL; 474 register_farg(&spec_fargs[i++], s, _exp, mant); 475 476 /* denormalized value */ 477 // #13 478 s = 1; 479 _exp = 0x000; 480 mant = 0x8340000078000ULL; 481 register_farg(&spec_fargs[i++], s, _exp, mant); 482 483 /* Negative finite number */ 484 // #14 485 s = 1; 486 _exp = 0x40d; 487 mant = 0x0650f5a07b353ULL; 488 register_farg(&spec_fargs[i++], s, _exp, mant); 489 490 /* A couple positive finite numbers ... */ 491 // #15 492 s = 0; 493 _exp = 0x412; 494 mant = 0x32585a9900000ULL; 495 register_farg(&spec_fargs[i++], s, _exp, mant); 496 497 // #16 498 s = 0; 499 _exp = 0x413; 500 mant = 0x82511a2000000ULL; 501 register_farg(&spec_fargs[i++], s, _exp, mant); 502 503 nb_special_fargs = i; 504 for (j = 0; j < i; j++) { 505 spec_sp_fargs[j] = spec_fargs[j]; 506 } 507} 508 509 510struct test_table 511{ 512 test_func_t test_category; 513 char * name; 514}; 515 516typedef enum { 517 SINGLE_TEST, 518 DOUBLE_TEST 519} precision_type_t; 520 521typedef enum { 522 VX_SCALAR_FP_NMSUB = 0, 523 // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE 524 VX_VECTOR_FP_MULT_AND_OP2 = 10, 525 // and before this line 526 VX_BASIC_CMP = 30, 527 VX_CONV_WORD, 528 VX_DEFAULT 529} vx_fp_test_type; 530 531typedef struct vx_fp_test 532{ 533 test_func_t test_func; 534 const char * name; 535 fp_test_args_t * targs; 536 int num_tests; 537 precision_type_t precision; 538 vx_fp_test_type type; 539 const char * op; 540} vx_fp_test_t; 541 542static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC; 543 544static Bool do_dot; 545static void test_xvcmpeqdp(void) 546{ 547 if (do_dot) 548 __asm__ __volatile__ ("xvcmpeqdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 549 else 550 __asm__ __volatile__ ("xvcmpeqdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 551} 552 553static void test_xvcmpgedp(void) 554{ 555 if (do_dot) 556 __asm__ __volatile__ ("xvcmpgedp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 557 else 558 __asm__ __volatile__ ("xvcmpgedp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 559} 560 561static void test_xvcmpgtdp(void) 562{ 563 if (do_dot) 564 __asm__ __volatile__ ("xvcmpgtdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 565 else 566 __asm__ __volatile__ ("xvcmpgtdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 567} 568 569static void test_xvcmpeqsp(void) 570{ 571 if (do_dot) 572 __asm__ __volatile__ ("xvcmpeqsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 573 else 574 __asm__ __volatile__ ("xvcmpeqsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 575} 576 577static void test_xvcmpgesp(void) 578{ 579 if (do_dot) 580 __asm__ __volatile__ ("xvcmpgesp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 581 else 582 __asm__ __volatile__ ("xvcmpgesp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 583} 584 585static void test_xvcmpgtsp(void) 586{ 587 if (do_dot) 588 __asm__ __volatile__ ("xvcmpgtsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 589 else 590 __asm__ __volatile__ ("xvcmpgtsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 591} 592 593static Bool do_aXp; 594static Bool do_dp; 595static void test_xsnmsub(void) 596{ 597 if (do_aXp) 598 __asm__ __volatile__ ("xsnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 599 else 600 __asm__ __volatile__ ("xsnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 601} 602 603static void test_xvmadd(void) 604{ 605 if (do_aXp) 606 if (do_dp) 607 __asm__ __volatile__ ("xvmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 608 else 609 __asm__ __volatile__ ("xvmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 610 else 611 if (do_dp) 612 __asm__ __volatile__ ("xvmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 613 else 614 __asm__ __volatile__ ("xvmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 615} 616 617static void test_xvnmadd(void) 618{ 619 if (do_aXp) 620 if (do_dp) 621 __asm__ __volatile__ ("xvnmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 622 else 623 __asm__ __volatile__ ("xvnmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 624 else 625 if (do_dp) 626 __asm__ __volatile__ ("xvnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 627 else 628 __asm__ __volatile__ ("xvnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 629} 630 631static void test_xvnmsub(void) 632{ 633 if (do_aXp) 634 if (do_dp) 635 __asm__ __volatile__ ("xvnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 636 else 637 __asm__ __volatile__ ("xvnmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 638 else 639 if (do_dp) 640 __asm__ __volatile__ ("xvnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 641 else 642 __asm__ __volatile__ ("xvnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 643} 644 645static void test_xvmsub(void) 646{ 647 if (do_aXp) 648 if (do_dp) 649 __asm__ __volatile__ ("xvmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 650 else 651 __asm__ __volatile__ ("xvmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 652 else 653 if (do_dp) 654 __asm__ __volatile__ ("xvmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 655 else 656 __asm__ __volatile__ ("xvmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 657} 658 659static void test_xssqrtdp(void) 660{ 661 __asm__ __volatile__ ("xssqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 662} 663 664static void test_xsrdpim(void) 665{ 666 __asm__ __volatile__ ("xsrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 667} 668 669static void test_xsrdpip(void) 670{ 671 __asm__ __volatile__ ("xsrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 672} 673 674static void test_xstdivdp(void) 675{ 676 __asm__ __volatile__ ("xstdivdp 6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 677} 678 679static void test_xsmaxdp(void) 680{ 681 __asm__ __volatile__ ("xsmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 682} 683 684static void test_xsmindp(void) 685{ 686 __asm__ __volatile__ ("xsmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 687} 688 689static void test_xvadddp(void) 690{ 691 __asm__ __volatile__ ("xvadddp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 692} 693 694static void test_xvaddsp(void) 695{ 696 __asm__ __volatile__ ("xvaddsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 697} 698 699static void test_xvdivdp(void) 700{ 701 __asm__ __volatile__ ("xvdivdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 702} 703 704static void test_xvdivsp(void) 705{ 706 __asm__ __volatile__ ("xvdivsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 707} 708 709static void test_xvmuldp(void) 710{ 711 __asm__ __volatile__ ("xvmuldp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 712} 713 714static void test_xvmulsp(void) 715{ 716 __asm__ __volatile__ ("xvmulsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 717} 718 719static void test_xvsubdp(void) 720{ 721 __asm__ __volatile__ ("xvsubdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 722} 723 724static void test_xvmaxdp(void) 725{ 726 __asm__ __volatile__ ("xvmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 727} 728 729static void test_xvmindp(void) 730{ 731 __asm__ __volatile__ ("xvmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 732} 733 734static void test_xvmaxsp(void) 735{ 736 __asm__ __volatile__ ("xvmaxsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 737} 738 739static void test_xvminsp(void) 740{ 741 __asm__ __volatile__ ("xvminsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 742} 743 744static void test_xvsubsp(void) 745{ 746 __asm__ __volatile__ ("xvsubsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 747} 748 749static void test_xvresp(void) 750{ 751 __asm__ __volatile__ ("xvresp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 752} 753 754static void test_xxsel(void) 755{ 756 unsigned long long * dst; 757 unsigned long long xa[] = { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL}; 758 unsigned long long xb[] = { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL}; 759 unsigned long long xc[] = { 0xffffffff01020304ULL, 0x128934bd00000000ULL}; 760 761 memcpy(&vec_inA, xa, 16); 762 memcpy(&vec_inB, xb, 16); 763 memcpy(&vec_inC, xc, 16); 764 765 766 __asm__ __volatile__ ("xxsel %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC)); 767 dst = (unsigned long long *) &vec_out; 768 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst); 769 dst++; 770 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst); 771 printf("\n"); 772} 773 774static void test_xxspltw(void) 775{ 776 int uim; 777 unsigned long long * dst = NULL; 778 unsigned long long xb[] = { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL}; 779 memcpy(&vec_inB, xb, 16); 780 781 for (uim = 0; uim < 4; uim++) { 782 switch (uim) { 783 case 0: 784 __asm__ __volatile__ ("xxspltw %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB)); 785 break; 786 case 1: 787 __asm__ __volatile__ ("xxspltw %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB)); 788 break; 789 case 2: 790 __asm__ __volatile__ ("xxspltw %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB)); 791 break; 792 case 3: 793 __asm__ __volatile__ ("xxspltw %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB)); 794 break; 795 } 796 dst = (unsigned long long *) &vec_out; 797 printf("xxspltw 0x%016llx%016llx %d=> 0x%016llx", xb[0], xb[1], uim, *dst); 798 dst++; 799 printf("%016llx\n", *dst); 800 } 801 printf("\n"); 802} 803 804static void test_xscvdpsxws(void) 805{ 806 __asm__ __volatile__ ("xscvdpsxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 807} 808 809static void test_xscvdpuxds(void) 810{ 811 __asm__ __volatile__ ("xscvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 812} 813 814static void test_xvcpsgndp(void) 815{ 816 __asm__ __volatile__ ("xvcpsgndp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 817} 818 819static void test_xvcpsgnsp(void) 820{ 821 __asm__ __volatile__ ("xvcpsgnsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 822} 823 824static void test_xvcvdpsxws(void) 825{ 826 __asm__ __volatile__ ("xvcvdpsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB)); 827} 828 829static void test_xvcvspsxws(void) 830{ 831 __asm__ __volatile__ ("xvcvspsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB)); 832} 833 834static vx_fp_test_t 835vx_vector_one_fp_arg_tests[] = { 836 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"}, 837 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"}, 838 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"}, 839 { NULL, NULL, NULL, 0 , 0, 0, NULL} 840}; 841 842static vx_fp_test_t 843vx_vector_fp_tests[] = { 844 { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"}, 845 { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"}, 846 { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"}, 847 { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"}, 848 { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"}, 849 { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"}, 850 { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" }, 851 { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" }, 852 { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" }, 853 { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" }, 854 { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" }, 855 { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" }, 856 { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" }, 857 { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" }, 858 { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" }, 859 { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" }, 860 { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" }, 861 { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" }, 862 { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"}, 863 { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"}, 864 { NULL, NULL, NULL, 0 , 0, 0, NULL} 865}; 866 867 868static vx_fp_test_t 869vx_aORm_fp_tests[] = { 870 { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"}, 871 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"}, 872 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"}, 873 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"}, 874 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"}, 875 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"}, 876 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"}, 877 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"}, 878 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"}, 879 { NULL, NULL, NULL, 0, 0, 0, NULL } 880}; 881 882static vx_fp_test_t 883vx_simple_scalar_fp_tests[] = { 884 { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 885 { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 886 { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 887 { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 888 { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 889 { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL}, 890 { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL}, 891 { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL}, 892 { NULL, NULL, NULL, 0, 0, 0, NULL } 893}; 894 895 896#ifdef __powerpc64__ 897static void test_bpermd(void) 898{ 899 /* NOTE: Bit number is '0 . . . 63' 900 * 901 * Permuted bits are generated bit 0 -7 as follows: 902 * index = (r14)8*i:8*i+7 903 * perm[i] = (r15)index 904 * 905 * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB 906 * byte of r14, 0x1b(27/base 10). This identifies bit 27 of r15, which is '1'. 907 * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'. 908 * So the result of the first two iterations of i are: 909 * perm = 0b01xxxxxx 910 * 911 */ 912 r15 = 0xa12bc37de56f9708ULL; 913 r14 = 0x1b2c31f030000001ULL; 914 __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 915 printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14, 916 (unsigned long long)r15, (unsigned long long)r17); 917 printf("\n"); 918} 919#endif 920 921static Bool do_OE; 922typedef enum { 923 DIV_BASE = 1, 924 DIV_OE = 2, 925 DIV_DOT = 4, 926} div_type_t; 927/* Possible divde type combinations are: 928 * - base 929 * - base+dot 930 * - base+OE 931 * - base+OE+dot 932 */ 933#ifdef __powerpc64__ 934static void test_divde(void) 935{ 936 int divde_type = DIV_BASE; 937 if (do_OE) 938 divde_type |= DIV_OE; 939 if (do_dot) 940 divde_type |= DIV_DOT; 941 942 switch (divde_type) { 943 case 1: 944 SET_CR_XER_ZERO; 945 __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 946 GET_CR_XER(div_flags, div_xer); 947 break; 948 case 3: 949 SET_CR_XER_ZERO; 950 __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 951 GET_CR_XER(div_flags, div_xer); 952 break; 953 case 5: 954 SET_CR_XER_ZERO; 955 __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 956 GET_CR_XER(div_flags, div_xer); 957 break; 958 case 7: 959 SET_CR_XER_ZERO; 960 __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 961 GET_CR_XER(div_flags, div_xer); 962 break; 963 default: 964 fprintf(stderr, "Invalid divde type. Exiting\n"); 965 exit(1); 966 } 967} 968#endif 969 970static void test_divweu(void) 971{ 972 int divweu_type = DIV_BASE; 973 if (do_OE) 974 divweu_type |= DIV_OE; 975 if (do_dot) 976 divweu_type |= DIV_DOT; 977 978 switch (divweu_type) { 979 case 1: 980 SET_CR_XER_ZERO; 981 __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 982 GET_CR_XER(div_flags, div_xer); 983 break; 984 case 3: 985 SET_CR_XER_ZERO; 986 __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 987 GET_CR_XER(div_flags, div_xer); 988 break; 989 case 5: 990 SET_CR_XER_ZERO; 991 __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 992 GET_CR_XER(div_flags, div_xer); 993 break; 994 case 7: 995 SET_CR_XER_ZERO; 996 __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 997 GET_CR_XER(div_flags, div_xer); 998 break; 999 default: 1000 fprintf(stderr, "Invalid divweu type. Exiting\n"); 1001 exit(1); 1002 } 1003} 1004 1005static void test_fctiduz(void) 1006{ 1007 if (do_dot) 1008 __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14)); 1009 else 1010 __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14)); 1011} 1012 1013static void test_fctidu(void) 1014{ 1015 if (do_dot) 1016 __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14)); 1017 else 1018 __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14)); 1019} 1020 1021static void test_fctiwuz(void) 1022{ 1023 if (do_dot) 1024 __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14)); 1025 else 1026 __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14)); 1027} 1028 1029static void test_fctiwu(void) 1030{ 1031 if (do_dot) 1032 __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14)); 1033 else 1034 __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14)); 1035} 1036 1037typedef struct simple_test { 1038 test_func_t test_func; 1039 char * name; 1040 precision_type_t precision; 1041} simple_test_t; 1042 1043static simple_test_t fct_tests[] = { 1044 { &test_fctiduz, "fctiduz", DOUBLE_TEST }, 1045 { &test_fctidu, "fctidu", DOUBLE_TEST }, 1046 { &test_fctiwuz, "fctiwuz", SINGLE_TEST }, 1047 { &test_fctiwu, "fctiwu", SINGLE_TEST }, 1048 { NULL, NULL } 1049}; 1050 1051static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1052{ 1053 int a_idx, b_idx, i; 1054 void * inA, * inB; 1055 void * vec_src = swap_inputs ? &vec_out : &vec_inB; 1056 1057 for (i = 0; i < 4; i++) { 1058 a_idx = targs->fra_idx; 1059 b_idx = targs->frb_idx; 1060 inA = (void *)&spec_sp_fargs[a_idx]; 1061 inB = (void *)&spec_sp_fargs[b_idx]; 1062 // copy single precision FP into vector element i 1063 memcpy(((void *)&vec_inA) + (i * 4), inA, 4); 1064 memcpy(vec_src + (i * 4), inB, 4); 1065 targs++; 1066 } 1067} 1068 1069static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1070{ 1071 int a_idx, b_idx, i; 1072 void * inA, * inB; 1073 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB; 1074 1075 for (i = 0; i < 2; i++) { 1076 a_idx = targs->fra_idx; 1077 b_idx = targs->frb_idx; 1078 inA = (void *)&spec_fargs[a_idx]; 1079 inB = (void *)&spec_fargs[b_idx]; 1080 // copy double precision FP into vector element i 1081 memcpy(((void *)&vec_inA) + (i * 8), inA, 8); 1082 memcpy(vec_src + (i * 8), inB, 8); 1083 targs++; 1084 } 1085} 1086 1087#define VX_NOT_CMP_OP 0xffffffff 1088static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i) 1089{ 1090 int a_idx, b_idx, k; 1091 char * name = malloc(20); 1092 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1093 int loops = dp ? 2 : 4; 1094 fp_test_args_t * targs = &test_group->targs[i]; 1095 unsigned long long * frA_dp, * frB_dp, * dst_dp; 1096 unsigned int * frA_sp, *frB_sp, * dst_sp; 1097 strcpy(name, test_group->name); 1098 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : "")); 1099 for (k = 0; k < loops; k++) { 1100 a_idx = targs->fra_idx; 1101 b_idx = targs->frb_idx; 1102 if (k) 1103 printf(" AND "); 1104 if (dp) { 1105 frA_dp = (unsigned long long *)&spec_fargs[a_idx]; 1106 frB_dp = (unsigned long long *)&spec_fargs[b_idx]; 1107 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp); 1108 } else { 1109 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx]; 1110 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx]; 1111 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp); 1112 } 1113 targs++; 1114 } 1115 if (cc != VX_NOT_CMP_OP) 1116 printf(" ? cc=%x", cc); 1117 1118 if (dp) { 1119 dst_dp = (unsigned long long *) &vec_out; 1120 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1121 } else { 1122 dst_sp = (unsigned int *) &vec_out; 1123 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1124 } 1125 free(name); 1126} 1127 1128 1129static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg, 1130 vx_fp_test_t * test_group, int i) 1131{ 1132 int a_idx, k; 1133 char * name = malloc(20); 1134 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1135 int loops = dp ? 2 : 4; 1136 fp_test_args_t * targs = &test_group->targs[i]; 1137 unsigned long long frA_dp, * dst_dp; 1138 unsigned int frA_sp, * dst_sp; 1139 1140 strcpy(name, test_group->name); 1141 if (do_aXp) 1142 if (dp) 1143 strcat(name, "adp"); 1144 else 1145 strcat(name, "asp"); 1146 else 1147 if (dp) 1148 strcat(name, "mdp"); 1149 else 1150 strcat(name, "msp"); 1151 1152 printf("#%d: %s ", dp? i/2 : i/4, name); 1153 for (k = 0; k < loops; k++) { 1154 a_idx = targs->fra_idx; 1155 if (k) 1156 printf(" AND "); 1157 if (dp) { 1158 frA_dp = *((unsigned long long *)&spec_fargs[a_idx]); 1159 printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]); 1160 } else { 1161 unsigned int * xt_sp = (unsigned int *)XT_arg; 1162 unsigned int * xb_sp = (unsigned int *)XB_arg; 1163 frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]); 1164 printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]); 1165 } 1166 targs++; 1167 } 1168 1169 if (dp) { 1170 dst_dp = (unsigned long long *) &vec_out; 1171 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1172 } else { 1173 dst_sp = (unsigned int *) &vec_out; 1174 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1175 } 1176 free(name); 1177} 1178 1179/* This function currently only supports double precision input arguments. */ 1180static void test_vx_simple_scalar_fp_ops(void) 1181{ 1182 test_func_t func; 1183 int k = 0; 1184 1185 build_special_fargs_table(); 1186 while ((func = vx_simple_scalar_fp_tests[k].test_func)) { 1187 unsigned long long * frap, * frbp, * dst; 1188 unsigned int * pv; 1189 int idx; 1190 vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k]; 1191 Bool convToWord = (test_group.type == VX_CONV_WORD); 1192 if (test_group.precision != DOUBLE_TEST) { 1193 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n"); 1194 exit(1); 1195 } 1196 pv = (unsigned int *)&vec_out; 1197 // clear vec_out 1198 for (idx = 0; idx < 4; idx++, pv++) 1199 *pv = 0; 1200 1201 /* If num_tests is exactly equal to nb_special_fargs, this implies the 1202 * instruction being tested only requires one floating point argument 1203 * (e.g. xssqrtdp). 1204 */ 1205 if (test_group.num_tests == nb_special_fargs && !test_group.targs) { 1206 void * inB; 1207 int i; 1208 for (i = 0; i < nb_special_fargs; i++) { 1209 inB = (void *)&spec_fargs[i]; 1210 frbp = (unsigned long long *)&spec_fargs[i]; 1211 memcpy(&vec_inB, inB, 8); 1212 (*func)(); 1213 dst = (unsigned long long *) &vec_out; 1214 printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, 1215 convToWord ? (*dst & 0x00000000ffffffffULL) : *dst); 1216 } 1217 } else { 1218 void * inA, * inB; 1219 unsigned int condreg, flags; 1220 int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0; 1221 int i; 1222 for (i = 0; i < test_group.num_tests; i++) { 1223 fp_test_args_t aTest = test_group.targs[i]; 1224 inA = (void *)&spec_fargs[aTest.fra_idx]; 1225 inB = (void *)&spec_fargs[aTest.frb_idx]; 1226 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1227 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1228 // Only need to copy one doubleword into each vector's element 0 1229 memcpy(&vec_inA, inA, 8); 1230 memcpy(&vec_inB, inB, 8); 1231 SET_FPSCR_ZERO; 1232 SET_CR_XER_ZERO; 1233 (*func)(); 1234 GET_CR(flags); 1235 if (isTdiv) { 1236 condreg = (flags & 0x000000f0) >> 4; 1237 printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg); 1238 } else { 1239 dst = (unsigned long long *) &vec_out; 1240 printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name, 1241 *frap, *frbp, *dst); 1242 } 1243 } 1244 } 1245 printf( "\n" ); 1246 k++; 1247 } 1248} 1249 1250static void test_vx_aORm_fp_ops(void) 1251{ 1252 /* These ops need a third src argument, which is stored in element 0 of 1253 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds 1254 * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds 1255 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test 1256 * data (input args, result) contain only two inputs, so I arbitrarily 1257 * choose some spec_fargs elements for the third source argument. 1258 * Note that that by using the same input data for a given pair of 1259 * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3 1260 * arguments), the expected result should be the same. 1261 */ 1262 1263 test_func_t func; 1264 int k; 1265 char * test_name = (char *)malloc(20); 1266 k = 0; 1267 do_dot = False; 1268 1269 build_special_fargs_table(); 1270 while ((func = vx_aORm_fp_tests[k].test_func)) { 1271 int i, stride; 1272 Bool repeat = False; 1273 Bool scalar = False; 1274 unsigned long long * frap, * frbp, * dst; 1275 vx_fp_test_t test_group = vx_aORm_fp_tests[k]; 1276 vx_fp_test_type test_type = test_group.type; 1277 do_dp = test_group.precision == DOUBLE_TEST ? True : False; 1278 frap = frbp = NULL; 1279 1280 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) { 1281 scalar = True; 1282 strcpy(test_name, test_group.name); 1283 if (!repeat) { 1284 repeat = 1; 1285 stride = 1; 1286 // Only support double precision scalar ops in this function 1287 if (do_dp) { 1288 strcat(test_name, "adp"); 1289 } else { 1290 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n"); 1291 exit(1); 1292 } 1293 do_aXp = True; 1294 } 1295 } else if (test_type < VX_BASIC_CMP) { 1296 // Then it must be a VX_VECTOR_xxx type 1297 stride = do_dp ? 2 : 4; 1298 if (!repeat) { 1299 // No need to work up the testcase name here, since that will be done in 1300 // the print_vx_aORm_fp_result() function we'll call for vector-type ops. 1301 repeat = 1; 1302 do_aXp = True; 1303 } 1304 } else { 1305 printf("ERROR: Invalid VX FP test type %d\n", test_type); 1306 exit(1); 1307 } 1308 1309again: 1310 for (i = 0; i < test_group.num_tests; i+=stride) { 1311 void * inA, * inB; 1312 int m, fp_idx[4]; 1313 unsigned long long vsr_XT[2]; 1314 unsigned long long vsr_XB[2]; 1315 fp_test_args_t aTest = test_group.targs[i]; 1316 for (m = 0; m < stride; m++) 1317 fp_idx[m] = i % (nb_special_fargs - stride) + m; 1318 1319 /* When repeat == True, we're on the first time through of one of the VX_FP_SMx 1320 * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap 1321 * inputs as described above: 1322 * src2 <= VSX[XT] 1323 * src3 <= VSX[XB] 1324 */ 1325 if (scalar) { 1326 // For scalar op, only need to copy one doubleword into each vector's element 0 1327 inA = (void *)&spec_fargs[aTest.fra_idx]; 1328 inB = (void *)&spec_fargs[aTest.frb_idx]; 1329 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1330 memcpy(&vec_inA, inA, 8); 1331 if (repeat) { 1332 memcpy(&vec_out, inB, 8); // src2 1333 memcpy(&vec_inB, &spec_fargs[fp_idx[0]], 8); //src3 1334 frbp = (unsigned long long *)&spec_fargs[fp_idx[0]]; 1335 } else { 1336 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1337 memcpy(&vec_inB, inB, 8); // src2 1338 memcpy(&vec_out, &spec_fargs[fp_idx[0]], 8); //src3 1339 } 1340 memcpy(vsr_XT, &vec_out, 8); 1341 } else { 1342 int j, loops = do_dp ? 2 : 4; 1343 size_t len = do_dp ? 8 : 4; 1344 void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out; 1345 for (j = 0; j < loops; j++) { 1346 if (do_dp) 1347 memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len); 1348 else 1349 memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len); 1350 } 1351 if (do_dp) 1352 setup_dp_fp_args(&test_group.targs[i], repeat); 1353 else 1354 setup_sp_fp_args(&test_group.targs[i], repeat); 1355 1356 memcpy(vsr_XT, &vec_out, 16); 1357 memcpy(vsr_XB, &vec_inB, 16); 1358 } 1359 1360 (*func)(); 1361 dst = (unsigned long long *) &vec_out; 1362 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) 1363 printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i, 1364 test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst ); 1365 else 1366 print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i); 1367 } 1368 printf( "\n" ); 1369 1370 if (repeat) { 1371 repeat = 0; 1372 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) { 1373 strcpy(test_name, test_group.name); 1374 strcat(test_name, "mdp"); 1375 } 1376 do_aXp = False; 1377 goto again; 1378 } 1379 k++; 1380 } 1381 printf( "\n" ); 1382 free(test_name); 1383} 1384 1385static void test_vx_vector_one_fp_arg(void) 1386{ 1387 test_func_t func; 1388 int k; 1389 k = 0; 1390 build_special_fargs_table(); 1391 1392 while ((func = vx_vector_one_fp_arg_tests[k].test_func)) { 1393 int idx, i; 1394 vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k]; 1395 Bool convToWord = (test_group.type == VX_CONV_WORD); 1396 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1397 Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False; 1398 int stride = dp ? 2 : 4; 1399 1400 for (i = 0; i < test_group.num_tests; i+=stride) { 1401 unsigned int * pv; 1402 void * inB; 1403 1404 pv = (unsigned int *)&vec_out; 1405 // clear vec_out 1406 for (idx = 0; idx < 4; idx++, pv++) 1407 *pv = 0; 1408 1409 if (dp) { 1410 int j; 1411 unsigned long long * frB_dp, *dst_dp; 1412 for (j = 0; j < 2; j++) { 1413 inB = (void *)&spec_fargs[i + j]; 1414 // copy double precision FP into vector element i 1415 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1416 } 1417 // execute test insn 1418 (*func)(); 1419 dst_dp = (unsigned long long *) &vec_out; 1420 printf("#%d: %s ", i/2, test_group.name); 1421 for (j = 0; j < 2; j++) { 1422 if (j) 1423 printf("; "); 1424 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1425 printf("%s(%016llx)", test_group.op, *frB_dp); 1426 printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]); 1427 } 1428 printf("\n"); 1429 } else { 1430 int j; 1431 unsigned int * frB_sp, * dst_sp; 1432 1433 for (j = 0; j < 4; j++) { 1434 inB = (void *)&spec_sp_fargs[i + j]; 1435 // copy single precision FP into vector element i 1436 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1437 } 1438 // execute test insn 1439 (*func)(); 1440 dst_sp = (unsigned int *) &vec_out; 1441 // print result 1442 printf("#%d: %s ", i/4, test_group.name); 1443 for (j = 0; j < 4; j++) { 1444 if (j) 1445 printf("; "); 1446 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1447 printf("%s(%08x)", test_group.op, *frB_sp); 1448 if (xvrespTest) { 1449 float calc_diff = fabs(spec_sp_fargs[i + j]/256); 1450 float sp_res; 1451 memcpy(&sp_res, &dst_sp[j], 4); 1452 float div_result = 1/spec_sp_fargs[i + j]; 1453 float real_diff = fabs(sp_res - div_result); 1454 printf( " ==> %s", 1455 ( ( sp_res == div_result ) 1456 || ( isnan(sp_res) && isnan(div_result) ) 1457 || ( real_diff <= calc_diff ) ) ? "PASS" 1458 : "FAIL"); 1459 } else { 1460 printf(" = %08x", dst_sp[j]); 1461 } 1462 } 1463 printf("\n"); 1464 } 1465 } 1466 k++; 1467 printf( "\n" ); 1468 } 1469 1470} 1471 1472/* This function assumes the instruction being tested requires two args. */ 1473static void test_vx_vector_fp_ops(void) 1474{ 1475 test_func_t func; 1476 int k; 1477 k = 0; 1478 build_special_fargs_table(); 1479 1480 while ((func = vx_vector_fp_tests[k].test_func)) { 1481 int idx, i, repeat = 1; 1482 vx_fp_test_t test_group = vx_vector_fp_tests[k]; 1483 int stride = test_group.precision == DOUBLE_TEST ? 2 : 4; 1484 do_dot = False; 1485 1486again: 1487 for (i = 0; i < test_group.num_tests; i+=stride) { 1488 unsigned int * pv, condreg; 1489 unsigned int flags; 1490 1491 pv = (unsigned int *)&vec_out; 1492 if (test_group.precision == DOUBLE_TEST) 1493 setup_dp_fp_args(&test_group.targs[i], False); 1494 else 1495 setup_sp_fp_args(&test_group.targs[i], False); 1496 1497 // clear vec_out 1498 for (idx = 0; idx < 4; idx++, pv++) 1499 *pv = 0; 1500 1501 // execute test insn 1502 SET_FPSCR_ZERO; 1503 SET_CR_XER_ZERO; 1504 (*func)(); 1505 GET_CR(flags); 1506 if (test_group.type == VX_BASIC_CMP) { 1507 condreg = (flags & 0x000000f0) >> 4; 1508 } else { 1509 condreg = VX_NOT_CMP_OP; 1510 } 1511 print_vector_fp_result(condreg, &test_group, i); 1512 } 1513 printf("\n"); 1514 if (repeat && test_group.type == VX_BASIC_CMP) { 1515 repeat = 0; 1516 do_dot = True; 1517 goto again; 1518 } 1519 k++; 1520 printf( "\n" ); 1521 } 1522} 1523 1524 1525// The div doubleword test data 1526signed long long div_dw_tdata[13][2] = { 1527 { 4, -4 }, 1528 { 4, -3 }, 1529 { 4, 4 }, 1530 { 4, -5 }, 1531 { 3, 8 }, 1532 { 0x8000000000000000ULL, 0xa }, 1533 { 0x50c, -1 }, 1534 { 0x50c, -4096 }, 1535 { 0x1234fedc, 0x8000a873 }, 1536 { 0xabcd87651234fedcULL, 0xa123b893 }, 1537 { 0x123456789abdcULL, 0 }, 1538 { 0, 2 }, 1539 { 0x77, 0xa3499 } 1540}; 1541#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2) 1542 1543// The div word test data 1544unsigned int div_w_tdata[6][2] = { 1545 { 0, 2 }, 1546 { 2, 0 }, 1547 { 0x7abc1234, 0xf0000000 }, 1548 { 0xfabc1234, 5 }, 1549 { 77, 66 }, 1550 { 5, 0xfabc1234 }, 1551}; 1552#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2) 1553 1554typedef struct div_ext_test 1555{ 1556 test_func_t test_func; 1557 const char *name; 1558 int num_tests; 1559 div_type_t div_type; 1560 precision_type_t precision; 1561} div_ext_test_t; 1562 1563static div_ext_test_t div_tests[] = { 1564#ifdef __powerpc64__ 1565 { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST }, 1566 { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST }, 1567#endif 1568 { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST }, 1569 { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST }, 1570 { NULL, NULL, 0, 0, 0 } 1571}; 1572 1573static void test_div_extensions(void) 1574{ 1575 test_func_t func; 1576 int k; 1577 k = 0; 1578 1579 while ((func = div_tests[k].test_func)) { 1580 int i, repeat = 1; 1581 div_ext_test_t test_group = div_tests[k]; 1582 do_dot = False; 1583 1584again: 1585 for (i = 0; i < test_group.num_tests; i++) { 1586 unsigned int condreg; 1587 1588 if (test_group.div_type == DIV_OE) 1589 do_OE = True; 1590 else 1591 do_OE = False; 1592 1593 if (test_group.precision == DOUBLE_TEST) { 1594 r14 = div_dw_tdata[i][0]; 1595 r15 = div_dw_tdata[i][1]; 1596 } else { 1597 r14 = div_w_tdata[i][0]; 1598 r15 = div_w_tdata[i][1]; 1599 } 1600 // execute test insn 1601 (*func)(); 1602 condreg = (div_flags & 0xf0000000) >> 28; 1603 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1604 if (test_group.precision == DOUBLE_TEST) { 1605 printf("0x%016llx / 0x%016llx = 0x%016llx;", 1606 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17); 1607 } else { 1608 printf("0x%08x / 0x%08x = 0x%08x;", 1609 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17); 1610 } 1611 printf(" CR=%x; XER=%x\n", condreg, div_xer); 1612 } 1613 printf("\n"); 1614 if (repeat) { 1615 repeat = 0; 1616 do_dot = True; 1617 goto again; 1618 } 1619 k++; 1620 printf( "\n" ); 1621 } 1622 1623} 1624 1625static void test_fct_ops(void) 1626{ 1627 test_func_t func; 1628 int k; 1629 k = 0; 1630 1631 while ((func = fct_tests[k].test_func)) { 1632 int i, repeat = 1; 1633 simple_test_t test_group = fct_tests[k]; 1634 do_dot = False; 1635 1636again: 1637 for (i = 0; i < nb_special_fargs; i++) { 1638 double result; 1639#define SINGLE_MASK 0x00000000FFFFFFFFULL 1640 1641 f14 = spec_fargs[i]; 1642 // execute test insn 1643 SET_FPSCR_ZERO; 1644 (*func)(); 1645 result = f17; 1646 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1647 printf("0x%016llx (%e) ==> 0x%016llx\n", 1648 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i], 1649 test_group.precision == SINGLE_TEST ? (SINGLE_MASK & 1650 *((unsigned long long *)(&result))) : 1651 *((unsigned long long *)(&result))); 1652 } 1653 printf("\n"); 1654 if (repeat) { 1655 repeat = 0; 1656 do_dot = True; 1657 goto again; 1658 } 1659 k++; 1660 printf( "\n" ); 1661 } 1662} 1663 1664#ifdef __powerpc64__ 1665void test_stdbrx(void) 1666{ 1667 unsigned long long store, val = 0xdeadbacf12345678ULL; 1668 printf("stdbrx: 0x%llx ==> ", val); 1669 r17 = (HWord_t)val; 1670 r14 = (HWord_t)&store; 1671 __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14)); 1672 printf("0x%llx\n", store); 1673 printf( "\n" ); 1674} 1675#endif 1676 1677static test_table_t 1678 all_tests[] = 1679{ 1680 { &test_vx_vector_one_fp_arg, 1681 "Test VSX vector single arg instructions"}, 1682 { &test_vx_vector_fp_ops, 1683 "Test VSX floating point compare and basic arithmetic instructions" }, 1684#ifdef __powerpc64__ 1685 { &test_bpermd, 1686 "Test bit permute double"}, 1687#endif 1688 { &test_xxsel, 1689 "Test xxsel instruction" }, 1690 { &test_xxspltw, 1691 "Test xxspltw instruction" }, 1692 { &test_div_extensions, 1693 "Test div extensions" }, 1694 { &test_fct_ops, 1695 "Test floating point convert [word | doubleword] unsigned, with round toward zero" }, 1696#ifdef __powerpc64__ 1697 { &test_stdbrx, 1698 "Test stdbrx instruction"}, 1699#endif 1700 { &test_vx_aORm_fp_ops, 1701 "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p"}, 1702 { &test_vx_simple_scalar_fp_ops, 1703 "Test scalar floating point arithmetic instructions"}, 1704 { NULL, NULL } 1705}; 1706#endif // HAS_VSX 1707 1708int main(int argc, char *argv[]) 1709{ 1710#ifdef HAS_VSX 1711 1712 test_table_t aTest; 1713 test_func_t func; 1714 int i = 0; 1715 1716 while ((func = all_tests[i].test_category)) { 1717 aTest = all_tests[i]; 1718 printf( "%s\n", aTest.name ); 1719 (*func)(); 1720 i++; 1721 } 1722 if (spec_fargs) 1723 free(spec_fargs); 1724 if (spec_sp_fargs) 1725 free(spec_sp_fargs); 1726 1727#endif // HAS _VSX 1728 1729 return 0; 1730} 1731