lp_test_blend.c revision 966d28cb2e5e090d8f591810f331df0d05b06271
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Unit tests for blend LLVM IR generation 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Blend computation code derived from code written by 36 * @author Brian Paul <brian@vmware.com> 37 */ 38 39 40#include "gallivm/lp_bld_type.h" 41#include "gallivm/lp_bld_debug.h" 42#include "lp_bld_blend.h" 43#include "lp_test.h" 44 45 46enum vector_mode 47{ 48 AoS = 0, 49 SoA = 1 50}; 51 52 53typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); 54 55/** cast wrapper */ 56static blend_test_ptr_t 57voidptr_to_blend_test_ptr_t(void *p) 58{ 59 union { 60 void *v; 61 blend_test_ptr_t f; 62 } u; 63 u.v = p; 64 return u.f; 65} 66 67 68 69void 70write_tsv_header(FILE *fp) 71{ 72 fprintf(fp, 73 "result\t" 74 "cycles_per_channel\t" 75 "mode\t" 76 "type\t" 77 "sep_func\t" 78 "sep_src_factor\t" 79 "sep_dst_factor\t" 80 "rgb_func\t" 81 "rgb_src_factor\t" 82 "rgb_dst_factor\t" 83 "alpha_func\t" 84 "alpha_src_factor\t" 85 "alpha_dst_factor\n"); 86 87 fflush(fp); 88} 89 90 91static void 92write_tsv_row(FILE *fp, 93 const struct pipe_blend_state *blend, 94 enum vector_mode mode, 95 struct lp_type type, 96 double cycles, 97 boolean success) 98{ 99 fprintf(fp, "%s\t", success ? "pass" : "fail"); 100 101 if (mode == AoS) { 102 fprintf(fp, "%.1f\t", cycles / type.length); 103 fprintf(fp, "aos\t"); 104 } 105 106 if (mode == SoA) { 107 fprintf(fp, "%.1f\t", cycles / (4 * type.length)); 108 fprintf(fp, "soa\t"); 109 } 110 111 fprintf(fp, "%s%u%sx%u\t", 112 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 113 type.width, 114 type.norm ? "n" : "", 115 type.length); 116 117 fprintf(fp, 118 "%s\t%s\t%s\t", 119 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false", 120 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false", 121 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false"); 122 123 fprintf(fp, 124 "%s\t%s\t%s\t%s\t%s\t%s\n", 125 util_dump_blend_func(blend->rt[0].rgb_func, TRUE), 126 util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), 127 util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), 128 util_dump_blend_func(blend->rt[0].alpha_func, TRUE), 129 util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), 130 util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); 131 132 fflush(fp); 133} 134 135 136static void 137dump_blend_type(FILE *fp, 138 const struct pipe_blend_state *blend, 139 enum vector_mode mode, 140 struct lp_type type) 141{ 142 fprintf(fp, "%s", mode ? "soa" : "aos"); 143 144 fprintf(fp, " type=%s%u%sx%u", 145 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 146 type.width, 147 type.norm ? "n" : "", 148 type.length); 149 150 fprintf(fp, 151 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", 152 "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE), 153 "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), 154 "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), 155 "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE), 156 "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), 157 "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); 158 159 fprintf(fp, " ...\n"); 160 fflush(fp); 161} 162 163 164static LLVMValueRef 165add_blend_test(LLVMModuleRef module, 166 const struct pipe_blend_state *blend, 167 enum vector_mode mode, 168 struct lp_type type) 169{ 170 LLVMTypeRef vec_type; 171 LLVMTypeRef args[4]; 172 LLVMValueRef func; 173 LLVMValueRef src_ptr; 174 LLVMValueRef dst_ptr; 175 LLVMValueRef const_ptr; 176 LLVMValueRef res_ptr; 177 LLVMBasicBlockRef block; 178 LLVMBuilderRef builder; 179 const unsigned rt = 0; 180 181 vec_type = lp_build_vec_type(type); 182 183 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); 184 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); 185 LLVMSetFunctionCallConv(func, LLVMCCallConv); 186 src_ptr = LLVMGetParam(func, 0); 187 dst_ptr = LLVMGetParam(func, 1); 188 const_ptr = LLVMGetParam(func, 2); 189 res_ptr = LLVMGetParam(func, 3); 190 191 block = LLVMAppendBasicBlock(func, "entry"); 192 builder = LLVMCreateBuilder(); 193 LLVMPositionBuilderAtEnd(builder, block); 194 195 if (mode == AoS) { 196 LLVMValueRef src; 197 LLVMValueRef dst; 198 LLVMValueRef con; 199 LLVMValueRef res; 200 201 src = LLVMBuildLoad(builder, src_ptr, "src"); 202 dst = LLVMBuildLoad(builder, dst_ptr, "dst"); 203 con = LLVMBuildLoad(builder, const_ptr, "const"); 204 205 res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3); 206 207 lp_build_name(res, "res"); 208 209 LLVMBuildStore(builder, res, res_ptr); 210 } 211 212 if (mode == SoA) { 213 LLVMValueRef src[4]; 214 LLVMValueRef dst[4]; 215 LLVMValueRef con[4]; 216 LLVMValueRef res[4]; 217 unsigned i; 218 219 for(i = 0; i < 4; ++i) { 220 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 221 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); 222 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); 223 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); 224 lp_build_name(src[i], "src.%c", "rgba"[i]); 225 lp_build_name(con[i], "con.%c", "rgba"[i]); 226 lp_build_name(dst[i], "dst.%c", "rgba"[i]); 227 } 228 229 lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); 230 231 for(i = 0; i < 4; ++i) { 232 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 233 lp_build_name(res[i], "res.%c", "rgba"[i]); 234 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); 235 } 236 } 237 238 LLVMBuildRetVoid(builder);; 239 240 LLVMDisposeBuilder(builder); 241 return func; 242} 243 244 245/** Add and limit result to ceiling of 1.0 */ 246#define ADD_SAT(R, A, B) \ 247do { \ 248 R = (A) + (B); if (R > 1.0f) R = 1.0f; \ 249} while (0) 250 251/** Subtract and limit result to floor of 0.0 */ 252#define SUB_SAT(R, A, B) \ 253do { \ 254 R = (A) - (B); if (R < 0.0f) R = 0.0f; \ 255} while (0) 256 257 258static void 259compute_blend_ref_term(unsigned rgb_factor, 260 unsigned alpha_factor, 261 const double *factor, 262 const double *src, 263 const double *dst, 264 const double *con, 265 double *term) 266{ 267 double temp; 268 269 switch (rgb_factor) { 270 case PIPE_BLENDFACTOR_ONE: 271 term[0] = factor[0]; /* R */ 272 term[1] = factor[1]; /* G */ 273 term[2] = factor[2]; /* B */ 274 break; 275 case PIPE_BLENDFACTOR_SRC_COLOR: 276 term[0] = factor[0] * src[0]; /* R */ 277 term[1] = factor[1] * src[1]; /* G */ 278 term[2] = factor[2] * src[2]; /* B */ 279 break; 280 case PIPE_BLENDFACTOR_SRC_ALPHA: 281 term[0] = factor[0] * src[3]; /* R */ 282 term[1] = factor[1] * src[3]; /* G */ 283 term[2] = factor[2] * src[3]; /* B */ 284 break; 285 case PIPE_BLENDFACTOR_DST_COLOR: 286 term[0] = factor[0] * dst[0]; /* R */ 287 term[1] = factor[1] * dst[1]; /* G */ 288 term[2] = factor[2] * dst[2]; /* B */ 289 break; 290 case PIPE_BLENDFACTOR_DST_ALPHA: 291 term[0] = factor[0] * dst[3]; /* R */ 292 term[1] = factor[1] * dst[3]; /* G */ 293 term[2] = factor[2] * dst[3]; /* B */ 294 break; 295 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 296 temp = MIN2(src[3], 1.0f - dst[3]); 297 term[0] = factor[0] * temp; /* R */ 298 term[1] = factor[1] * temp; /* G */ 299 term[2] = factor[2] * temp; /* B */ 300 break; 301 case PIPE_BLENDFACTOR_CONST_COLOR: 302 term[0] = factor[0] * con[0]; /* R */ 303 term[1] = factor[1] * con[1]; /* G */ 304 term[2] = factor[2] * con[2]; /* B */ 305 break; 306 case PIPE_BLENDFACTOR_CONST_ALPHA: 307 term[0] = factor[0] * con[3]; /* R */ 308 term[1] = factor[1] * con[3]; /* G */ 309 term[2] = factor[2] * con[3]; /* B */ 310 break; 311 case PIPE_BLENDFACTOR_SRC1_COLOR: 312 assert(0); /* to do */ 313 break; 314 case PIPE_BLENDFACTOR_SRC1_ALPHA: 315 assert(0); /* to do */ 316 break; 317 case PIPE_BLENDFACTOR_ZERO: 318 term[0] = 0.0f; /* R */ 319 term[1] = 0.0f; /* G */ 320 term[2] = 0.0f; /* B */ 321 break; 322 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 323 term[0] = factor[0] * (1.0f - src[0]); /* R */ 324 term[1] = factor[1] * (1.0f - src[1]); /* G */ 325 term[2] = factor[2] * (1.0f - src[2]); /* B */ 326 break; 327 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 328 term[0] = factor[0] * (1.0f - src[3]); /* R */ 329 term[1] = factor[1] * (1.0f - src[3]); /* G */ 330 term[2] = factor[2] * (1.0f - src[3]); /* B */ 331 break; 332 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 333 term[0] = factor[0] * (1.0f - dst[3]); /* R */ 334 term[1] = factor[1] * (1.0f - dst[3]); /* G */ 335 term[2] = factor[2] * (1.0f - dst[3]); /* B */ 336 break; 337 case PIPE_BLENDFACTOR_INV_DST_COLOR: 338 term[0] = factor[0] * (1.0f - dst[0]); /* R */ 339 term[1] = factor[1] * (1.0f - dst[1]); /* G */ 340 term[2] = factor[2] * (1.0f - dst[2]); /* B */ 341 break; 342 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 343 term[0] = factor[0] * (1.0f - con[0]); /* R */ 344 term[1] = factor[1] * (1.0f - con[1]); /* G */ 345 term[2] = factor[2] * (1.0f - con[2]); /* B */ 346 break; 347 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 348 term[0] = factor[0] * (1.0f - con[3]); /* R */ 349 term[1] = factor[1] * (1.0f - con[3]); /* G */ 350 term[2] = factor[2] * (1.0f - con[3]); /* B */ 351 break; 352 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 353 assert(0); /* to do */ 354 break; 355 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 356 assert(0); /* to do */ 357 break; 358 default: 359 assert(0); 360 } 361 362 /* 363 * Compute src/first term A 364 */ 365 switch (alpha_factor) { 366 case PIPE_BLENDFACTOR_ONE: 367 term[3] = factor[3]; /* A */ 368 break; 369 case PIPE_BLENDFACTOR_SRC_COLOR: 370 case PIPE_BLENDFACTOR_SRC_ALPHA: 371 term[3] = factor[3] * src[3]; /* A */ 372 break; 373 case PIPE_BLENDFACTOR_DST_COLOR: 374 case PIPE_BLENDFACTOR_DST_ALPHA: 375 term[3] = factor[3] * dst[3]; /* A */ 376 break; 377 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 378 term[3] = src[3]; /* A */ 379 break; 380 case PIPE_BLENDFACTOR_CONST_COLOR: 381 case PIPE_BLENDFACTOR_CONST_ALPHA: 382 term[3] = factor[3] * con[3]; /* A */ 383 break; 384 case PIPE_BLENDFACTOR_ZERO: 385 term[3] = 0.0f; /* A */ 386 break; 387 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 388 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 389 term[3] = factor[3] * (1.0f - src[3]); /* A */ 390 break; 391 case PIPE_BLENDFACTOR_INV_DST_COLOR: 392 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 393 term[3] = factor[3] * (1.0f - dst[3]); /* A */ 394 break; 395 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 396 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 397 term[3] = factor[3] * (1.0f - con[3]); 398 break; 399 default: 400 assert(0); 401 } 402} 403 404 405static void 406compute_blend_ref(const struct pipe_blend_state *blend, 407 const double *src, 408 const double *dst, 409 const double *con, 410 double *res) 411{ 412 double src_term[4]; 413 double dst_term[4]; 414 415 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor, 416 src, src, dst, con, src_term); 417 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor, 418 dst, src, dst, con, dst_term); 419 420 /* 421 * Combine RGB terms 422 */ 423 switch (blend->rt[0].rgb_func) { 424 case PIPE_BLEND_ADD: 425 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ 426 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ 427 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ 428 break; 429 case PIPE_BLEND_SUBTRACT: 430 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ 431 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ 432 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ 433 break; 434 case PIPE_BLEND_REVERSE_SUBTRACT: 435 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ 436 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ 437 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ 438 break; 439 case PIPE_BLEND_MIN: 440 res[0] = MIN2(src_term[0], dst_term[0]); /* R */ 441 res[1] = MIN2(src_term[1], dst_term[1]); /* G */ 442 res[2] = MIN2(src_term[2], dst_term[2]); /* B */ 443 break; 444 case PIPE_BLEND_MAX: 445 res[0] = MAX2(src_term[0], dst_term[0]); /* R */ 446 res[1] = MAX2(src_term[1], dst_term[1]); /* G */ 447 res[2] = MAX2(src_term[2], dst_term[2]); /* B */ 448 break; 449 default: 450 assert(0); 451 } 452 453 /* 454 * Combine A terms 455 */ 456 switch (blend->rt[0].alpha_func) { 457 case PIPE_BLEND_ADD: 458 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ 459 break; 460 case PIPE_BLEND_SUBTRACT: 461 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ 462 break; 463 case PIPE_BLEND_REVERSE_SUBTRACT: 464 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ 465 break; 466 case PIPE_BLEND_MIN: 467 res[3] = MIN2(src_term[3], dst_term[3]); /* A */ 468 break; 469 case PIPE_BLEND_MAX: 470 res[3] = MAX2(src_term[3], dst_term[3]); /* A */ 471 break; 472 default: 473 assert(0); 474 } 475} 476 477 478PIPE_ALIGN_STACK 479static boolean 480test_one(unsigned verbose, 481 FILE *fp, 482 const struct pipe_blend_state *blend, 483 enum vector_mode mode, 484 struct lp_type type) 485{ 486 LLVMModuleRef module = NULL; 487 LLVMValueRef func = NULL; 488 LLVMExecutionEngineRef engine = NULL; 489 LLVMModuleProviderRef provider = NULL; 490 LLVMPassManagerRef pass = NULL; 491 char *error = NULL; 492 blend_test_ptr_t blend_test_ptr; 493 boolean success; 494 const unsigned n = LP_TEST_NUM_SAMPLES; 495 int64_t cycles[LP_TEST_NUM_SAMPLES]; 496 double cycles_avg = 0.0; 497 unsigned i, j; 498 void *code; 499 500 if(verbose >= 1) 501 dump_blend_type(stdout, blend, mode, type); 502 503 module = LLVMModuleCreateWithName("test"); 504 505 func = add_blend_test(module, blend, mode, type); 506 507 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { 508 LLVMDumpModule(module); 509 abort(); 510 } 511 LLVMDisposeMessage(error); 512 513 provider = LLVMCreateModuleProviderForExistingModule(module); 514 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { 515 if(verbose < 1) 516 dump_blend_type(stderr, blend, mode, type); 517 fprintf(stderr, "%s\n", error); 518 LLVMDisposeMessage(error); 519 abort(); 520 } 521 522#if 0 523 pass = LLVMCreatePassManager(); 524 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); 525 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 526 * but there are more on SVN. */ 527 LLVMAddConstantPropagationPass(pass); 528 LLVMAddInstructionCombiningPass(pass); 529 LLVMAddPromoteMemoryToRegisterPass(pass); 530 LLVMAddGVNPass(pass); 531 LLVMAddCFGSimplificationPass(pass); 532 LLVMRunPassManager(pass, module); 533#else 534 (void)pass; 535#endif 536 537 if(verbose >= 2) 538 LLVMDumpModule(module); 539 540 code = LLVMGetPointerToGlobal(engine, func); 541 blend_test_ptr = voidptr_to_blend_test_ptr_t(code); 542 543 if(verbose >= 2) 544 lp_disassemble(code); 545 546 success = TRUE; 547 for(i = 0; i < n && success; ++i) { 548 if(mode == AoS) { 549 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; 550 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; 551 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; 552 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; 553 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; 554 int64_t start_counter = 0; 555 int64_t end_counter = 0; 556 557 random_vec(type, src); 558 random_vec(type, dst); 559 random_vec(type, con); 560 561 { 562 double fsrc[LP_MAX_VECTOR_LENGTH]; 563 double fdst[LP_MAX_VECTOR_LENGTH]; 564 double fcon[LP_MAX_VECTOR_LENGTH]; 565 double fref[LP_MAX_VECTOR_LENGTH]; 566 567 read_vec(type, src, fsrc); 568 read_vec(type, dst, fdst); 569 read_vec(type, con, fcon); 570 571 for(j = 0; j < type.length; j += 4) 572 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); 573 574 write_vec(type, ref, fref); 575 } 576 577 start_counter = rdtsc(); 578 blend_test_ptr(src, dst, con, res); 579 end_counter = rdtsc(); 580 581 cycles[i] = end_counter - start_counter; 582 583 if(!compare_vec(type, res, ref)) { 584 success = FALSE; 585 586 if(verbose < 1) 587 dump_blend_type(stderr, blend, mode, type); 588 fprintf(stderr, "MISMATCH\n"); 589 590 fprintf(stderr, " Src: "); 591 dump_vec(stderr, type, src); 592 fprintf(stderr, "\n"); 593 594 fprintf(stderr, " Dst: "); 595 dump_vec(stderr, type, dst); 596 fprintf(stderr, "\n"); 597 598 fprintf(stderr, " Con: "); 599 dump_vec(stderr, type, con); 600 fprintf(stderr, "\n"); 601 602 fprintf(stderr, " Res: "); 603 dump_vec(stderr, type, res); 604 fprintf(stderr, "\n"); 605 606 fprintf(stderr, " Ref: "); 607 dump_vec(stderr, type, ref); 608 fprintf(stderr, "\n"); 609 } 610 } 611 612 if(mode == SoA) { 613 const unsigned stride = type.length*type.width/8; 614 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; 615 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; 616 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; 617 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; 618 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; 619 int64_t start_counter = 0; 620 int64_t end_counter = 0; 621 boolean mismatch; 622 623 for(j = 0; j < 4; ++j) { 624 random_vec(type, src + j*stride); 625 random_vec(type, dst + j*stride); 626 random_vec(type, con + j*stride); 627 } 628 629 { 630 double fsrc[4]; 631 double fdst[4]; 632 double fcon[4]; 633 double fref[4]; 634 unsigned k; 635 636 for(k = 0; k < type.length; ++k) { 637 for(j = 0; j < 4; ++j) { 638 fsrc[j] = read_elem(type, src + j*stride, k); 639 fdst[j] = read_elem(type, dst + j*stride, k); 640 fcon[j] = read_elem(type, con + j*stride, k); 641 } 642 643 compute_blend_ref(blend, fsrc, fdst, fcon, fref); 644 645 for(j = 0; j < 4; ++j) 646 write_elem(type, ref + j*stride, k, fref[j]); 647 } 648 } 649 650 start_counter = rdtsc(); 651 blend_test_ptr(src, dst, con, res); 652 end_counter = rdtsc(); 653 654 cycles[i] = end_counter - start_counter; 655 656 mismatch = FALSE; 657 for (j = 0; j < 4; ++j) 658 if(!compare_vec(type, res + j*stride, ref + j*stride)) 659 mismatch = TRUE; 660 661 if (mismatch) { 662 success = FALSE; 663 664 if(verbose < 1) 665 dump_blend_type(stderr, blend, mode, type); 666 fprintf(stderr, "MISMATCH\n"); 667 for(j = 0; j < 4; ++j) { 668 char channel = "RGBA"[j]; 669 fprintf(stderr, " Src%c: ", channel); 670 dump_vec(stderr, type, src + j*stride); 671 fprintf(stderr, "\n"); 672 673 fprintf(stderr, " Dst%c: ", channel); 674 dump_vec(stderr, type, dst + j*stride); 675 fprintf(stderr, "\n"); 676 677 fprintf(stderr, " Con%c: ", channel); 678 dump_vec(stderr, type, con + j*stride); 679 fprintf(stderr, "\n"); 680 681 fprintf(stderr, " Res%c: ", channel); 682 dump_vec(stderr, type, res + j*stride); 683 fprintf(stderr, "\n"); 684 685 fprintf(stderr, " Ref%c: ", channel); 686 dump_vec(stderr, type, ref + j*stride); 687 fprintf(stderr, "\n"); 688 } 689 } 690 } 691 } 692 693 /* 694 * Unfortunately the output of cycle counter is not very reliable as it comes 695 * -- sometimes we get outliers (due IRQs perhaps?) which are 696 * better removed to avoid random or biased data. 697 */ 698 { 699 double sum = 0.0, sum2 = 0.0; 700 double avg, std; 701 unsigned m; 702 703 for(i = 0; i < n; ++i) { 704 sum += cycles[i]; 705 sum2 += cycles[i]*cycles[i]; 706 } 707 708 avg = sum/n; 709 std = sqrtf((sum2 - n*avg*avg)/n); 710 711 m = 0; 712 sum = 0.0; 713 for(i = 0; i < n; ++i) { 714 if(fabs(cycles[i] - avg) <= 4.0*std) { 715 sum += cycles[i]; 716 ++m; 717 } 718 } 719 720 cycles_avg = sum/m; 721 722 } 723 724 if(fp) 725 write_tsv_row(fp, blend, mode, type, cycles_avg, success); 726 727 if (!success) { 728 if(verbose < 2) 729 LLVMDumpModule(module); 730 LLVMWriteBitcodeToFile(module, "blend.bc"); 731 fprintf(stderr, "blend.bc written\n"); 732 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); 733 abort(); 734 } 735 736 LLVMFreeMachineCodeForFunction(engine, func); 737 738 LLVMDisposeExecutionEngine(engine); 739 if(pass) 740 LLVMDisposePassManager(pass); 741 742 return success; 743} 744 745 746const unsigned 747blend_factors[] = { 748 PIPE_BLENDFACTOR_ZERO, 749 PIPE_BLENDFACTOR_ONE, 750 PIPE_BLENDFACTOR_SRC_COLOR, 751 PIPE_BLENDFACTOR_SRC_ALPHA, 752 PIPE_BLENDFACTOR_DST_COLOR, 753 PIPE_BLENDFACTOR_DST_ALPHA, 754 PIPE_BLENDFACTOR_CONST_COLOR, 755 PIPE_BLENDFACTOR_CONST_ALPHA, 756#if 0 757 PIPE_BLENDFACTOR_SRC1_COLOR, 758 PIPE_BLENDFACTOR_SRC1_ALPHA, 759#endif 760 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, 761 PIPE_BLENDFACTOR_INV_SRC_COLOR, 762 PIPE_BLENDFACTOR_INV_SRC_ALPHA, 763 PIPE_BLENDFACTOR_INV_DST_COLOR, 764 PIPE_BLENDFACTOR_INV_DST_ALPHA, 765 PIPE_BLENDFACTOR_INV_CONST_COLOR, 766 PIPE_BLENDFACTOR_INV_CONST_ALPHA, 767#if 0 768 PIPE_BLENDFACTOR_INV_SRC1_COLOR, 769 PIPE_BLENDFACTOR_INV_SRC1_ALPHA, 770#endif 771}; 772 773 774const unsigned 775blend_funcs[] = { 776 PIPE_BLEND_ADD, 777 PIPE_BLEND_SUBTRACT, 778 PIPE_BLEND_REVERSE_SUBTRACT, 779 PIPE_BLEND_MIN, 780 PIPE_BLEND_MAX 781}; 782 783 784const struct lp_type blend_types[] = { 785 /* float, fixed, sign, norm, width, len */ 786 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ 787 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ 788}; 789 790 791const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]); 792const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]); 793const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); 794 795 796boolean 797test_all(unsigned verbose, FILE *fp) 798{ 799 const unsigned *rgb_func; 800 const unsigned *rgb_src_factor; 801 const unsigned *rgb_dst_factor; 802 const unsigned *alpha_func; 803 const unsigned *alpha_src_factor; 804 const unsigned *alpha_dst_factor; 805 struct pipe_blend_state blend; 806 enum vector_mode mode; 807 const struct lp_type *type; 808 bool success = TRUE; 809 810 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { 811 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { 812 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) { 813 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { 814 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { 815 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { 816 for(mode = 0; mode < 2; ++mode) { 817 for(type = blend_types; type < &blend_types[num_types]; ++type) { 818 819 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 820 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) 821 continue; 822 823 memset(&blend, 0, sizeof blend); 824 blend.rt[0].blend_enable = 1; 825 blend.rt[0].rgb_func = *rgb_func; 826 blend.rt[0].rgb_src_factor = *rgb_src_factor; 827 blend.rt[0].rgb_dst_factor = *rgb_dst_factor; 828 blend.rt[0].alpha_func = *alpha_func; 829 blend.rt[0].alpha_src_factor = *alpha_src_factor; 830 blend.rt[0].alpha_dst_factor = *alpha_dst_factor; 831 blend.rt[0].colormask = PIPE_MASK_RGBA; 832 833 if(!test_one(verbose, fp, &blend, mode, *type)) 834 success = FALSE; 835 836 } 837 } 838 } 839 } 840 } 841 } 842 } 843 } 844 845 return success; 846} 847 848 849boolean 850test_some(unsigned verbose, FILE *fp, unsigned long n) 851{ 852 const unsigned *rgb_func; 853 const unsigned *rgb_src_factor; 854 const unsigned *rgb_dst_factor; 855 const unsigned *alpha_func; 856 const unsigned *alpha_src_factor; 857 const unsigned *alpha_dst_factor; 858 struct pipe_blend_state blend; 859 enum vector_mode mode; 860 const struct lp_type *type; 861 unsigned long i; 862 bool success = TRUE; 863 864 for(i = 0; i < n; ++i) { 865 rgb_func = &blend_funcs[rand() % num_funcs]; 866 alpha_func = &blend_funcs[rand() % num_funcs]; 867 rgb_src_factor = &blend_factors[rand() % num_factors]; 868 alpha_src_factor = &blend_factors[rand() % num_factors]; 869 870 do { 871 rgb_dst_factor = &blend_factors[rand() % num_factors]; 872 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 873 874 do { 875 alpha_dst_factor = &blend_factors[rand() % num_factors]; 876 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 877 878 mode = rand() & 1; 879 880 type = &blend_types[rand() % num_types]; 881 882 memset(&blend, 0, sizeof blend); 883 blend.rt[0].blend_enable = 1; 884 blend.rt[0].rgb_func = *rgb_func; 885 blend.rt[0].rgb_src_factor = *rgb_src_factor; 886 blend.rt[0].rgb_dst_factor = *rgb_dst_factor; 887 blend.rt[0].alpha_func = *alpha_func; 888 blend.rt[0].alpha_src_factor = *alpha_src_factor; 889 blend.rt[0].alpha_dst_factor = *alpha_dst_factor; 890 blend.rt[0].colormask = PIPE_MASK_RGBA; 891 892 if(!test_one(verbose, fp, &blend, mode, *type)) 893 success = FALSE; 894 } 895 896 return success; 897} 898