lp_test_blend.c revision 99e28d4ee3ce995845d9290b735b1fbe1b96886d
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Unit tests for blend LLVM IR generation 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Blend computation code derived from code written by 36 * @author Brian Paul <brian@vmware.com> 37 */ 38 39 40#include "lp_bld_type.h" 41#include "lp_bld_arit.h" 42#include "lp_bld_blend.h" 43#include "lp_bld_debug.h" 44#include "lp_test.h" 45 46 47enum vector_mode 48{ 49 AoS = 0, 50 SoA = 1 51}; 52 53 54typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); 55 56 57void 58write_tsv_header(FILE *fp) 59{ 60 fprintf(fp, 61 "result\t" 62 "cycles_per_channel\t" 63 "mode\t" 64 "type\t" 65 "sep_func\t" 66 "sep_src_factor\t" 67 "sep_dst_factor\t" 68 "rgb_func\t" 69 "rgb_src_factor\t" 70 "rgb_dst_factor\t" 71 "alpha_func\t" 72 "alpha_src_factor\t" 73 "alpha_dst_factor\n"); 74 75 fflush(fp); 76} 77 78 79static void 80write_tsv_row(FILE *fp, 81 const struct pipe_blend_state *blend, 82 enum vector_mode mode, 83 struct lp_type type, 84 double cycles, 85 boolean success) 86{ 87 fprintf(fp, "%s\t", success ? "pass" : "fail"); 88 89 if (mode == AoS) { 90 fprintf(fp, "%.1f\t", cycles / type.length); 91 fprintf(fp, "aos\t"); 92 } 93 94 if (mode == SoA) { 95 fprintf(fp, "%.1f\t", cycles / (4 * type.length)); 96 fprintf(fp, "soa\t"); 97 } 98 99 fprintf(fp, "%s%u%sx%u\t", 100 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 101 type.width, 102 type.norm ? "n" : "", 103 type.length); 104 105 fprintf(fp, 106 "%s\t%s\t%s\t", 107 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false", 108 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false", 109 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false"); 110 111 fprintf(fp, 112 "%s\t%s\t%s\t%s\t%s\t%s\n", 113 debug_dump_blend_func(blend->rt[0].rgb_func, TRUE), 114 debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), 115 debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), 116 debug_dump_blend_func(blend->rt[0].alpha_func, TRUE), 117 debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), 118 debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); 119 120 fflush(fp); 121} 122 123 124static void 125dump_blend_type(FILE *fp, 126 const struct pipe_blend_state *blend, 127 enum vector_mode mode, 128 struct lp_type type) 129{ 130 fprintf(fp, "%s", mode ? "soa" : "aos"); 131 132 fprintf(fp, " type=%s%u%sx%u", 133 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 134 type.width, 135 type.norm ? "n" : "", 136 type.length); 137 138 fprintf(fp, 139 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", 140 "rgb_func", debug_dump_blend_func(blend->rt[0].rgb_func, TRUE), 141 "rgb_src_factor", debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), 142 "rgb_dst_factor", debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), 143 "alpha_func", debug_dump_blend_func(blend->rt[0].alpha_func, TRUE), 144 "alpha_src_factor", debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), 145 "alpha_dst_factor", debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); 146 147 fprintf(fp, " ...\n"); 148 fflush(fp); 149} 150 151 152static LLVMValueRef 153add_blend_test(LLVMModuleRef module, 154 const struct pipe_blend_state *blend, 155 enum vector_mode mode, 156 struct lp_type type) 157{ 158 LLVMTypeRef ret_type; 159 LLVMTypeRef vec_type; 160 LLVMTypeRef args[4]; 161 LLVMValueRef func; 162 LLVMValueRef src_ptr; 163 LLVMValueRef dst_ptr; 164 LLVMValueRef const_ptr; 165 LLVMValueRef res_ptr; 166 LLVMBasicBlockRef block; 167 LLVMBuilderRef builder; 168 169 ret_type = LLVMInt64Type(); 170 vec_type = lp_build_vec_type(type); 171 172 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); 173 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); 174 LLVMSetFunctionCallConv(func, LLVMCCallConv); 175 src_ptr = LLVMGetParam(func, 0); 176 dst_ptr = LLVMGetParam(func, 1); 177 const_ptr = LLVMGetParam(func, 2); 178 res_ptr = LLVMGetParam(func, 3); 179 180 block = LLVMAppendBasicBlock(func, "entry"); 181 builder = LLVMCreateBuilder(); 182 LLVMPositionBuilderAtEnd(builder, block); 183 184 if (mode == AoS) { 185 LLVMValueRef src; 186 LLVMValueRef dst; 187 LLVMValueRef con; 188 LLVMValueRef res; 189 190 src = LLVMBuildLoad(builder, src_ptr, "src"); 191 dst = LLVMBuildLoad(builder, dst_ptr, "dst"); 192 con = LLVMBuildLoad(builder, const_ptr, "const"); 193 194 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); 195 196 lp_build_name(res, "res"); 197 198 LLVMBuildStore(builder, res, res_ptr); 199 } 200 201 if (mode == SoA) { 202 LLVMValueRef src[4]; 203 LLVMValueRef dst[4]; 204 LLVMValueRef con[4]; 205 LLVMValueRef res[4]; 206 unsigned i; 207 208 for(i = 0; i < 4; ++i) { 209 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 210 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); 211 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); 212 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); 213 lp_build_name(src[i], "src.%c", "rgba"[i]); 214 lp_build_name(con[i], "con.%c", "rgba"[i]); 215 lp_build_name(dst[i], "dst.%c", "rgba"[i]); 216 } 217 218 lp_build_blend_soa(builder, blend, type, src, dst, con, res); 219 220 for(i = 0; i < 4; ++i) { 221 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 222 lp_build_name(res[i], "res.%c", "rgba"[i]); 223 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); 224 } 225 } 226 227 LLVMBuildRetVoid(builder);; 228 229 LLVMDisposeBuilder(builder); 230 return func; 231} 232 233 234/** Add and limit result to ceiling of 1.0 */ 235#define ADD_SAT(R, A, B) \ 236do { \ 237 R = (A) + (B); if (R > 1.0f) R = 1.0f; \ 238} while (0) 239 240/** Subtract and limit result to floor of 0.0 */ 241#define SUB_SAT(R, A, B) \ 242do { \ 243 R = (A) - (B); if (R < 0.0f) R = 0.0f; \ 244} while (0) 245 246 247static void 248compute_blend_ref_term(unsigned rgb_factor, 249 unsigned alpha_factor, 250 const double *factor, 251 const double *src, 252 const double *dst, 253 const double *con, 254 double *term) 255{ 256 double temp; 257 258 switch (rgb_factor) { 259 case PIPE_BLENDFACTOR_ONE: 260 term[0] = factor[0]; /* R */ 261 term[1] = factor[1]; /* G */ 262 term[2] = factor[2]; /* B */ 263 break; 264 case PIPE_BLENDFACTOR_SRC_COLOR: 265 term[0] = factor[0] * src[0]; /* R */ 266 term[1] = factor[1] * src[1]; /* G */ 267 term[2] = factor[2] * src[2]; /* B */ 268 break; 269 case PIPE_BLENDFACTOR_SRC_ALPHA: 270 term[0] = factor[0] * src[3]; /* R */ 271 term[1] = factor[1] * src[3]; /* G */ 272 term[2] = factor[2] * src[3]; /* B */ 273 break; 274 case PIPE_BLENDFACTOR_DST_COLOR: 275 term[0] = factor[0] * dst[0]; /* R */ 276 term[1] = factor[1] * dst[1]; /* G */ 277 term[2] = factor[2] * dst[2]; /* B */ 278 break; 279 case PIPE_BLENDFACTOR_DST_ALPHA: 280 term[0] = factor[0] * dst[3]; /* R */ 281 term[1] = factor[1] * dst[3]; /* G */ 282 term[2] = factor[2] * dst[3]; /* B */ 283 break; 284 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 285 temp = MIN2(src[3], 1.0f - dst[3]); 286 term[0] = factor[0] * temp; /* R */ 287 term[1] = factor[1] * temp; /* G */ 288 term[2] = factor[2] * temp; /* B */ 289 break; 290 case PIPE_BLENDFACTOR_CONST_COLOR: 291 term[0] = factor[0] * con[0]; /* R */ 292 term[1] = factor[1] * con[1]; /* G */ 293 term[2] = factor[2] * con[2]; /* B */ 294 break; 295 case PIPE_BLENDFACTOR_CONST_ALPHA: 296 term[0] = factor[0] * con[3]; /* R */ 297 term[1] = factor[1] * con[3]; /* G */ 298 term[2] = factor[2] * con[3]; /* B */ 299 break; 300 case PIPE_BLENDFACTOR_SRC1_COLOR: 301 assert(0); /* to do */ 302 break; 303 case PIPE_BLENDFACTOR_SRC1_ALPHA: 304 assert(0); /* to do */ 305 break; 306 case PIPE_BLENDFACTOR_ZERO: 307 term[0] = 0.0f; /* R */ 308 term[1] = 0.0f; /* G */ 309 term[2] = 0.0f; /* B */ 310 break; 311 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 312 term[0] = factor[0] * (1.0f - src[0]); /* R */ 313 term[1] = factor[1] * (1.0f - src[1]); /* G */ 314 term[2] = factor[2] * (1.0f - src[2]); /* B */ 315 break; 316 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 317 term[0] = factor[0] * (1.0f - src[3]); /* R */ 318 term[1] = factor[1] * (1.0f - src[3]); /* G */ 319 term[2] = factor[2] * (1.0f - src[3]); /* B */ 320 break; 321 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 322 term[0] = factor[0] * (1.0f - dst[3]); /* R */ 323 term[1] = factor[1] * (1.0f - dst[3]); /* G */ 324 term[2] = factor[2] * (1.0f - dst[3]); /* B */ 325 break; 326 case PIPE_BLENDFACTOR_INV_DST_COLOR: 327 term[0] = factor[0] * (1.0f - dst[0]); /* R */ 328 term[1] = factor[1] * (1.0f - dst[1]); /* G */ 329 term[2] = factor[2] * (1.0f - dst[2]); /* B */ 330 break; 331 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332 term[0] = factor[0] * (1.0f - con[0]); /* R */ 333 term[1] = factor[1] * (1.0f - con[1]); /* G */ 334 term[2] = factor[2] * (1.0f - con[2]); /* B */ 335 break; 336 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 337 term[0] = factor[0] * (1.0f - con[3]); /* R */ 338 term[1] = factor[1] * (1.0f - con[3]); /* G */ 339 term[2] = factor[2] * (1.0f - con[3]); /* B */ 340 break; 341 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 342 assert(0); /* to do */ 343 break; 344 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 345 assert(0); /* to do */ 346 break; 347 default: 348 assert(0); 349 } 350 351 /* 352 * Compute src/first term A 353 */ 354 switch (alpha_factor) { 355 case PIPE_BLENDFACTOR_ONE: 356 term[3] = factor[3]; /* A */ 357 break; 358 case PIPE_BLENDFACTOR_SRC_COLOR: 359 case PIPE_BLENDFACTOR_SRC_ALPHA: 360 term[3] = factor[3] * src[3]; /* A */ 361 break; 362 case PIPE_BLENDFACTOR_DST_COLOR: 363 case PIPE_BLENDFACTOR_DST_ALPHA: 364 term[3] = factor[3] * dst[3]; /* A */ 365 break; 366 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 367 term[3] = src[3]; /* A */ 368 break; 369 case PIPE_BLENDFACTOR_CONST_COLOR: 370 case PIPE_BLENDFACTOR_CONST_ALPHA: 371 term[3] = factor[3] * con[3]; /* A */ 372 break; 373 case PIPE_BLENDFACTOR_ZERO: 374 term[3] = 0.0f; /* A */ 375 break; 376 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 377 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 378 term[3] = factor[3] * (1.0f - src[3]); /* A */ 379 break; 380 case PIPE_BLENDFACTOR_INV_DST_COLOR: 381 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 382 term[3] = factor[3] * (1.0f - dst[3]); /* A */ 383 break; 384 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 385 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 386 term[3] = factor[3] * (1.0f - con[3]); 387 break; 388 default: 389 assert(0); 390 } 391} 392 393 394static void 395compute_blend_ref(const struct pipe_blend_state *blend, 396 const double *src, 397 const double *dst, 398 const double *con, 399 double *res) 400{ 401 double src_term[4]; 402 double dst_term[4]; 403 404 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor, 405 src, src, dst, con, src_term); 406 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor, 407 dst, src, dst, con, dst_term); 408 409 /* 410 * Combine RGB terms 411 */ 412 switch (blend->rt[0].rgb_func) { 413 case PIPE_BLEND_ADD: 414 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ 415 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ 416 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ 417 break; 418 case PIPE_BLEND_SUBTRACT: 419 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ 420 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ 421 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ 422 break; 423 case PIPE_BLEND_REVERSE_SUBTRACT: 424 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ 425 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ 426 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ 427 break; 428 case PIPE_BLEND_MIN: 429 res[0] = MIN2(src_term[0], dst_term[0]); /* R */ 430 res[1] = MIN2(src_term[1], dst_term[1]); /* G */ 431 res[2] = MIN2(src_term[2], dst_term[2]); /* B */ 432 break; 433 case PIPE_BLEND_MAX: 434 res[0] = MAX2(src_term[0], dst_term[0]); /* R */ 435 res[1] = MAX2(src_term[1], dst_term[1]); /* G */ 436 res[2] = MAX2(src_term[2], dst_term[2]); /* B */ 437 break; 438 default: 439 assert(0); 440 } 441 442 /* 443 * Combine A terms 444 */ 445 switch (blend->rt[0].alpha_func) { 446 case PIPE_BLEND_ADD: 447 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ 448 break; 449 case PIPE_BLEND_SUBTRACT: 450 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ 451 break; 452 case PIPE_BLEND_REVERSE_SUBTRACT: 453 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ 454 break; 455 case PIPE_BLEND_MIN: 456 res[3] = MIN2(src_term[3], dst_term[3]); /* A */ 457 break; 458 case PIPE_BLEND_MAX: 459 res[3] = MAX2(src_term[3], dst_term[3]); /* A */ 460 break; 461 default: 462 assert(0); 463 } 464} 465 466 467PIPE_ALIGN_STACK 468static boolean 469test_one(unsigned verbose, 470 FILE *fp, 471 const struct pipe_blend_state *blend, 472 enum vector_mode mode, 473 struct lp_type type) 474{ 475 LLVMModuleRef module = NULL; 476 LLVMValueRef func = NULL; 477 LLVMExecutionEngineRef engine = NULL; 478 LLVMModuleProviderRef provider = NULL; 479 LLVMPassManagerRef pass = NULL; 480 char *error = NULL; 481 blend_test_ptr_t blend_test_ptr; 482 boolean success; 483 const unsigned n = LP_TEST_NUM_SAMPLES; 484 int64_t cycles[LP_TEST_NUM_SAMPLES]; 485 double cycles_avg = 0.0; 486 unsigned i, j; 487 488 if(verbose >= 1) 489 dump_blend_type(stdout, blend, mode, type); 490 491 module = LLVMModuleCreateWithName("test"); 492 493 func = add_blend_test(module, blend, mode, type); 494 495 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { 496 LLVMDumpModule(module); 497 abort(); 498 } 499 LLVMDisposeMessage(error); 500 501 provider = LLVMCreateModuleProviderForExistingModule(module); 502 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { 503 if(verbose < 1) 504 dump_blend_type(stderr, blend, mode, type); 505 fprintf(stderr, "%s\n", error); 506 LLVMDisposeMessage(error); 507 abort(); 508 } 509 510#if 0 511 pass = LLVMCreatePassManager(); 512 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); 513 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 514 * but there are more on SVN. */ 515 LLVMAddConstantPropagationPass(pass); 516 LLVMAddInstructionCombiningPass(pass); 517 LLVMAddPromoteMemoryToRegisterPass(pass); 518 LLVMAddGVNPass(pass); 519 LLVMAddCFGSimplificationPass(pass); 520 LLVMRunPassManager(pass, module); 521#else 522 (void)pass; 523#endif 524 525 if(verbose >= 2) 526 LLVMDumpModule(module); 527 528 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); 529 530 if(verbose >= 2) 531 lp_disassemble(blend_test_ptr); 532 533 success = TRUE; 534 for(i = 0; i < n && success; ++i) { 535 if(mode == AoS) { 536 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; 537 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; 538 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; 539 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; 540 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; 541 int64_t start_counter = 0; 542 int64_t end_counter = 0; 543 544 random_vec(type, src); 545 random_vec(type, dst); 546 random_vec(type, con); 547 548 { 549 double fsrc[LP_MAX_VECTOR_LENGTH]; 550 double fdst[LP_MAX_VECTOR_LENGTH]; 551 double fcon[LP_MAX_VECTOR_LENGTH]; 552 double fref[LP_MAX_VECTOR_LENGTH]; 553 554 read_vec(type, src, fsrc); 555 read_vec(type, dst, fdst); 556 read_vec(type, con, fcon); 557 558 for(j = 0; j < type.length; j += 4) 559 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); 560 561 write_vec(type, ref, fref); 562 } 563 564 start_counter = rdtsc(); 565 blend_test_ptr(src, dst, con, res); 566 end_counter = rdtsc(); 567 568 cycles[i] = end_counter - start_counter; 569 570 if(!compare_vec(type, res, ref)) { 571 success = FALSE; 572 573 if(verbose < 1) 574 dump_blend_type(stderr, blend, mode, type); 575 fprintf(stderr, "MISMATCH\n"); 576 577 fprintf(stderr, " Src: "); 578 dump_vec(stderr, type, src); 579 fprintf(stderr, "\n"); 580 581 fprintf(stderr, " Dst: "); 582 dump_vec(stderr, type, dst); 583 fprintf(stderr, "\n"); 584 585 fprintf(stderr, " Con: "); 586 dump_vec(stderr, type, con); 587 fprintf(stderr, "\n"); 588 589 fprintf(stderr, " Res: "); 590 dump_vec(stderr, type, res); 591 fprintf(stderr, "\n"); 592 593 fprintf(stderr, " Ref: "); 594 dump_vec(stderr, type, ref); 595 fprintf(stderr, "\n"); 596 } 597 } 598 599 if(mode == SoA) { 600 const unsigned stride = type.length*type.width/8; 601 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; 602 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; 603 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; 604 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; 605 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; 606 int64_t start_counter = 0; 607 int64_t end_counter = 0; 608 boolean mismatch; 609 610 for(j = 0; j < 4; ++j) { 611 random_vec(type, src + j*stride); 612 random_vec(type, dst + j*stride); 613 random_vec(type, con + j*stride); 614 } 615 616 { 617 double fsrc[4]; 618 double fdst[4]; 619 double fcon[4]; 620 double fref[4]; 621 unsigned k; 622 623 for(k = 0; k < type.length; ++k) { 624 for(j = 0; j < 4; ++j) { 625 fsrc[j] = read_elem(type, src + j*stride, k); 626 fdst[j] = read_elem(type, dst + j*stride, k); 627 fcon[j] = read_elem(type, con + j*stride, k); 628 } 629 630 compute_blend_ref(blend, fsrc, fdst, fcon, fref); 631 632 for(j = 0; j < 4; ++j) 633 write_elem(type, ref + j*stride, k, fref[j]); 634 } 635 } 636 637 start_counter = rdtsc(); 638 blend_test_ptr(src, dst, con, res); 639 end_counter = rdtsc(); 640 641 cycles[i] = end_counter - start_counter; 642 643 mismatch = FALSE; 644 for (j = 0; j < 4; ++j) 645 if(!compare_vec(type, res + j*stride, ref + j*stride)) 646 mismatch = TRUE; 647 648 if (mismatch) { 649 success = FALSE; 650 651 if(verbose < 1) 652 dump_blend_type(stderr, blend, mode, type); 653 fprintf(stderr, "MISMATCH\n"); 654 for(j = 0; j < 4; ++j) { 655 char channel = "RGBA"[j]; 656 fprintf(stderr, " Src%c: ", channel); 657 dump_vec(stderr, type, src + j*stride); 658 fprintf(stderr, "\n"); 659 660 fprintf(stderr, " Dst%c: ", channel); 661 dump_vec(stderr, type, dst + j*stride); 662 fprintf(stderr, "\n"); 663 664 fprintf(stderr, " Con%c: ", channel); 665 dump_vec(stderr, type, con + j*stride); 666 fprintf(stderr, "\n"); 667 668 fprintf(stderr, " Res%c: ", channel); 669 dump_vec(stderr, type, res + j*stride); 670 fprintf(stderr, "\n"); 671 672 fprintf(stderr, " Ref%c: ", channel); 673 dump_vec(stderr, type, ref + j*stride); 674 fprintf(stderr, "\n"); 675 } 676 } 677 } 678 } 679 680 /* 681 * Unfortunately the output of cycle counter is not very reliable as it comes 682 * -- sometimes we get outliers (due IRQs perhaps?) which are 683 * better removed to avoid random or biased data. 684 */ 685 { 686 double sum = 0.0, sum2 = 0.0; 687 double avg, std; 688 unsigned m; 689 690 for(i = 0; i < n; ++i) { 691 sum += cycles[i]; 692 sum2 += cycles[i]*cycles[i]; 693 } 694 695 avg = sum/n; 696 std = sqrtf((sum2 - n*avg*avg)/n); 697 698 m = 0; 699 sum = 0.0; 700 for(i = 0; i < n; ++i) { 701 if(fabs(cycles[i] - avg) <= 4.0*std) { 702 sum += cycles[i]; 703 ++m; 704 } 705 } 706 707 cycles_avg = sum/m; 708 709 } 710 711 if(fp) 712 write_tsv_row(fp, blend, mode, type, cycles_avg, success); 713 714 if (!success) { 715 if(verbose < 2) 716 LLVMDumpModule(module); 717 LLVMWriteBitcodeToFile(module, "blend.bc"); 718 fprintf(stderr, "blend.bc written\n"); 719 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); 720 abort(); 721 } 722 723 LLVMFreeMachineCodeForFunction(engine, func); 724 725 LLVMDisposeExecutionEngine(engine); 726 if(pass) 727 LLVMDisposePassManager(pass); 728 729 return success; 730} 731 732 733const unsigned 734blend_factors[] = { 735 PIPE_BLENDFACTOR_ZERO, 736 PIPE_BLENDFACTOR_ONE, 737 PIPE_BLENDFACTOR_SRC_COLOR, 738 PIPE_BLENDFACTOR_SRC_ALPHA, 739 PIPE_BLENDFACTOR_DST_COLOR, 740 PIPE_BLENDFACTOR_DST_ALPHA, 741 PIPE_BLENDFACTOR_CONST_COLOR, 742 PIPE_BLENDFACTOR_CONST_ALPHA, 743#if 0 744 PIPE_BLENDFACTOR_SRC1_COLOR, 745 PIPE_BLENDFACTOR_SRC1_ALPHA, 746#endif 747 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, 748 PIPE_BLENDFACTOR_INV_SRC_COLOR, 749 PIPE_BLENDFACTOR_INV_SRC_ALPHA, 750 PIPE_BLENDFACTOR_INV_DST_COLOR, 751 PIPE_BLENDFACTOR_INV_DST_ALPHA, 752 PIPE_BLENDFACTOR_INV_CONST_COLOR, 753 PIPE_BLENDFACTOR_INV_CONST_ALPHA, 754#if 0 755 PIPE_BLENDFACTOR_INV_SRC1_COLOR, 756 PIPE_BLENDFACTOR_INV_SRC1_ALPHA, 757#endif 758}; 759 760 761const unsigned 762blend_funcs[] = { 763 PIPE_BLEND_ADD, 764 PIPE_BLEND_SUBTRACT, 765 PIPE_BLEND_REVERSE_SUBTRACT, 766 PIPE_BLEND_MIN, 767 PIPE_BLEND_MAX 768}; 769 770 771const struct lp_type blend_types[] = { 772 /* float, fixed, sign, norm, width, len */ 773 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ 774 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ 775}; 776 777 778const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]); 779const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]); 780const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); 781 782 783boolean 784test_all(unsigned verbose, FILE *fp) 785{ 786 const unsigned *rgb_func; 787 const unsigned *rgb_src_factor; 788 const unsigned *rgb_dst_factor; 789 const unsigned *alpha_func; 790 const unsigned *alpha_src_factor; 791 const unsigned *alpha_dst_factor; 792 struct pipe_blend_state blend; 793 enum vector_mode mode; 794 const struct lp_type *type; 795 bool success = TRUE; 796 797 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { 798 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { 799 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) { 800 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { 801 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { 802 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { 803 for(mode = 0; mode < 2; ++mode) { 804 for(type = blend_types; type < &blend_types[num_types]; ++type) { 805 806 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 807 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) 808 continue; 809 810 memset(&blend, 0, sizeof blend); 811 blend.rt[0].blend_enable = 1; 812 blend.rt[0].rgb_func = *rgb_func; 813 blend.rt[0].rgb_src_factor = *rgb_src_factor; 814 blend.rt[0].rgb_dst_factor = *rgb_dst_factor; 815 blend.rt[0].alpha_func = *alpha_func; 816 blend.rt[0].alpha_src_factor = *alpha_src_factor; 817 blend.rt[0].alpha_dst_factor = *alpha_dst_factor; 818 blend.rt[0].colormask = PIPE_MASK_RGBA; 819 820 if(!test_one(verbose, fp, &blend, mode, *type)) 821 success = FALSE; 822 823 } 824 } 825 } 826 } 827 } 828 } 829 } 830 } 831 832 return success; 833} 834 835 836boolean 837test_some(unsigned verbose, FILE *fp, unsigned long n) 838{ 839 const unsigned *rgb_func; 840 const unsigned *rgb_src_factor; 841 const unsigned *rgb_dst_factor; 842 const unsigned *alpha_func; 843 const unsigned *alpha_src_factor; 844 const unsigned *alpha_dst_factor; 845 struct pipe_blend_state blend; 846 enum vector_mode mode; 847 const struct lp_type *type; 848 unsigned long i; 849 bool success = TRUE; 850 851 for(i = 0; i < n; ++i) { 852 rgb_func = &blend_funcs[rand() % num_funcs]; 853 alpha_func = &blend_funcs[rand() % num_funcs]; 854 rgb_src_factor = &blend_factors[rand() % num_factors]; 855 alpha_src_factor = &blend_factors[rand() % num_factors]; 856 857 do { 858 rgb_dst_factor = &blend_factors[rand() % num_factors]; 859 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 860 861 do { 862 alpha_dst_factor = &blend_factors[rand() % num_factors]; 863 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 864 865 mode = rand() & 1; 866 867 type = &blend_types[rand() % num_types]; 868 869 memset(&blend, 0, sizeof blend); 870 blend.rt[0].blend_enable = 1; 871 blend.rt[0].rgb_func = *rgb_func; 872 blend.rt[0].rgb_src_factor = *rgb_src_factor; 873 blend.rt[0].rgb_dst_factor = *rgb_dst_factor; 874 blend.rt[0].alpha_func = *alpha_func; 875 blend.rt[0].alpha_src_factor = *alpha_src_factor; 876 blend.rt[0].alpha_dst_factor = *alpha_dst_factor; 877 blend.rt[0].colormask = PIPE_MASK_RGBA; 878 879 if(!test_one(verbose, fp, &blend, mode, *type)) 880 success = FALSE; 881 } 882 883 return success; 884} 885