lp_test_blend.c revision b544ab72994a7eda1e8c17fa217213ff3713dd99
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Unit tests for blend LLVM IR generation 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Blend computation code derived from code written by 36 * @author Brian Paul <brian@vmware.com> 37 */ 38 39 40#include "lp_bld_type.h" 41#include "lp_bld_arit.h" 42#include "lp_bld_blend.h" 43#include "lp_bld_debug.h" 44#include "lp_test.h" 45 46 47enum vector_mode 48{ 49 AoS = 0, 50 SoA = 1 51}; 52 53 54typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); 55 56 57void 58write_tsv_header(FILE *fp) 59{ 60 fprintf(fp, 61 "result\t" 62 "cycles_per_channel\t" 63 "mode\t" 64 "type\t" 65 "sep_func\t" 66 "sep_src_factor\t" 67 "sep_dst_factor\t" 68 "rgb_func\t" 69 "rgb_src_factor\t" 70 "rgb_dst_factor\t" 71 "alpha_func\t" 72 "alpha_src_factor\t" 73 "alpha_dst_factor\n"); 74 75 fflush(fp); 76} 77 78 79static void 80write_tsv_row(FILE *fp, 81 const struct pipe_blend_state *blend, 82 enum vector_mode mode, 83 struct lp_type type, 84 double cycles, 85 boolean success) 86{ 87 fprintf(fp, "%s\t", success ? "pass" : "fail"); 88 89 if (mode == AoS) { 90 fprintf(fp, "%.1f\t", cycles / type.length); 91 fprintf(fp, "aos\t"); 92 } 93 94 if (mode == SoA) { 95 fprintf(fp, "%.1f\t", cycles / (4 * type.length)); 96 fprintf(fp, "soa\t"); 97 } 98 99 fprintf(fp, "%s%u%sx%u\t", 100 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 101 type.width, 102 type.norm ? "n" : "", 103 type.length); 104 105 fprintf(fp, 106 "%s\t%s\t%s\t", 107 blend->rgb_func != blend->alpha_func ? "true" : "false", 108 blend->rgb_src_factor != blend->alpha_src_factor ? "true" : "false", 109 blend->rgb_dst_factor != blend->alpha_dst_factor ? "true" : "false"); 110 111 fprintf(fp, 112 "%s\t%s\t%s\t%s\t%s\t%s\n", 113 debug_dump_blend_func(blend->rgb_func, TRUE), 114 debug_dump_blend_factor(blend->rgb_src_factor, TRUE), 115 debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), 116 debug_dump_blend_func(blend->alpha_func, TRUE), 117 debug_dump_blend_factor(blend->alpha_src_factor, TRUE), 118 debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); 119 120 fflush(fp); 121} 122 123 124static void 125dump_blend_type(FILE *fp, 126 const struct pipe_blend_state *blend, 127 enum vector_mode mode, 128 struct lp_type type) 129{ 130 fprintf(fp, "%s", mode ? "soa" : "aos"); 131 132 fprintf(fp, " type=%s%u%sx%u", 133 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 134 type.width, 135 type.norm ? "n" : "", 136 type.length); 137 138 fprintf(fp, 139 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", 140 "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE), 141 "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE), 142 "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), 143 "alpha_func", debug_dump_blend_func(blend->alpha_func, TRUE), 144 "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE), 145 "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); 146 147 fprintf(fp, " ...\n"); 148 fflush(fp); 149} 150 151 152static LLVMValueRef 153add_blend_test(LLVMModuleRef module, 154 const struct pipe_blend_state *blend, 155 enum vector_mode mode, 156 struct lp_type type) 157{ 158 LLVMTypeRef ret_type; 159 LLVMTypeRef vec_type; 160 LLVMTypeRef args[4]; 161 LLVMValueRef func; 162 LLVMValueRef src_ptr; 163 LLVMValueRef dst_ptr; 164 LLVMValueRef const_ptr; 165 LLVMValueRef res_ptr; 166 LLVMBasicBlockRef block; 167 LLVMBuilderRef builder; 168 169 ret_type = LLVMInt64Type(); 170 vec_type = lp_build_vec_type(type); 171 172 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); 173 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); 174 LLVMSetFunctionCallConv(func, LLVMCCallConv); 175 src_ptr = LLVMGetParam(func, 0); 176 dst_ptr = LLVMGetParam(func, 1); 177 const_ptr = LLVMGetParam(func, 2); 178 res_ptr = LLVMGetParam(func, 3); 179 180 block = LLVMAppendBasicBlock(func, "entry"); 181 builder = LLVMCreateBuilder(); 182 LLVMPositionBuilderAtEnd(builder, block); 183 184 if (mode == AoS) { 185 LLVMValueRef src; 186 LLVMValueRef dst; 187 LLVMValueRef con; 188 LLVMValueRef res; 189 190 src = LLVMBuildLoad(builder, src_ptr, "src"); 191 dst = LLVMBuildLoad(builder, dst_ptr, "dst"); 192 con = LLVMBuildLoad(builder, const_ptr, "const"); 193 194 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); 195 196 lp_build_name(res, "res"); 197 198 LLVMBuildStore(builder, res, res_ptr); 199 } 200 201 if (mode == SoA) { 202 LLVMValueRef src[4]; 203 LLVMValueRef dst[4]; 204 LLVMValueRef con[4]; 205 LLVMValueRef res[4]; 206 unsigned i; 207 208 for(i = 0; i < 4; ++i) { 209 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 210 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); 211 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); 212 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); 213 lp_build_name(src[i], "src.%c", "rgba"[i]); 214 lp_build_name(con[i], "con.%c", "rgba"[i]); 215 lp_build_name(dst[i], "dst.%c", "rgba"[i]); 216 } 217 218 lp_build_blend_soa(builder, blend, type, src, dst, con, res); 219 220 for(i = 0; i < 4; ++i) { 221 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 222 lp_build_name(res[i], "res.%c", "rgba"[i]); 223 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); 224 } 225 } 226 227 LLVMBuildRetVoid(builder);; 228 229 LLVMDisposeBuilder(builder); 230 return func; 231} 232 233 234/** Add and limit result to ceiling of 1.0 */ 235#define ADD_SAT(R, A, B) \ 236do { \ 237 R = (A) + (B); if (R > 1.0f) R = 1.0f; \ 238} while (0) 239 240/** Subtract and limit result to floor of 0.0 */ 241#define SUB_SAT(R, A, B) \ 242do { \ 243 R = (A) - (B); if (R < 0.0f) R = 0.0f; \ 244} while (0) 245 246 247static void 248compute_blend_ref_term(unsigned rgb_factor, 249 unsigned alpha_factor, 250 const double *factor, 251 const double *src, 252 const double *dst, 253 const double *con, 254 double *term) 255{ 256 double temp; 257 258 switch (rgb_factor) { 259 case PIPE_BLENDFACTOR_ONE: 260 term[0] = factor[0]; /* R */ 261 term[1] = factor[1]; /* G */ 262 term[2] = factor[2]; /* B */ 263 break; 264 case PIPE_BLENDFACTOR_SRC_COLOR: 265 term[0] = factor[0] * src[0]; /* R */ 266 term[1] = factor[1] * src[1]; /* G */ 267 term[2] = factor[2] * src[2]; /* B */ 268 break; 269 case PIPE_BLENDFACTOR_SRC_ALPHA: 270 term[0] = factor[0] * src[3]; /* R */ 271 term[1] = factor[1] * src[3]; /* G */ 272 term[2] = factor[2] * src[3]; /* B */ 273 break; 274 case PIPE_BLENDFACTOR_DST_COLOR: 275 term[0] = factor[0] * dst[0]; /* R */ 276 term[1] = factor[1] * dst[1]; /* G */ 277 term[2] = factor[2] * dst[2]; /* B */ 278 break; 279 case PIPE_BLENDFACTOR_DST_ALPHA: 280 term[0] = factor[0] * dst[3]; /* R */ 281 term[1] = factor[1] * dst[3]; /* G */ 282 term[2] = factor[2] * dst[3]; /* B */ 283 break; 284 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 285 temp = MIN2(src[3], 1.0f - dst[3]); 286 term[0] = factor[0] * temp; /* R */ 287 term[1] = factor[1] * temp; /* G */ 288 term[2] = factor[2] * temp; /* B */ 289 break; 290 case PIPE_BLENDFACTOR_CONST_COLOR: 291 term[0] = factor[0] * con[0]; /* R */ 292 term[1] = factor[1] * con[1]; /* G */ 293 term[2] = factor[2] * con[2]; /* B */ 294 break; 295 case PIPE_BLENDFACTOR_CONST_ALPHA: 296 term[0] = factor[0] * con[3]; /* R */ 297 term[1] = factor[1] * con[3]; /* G */ 298 term[2] = factor[2] * con[3]; /* B */ 299 break; 300 case PIPE_BLENDFACTOR_SRC1_COLOR: 301 assert(0); /* to do */ 302 break; 303 case PIPE_BLENDFACTOR_SRC1_ALPHA: 304 assert(0); /* to do */ 305 break; 306 case PIPE_BLENDFACTOR_ZERO: 307 term[0] = 0.0f; /* R */ 308 term[1] = 0.0f; /* G */ 309 term[2] = 0.0f; /* B */ 310 break; 311 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 312 term[0] = factor[0] * (1.0f - src[0]); /* R */ 313 term[1] = factor[1] * (1.0f - src[1]); /* G */ 314 term[2] = factor[2] * (1.0f - src[2]); /* B */ 315 break; 316 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 317 term[0] = factor[0] * (1.0f - src[3]); /* R */ 318 term[1] = factor[1] * (1.0f - src[3]); /* G */ 319 term[2] = factor[2] * (1.0f - src[3]); /* B */ 320 break; 321 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 322 term[0] = factor[0] * (1.0f - dst[3]); /* R */ 323 term[1] = factor[1] * (1.0f - dst[3]); /* G */ 324 term[2] = factor[2] * (1.0f - dst[3]); /* B */ 325 break; 326 case PIPE_BLENDFACTOR_INV_DST_COLOR: 327 term[0] = factor[0] * (1.0f - dst[0]); /* R */ 328 term[1] = factor[1] * (1.0f - dst[1]); /* G */ 329 term[2] = factor[2] * (1.0f - dst[2]); /* B */ 330 break; 331 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332 term[0] = factor[0] * (1.0f - con[0]); /* R */ 333 term[1] = factor[1] * (1.0f - con[1]); /* G */ 334 term[2] = factor[2] * (1.0f - con[2]); /* B */ 335 break; 336 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 337 term[0] = factor[0] * (1.0f - con[3]); /* R */ 338 term[1] = factor[1] * (1.0f - con[3]); /* G */ 339 term[2] = factor[2] * (1.0f - con[3]); /* B */ 340 break; 341 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 342 assert(0); /* to do */ 343 break; 344 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 345 assert(0); /* to do */ 346 break; 347 default: 348 assert(0); 349 } 350 351 /* 352 * Compute src/first term A 353 */ 354 switch (alpha_factor) { 355 case PIPE_BLENDFACTOR_ONE: 356 term[3] = factor[3]; /* A */ 357 break; 358 case PIPE_BLENDFACTOR_SRC_COLOR: 359 case PIPE_BLENDFACTOR_SRC_ALPHA: 360 term[3] = factor[3] * src[3]; /* A */ 361 break; 362 case PIPE_BLENDFACTOR_DST_COLOR: 363 case PIPE_BLENDFACTOR_DST_ALPHA: 364 term[3] = factor[3] * dst[3]; /* A */ 365 break; 366 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 367 term[3] = src[3]; /* A */ 368 break; 369 case PIPE_BLENDFACTOR_CONST_COLOR: 370 case PIPE_BLENDFACTOR_CONST_ALPHA: 371 term[3] = factor[3] * con[3]; /* A */ 372 break; 373 case PIPE_BLENDFACTOR_ZERO: 374 term[3] = 0.0f; /* A */ 375 break; 376 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 377 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 378 term[3] = factor[3] * (1.0f - src[3]); /* A */ 379 break; 380 case PIPE_BLENDFACTOR_INV_DST_COLOR: 381 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 382 term[3] = factor[3] * (1.0f - dst[3]); /* A */ 383 break; 384 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 385 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 386 term[3] = factor[3] * (1.0f - con[3]); 387 break; 388 default: 389 assert(0); 390 } 391} 392 393 394static void 395compute_blend_ref(const struct pipe_blend_state *blend, 396 const double *src, 397 const double *dst, 398 const double *con, 399 double *res) 400{ 401 double src_term[4]; 402 double dst_term[4]; 403 404 compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term); 405 compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term); 406 407 /* 408 * Combine RGB terms 409 */ 410 switch (blend->rgb_func) { 411 case PIPE_BLEND_ADD: 412 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ 413 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ 414 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ 415 break; 416 case PIPE_BLEND_SUBTRACT: 417 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ 418 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ 419 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ 420 break; 421 case PIPE_BLEND_REVERSE_SUBTRACT: 422 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ 423 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ 424 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ 425 break; 426 case PIPE_BLEND_MIN: 427 res[0] = MIN2(src_term[0], dst_term[0]); /* R */ 428 res[1] = MIN2(src_term[1], dst_term[1]); /* G */ 429 res[2] = MIN2(src_term[2], dst_term[2]); /* B */ 430 break; 431 case PIPE_BLEND_MAX: 432 res[0] = MAX2(src_term[0], dst_term[0]); /* R */ 433 res[1] = MAX2(src_term[1], dst_term[1]); /* G */ 434 res[2] = MAX2(src_term[2], dst_term[2]); /* B */ 435 break; 436 default: 437 assert(0); 438 } 439 440 /* 441 * Combine A terms 442 */ 443 switch (blend->alpha_func) { 444 case PIPE_BLEND_ADD: 445 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ 446 break; 447 case PIPE_BLEND_SUBTRACT: 448 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ 449 break; 450 case PIPE_BLEND_REVERSE_SUBTRACT: 451 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ 452 break; 453 case PIPE_BLEND_MIN: 454 res[3] = MIN2(src_term[3], dst_term[3]); /* A */ 455 break; 456 case PIPE_BLEND_MAX: 457 res[3] = MAX2(src_term[3], dst_term[3]); /* A */ 458 break; 459 default: 460 assert(0); 461 } 462} 463 464 465static boolean 466test_one(unsigned verbose, 467 FILE *fp, 468 const struct pipe_blend_state *blend, 469 enum vector_mode mode, 470 struct lp_type type) 471{ 472 LLVMModuleRef module = NULL; 473 LLVMValueRef func = NULL; 474 LLVMExecutionEngineRef engine = NULL; 475 LLVMModuleProviderRef provider = NULL; 476 LLVMPassManagerRef pass = NULL; 477 char *error = NULL; 478 blend_test_ptr_t blend_test_ptr; 479 boolean success; 480 const unsigned n = LP_TEST_NUM_SAMPLES; 481 int64_t cycles[LP_TEST_NUM_SAMPLES]; 482 double cycles_avg = 0.0; 483 unsigned i, j; 484 485 if(verbose >= 1) 486 dump_blend_type(stdout, blend, mode, type); 487 488 module = LLVMModuleCreateWithName("test"); 489 490 func = add_blend_test(module, blend, mode, type); 491 492 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { 493 LLVMDumpModule(module); 494 abort(); 495 } 496 LLVMDisposeMessage(error); 497 498 provider = LLVMCreateModuleProviderForExistingModule(module); 499 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { 500 if(verbose < 1) 501 dump_blend_type(stderr, blend, mode, type); 502 fprintf(stderr, "%s\n", error); 503 LLVMDisposeMessage(error); 504 abort(); 505 } 506 507#if 0 508 pass = LLVMCreatePassManager(); 509 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); 510 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 511 * but there are more on SVN. */ 512 LLVMAddConstantPropagationPass(pass); 513 LLVMAddInstructionCombiningPass(pass); 514 LLVMAddPromoteMemoryToRegisterPass(pass); 515 LLVMAddGVNPass(pass); 516 LLVMAddCFGSimplificationPass(pass); 517 LLVMRunPassManager(pass, module); 518#else 519 (void)pass; 520#endif 521 522 if(verbose >= 2) 523 LLVMDumpModule(module); 524 525 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); 526 527 if(verbose >= 2) 528 lp_disassemble(blend_test_ptr); 529 530 success = TRUE; 531 for(i = 0; i < n && success; ++i) { 532 if(mode == AoS) { 533 uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; 534 uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; 535 uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; 536 uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; 537 uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; 538 int64_t start_counter = 0; 539 int64_t end_counter = 0; 540 541 random_vec(type, src); 542 random_vec(type, dst); 543 random_vec(type, con); 544 545 { 546 double fsrc[LP_MAX_VECTOR_LENGTH]; 547 double fdst[LP_MAX_VECTOR_LENGTH]; 548 double fcon[LP_MAX_VECTOR_LENGTH]; 549 double fref[LP_MAX_VECTOR_LENGTH]; 550 551 read_vec(type, src, fsrc); 552 read_vec(type, dst, fdst); 553 read_vec(type, con, fcon); 554 555 for(j = 0; j < type.length; j += 4) 556 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); 557 558 write_vec(type, ref, fref); 559 } 560 561 start_counter = rdtsc(); 562 blend_test_ptr(src, dst, con, res); 563 end_counter = rdtsc(); 564 565 cycles[i] = end_counter - start_counter; 566 567 if(!compare_vec(type, res, ref)) { 568 success = FALSE; 569 570 if(verbose < 1) 571 dump_blend_type(stderr, blend, mode, type); 572 fprintf(stderr, "MISMATCH\n"); 573 574 fprintf(stderr, " Src: "); 575 dump_vec(stderr, type, src); 576 fprintf(stderr, "\n"); 577 578 fprintf(stderr, " Dst: "); 579 dump_vec(stderr, type, dst); 580 fprintf(stderr, "\n"); 581 582 fprintf(stderr, " Con: "); 583 dump_vec(stderr, type, con); 584 fprintf(stderr, "\n"); 585 586 fprintf(stderr, " Res: "); 587 dump_vec(stderr, type, res); 588 fprintf(stderr, "\n"); 589 590 fprintf(stderr, " Ref: "); 591 dump_vec(stderr, type, ref); 592 fprintf(stderr, "\n"); 593 } 594 } 595 596 if(mode == SoA) { 597 const unsigned stride = type.length*type.width/8; 598 uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; 599 uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; 600 uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; 601 uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; 602 uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; 603 int64_t start_counter = 0; 604 int64_t end_counter = 0; 605 boolean mismatch; 606 607 for(j = 0; j < 4; ++j) { 608 random_vec(type, src + j*stride); 609 random_vec(type, dst + j*stride); 610 random_vec(type, con + j*stride); 611 } 612 613 { 614 double fsrc[4]; 615 double fdst[4]; 616 double fcon[4]; 617 double fref[4]; 618 unsigned k; 619 620 for(k = 0; k < type.length; ++k) { 621 for(j = 0; j < 4; ++j) { 622 fsrc[j] = read_elem(type, src + j*stride, k); 623 fdst[j] = read_elem(type, dst + j*stride, k); 624 fcon[j] = read_elem(type, con + j*stride, k); 625 } 626 627 compute_blend_ref(blend, fsrc, fdst, fcon, fref); 628 629 for(j = 0; j < 4; ++j) 630 write_elem(type, ref + j*stride, k, fref[j]); 631 } 632 } 633 634 start_counter = rdtsc(); 635 blend_test_ptr(src, dst, con, res); 636 end_counter = rdtsc(); 637 638 cycles[i] = end_counter - start_counter; 639 640 mismatch = FALSE; 641 for (j = 0; j < 4; ++j) 642 if(!compare_vec(type, res + j*stride, ref + j*stride)) 643 mismatch = TRUE; 644 645 if (mismatch) { 646 success = FALSE; 647 648 if(verbose < 1) 649 dump_blend_type(stderr, blend, mode, type); 650 fprintf(stderr, "MISMATCH\n"); 651 for(j = 0; j < 4; ++j) { 652 char channel = "RGBA"[j]; 653 fprintf(stderr, " Src%c: ", channel); 654 dump_vec(stderr, type, src + j*stride); 655 fprintf(stderr, "\n"); 656 657 fprintf(stderr, " Dst%c: ", channel); 658 dump_vec(stderr, type, dst + j*stride); 659 fprintf(stderr, "\n"); 660 661 fprintf(stderr, " Con%c: ", channel); 662 dump_vec(stderr, type, con + j*stride); 663 fprintf(stderr, "\n"); 664 665 fprintf(stderr, " Res%c: ", channel); 666 dump_vec(stderr, type, res + j*stride); 667 fprintf(stderr, "\n"); 668 669 fprintf(stderr, " Ref%c: ", channel); 670 dump_vec(stderr, type, ref + j*stride); 671 fprintf(stderr, "\n"); 672 } 673 } 674 } 675 } 676 677 /* 678 * Unfortunately the output of cycle counter is not very reliable as it comes 679 * -- sometimes we get outliers (due IRQs perhaps?) which are 680 * better removed to avoid random or biased data. 681 */ 682 { 683 double sum = 0.0, sum2 = 0.0; 684 double avg, std; 685 unsigned m; 686 687 for(i = 0; i < n; ++i) { 688 sum += cycles[i]; 689 sum2 += cycles[i]*cycles[i]; 690 } 691 692 avg = sum/n; 693 std = sqrtf((sum2 - n*avg*avg)/n); 694 695 m = 0; 696 sum = 0.0; 697 for(i = 0; i < n; ++i) { 698 if(fabs(cycles[i] - avg) <= 4.0*std) { 699 sum += cycles[i]; 700 ++m; 701 } 702 } 703 704 cycles_avg = sum/m; 705 706 } 707 708 if(fp) 709 write_tsv_row(fp, blend, mode, type, cycles_avg, success); 710 711 if (!success) { 712 if(verbose < 2) 713 LLVMDumpModule(module); 714 LLVMWriteBitcodeToFile(module, "blend.bc"); 715 fprintf(stderr, "blend.bc written\n"); 716 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); 717 abort(); 718 } 719 720 LLVMFreeMachineCodeForFunction(engine, func); 721 722 LLVMDisposeExecutionEngine(engine); 723 if(pass) 724 LLVMDisposePassManager(pass); 725 726 return success; 727} 728 729 730const unsigned 731blend_factors[] = { 732 PIPE_BLENDFACTOR_ZERO, 733 PIPE_BLENDFACTOR_ONE, 734 PIPE_BLENDFACTOR_SRC_COLOR, 735 PIPE_BLENDFACTOR_SRC_ALPHA, 736 PIPE_BLENDFACTOR_DST_COLOR, 737 PIPE_BLENDFACTOR_DST_ALPHA, 738 PIPE_BLENDFACTOR_CONST_COLOR, 739 PIPE_BLENDFACTOR_CONST_ALPHA, 740#if 0 741 PIPE_BLENDFACTOR_SRC1_COLOR, 742 PIPE_BLENDFACTOR_SRC1_ALPHA, 743#endif 744 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, 745 PIPE_BLENDFACTOR_INV_SRC_COLOR, 746 PIPE_BLENDFACTOR_INV_SRC_ALPHA, 747 PIPE_BLENDFACTOR_INV_DST_COLOR, 748 PIPE_BLENDFACTOR_INV_DST_ALPHA, 749 PIPE_BLENDFACTOR_INV_CONST_COLOR, 750 PIPE_BLENDFACTOR_INV_CONST_ALPHA, 751#if 0 752 PIPE_BLENDFACTOR_INV_SRC1_COLOR, 753 PIPE_BLENDFACTOR_INV_SRC1_ALPHA, 754#endif 755}; 756 757 758const unsigned 759blend_funcs[] = { 760 PIPE_BLEND_ADD, 761 PIPE_BLEND_SUBTRACT, 762 PIPE_BLEND_REVERSE_SUBTRACT, 763 PIPE_BLEND_MIN, 764 PIPE_BLEND_MAX 765}; 766 767 768const struct lp_type blend_types[] = { 769 /* float, fixed, sign, norm, width, len */ 770 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ 771 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ 772}; 773 774 775const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]); 776const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]); 777const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); 778 779 780boolean 781test_all(unsigned verbose, FILE *fp) 782{ 783 const unsigned *rgb_func; 784 const unsigned *rgb_src_factor; 785 const unsigned *rgb_dst_factor; 786 const unsigned *alpha_func; 787 const unsigned *alpha_src_factor; 788 const unsigned *alpha_dst_factor; 789 struct pipe_blend_state blend; 790 enum vector_mode mode; 791 const struct lp_type *type; 792 bool success = TRUE; 793 794 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { 795 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { 796 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) { 797 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { 798 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { 799 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { 800 for(mode = 0; mode < 2; ++mode) { 801 for(type = blend_types; type < &blend_types[num_types]; ++type) { 802 803 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 804 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) 805 continue; 806 807 memset(&blend, 0, sizeof blend); 808 blend.blend_enable = 1; 809 blend.rgb_func = *rgb_func; 810 blend.rgb_src_factor = *rgb_src_factor; 811 blend.rgb_dst_factor = *rgb_dst_factor; 812 blend.alpha_func = *alpha_func; 813 blend.alpha_src_factor = *alpha_src_factor; 814 blend.alpha_dst_factor = *alpha_dst_factor; 815 blend.colormask = PIPE_MASK_RGBA; 816 817 if(!test_one(verbose, fp, &blend, mode, *type)) 818 success = FALSE; 819 820 } 821 } 822 } 823 } 824 } 825 } 826 } 827 } 828 829 return success; 830} 831 832 833boolean 834test_some(unsigned verbose, FILE *fp, unsigned long n) 835{ 836 const unsigned *rgb_func; 837 const unsigned *rgb_src_factor; 838 const unsigned *rgb_dst_factor; 839 const unsigned *alpha_func; 840 const unsigned *alpha_src_factor; 841 const unsigned *alpha_dst_factor; 842 struct pipe_blend_state blend; 843 enum vector_mode mode; 844 const struct lp_type *type; 845 unsigned long i; 846 bool success = TRUE; 847 848 for(i = 0; i < n; ++i) { 849 rgb_func = &blend_funcs[rand() % num_funcs]; 850 alpha_func = &blend_funcs[rand() % num_funcs]; 851 rgb_src_factor = &blend_factors[rand() % num_factors]; 852 alpha_src_factor = &blend_factors[rand() % num_factors]; 853 854 do { 855 rgb_dst_factor = &blend_factors[rand() % num_factors]; 856 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 857 858 do { 859 alpha_dst_factor = &blend_factors[rand() % num_factors]; 860 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 861 862 mode = rand() & 1; 863 864 type = &blend_types[rand() % num_types]; 865 866 memset(&blend, 0, sizeof blend); 867 blend.blend_enable = 1; 868 blend.rgb_func = *rgb_func; 869 blend.rgb_src_factor = *rgb_src_factor; 870 blend.rgb_dst_factor = *rgb_dst_factor; 871 blend.alpha_func = *alpha_func; 872 blend.alpha_src_factor = *alpha_src_factor; 873 blend.alpha_dst_factor = *alpha_dst_factor; 874 blend.colormask = PIPE_MASK_RGBA; 875 876 if(!test_one(verbose, fp, &blend, mode, *type)) 877 success = FALSE; 878 } 879 880 return success; 881} 882