lp_test_blend.c revision 86bfe974b880dc2cbf40b91ba0fde34e8a9c756e
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Unit tests for blend LLVM IR generation 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Blend computation code derived from code written by 36 * @author Brian Paul <brian@vmware.com> 37 */ 38 39 40#include "lp_bld_type.h" 41#include "lp_bld_arit.h" 42#include "lp_bld_blend.h" 43#include "lp_bld_debug.h" 44#include "lp_test.h" 45 46 47enum vector_mode 48{ 49 AoS = 0, 50 SoA = 1 51}; 52 53 54typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); 55 56 57void 58write_tsv_header(FILE *fp) 59{ 60 fprintf(fp, 61 "result\t" 62 "cycles_per_channel\t" 63 "mode\t" 64 "type\t" 65 "sep_func\t" 66 "sep_src_factor\t" 67 "sep_dst_factor\t" 68 "rgb_func\t" 69 "rgb_src_factor\t" 70 "rgb_dst_factor\t" 71 "alpha_func\t" 72 "alpha_src_factor\t" 73 "alpha_dst_factor\n"); 74 75 fflush(fp); 76} 77 78 79static void 80write_tsv_row(FILE *fp, 81 const struct pipe_blend_state *blend, 82 enum vector_mode mode, 83 struct lp_type type, 84 double cycles, 85 boolean success) 86{ 87 fprintf(fp, "%s\t", success ? "pass" : "fail"); 88 89 if (mode == AoS) { 90 fprintf(fp, "%.1f\t", cycles / type.length); 91 fprintf(fp, "aos\t"); 92 } 93 94 if (mode == SoA) { 95 fprintf(fp, "%.1f\t", cycles / (4 * type.length)); 96 fprintf(fp, "soa\t"); 97 } 98 99 fprintf(fp, "%s%u%sx%u\t", 100 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 101 type.width, 102 type.norm ? "n" : "", 103 type.length); 104 105 fprintf(fp, 106 "%s\t%s\t%s\t", 107 blend->rgb_func != blend->alpha_func ? "true" : "false", 108 blend->rgb_src_factor != blend->alpha_src_factor ? "true" : "false", 109 blend->rgb_dst_factor != blend->alpha_dst_factor ? "true" : "false"); 110 111 fprintf(fp, 112 "%s\t%s\t%s\t%s\t%s\t%s\n", 113 debug_dump_blend_func(blend->rgb_func, TRUE), 114 debug_dump_blend_factor(blend->rgb_src_factor, TRUE), 115 debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), 116 debug_dump_blend_func(blend->alpha_func, TRUE), 117 debug_dump_blend_factor(blend->alpha_src_factor, TRUE), 118 debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); 119 120 fflush(fp); 121} 122 123 124static void 125dump_blend_type(FILE *fp, 126 const struct pipe_blend_state *blend, 127 enum vector_mode mode, 128 struct lp_type type) 129{ 130 fprintf(fp, "%s", mode ? "soa" : "aos"); 131 132 fprintf(fp, " type=%s%u%sx%u", 133 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), 134 type.width, 135 type.norm ? "n" : "", 136 type.length); 137 138 fprintf(fp, 139 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", 140 "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE), 141 "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE), 142 "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), 143 "alpha_func", debug_dump_blend_func(blend->alpha_func, TRUE), 144 "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE), 145 "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); 146 147 fprintf(fp, " ...\n"); 148 fflush(fp); 149} 150 151 152static LLVMValueRef 153add_blend_test(LLVMModuleRef module, 154 const struct pipe_blend_state *blend, 155 enum vector_mode mode, 156 struct lp_type type) 157{ 158 LLVMTypeRef ret_type; 159 LLVMTypeRef vec_type; 160 LLVMTypeRef args[4]; 161 LLVMValueRef func; 162 LLVMValueRef src_ptr; 163 LLVMValueRef dst_ptr; 164 LLVMValueRef const_ptr; 165 LLVMValueRef res_ptr; 166 LLVMBasicBlockRef block; 167 LLVMBuilderRef builder; 168 169 ret_type = LLVMInt64Type(); 170 vec_type = lp_build_vec_type(type); 171 172 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); 173 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); 174 LLVMSetFunctionCallConv(func, LLVMCCallConv); 175 src_ptr = LLVMGetParam(func, 0); 176 dst_ptr = LLVMGetParam(func, 1); 177 const_ptr = LLVMGetParam(func, 2); 178 res_ptr = LLVMGetParam(func, 3); 179 180 block = LLVMAppendBasicBlock(func, "entry"); 181 builder = LLVMCreateBuilder(); 182 LLVMPositionBuilderAtEnd(builder, block); 183 184 if (mode == AoS) { 185 LLVMValueRef src; 186 LLVMValueRef dst; 187 LLVMValueRef con; 188 LLVMValueRef res; 189 190 src = LLVMBuildLoad(builder, src_ptr, "src"); 191 dst = LLVMBuildLoad(builder, dst_ptr, "dst"); 192 con = LLVMBuildLoad(builder, const_ptr, "const"); 193 194 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); 195 196 lp_build_name(res, "res"); 197 198 LLVMBuildStore(builder, res, res_ptr); 199 } 200 201 if (mode == SoA) { 202 LLVMValueRef src[4]; 203 LLVMValueRef dst[4]; 204 LLVMValueRef con[4]; 205 LLVMValueRef res[4]; 206 unsigned i; 207 208 for(i = 0; i < 4; ++i) { 209 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 210 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); 211 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); 212 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); 213 lp_build_name(src[i], "src.%c", "rgba"[i]); 214 lp_build_name(con[i], "con.%c", "rgba"[i]); 215 lp_build_name(dst[i], "dst.%c", "rgba"[i]); 216 } 217 218 lp_build_blend_soa(builder, blend, type, src, dst, con, res); 219 220 for(i = 0; i < 4; ++i) { 221 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 222 lp_build_name(res[i], "res.%c", "rgba"[i]); 223 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); 224 } 225 } 226 227 LLVMBuildRetVoid(builder);; 228 229 LLVMDisposeBuilder(builder); 230 return func; 231} 232 233 234/** Add and limit result to ceiling of 1.0 */ 235#define ADD_SAT(R, A, B) \ 236do { \ 237 R = (A) + (B); if (R > 1.0f) R = 1.0f; \ 238} while (0) 239 240/** Subtract and limit result to floor of 0.0 */ 241#define SUB_SAT(R, A, B) \ 242do { \ 243 R = (A) - (B); if (R < 0.0f) R = 0.0f; \ 244} while (0) 245 246 247static void 248compute_blend_ref_term(unsigned rgb_factor, 249 unsigned alpha_factor, 250 const double *factor, 251 const double *src, 252 const double *dst, 253 const double *con, 254 double *term) 255{ 256 double temp; 257 258 switch (rgb_factor) { 259 case PIPE_BLENDFACTOR_ONE: 260 term[0] = factor[0]; /* R */ 261 term[1] = factor[1]; /* G */ 262 term[2] = factor[2]; /* B */ 263 break; 264 case PIPE_BLENDFACTOR_SRC_COLOR: 265 term[0] = factor[0] * src[0]; /* R */ 266 term[1] = factor[1] * src[1]; /* G */ 267 term[2] = factor[2] * src[2]; /* B */ 268 break; 269 case PIPE_BLENDFACTOR_SRC_ALPHA: 270 term[0] = factor[0] * src[3]; /* R */ 271 term[1] = factor[1] * src[3]; /* G */ 272 term[2] = factor[2] * src[3]; /* B */ 273 break; 274 case PIPE_BLENDFACTOR_DST_COLOR: 275 term[0] = factor[0] * dst[0]; /* R */ 276 term[1] = factor[1] * dst[1]; /* G */ 277 term[2] = factor[2] * dst[2]; /* B */ 278 break; 279 case PIPE_BLENDFACTOR_DST_ALPHA: 280 term[0] = factor[0] * dst[3]; /* R */ 281 term[1] = factor[1] * dst[3]; /* G */ 282 term[2] = factor[2] * dst[3]; /* B */ 283 break; 284 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 285 temp = MIN2(src[3], 1.0f - dst[3]); 286 term[0] = factor[0] * temp; /* R */ 287 term[1] = factor[1] * temp; /* G */ 288 term[2] = factor[2] * temp; /* B */ 289 break; 290 case PIPE_BLENDFACTOR_CONST_COLOR: 291 term[0] = factor[0] * con[0]; /* R */ 292 term[1] = factor[1] * con[1]; /* G */ 293 term[2] = factor[2] * con[2]; /* B */ 294 break; 295 case PIPE_BLENDFACTOR_CONST_ALPHA: 296 term[0] = factor[0] * con[3]; /* R */ 297 term[1] = factor[1] * con[3]; /* G */ 298 term[2] = factor[2] * con[3]; /* B */ 299 break; 300 case PIPE_BLENDFACTOR_SRC1_COLOR: 301 assert(0); /* to do */ 302 break; 303 case PIPE_BLENDFACTOR_SRC1_ALPHA: 304 assert(0); /* to do */ 305 break; 306 case PIPE_BLENDFACTOR_ZERO: 307 term[0] = 0.0f; /* R */ 308 term[1] = 0.0f; /* G */ 309 term[2] = 0.0f; /* B */ 310 break; 311 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 312 term[0] = factor[0] * (1.0f - src[0]); /* R */ 313 term[1] = factor[1] * (1.0f - src[1]); /* G */ 314 term[2] = factor[2] * (1.0f - src[2]); /* B */ 315 break; 316 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 317 term[0] = factor[0] * (1.0f - src[3]); /* R */ 318 term[1] = factor[1] * (1.0f - src[3]); /* G */ 319 term[2] = factor[2] * (1.0f - src[3]); /* B */ 320 break; 321 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 322 term[0] = factor[0] * (1.0f - dst[3]); /* R */ 323 term[1] = factor[1] * (1.0f - dst[3]); /* G */ 324 term[2] = factor[2] * (1.0f - dst[3]); /* B */ 325 break; 326 case PIPE_BLENDFACTOR_INV_DST_COLOR: 327 term[0] = factor[0] * (1.0f - dst[0]); /* R */ 328 term[1] = factor[1] * (1.0f - dst[1]); /* G */ 329 term[2] = factor[2] * (1.0f - dst[2]); /* B */ 330 break; 331 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332 term[0] = factor[0] * (1.0f - con[0]); /* R */ 333 term[1] = factor[1] * (1.0f - con[1]); /* G */ 334 term[2] = factor[2] * (1.0f - con[2]); /* B */ 335 break; 336 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 337 term[0] = factor[0] * (1.0f - con[3]); /* R */ 338 term[1] = factor[1] * (1.0f - con[3]); /* G */ 339 term[2] = factor[2] * (1.0f - con[3]); /* B */ 340 break; 341 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 342 assert(0); /* to do */ 343 break; 344 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 345 assert(0); /* to do */ 346 break; 347 default: 348 assert(0); 349 } 350 351 /* 352 * Compute src/first term A 353 */ 354 switch (alpha_factor) { 355 case PIPE_BLENDFACTOR_ONE: 356 term[3] = factor[3]; /* A */ 357 break; 358 case PIPE_BLENDFACTOR_SRC_COLOR: 359 case PIPE_BLENDFACTOR_SRC_ALPHA: 360 term[3] = factor[3] * src[3]; /* A */ 361 break; 362 case PIPE_BLENDFACTOR_DST_COLOR: 363 case PIPE_BLENDFACTOR_DST_ALPHA: 364 term[3] = factor[3] * dst[3]; /* A */ 365 break; 366 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 367 term[3] = src[3]; /* A */ 368 break; 369 case PIPE_BLENDFACTOR_CONST_COLOR: 370 case PIPE_BLENDFACTOR_CONST_ALPHA: 371 term[3] = factor[3] * con[3]; /* A */ 372 break; 373 case PIPE_BLENDFACTOR_ZERO: 374 term[3] = 0.0f; /* A */ 375 break; 376 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 377 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 378 term[3] = factor[3] * (1.0f - src[3]); /* A */ 379 break; 380 case PIPE_BLENDFACTOR_INV_DST_COLOR: 381 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 382 term[3] = factor[3] * (1.0f - dst[3]); /* A */ 383 break; 384 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 385 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 386 term[3] = factor[3] * (1.0f - con[3]); 387 break; 388 default: 389 assert(0); 390 } 391} 392 393 394static void 395compute_blend_ref(const struct pipe_blend_state *blend, 396 const double *src, 397 const double *dst, 398 const double *con, 399 double *res) 400{ 401 double src_term[4]; 402 double dst_term[4]; 403 404 compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term); 405 compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term); 406 407 /* 408 * Combine RGB terms 409 */ 410 switch (blend->rgb_func) { 411 case PIPE_BLEND_ADD: 412 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ 413 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ 414 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ 415 break; 416 case PIPE_BLEND_SUBTRACT: 417 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ 418 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ 419 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ 420 break; 421 case PIPE_BLEND_REVERSE_SUBTRACT: 422 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ 423 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ 424 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ 425 break; 426 case PIPE_BLEND_MIN: 427 res[0] = MIN2(src_term[0], dst_term[0]); /* R */ 428 res[1] = MIN2(src_term[1], dst_term[1]); /* G */ 429 res[2] = MIN2(src_term[2], dst_term[2]); /* B */ 430 break; 431 case PIPE_BLEND_MAX: 432 res[0] = MAX2(src_term[0], dst_term[0]); /* R */ 433 res[1] = MAX2(src_term[1], dst_term[1]); /* G */ 434 res[2] = MAX2(src_term[2], dst_term[2]); /* B */ 435 break; 436 default: 437 assert(0); 438 } 439 440 /* 441 * Combine A terms 442 */ 443 switch (blend->alpha_func) { 444 case PIPE_BLEND_ADD: 445 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ 446 break; 447 case PIPE_BLEND_SUBTRACT: 448 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ 449 break; 450 case PIPE_BLEND_REVERSE_SUBTRACT: 451 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ 452 break; 453 case PIPE_BLEND_MIN: 454 res[3] = MIN2(src_term[3], dst_term[3]); /* A */ 455 break; 456 case PIPE_BLEND_MAX: 457 res[3] = MAX2(src_term[3], dst_term[3]); /* A */ 458 break; 459 default: 460 assert(0); 461 } 462} 463 464 465ALIGN_STACK 466static boolean 467test_one(unsigned verbose, 468 FILE *fp, 469 const struct pipe_blend_state *blend, 470 enum vector_mode mode, 471 struct lp_type type) 472{ 473 LLVMModuleRef module = NULL; 474 LLVMValueRef func = NULL; 475 LLVMExecutionEngineRef engine = NULL; 476 LLVMModuleProviderRef provider = NULL; 477 LLVMPassManagerRef pass = NULL; 478 char *error = NULL; 479 blend_test_ptr_t blend_test_ptr; 480 boolean success; 481 const unsigned n = LP_TEST_NUM_SAMPLES; 482 int64_t cycles[LP_TEST_NUM_SAMPLES]; 483 double cycles_avg = 0.0; 484 unsigned i, j; 485 486 if(verbose >= 1) 487 dump_blend_type(stdout, blend, mode, type); 488 489 module = LLVMModuleCreateWithName("test"); 490 491 func = add_blend_test(module, blend, mode, type); 492 493 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { 494 LLVMDumpModule(module); 495 abort(); 496 } 497 LLVMDisposeMessage(error); 498 499 provider = LLVMCreateModuleProviderForExistingModule(module); 500 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { 501 if(verbose < 1) 502 dump_blend_type(stderr, blend, mode, type); 503 fprintf(stderr, "%s\n", error); 504 LLVMDisposeMessage(error); 505 abort(); 506 } 507 508#if 0 509 pass = LLVMCreatePassManager(); 510 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); 511 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 512 * but there are more on SVN. */ 513 LLVMAddConstantPropagationPass(pass); 514 LLVMAddInstructionCombiningPass(pass); 515 LLVMAddPromoteMemoryToRegisterPass(pass); 516 LLVMAddGVNPass(pass); 517 LLVMAddCFGSimplificationPass(pass); 518 LLVMRunPassManager(pass, module); 519#else 520 (void)pass; 521#endif 522 523 if(verbose >= 2) 524 LLVMDumpModule(module); 525 526 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); 527 528 if(verbose >= 2) 529 lp_disassemble(blend_test_ptr); 530 531 success = TRUE; 532 for(i = 0; i < n && success; ++i) { 533 if(mode == AoS) { 534 PIPE_ALIGN_VAR(16, uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]); 535 PIPE_ALIGN_VAR(16, uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]); 536 PIPE_ALIGN_VAR(16, uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]); 537 PIPE_ALIGN_VAR(16, uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]); 538 PIPE_ALIGN_VAR(16, uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]); 539 int64_t start_counter = 0; 540 int64_t end_counter = 0; 541 542 random_vec(type, src); 543 random_vec(type, dst); 544 random_vec(type, con); 545 546 { 547 double fsrc[LP_MAX_VECTOR_LENGTH]; 548 double fdst[LP_MAX_VECTOR_LENGTH]; 549 double fcon[LP_MAX_VECTOR_LENGTH]; 550 double fref[LP_MAX_VECTOR_LENGTH]; 551 552 read_vec(type, src, fsrc); 553 read_vec(type, dst, fdst); 554 read_vec(type, con, fcon); 555 556 for(j = 0; j < type.length; j += 4) 557 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); 558 559 write_vec(type, ref, fref); 560 } 561 562 start_counter = rdtsc(); 563 blend_test_ptr(src, dst, con, res); 564 end_counter = rdtsc(); 565 566 cycles[i] = end_counter - start_counter; 567 568 if(!compare_vec(type, res, ref)) { 569 success = FALSE; 570 571 if(verbose < 1) 572 dump_blend_type(stderr, blend, mode, type); 573 fprintf(stderr, "MISMATCH\n"); 574 575 fprintf(stderr, " Src: "); 576 dump_vec(stderr, type, src); 577 fprintf(stderr, "\n"); 578 579 fprintf(stderr, " Dst: "); 580 dump_vec(stderr, type, dst); 581 fprintf(stderr, "\n"); 582 583 fprintf(stderr, " Con: "); 584 dump_vec(stderr, type, con); 585 fprintf(stderr, "\n"); 586 587 fprintf(stderr, " Res: "); 588 dump_vec(stderr, type, res); 589 fprintf(stderr, "\n"); 590 591 fprintf(stderr, " Ref: "); 592 dump_vec(stderr, type, ref); 593 fprintf(stderr, "\n"); 594 } 595 } 596 597 if(mode == SoA) { 598 const unsigned stride = type.length*type.width/8; 599 PIPE_ALIGN_VAR(16, uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]); 600 PIPE_ALIGN_VAR(16, uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]); 601 PIPE_ALIGN_VAR(16, uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]); 602 PIPE_ALIGN_VAR(16, uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]); 603 PIPE_ALIGN_VAR(16, uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]); 604 int64_t start_counter = 0; 605 int64_t end_counter = 0; 606 boolean mismatch; 607 608 for(j = 0; j < 4; ++j) { 609 random_vec(type, src + j*stride); 610 random_vec(type, dst + j*stride); 611 random_vec(type, con + j*stride); 612 } 613 614 { 615 double fsrc[4]; 616 double fdst[4]; 617 double fcon[4]; 618 double fref[4]; 619 unsigned k; 620 621 for(k = 0; k < type.length; ++k) { 622 for(j = 0; j < 4; ++j) { 623 fsrc[j] = read_elem(type, src + j*stride, k); 624 fdst[j] = read_elem(type, dst + j*stride, k); 625 fcon[j] = read_elem(type, con + j*stride, k); 626 } 627 628 compute_blend_ref(blend, fsrc, fdst, fcon, fref); 629 630 for(j = 0; j < 4; ++j) 631 write_elem(type, ref + j*stride, k, fref[j]); 632 } 633 } 634 635 start_counter = rdtsc(); 636 blend_test_ptr(src, dst, con, res); 637 end_counter = rdtsc(); 638 639 cycles[i] = end_counter - start_counter; 640 641 mismatch = FALSE; 642 for (j = 0; j < 4; ++j) 643 if(!compare_vec(type, res + j*stride, ref + j*stride)) 644 mismatch = TRUE; 645 646 if (mismatch) { 647 success = FALSE; 648 649 if(verbose < 1) 650 dump_blend_type(stderr, blend, mode, type); 651 fprintf(stderr, "MISMATCH\n"); 652 for(j = 0; j < 4; ++j) { 653 char channel = "RGBA"[j]; 654 fprintf(stderr, " Src%c: ", channel); 655 dump_vec(stderr, type, src + j*stride); 656 fprintf(stderr, "\n"); 657 658 fprintf(stderr, " Dst%c: ", channel); 659 dump_vec(stderr, type, dst + j*stride); 660 fprintf(stderr, "\n"); 661 662 fprintf(stderr, " Con%c: ", channel); 663 dump_vec(stderr, type, con + j*stride); 664 fprintf(stderr, "\n"); 665 666 fprintf(stderr, " Res%c: ", channel); 667 dump_vec(stderr, type, res + j*stride); 668 fprintf(stderr, "\n"); 669 670 fprintf(stderr, " Ref%c: ", channel); 671 dump_vec(stderr, type, ref + j*stride); 672 fprintf(stderr, "\n"); 673 } 674 } 675 } 676 } 677 678 /* 679 * Unfortunately the output of cycle counter is not very reliable as it comes 680 * -- sometimes we get outliers (due IRQs perhaps?) which are 681 * better removed to avoid random or biased data. 682 */ 683 { 684 double sum = 0.0, sum2 = 0.0; 685 double avg, std; 686 unsigned m; 687 688 for(i = 0; i < n; ++i) { 689 sum += cycles[i]; 690 sum2 += cycles[i]*cycles[i]; 691 } 692 693 avg = sum/n; 694 std = sqrtf((sum2 - n*avg*avg)/n); 695 696 m = 0; 697 sum = 0.0; 698 for(i = 0; i < n; ++i) { 699 if(fabs(cycles[i] - avg) <= 4.0*std) { 700 sum += cycles[i]; 701 ++m; 702 } 703 } 704 705 cycles_avg = sum/m; 706 707 } 708 709 if(fp) 710 write_tsv_row(fp, blend, mode, type, cycles_avg, success); 711 712 if (!success) { 713 if(verbose < 2) 714 LLVMDumpModule(module); 715 LLVMWriteBitcodeToFile(module, "blend.bc"); 716 fprintf(stderr, "blend.bc written\n"); 717 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); 718 abort(); 719 } 720 721 LLVMFreeMachineCodeForFunction(engine, func); 722 723 LLVMDisposeExecutionEngine(engine); 724 if(pass) 725 LLVMDisposePassManager(pass); 726 727 return success; 728} 729 730 731const unsigned 732blend_factors[] = { 733 PIPE_BLENDFACTOR_ZERO, 734 PIPE_BLENDFACTOR_ONE, 735 PIPE_BLENDFACTOR_SRC_COLOR, 736 PIPE_BLENDFACTOR_SRC_ALPHA, 737 PIPE_BLENDFACTOR_DST_COLOR, 738 PIPE_BLENDFACTOR_DST_ALPHA, 739 PIPE_BLENDFACTOR_CONST_COLOR, 740 PIPE_BLENDFACTOR_CONST_ALPHA, 741#if 0 742 PIPE_BLENDFACTOR_SRC1_COLOR, 743 PIPE_BLENDFACTOR_SRC1_ALPHA, 744#endif 745 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, 746 PIPE_BLENDFACTOR_INV_SRC_COLOR, 747 PIPE_BLENDFACTOR_INV_SRC_ALPHA, 748 PIPE_BLENDFACTOR_INV_DST_COLOR, 749 PIPE_BLENDFACTOR_INV_DST_ALPHA, 750 PIPE_BLENDFACTOR_INV_CONST_COLOR, 751 PIPE_BLENDFACTOR_INV_CONST_ALPHA, 752#if 0 753 PIPE_BLENDFACTOR_INV_SRC1_COLOR, 754 PIPE_BLENDFACTOR_INV_SRC1_ALPHA, 755#endif 756}; 757 758 759const unsigned 760blend_funcs[] = { 761 PIPE_BLEND_ADD, 762 PIPE_BLEND_SUBTRACT, 763 PIPE_BLEND_REVERSE_SUBTRACT, 764 PIPE_BLEND_MIN, 765 PIPE_BLEND_MAX 766}; 767 768 769const struct lp_type blend_types[] = { 770 /* float, fixed, sign, norm, width, len */ 771 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ 772 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ 773}; 774 775 776const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]); 777const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]); 778const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); 779 780 781boolean 782test_all(unsigned verbose, FILE *fp) 783{ 784 const unsigned *rgb_func; 785 const unsigned *rgb_src_factor; 786 const unsigned *rgb_dst_factor; 787 const unsigned *alpha_func; 788 const unsigned *alpha_src_factor; 789 const unsigned *alpha_dst_factor; 790 struct pipe_blend_state blend; 791 enum vector_mode mode; 792 const struct lp_type *type; 793 bool success = TRUE; 794 795 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { 796 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { 797 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) { 798 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { 799 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { 800 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { 801 for(mode = 0; mode < 2; ++mode) { 802 for(type = blend_types; type < &blend_types[num_types]; ++type) { 803 804 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 805 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) 806 continue; 807 808 memset(&blend, 0, sizeof blend); 809 blend.blend_enable = 1; 810 blend.rgb_func = *rgb_func; 811 blend.rgb_src_factor = *rgb_src_factor; 812 blend.rgb_dst_factor = *rgb_dst_factor; 813 blend.alpha_func = *alpha_func; 814 blend.alpha_src_factor = *alpha_src_factor; 815 blend.alpha_dst_factor = *alpha_dst_factor; 816 blend.colormask = PIPE_MASK_RGBA; 817 818 if(!test_one(verbose, fp, &blend, mode, *type)) 819 success = FALSE; 820 821 } 822 } 823 } 824 } 825 } 826 } 827 } 828 } 829 830 return success; 831} 832 833 834boolean 835test_some(unsigned verbose, FILE *fp, unsigned long n) 836{ 837 const unsigned *rgb_func; 838 const unsigned *rgb_src_factor; 839 const unsigned *rgb_dst_factor; 840 const unsigned *alpha_func; 841 const unsigned *alpha_src_factor; 842 const unsigned *alpha_dst_factor; 843 struct pipe_blend_state blend; 844 enum vector_mode mode; 845 const struct lp_type *type; 846 unsigned long i; 847 bool success = TRUE; 848 849 for(i = 0; i < n; ++i) { 850 rgb_func = &blend_funcs[rand() % num_funcs]; 851 alpha_func = &blend_funcs[rand() % num_funcs]; 852 rgb_src_factor = &blend_factors[rand() % num_factors]; 853 alpha_src_factor = &blend_factors[rand() % num_factors]; 854 855 do { 856 rgb_dst_factor = &blend_factors[rand() % num_factors]; 857 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 858 859 do { 860 alpha_dst_factor = &blend_factors[rand() % num_factors]; 861 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); 862 863 mode = rand() & 1; 864 865 type = &blend_types[rand() % num_types]; 866 867 memset(&blend, 0, sizeof blend); 868 blend.blend_enable = 1; 869 blend.rgb_func = *rgb_func; 870 blend.rgb_src_factor = *rgb_src_factor; 871 blend.rgb_dst_factor = *rgb_dst_factor; 872 blend.alpha_func = *alpha_func; 873 blend.alpha_src_factor = *alpha_src_factor; 874 blend.alpha_dst_factor = *alpha_dst_factor; 875 blend.colormask = PIPE_MASK_RGBA; 876 877 if(!test_one(verbose, fp, &blend, mode, *type)) 878 success = FALSE; 879 } 880 881 return success; 882} 883