lp_test_blend.c revision 99e28d4ee3ce995845d9290b735b1fbe1b96886d
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40#include "lp_bld_type.h"
41#include "lp_bld_arit.h"
42#include "lp_bld_blend.h"
43#include "lp_bld_debug.h"
44#include "lp_test.h"
45
46
47enum vector_mode
48{
49   AoS = 0,
50   SoA = 1
51};
52
53
54typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
55
56
57void
58write_tsv_header(FILE *fp)
59{
60   fprintf(fp,
61           "result\t"
62           "cycles_per_channel\t"
63           "mode\t"
64           "type\t"
65           "sep_func\t"
66           "sep_src_factor\t"
67           "sep_dst_factor\t"
68           "rgb_func\t"
69           "rgb_src_factor\t"
70           "rgb_dst_factor\t"
71           "alpha_func\t"
72           "alpha_src_factor\t"
73           "alpha_dst_factor\n");
74
75   fflush(fp);
76}
77
78
79static void
80write_tsv_row(FILE *fp,
81              const struct pipe_blend_state *blend,
82              enum vector_mode mode,
83              struct lp_type type,
84              double cycles,
85              boolean success)
86{
87   fprintf(fp, "%s\t", success ? "pass" : "fail");
88
89   if (mode == AoS) {
90      fprintf(fp, "%.1f\t", cycles / type.length);
91      fprintf(fp, "aos\t");
92   }
93
94   if (mode == SoA) {
95      fprintf(fp, "%.1f\t", cycles / (4 * type.length));
96      fprintf(fp, "soa\t");
97   }
98
99   fprintf(fp, "%s%u%sx%u\t",
100           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
101           type.width,
102           type.norm ? "n" : "",
103           type.length);
104
105   fprintf(fp,
106           "%s\t%s\t%s\t",
107           blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
108           blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
109           blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
110
111   fprintf(fp,
112           "%s\t%s\t%s\t%s\t%s\t%s\n",
113           debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
114           debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
115           debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
116           debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
117           debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
118           debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
119
120   fflush(fp);
121}
122
123
124static void
125dump_blend_type(FILE *fp,
126                const struct pipe_blend_state *blend,
127                enum vector_mode mode,
128                struct lp_type type)
129{
130   fprintf(fp, "%s", mode ? "soa" : "aos");
131
132   fprintf(fp, " type=%s%u%sx%u",
133           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
134           type.width,
135           type.norm ? "n" : "",
136           type.length);
137
138   fprintf(fp,
139           " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
140           "rgb_func",         debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
141           "rgb_src_factor",   debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
142           "rgb_dst_factor",   debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
143           "alpha_func",       debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
144           "alpha_src_factor", debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
145           "alpha_dst_factor", debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
146
147   fprintf(fp, " ...\n");
148   fflush(fp);
149}
150
151
152static LLVMValueRef
153add_blend_test(LLVMModuleRef module,
154               const struct pipe_blend_state *blend,
155               enum vector_mode mode,
156               struct lp_type type)
157{
158   LLVMTypeRef ret_type;
159   LLVMTypeRef vec_type;
160   LLVMTypeRef args[4];
161   LLVMValueRef func;
162   LLVMValueRef src_ptr;
163   LLVMValueRef dst_ptr;
164   LLVMValueRef const_ptr;
165   LLVMValueRef res_ptr;
166   LLVMBasicBlockRef block;
167   LLVMBuilderRef builder;
168
169   ret_type = LLVMInt64Type();
170   vec_type = lp_build_vec_type(type);
171
172   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
173   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
174   LLVMSetFunctionCallConv(func, LLVMCCallConv);
175   src_ptr = LLVMGetParam(func, 0);
176   dst_ptr = LLVMGetParam(func, 1);
177   const_ptr = LLVMGetParam(func, 2);
178   res_ptr = LLVMGetParam(func, 3);
179
180   block = LLVMAppendBasicBlock(func, "entry");
181   builder = LLVMCreateBuilder();
182   LLVMPositionBuilderAtEnd(builder, block);
183
184   if (mode == AoS) {
185      LLVMValueRef src;
186      LLVMValueRef dst;
187      LLVMValueRef con;
188      LLVMValueRef res;
189
190      src = LLVMBuildLoad(builder, src_ptr, "src");
191      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
192      con = LLVMBuildLoad(builder, const_ptr, "const");
193
194      res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
195
196      lp_build_name(res, "res");
197
198      LLVMBuildStore(builder, res, res_ptr);
199   }
200
201   if (mode == SoA) {
202      LLVMValueRef src[4];
203      LLVMValueRef dst[4];
204      LLVMValueRef con[4];
205      LLVMValueRef res[4];
206      unsigned i;
207
208      for(i = 0; i < 4; ++i) {
209         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
210         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
211         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
212         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
213         lp_build_name(src[i], "src.%c", "rgba"[i]);
214         lp_build_name(con[i], "con.%c", "rgba"[i]);
215         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
216      }
217
218      lp_build_blend_soa(builder, blend, type, src, dst, con, res);
219
220      for(i = 0; i < 4; ++i) {
221         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
222         lp_build_name(res[i], "res.%c", "rgba"[i]);
223         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
224      }
225   }
226
227   LLVMBuildRetVoid(builder);;
228
229   LLVMDisposeBuilder(builder);
230   return func;
231}
232
233
234/** Add and limit result to ceiling of 1.0 */
235#define ADD_SAT(R, A, B) \
236do { \
237   R = (A) + (B);  if (R > 1.0f) R = 1.0f; \
238} while (0)
239
240/** Subtract and limit result to floor of 0.0 */
241#define SUB_SAT(R, A, B) \
242do { \
243   R = (A) - (B);  if (R < 0.0f) R = 0.0f; \
244} while (0)
245
246
247static void
248compute_blend_ref_term(unsigned rgb_factor,
249                       unsigned alpha_factor,
250                       const double *factor,
251                       const double *src,
252                       const double *dst,
253                       const double *con,
254                       double *term)
255{
256   double temp;
257
258   switch (rgb_factor) {
259   case PIPE_BLENDFACTOR_ONE:
260      term[0] = factor[0]; /* R */
261      term[1] = factor[1]; /* G */
262      term[2] = factor[2]; /* B */
263      break;
264   case PIPE_BLENDFACTOR_SRC_COLOR:
265      term[0] = factor[0] * src[0]; /* R */
266      term[1] = factor[1] * src[1]; /* G */
267      term[2] = factor[2] * src[2]; /* B */
268      break;
269   case PIPE_BLENDFACTOR_SRC_ALPHA:
270      term[0] = factor[0] * src[3]; /* R */
271      term[1] = factor[1] * src[3]; /* G */
272      term[2] = factor[2] * src[3]; /* B */
273      break;
274   case PIPE_BLENDFACTOR_DST_COLOR:
275      term[0] = factor[0] * dst[0]; /* R */
276      term[1] = factor[1] * dst[1]; /* G */
277      term[2] = factor[2] * dst[2]; /* B */
278      break;
279   case PIPE_BLENDFACTOR_DST_ALPHA:
280      term[0] = factor[0] * dst[3]; /* R */
281      term[1] = factor[1] * dst[3]; /* G */
282      term[2] = factor[2] * dst[3]; /* B */
283      break;
284   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
285      temp = MIN2(src[3], 1.0f - dst[3]);
286      term[0] = factor[0] * temp; /* R */
287      term[1] = factor[1] * temp; /* G */
288      term[2] = factor[2] * temp; /* B */
289      break;
290   case PIPE_BLENDFACTOR_CONST_COLOR:
291      term[0] = factor[0] * con[0]; /* R */
292      term[1] = factor[1] * con[1]; /* G */
293      term[2] = factor[2] * con[2]; /* B */
294      break;
295   case PIPE_BLENDFACTOR_CONST_ALPHA:
296      term[0] = factor[0] * con[3]; /* R */
297      term[1] = factor[1] * con[3]; /* G */
298      term[2] = factor[2] * con[3]; /* B */
299      break;
300   case PIPE_BLENDFACTOR_SRC1_COLOR:
301      assert(0); /* to do */
302      break;
303   case PIPE_BLENDFACTOR_SRC1_ALPHA:
304      assert(0); /* to do */
305      break;
306   case PIPE_BLENDFACTOR_ZERO:
307      term[0] = 0.0f; /* R */
308      term[1] = 0.0f; /* G */
309      term[2] = 0.0f; /* B */
310      break;
311   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
312      term[0] = factor[0] * (1.0f - src[0]); /* R */
313      term[1] = factor[1] * (1.0f - src[1]); /* G */
314      term[2] = factor[2] * (1.0f - src[2]); /* B */
315      break;
316   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
317      term[0] = factor[0] * (1.0f - src[3]); /* R */
318      term[1] = factor[1] * (1.0f - src[3]); /* G */
319      term[2] = factor[2] * (1.0f - src[3]); /* B */
320      break;
321   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
322      term[0] = factor[0] * (1.0f - dst[3]); /* R */
323      term[1] = factor[1] * (1.0f - dst[3]); /* G */
324      term[2] = factor[2] * (1.0f - dst[3]); /* B */
325      break;
326   case PIPE_BLENDFACTOR_INV_DST_COLOR:
327      term[0] = factor[0] * (1.0f - dst[0]); /* R */
328      term[1] = factor[1] * (1.0f - dst[1]); /* G */
329      term[2] = factor[2] * (1.0f - dst[2]); /* B */
330      break;
331   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
332      term[0] = factor[0] * (1.0f - con[0]); /* R */
333      term[1] = factor[1] * (1.0f - con[1]); /* G */
334      term[2] = factor[2] * (1.0f - con[2]); /* B */
335      break;
336   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
337      term[0] = factor[0] * (1.0f - con[3]); /* R */
338      term[1] = factor[1] * (1.0f - con[3]); /* G */
339      term[2] = factor[2] * (1.0f - con[3]); /* B */
340      break;
341   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
342      assert(0); /* to do */
343      break;
344   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
345      assert(0); /* to do */
346      break;
347   default:
348      assert(0);
349   }
350
351   /*
352    * Compute src/first term A
353    */
354   switch (alpha_factor) {
355   case PIPE_BLENDFACTOR_ONE:
356      term[3] = factor[3]; /* A */
357      break;
358   case PIPE_BLENDFACTOR_SRC_COLOR:
359   case PIPE_BLENDFACTOR_SRC_ALPHA:
360      term[3] = factor[3] * src[3]; /* A */
361      break;
362   case PIPE_BLENDFACTOR_DST_COLOR:
363   case PIPE_BLENDFACTOR_DST_ALPHA:
364      term[3] = factor[3] * dst[3]; /* A */
365      break;
366   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
367      term[3] = src[3]; /* A */
368      break;
369   case PIPE_BLENDFACTOR_CONST_COLOR:
370   case PIPE_BLENDFACTOR_CONST_ALPHA:
371      term[3] = factor[3] * con[3]; /* A */
372      break;
373   case PIPE_BLENDFACTOR_ZERO:
374      term[3] = 0.0f; /* A */
375      break;
376   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
377   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
378      term[3] = factor[3] * (1.0f - src[3]); /* A */
379      break;
380   case PIPE_BLENDFACTOR_INV_DST_COLOR:
381   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
382      term[3] = factor[3] * (1.0f - dst[3]); /* A */
383      break;
384   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
385   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
386      term[3] = factor[3] * (1.0f - con[3]);
387      break;
388   default:
389      assert(0);
390   }
391}
392
393
394static void
395compute_blend_ref(const struct pipe_blend_state *blend,
396                  const double *src,
397                  const double *dst,
398                  const double *con,
399                  double *res)
400{
401   double src_term[4];
402   double dst_term[4];
403
404   compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
405                          src, src, dst, con, src_term);
406   compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
407                          dst, src, dst, con, dst_term);
408
409   /*
410    * Combine RGB terms
411    */
412   switch (blend->rt[0].rgb_func) {
413   case PIPE_BLEND_ADD:
414      ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
415      ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
416      ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
417      break;
418   case PIPE_BLEND_SUBTRACT:
419      SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
420      SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
421      SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
422      break;
423   case PIPE_BLEND_REVERSE_SUBTRACT:
424      SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
425      SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
426      SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
427      break;
428   case PIPE_BLEND_MIN:
429      res[0] = MIN2(src_term[0], dst_term[0]); /* R */
430      res[1] = MIN2(src_term[1], dst_term[1]); /* G */
431      res[2] = MIN2(src_term[2], dst_term[2]); /* B */
432      break;
433   case PIPE_BLEND_MAX:
434      res[0] = MAX2(src_term[0], dst_term[0]); /* R */
435      res[1] = MAX2(src_term[1], dst_term[1]); /* G */
436      res[2] = MAX2(src_term[2], dst_term[2]); /* B */
437      break;
438   default:
439      assert(0);
440   }
441
442   /*
443    * Combine A terms
444    */
445   switch (blend->rt[0].alpha_func) {
446   case PIPE_BLEND_ADD:
447      ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
448      break;
449   case PIPE_BLEND_SUBTRACT:
450      SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
451      break;
452   case PIPE_BLEND_REVERSE_SUBTRACT:
453      SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
454      break;
455   case PIPE_BLEND_MIN:
456      res[3] = MIN2(src_term[3], dst_term[3]); /* A */
457      break;
458   case PIPE_BLEND_MAX:
459      res[3] = MAX2(src_term[3], dst_term[3]); /* A */
460      break;
461   default:
462      assert(0);
463   }
464}
465
466
467PIPE_ALIGN_STACK
468static boolean
469test_one(unsigned verbose,
470         FILE *fp,
471         const struct pipe_blend_state *blend,
472         enum vector_mode mode,
473         struct lp_type type)
474{
475   LLVMModuleRef module = NULL;
476   LLVMValueRef func = NULL;
477   LLVMExecutionEngineRef engine = NULL;
478   LLVMModuleProviderRef provider = NULL;
479   LLVMPassManagerRef pass = NULL;
480   char *error = NULL;
481   blend_test_ptr_t blend_test_ptr;
482   boolean success;
483   const unsigned n = LP_TEST_NUM_SAMPLES;
484   int64_t cycles[LP_TEST_NUM_SAMPLES];
485   double cycles_avg = 0.0;
486   unsigned i, j;
487
488   if(verbose >= 1)
489      dump_blend_type(stdout, blend, mode, type);
490
491   module = LLVMModuleCreateWithName("test");
492
493   func = add_blend_test(module, blend, mode, type);
494
495   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
496      LLVMDumpModule(module);
497      abort();
498   }
499   LLVMDisposeMessage(error);
500
501   provider = LLVMCreateModuleProviderForExistingModule(module);
502   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
503      if(verbose < 1)
504         dump_blend_type(stderr, blend, mode, type);
505      fprintf(stderr, "%s\n", error);
506      LLVMDisposeMessage(error);
507      abort();
508   }
509
510#if 0
511   pass = LLVMCreatePassManager();
512   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
513   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
514    * but there are more on SVN. */
515   LLVMAddConstantPropagationPass(pass);
516   LLVMAddInstructionCombiningPass(pass);
517   LLVMAddPromoteMemoryToRegisterPass(pass);
518   LLVMAddGVNPass(pass);
519   LLVMAddCFGSimplificationPass(pass);
520   LLVMRunPassManager(pass, module);
521#else
522   (void)pass;
523#endif
524
525   if(verbose >= 2)
526      LLVMDumpModule(module);
527
528   blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
529
530   if(verbose >= 2)
531      lp_disassemble(blend_test_ptr);
532
533   success = TRUE;
534   for(i = 0; i < n && success; ++i) {
535      if(mode == AoS) {
536         PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
537         PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
538         PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
539         PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
540         PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
541         int64_t start_counter = 0;
542         int64_t end_counter = 0;
543
544         random_vec(type, src);
545         random_vec(type, dst);
546         random_vec(type, con);
547
548         {
549            double fsrc[LP_MAX_VECTOR_LENGTH];
550            double fdst[LP_MAX_VECTOR_LENGTH];
551            double fcon[LP_MAX_VECTOR_LENGTH];
552            double fref[LP_MAX_VECTOR_LENGTH];
553
554            read_vec(type, src, fsrc);
555            read_vec(type, dst, fdst);
556            read_vec(type, con, fcon);
557
558            for(j = 0; j < type.length; j += 4)
559               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
560
561            write_vec(type, ref, fref);
562         }
563
564         start_counter = rdtsc();
565         blend_test_ptr(src, dst, con, res);
566         end_counter = rdtsc();
567
568         cycles[i] = end_counter - start_counter;
569
570         if(!compare_vec(type, res, ref)) {
571            success = FALSE;
572
573            if(verbose < 1)
574               dump_blend_type(stderr, blend, mode, type);
575            fprintf(stderr, "MISMATCH\n");
576
577            fprintf(stderr, "  Src: ");
578            dump_vec(stderr, type, src);
579            fprintf(stderr, "\n");
580
581            fprintf(stderr, "  Dst: ");
582            dump_vec(stderr, type, dst);
583            fprintf(stderr, "\n");
584
585            fprintf(stderr, "  Con: ");
586            dump_vec(stderr, type, con);
587            fprintf(stderr, "\n");
588
589            fprintf(stderr, "  Res: ");
590            dump_vec(stderr, type, res);
591            fprintf(stderr, "\n");
592
593            fprintf(stderr, "  Ref: ");
594            dump_vec(stderr, type, ref);
595            fprintf(stderr, "\n");
596         }
597      }
598
599      if(mode == SoA) {
600         const unsigned stride = type.length*type.width/8;
601         PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
602         PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
603         PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
604         PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
605         PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
606         int64_t start_counter = 0;
607         int64_t end_counter = 0;
608         boolean mismatch;
609
610         for(j = 0; j < 4; ++j) {
611            random_vec(type, src + j*stride);
612            random_vec(type, dst + j*stride);
613            random_vec(type, con + j*stride);
614         }
615
616         {
617            double fsrc[4];
618            double fdst[4];
619            double fcon[4];
620            double fref[4];
621            unsigned k;
622
623            for(k = 0; k < type.length; ++k) {
624               for(j = 0; j < 4; ++j) {
625                  fsrc[j] = read_elem(type, src + j*stride, k);
626                  fdst[j] = read_elem(type, dst + j*stride, k);
627                  fcon[j] = read_elem(type, con + j*stride, k);
628               }
629
630               compute_blend_ref(blend, fsrc, fdst, fcon, fref);
631
632               for(j = 0; j < 4; ++j)
633                  write_elem(type, ref + j*stride, k, fref[j]);
634            }
635         }
636
637         start_counter = rdtsc();
638         blend_test_ptr(src, dst, con, res);
639         end_counter = rdtsc();
640
641         cycles[i] = end_counter - start_counter;
642
643         mismatch = FALSE;
644         for (j = 0; j < 4; ++j)
645            if(!compare_vec(type, res + j*stride, ref + j*stride))
646               mismatch = TRUE;
647
648         if (mismatch) {
649            success = FALSE;
650
651            if(verbose < 1)
652               dump_blend_type(stderr, blend, mode, type);
653            fprintf(stderr, "MISMATCH\n");
654            for(j = 0; j < 4; ++j) {
655               char channel = "RGBA"[j];
656               fprintf(stderr, "  Src%c: ", channel);
657               dump_vec(stderr, type, src + j*stride);
658               fprintf(stderr, "\n");
659
660               fprintf(stderr, "  Dst%c: ", channel);
661               dump_vec(stderr, type, dst + j*stride);
662               fprintf(stderr, "\n");
663
664               fprintf(stderr, "  Con%c: ", channel);
665               dump_vec(stderr, type, con + j*stride);
666               fprintf(stderr, "\n");
667
668               fprintf(stderr, "  Res%c: ", channel);
669               dump_vec(stderr, type, res + j*stride);
670               fprintf(stderr, "\n");
671
672               fprintf(stderr, "  Ref%c: ", channel);
673               dump_vec(stderr, type, ref + j*stride);
674               fprintf(stderr, "\n");
675            }
676         }
677      }
678   }
679
680   /*
681    * Unfortunately the output of cycle counter is not very reliable as it comes
682    * -- sometimes we get outliers (due IRQs perhaps?) which are
683    * better removed to avoid random or biased data.
684    */
685   {
686      double sum = 0.0, sum2 = 0.0;
687      double avg, std;
688      unsigned m;
689
690      for(i = 0; i < n; ++i) {
691         sum += cycles[i];
692         sum2 += cycles[i]*cycles[i];
693      }
694
695      avg = sum/n;
696      std = sqrtf((sum2 - n*avg*avg)/n);
697
698      m = 0;
699      sum = 0.0;
700      for(i = 0; i < n; ++i) {
701         if(fabs(cycles[i] - avg) <= 4.0*std) {
702            sum += cycles[i];
703            ++m;
704         }
705      }
706
707      cycles_avg = sum/m;
708
709   }
710
711   if(fp)
712      write_tsv_row(fp, blend, mode, type, cycles_avg, success);
713
714   if (!success) {
715      if(verbose < 2)
716         LLVMDumpModule(module);
717      LLVMWriteBitcodeToFile(module, "blend.bc");
718      fprintf(stderr, "blend.bc written\n");
719      fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
720      abort();
721   }
722
723   LLVMFreeMachineCodeForFunction(engine, func);
724
725   LLVMDisposeExecutionEngine(engine);
726   if(pass)
727      LLVMDisposePassManager(pass);
728
729   return success;
730}
731
732
733const unsigned
734blend_factors[] = {
735   PIPE_BLENDFACTOR_ZERO,
736   PIPE_BLENDFACTOR_ONE,
737   PIPE_BLENDFACTOR_SRC_COLOR,
738   PIPE_BLENDFACTOR_SRC_ALPHA,
739   PIPE_BLENDFACTOR_DST_COLOR,
740   PIPE_BLENDFACTOR_DST_ALPHA,
741   PIPE_BLENDFACTOR_CONST_COLOR,
742   PIPE_BLENDFACTOR_CONST_ALPHA,
743#if 0
744   PIPE_BLENDFACTOR_SRC1_COLOR,
745   PIPE_BLENDFACTOR_SRC1_ALPHA,
746#endif
747   PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
748   PIPE_BLENDFACTOR_INV_SRC_COLOR,
749   PIPE_BLENDFACTOR_INV_SRC_ALPHA,
750   PIPE_BLENDFACTOR_INV_DST_COLOR,
751   PIPE_BLENDFACTOR_INV_DST_ALPHA,
752   PIPE_BLENDFACTOR_INV_CONST_COLOR,
753   PIPE_BLENDFACTOR_INV_CONST_ALPHA,
754#if 0
755   PIPE_BLENDFACTOR_INV_SRC1_COLOR,
756   PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
757#endif
758};
759
760
761const unsigned
762blend_funcs[] = {
763   PIPE_BLEND_ADD,
764   PIPE_BLEND_SUBTRACT,
765   PIPE_BLEND_REVERSE_SUBTRACT,
766   PIPE_BLEND_MIN,
767   PIPE_BLEND_MAX
768};
769
770
771const struct lp_type blend_types[] = {
772   /* float, fixed,  sign,  norm, width, len */
773   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */
774   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
775};
776
777
778const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
779const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
780const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
781
782
783boolean
784test_all(unsigned verbose, FILE *fp)
785{
786   const unsigned *rgb_func;
787   const unsigned *rgb_src_factor;
788   const unsigned *rgb_dst_factor;
789   const unsigned *alpha_func;
790   const unsigned *alpha_src_factor;
791   const unsigned *alpha_dst_factor;
792   struct pipe_blend_state blend;
793   enum vector_mode mode;
794   const struct lp_type *type;
795   bool success = TRUE;
796
797   for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
798      for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
799         for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
800            for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
801               for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
802                  for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
803                     for(mode = 0; mode < 2; ++mode) {
804                        for(type = blend_types; type < &blend_types[num_types]; ++type) {
805
806                           if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
807                              *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
808                              continue;
809
810                           memset(&blend, 0, sizeof blend);
811                           blend.rt[0].blend_enable      = 1;
812                           blend.rt[0].rgb_func          = *rgb_func;
813                           blend.rt[0].rgb_src_factor    = *rgb_src_factor;
814                           blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
815                           blend.rt[0].alpha_func        = *alpha_func;
816                           blend.rt[0].alpha_src_factor  = *alpha_src_factor;
817                           blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
818                           blend.rt[0].colormask         = PIPE_MASK_RGBA;
819
820                           if(!test_one(verbose, fp, &blend, mode, *type))
821                             success = FALSE;
822
823                        }
824                     }
825                  }
826               }
827            }
828         }
829      }
830   }
831
832   return success;
833}
834
835
836boolean
837test_some(unsigned verbose, FILE *fp, unsigned long n)
838{
839   const unsigned *rgb_func;
840   const unsigned *rgb_src_factor;
841   const unsigned *rgb_dst_factor;
842   const unsigned *alpha_func;
843   const unsigned *alpha_src_factor;
844   const unsigned *alpha_dst_factor;
845   struct pipe_blend_state blend;
846   enum vector_mode mode;
847   const struct lp_type *type;
848   unsigned long i;
849   bool success = TRUE;
850
851   for(i = 0; i < n; ++i) {
852      rgb_func = &blend_funcs[rand() % num_funcs];
853      alpha_func = &blend_funcs[rand() % num_funcs];
854      rgb_src_factor = &blend_factors[rand() % num_factors];
855      alpha_src_factor = &blend_factors[rand() % num_factors];
856
857      do {
858         rgb_dst_factor = &blend_factors[rand() % num_factors];
859      } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
860
861      do {
862         alpha_dst_factor = &blend_factors[rand() % num_factors];
863      } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
864
865      mode = rand() & 1;
866
867      type = &blend_types[rand() % num_types];
868
869      memset(&blend, 0, sizeof blend);
870      blend.rt[0].blend_enable      = 1;
871      blend.rt[0].rgb_func          = *rgb_func;
872      blend.rt[0].rgb_src_factor    = *rgb_src_factor;
873      blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
874      blend.rt[0].alpha_func        = *alpha_func;
875      blend.rt[0].alpha_src_factor  = *alpha_src_factor;
876      blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
877      blend.rt[0].colormask         = PIPE_MASK_RGBA;
878
879      if(!test_one(verbose, fp, &blend, mode, *type))
880        success = FALSE;
881   }
882
883   return success;
884}
885