1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39#include "util/u_memory.h"
40
41#include "gallivm/lp_bld_init.h"
42#include "gallivm/lp_bld_type.h"
43#include "gallivm/lp_bld_debug.h"
44#include "lp_bld_blend.h"
45#include "lp_test.h"
46
47
48enum vector_mode
49{
50   AoS = 0,
51   SoA = 1
52};
53
54
55typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
56
57
58void
59write_tsv_header(FILE *fp)
60{
61   fprintf(fp,
62           "result\t"
63           "cycles_per_channel\t"
64           "mode\t"
65           "type\t"
66           "sep_func\t"
67           "sep_src_factor\t"
68           "sep_dst_factor\t"
69           "rgb_func\t"
70           "rgb_src_factor\t"
71           "rgb_dst_factor\t"
72           "alpha_func\t"
73           "alpha_src_factor\t"
74           "alpha_dst_factor\n");
75
76   fflush(fp);
77}
78
79
80static void
81write_tsv_row(FILE *fp,
82              const struct pipe_blend_state *blend,
83              enum vector_mode mode,
84              struct lp_type type,
85              double cycles,
86              boolean success)
87{
88   fprintf(fp, "%s\t", success ? "pass" : "fail");
89
90   if (mode == AoS) {
91      fprintf(fp, "%.1f\t", cycles / type.length);
92      fprintf(fp, "aos\t");
93   }
94
95   if (mode == SoA) {
96      fprintf(fp, "%.1f\t", cycles / (4 * type.length));
97      fprintf(fp, "soa\t");
98   }
99
100   fprintf(fp, "%s%u%sx%u\t",
101           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
102           type.width,
103           type.norm ? "n" : "",
104           type.length);
105
106   fprintf(fp,
107           "%s\t%s\t%s\t",
108           blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
109           blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
110           blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
111
112   fprintf(fp,
113           "%s\t%s\t%s\t%s\t%s\t%s\n",
114           util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
115           util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
116           util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
117           util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
118           util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
119           util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
120
121   fflush(fp);
122}
123
124
125static void
126dump_blend_type(FILE *fp,
127                const struct pipe_blend_state *blend,
128                enum vector_mode mode,
129                struct lp_type type)
130{
131   fprintf(fp, "%s", mode ? "soa" : "aos");
132
133   fprintf(fp, " type=%s%u%sx%u",
134           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
135           type.width,
136           type.norm ? "n" : "",
137           type.length);
138
139   fprintf(fp,
140           " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
141           "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
142           "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
143           "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
144           "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
145           "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
146           "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
147
148   fprintf(fp, " ...\n");
149   fflush(fp);
150}
151
152
153static LLVMValueRef
154add_blend_test(struct gallivm_state *gallivm,
155               const struct pipe_blend_state *blend,
156               enum vector_mode mode,
157               struct lp_type type)
158{
159   LLVMModuleRef module = gallivm->module;
160   LLVMContextRef context = gallivm->context;
161   LLVMTypeRef vec_type;
162   LLVMTypeRef args[4];
163   LLVMValueRef func;
164   LLVMValueRef src_ptr;
165   LLVMValueRef dst_ptr;
166   LLVMValueRef const_ptr;
167   LLVMValueRef res_ptr;
168   LLVMBasicBlockRef block;
169   LLVMBuilderRef builder;
170   const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
171   const unsigned rt = 0;
172   const unsigned char swizzle[4] = { 0, 1, 2, 3 };
173
174   vec_type = lp_build_vec_type(gallivm, type);
175
176   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
177   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
178   LLVMSetFunctionCallConv(func, LLVMCCallConv);
179   src_ptr = LLVMGetParam(func, 0);
180   dst_ptr = LLVMGetParam(func, 1);
181   const_ptr = LLVMGetParam(func, 2);
182   res_ptr = LLVMGetParam(func, 3);
183
184   block = LLVMAppendBasicBlockInContext(context, func, "entry");
185   builder = gallivm->builder;
186   LLVMPositionBuilderAtEnd(builder, block);
187
188   if (mode == AoS) {
189      LLVMValueRef src;
190      LLVMValueRef dst;
191      LLVMValueRef con;
192      LLVMValueRef res;
193
194      src = LLVMBuildLoad(builder, src_ptr, "src");
195      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
196      con = LLVMBuildLoad(builder, const_ptr, "const");
197
198      res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
199
200      lp_build_name(res, "res");
201
202      LLVMBuildStore(builder, res, res_ptr);
203   }
204
205   if (mode == SoA) {
206      LLVMValueRef src[4];
207      LLVMValueRef dst[4];
208      LLVMValueRef con[4];
209      LLVMValueRef res[4];
210      unsigned i;
211
212      for(i = 0; i < 4; ++i) {
213         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
214         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
215         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
216         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
217         lp_build_name(src[i], "src.%c", "rgba"[i]);
218         lp_build_name(con[i], "con.%c", "rgba"[i]);
219         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
220      }
221
222      lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
223
224      for(i = 0; i < 4; ++i) {
225         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
226         lp_build_name(res[i], "res.%c", "rgba"[i]);
227         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
228      }
229   }
230
231   LLVMBuildRetVoid(builder);;
232
233   return func;
234}
235
236
237static void
238compute_blend_ref_term(unsigned rgb_factor,
239                       unsigned alpha_factor,
240                       const double *factor,
241                       const double *src,
242                       const double *dst,
243                       const double *con,
244                       double *term)
245{
246   double temp;
247
248   switch (rgb_factor) {
249   case PIPE_BLENDFACTOR_ONE:
250      term[0] = factor[0]; /* R */
251      term[1] = factor[1]; /* G */
252      term[2] = factor[2]; /* B */
253      break;
254   case PIPE_BLENDFACTOR_SRC_COLOR:
255      term[0] = factor[0] * src[0]; /* R */
256      term[1] = factor[1] * src[1]; /* G */
257      term[2] = factor[2] * src[2]; /* B */
258      break;
259   case PIPE_BLENDFACTOR_SRC_ALPHA:
260      term[0] = factor[0] * src[3]; /* R */
261      term[1] = factor[1] * src[3]; /* G */
262      term[2] = factor[2] * src[3]; /* B */
263      break;
264   case PIPE_BLENDFACTOR_DST_COLOR:
265      term[0] = factor[0] * dst[0]; /* R */
266      term[1] = factor[1] * dst[1]; /* G */
267      term[2] = factor[2] * dst[2]; /* B */
268      break;
269   case PIPE_BLENDFACTOR_DST_ALPHA:
270      term[0] = factor[0] * dst[3]; /* R */
271      term[1] = factor[1] * dst[3]; /* G */
272      term[2] = factor[2] * dst[3]; /* B */
273      break;
274   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
275      temp = MIN2(src[3], 1.0f - dst[3]);
276      term[0] = factor[0] * temp; /* R */
277      term[1] = factor[1] * temp; /* G */
278      term[2] = factor[2] * temp; /* B */
279      break;
280   case PIPE_BLENDFACTOR_CONST_COLOR:
281      term[0] = factor[0] * con[0]; /* R */
282      term[1] = factor[1] * con[1]; /* G */
283      term[2] = factor[2] * con[2]; /* B */
284      break;
285   case PIPE_BLENDFACTOR_CONST_ALPHA:
286      term[0] = factor[0] * con[3]; /* R */
287      term[1] = factor[1] * con[3]; /* G */
288      term[2] = factor[2] * con[3]; /* B */
289      break;
290   case PIPE_BLENDFACTOR_SRC1_COLOR:
291      assert(0); /* to do */
292      break;
293   case PIPE_BLENDFACTOR_SRC1_ALPHA:
294      assert(0); /* to do */
295      break;
296   case PIPE_BLENDFACTOR_ZERO:
297      term[0] = 0.0f; /* R */
298      term[1] = 0.0f; /* G */
299      term[2] = 0.0f; /* B */
300      break;
301   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
302      term[0] = factor[0] * (1.0f - src[0]); /* R */
303      term[1] = factor[1] * (1.0f - src[1]); /* G */
304      term[2] = factor[2] * (1.0f - src[2]); /* B */
305      break;
306   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
307      term[0] = factor[0] * (1.0f - src[3]); /* R */
308      term[1] = factor[1] * (1.0f - src[3]); /* G */
309      term[2] = factor[2] * (1.0f - src[3]); /* B */
310      break;
311   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
312      term[0] = factor[0] * (1.0f - dst[3]); /* R */
313      term[1] = factor[1] * (1.0f - dst[3]); /* G */
314      term[2] = factor[2] * (1.0f - dst[3]); /* B */
315      break;
316   case PIPE_BLENDFACTOR_INV_DST_COLOR:
317      term[0] = factor[0] * (1.0f - dst[0]); /* R */
318      term[1] = factor[1] * (1.0f - dst[1]); /* G */
319      term[2] = factor[2] * (1.0f - dst[2]); /* B */
320      break;
321   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
322      term[0] = factor[0] * (1.0f - con[0]); /* R */
323      term[1] = factor[1] * (1.0f - con[1]); /* G */
324      term[2] = factor[2] * (1.0f - con[2]); /* B */
325      break;
326   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
327      term[0] = factor[0] * (1.0f - con[3]); /* R */
328      term[1] = factor[1] * (1.0f - con[3]); /* G */
329      term[2] = factor[2] * (1.0f - con[3]); /* B */
330      break;
331   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
332      assert(0); /* to do */
333      break;
334   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
335      assert(0); /* to do */
336      break;
337   default:
338      assert(0);
339   }
340
341   /*
342    * Compute src/first term A
343    */
344   switch (alpha_factor) {
345   case PIPE_BLENDFACTOR_ONE:
346      term[3] = factor[3]; /* A */
347      break;
348   case PIPE_BLENDFACTOR_SRC_COLOR:
349   case PIPE_BLENDFACTOR_SRC_ALPHA:
350      term[3] = factor[3] * src[3]; /* A */
351      break;
352   case PIPE_BLENDFACTOR_DST_COLOR:
353   case PIPE_BLENDFACTOR_DST_ALPHA:
354      term[3] = factor[3] * dst[3]; /* A */
355      break;
356   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
357      term[3] = src[3]; /* A */
358      break;
359   case PIPE_BLENDFACTOR_CONST_COLOR:
360   case PIPE_BLENDFACTOR_CONST_ALPHA:
361      term[3] = factor[3] * con[3]; /* A */
362      break;
363   case PIPE_BLENDFACTOR_ZERO:
364      term[3] = 0.0f; /* A */
365      break;
366   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
367   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
368      term[3] = factor[3] * (1.0f - src[3]); /* A */
369      break;
370   case PIPE_BLENDFACTOR_INV_DST_COLOR:
371   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
372      term[3] = factor[3] * (1.0f - dst[3]); /* A */
373      break;
374   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
375   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
376      term[3] = factor[3] * (1.0f - con[3]);
377      break;
378   default:
379      assert(0);
380   }
381}
382
383
384static void
385compute_blend_ref(const struct pipe_blend_state *blend,
386                  const double *src,
387                  const double *dst,
388                  const double *con,
389                  double *res)
390{
391   double src_term[4];
392   double dst_term[4];
393
394   compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
395                          src, src, dst, con, src_term);
396   compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
397                          dst, src, dst, con, dst_term);
398
399   /*
400    * Combine RGB terms
401    */
402   switch (blend->rt[0].rgb_func) {
403   case PIPE_BLEND_ADD:
404      res[0] = src_term[0] + dst_term[0]; /* R */
405      res[1] = src_term[1] + dst_term[1]; /* G */
406      res[2] = src_term[2] + dst_term[2]; /* B */
407      break;
408   case PIPE_BLEND_SUBTRACT:
409      res[0] = src_term[0] - dst_term[0]; /* R */
410      res[1] = src_term[1] - dst_term[1]; /* G */
411      res[2] = src_term[2] - dst_term[2]; /* B */
412      break;
413   case PIPE_BLEND_REVERSE_SUBTRACT:
414      res[0] = dst_term[0] - src_term[0]; /* R */
415      res[1] = dst_term[1] - src_term[1]; /* G */
416      res[2] = dst_term[2] - src_term[2]; /* B */
417      break;
418   case PIPE_BLEND_MIN:
419      res[0] = MIN2(src_term[0], dst_term[0]); /* R */
420      res[1] = MIN2(src_term[1], dst_term[1]); /* G */
421      res[2] = MIN2(src_term[2], dst_term[2]); /* B */
422      break;
423   case PIPE_BLEND_MAX:
424      res[0] = MAX2(src_term[0], dst_term[0]); /* R */
425      res[1] = MAX2(src_term[1], dst_term[1]); /* G */
426      res[2] = MAX2(src_term[2], dst_term[2]); /* B */
427      break;
428   default:
429      assert(0);
430   }
431
432   /*
433    * Combine A terms
434    */
435   switch (blend->rt[0].alpha_func) {
436   case PIPE_BLEND_ADD:
437      res[3] = src_term[3] + dst_term[3]; /* A */
438      break;
439   case PIPE_BLEND_SUBTRACT:
440      res[3] = src_term[3] - dst_term[3]; /* A */
441      break;
442   case PIPE_BLEND_REVERSE_SUBTRACT:
443      res[3] = dst_term[3] - src_term[3]; /* A */
444      break;
445   case PIPE_BLEND_MIN:
446      res[3] = MIN2(src_term[3], dst_term[3]); /* A */
447      break;
448   case PIPE_BLEND_MAX:
449      res[3] = MAX2(src_term[3], dst_term[3]); /* A */
450      break;
451   default:
452      assert(0);
453   }
454}
455
456
457PIPE_ALIGN_STACK
458static boolean
459test_one(unsigned verbose,
460         FILE *fp,
461         const struct pipe_blend_state *blend,
462         enum vector_mode mode,
463         struct lp_type type)
464{
465   struct gallivm_state *gallivm;
466   LLVMValueRef func = NULL;
467   blend_test_ptr_t blend_test_ptr;
468   boolean success;
469   const unsigned n = LP_TEST_NUM_SAMPLES;
470   int64_t cycles[LP_TEST_NUM_SAMPLES];
471   double cycles_avg = 0.0;
472   unsigned i, j;
473   const unsigned stride = lp_type_width(type)/8;
474
475   if(verbose >= 1)
476      dump_blend_type(stdout, blend, mode, type);
477
478   gallivm = gallivm_create();
479
480   func = add_blend_test(gallivm, blend, mode, type);
481
482   gallivm_compile_module(gallivm);
483
484   blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);
485
486   success = TRUE;
487   if(mode == AoS) {
488      uint8_t *src, *dst, *con, *res, *ref;
489      src = align_malloc(stride, stride);
490      dst = align_malloc(stride, stride);
491      con = align_malloc(stride, stride);
492      res = align_malloc(stride, stride);
493      ref = align_malloc(stride, stride);
494
495      for(i = 0; i < n && success; ++i) {
496         int64_t start_counter = 0;
497         int64_t end_counter = 0;
498
499         random_vec(type, src);
500         random_vec(type, dst);
501         random_vec(type, con);
502
503         {
504            double fsrc[LP_MAX_VECTOR_LENGTH];
505            double fdst[LP_MAX_VECTOR_LENGTH];
506            double fcon[LP_MAX_VECTOR_LENGTH];
507            double fref[LP_MAX_VECTOR_LENGTH];
508
509            read_vec(type, src, fsrc);
510            read_vec(type, dst, fdst);
511            read_vec(type, con, fcon);
512
513            for(j = 0; j < type.length; j += 4)
514               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
515
516            write_vec(type, ref, fref);
517         }
518
519         start_counter = rdtsc();
520         blend_test_ptr(src, dst, con, res);
521         end_counter = rdtsc();
522
523         cycles[i] = end_counter - start_counter;
524
525         if(!compare_vec(type, res, ref)) {
526            success = FALSE;
527
528            if(verbose < 1)
529               dump_blend_type(stderr, blend, mode, type);
530            fprintf(stderr, "MISMATCH\n");
531
532            fprintf(stderr, "  Src: ");
533            dump_vec(stderr, type, src);
534            fprintf(stderr, "\n");
535
536            fprintf(stderr, "  Dst: ");
537            dump_vec(stderr, type, dst);
538            fprintf(stderr, "\n");
539
540            fprintf(stderr, "  Con: ");
541            dump_vec(stderr, type, con);
542            fprintf(stderr, "\n");
543
544            fprintf(stderr, "  Res: ");
545            dump_vec(stderr, type, res);
546            fprintf(stderr, "\n");
547
548            fprintf(stderr, "  Ref: ");
549            dump_vec(stderr, type, ref);
550            fprintf(stderr, "\n");
551         }
552      }
553      align_free(src);
554      align_free(dst);
555      align_free(con);
556      align_free(res);
557      align_free(ref);
558   }
559   else if(mode == SoA) {
560      uint8_t *src, *dst, *con, *res, *ref;
561      src = align_malloc(4*stride, stride);
562      dst = align_malloc(4*stride, stride);
563      con = align_malloc(4*stride, stride);
564      res = align_malloc(4*stride, stride);
565      ref = align_malloc(4*stride, stride);
566
567      for(i = 0; i < n && success; ++i) {
568         int64_t start_counter = 0;
569         int64_t end_counter = 0;
570         boolean mismatch;
571
572         for(j = 0; j < 4; ++j) {
573            random_vec(type, src + j*stride);
574            random_vec(type, dst + j*stride);
575            random_vec(type, con + j*stride);
576         }
577
578         {
579            double fsrc[4];
580            double fdst[4];
581            double fcon[4];
582            double fref[4];
583            unsigned k;
584
585            for(k = 0; k < type.length; ++k) {
586               for(j = 0; j < 4; ++j) {
587                  fsrc[j] = read_elem(type, src + j*stride, k);
588                  fdst[j] = read_elem(type, dst + j*stride, k);
589                  fcon[j] = read_elem(type, con + j*stride, k);
590               }
591
592               compute_blend_ref(blend, fsrc, fdst, fcon, fref);
593
594               for(j = 0; j < 4; ++j)
595                  write_elem(type, ref + j*stride, k, fref[j]);
596            }
597         }
598
599         start_counter = rdtsc();
600         blend_test_ptr(src, dst, con, res);
601         end_counter = rdtsc();
602
603         cycles[i] = end_counter - start_counter;
604
605         mismatch = FALSE;
606         for (j = 0; j < 4; ++j)
607            if(!compare_vec(type, res + j*stride, ref + j*stride))
608               mismatch = TRUE;
609
610         if (mismatch) {
611            success = FALSE;
612
613            if(verbose < 1)
614               dump_blend_type(stderr, blend, mode, type);
615            fprintf(stderr, "MISMATCH\n");
616            for(j = 0; j < 4; ++j) {
617               char channel = "RGBA"[j];
618               fprintf(stderr, "  Src%c: ", channel);
619               dump_vec(stderr, type, src + j*stride);
620               fprintf(stderr, "\n");
621
622               fprintf(stderr, "  Dst%c: ", channel);
623               dump_vec(stderr, type, dst + j*stride);
624               fprintf(stderr, "\n");
625
626               fprintf(stderr, "  Con%c: ", channel);
627               dump_vec(stderr, type, con + j*stride);
628               fprintf(stderr, "\n");
629
630               fprintf(stderr, "  Res%c: ", channel);
631               dump_vec(stderr, type, res + j*stride);
632               fprintf(stderr, "\n");
633
634               fprintf(stderr, "  Ref%c: ", channel);
635               dump_vec(stderr, type, ref + j*stride);
636               fprintf(stderr, "\n");
637
638               fprintf(stderr, "\n");
639            }
640         }
641      }
642      align_free(src);
643      align_free(dst);
644      align_free(con);
645      align_free(res);
646      align_free(ref);
647   }
648
649   /*
650    * Unfortunately the output of cycle counter is not very reliable as it comes
651    * -- sometimes we get outliers (due IRQs perhaps?) which are
652    * better removed to avoid random or biased data.
653    */
654   {
655      double sum = 0.0, sum2 = 0.0;
656      double avg, std;
657      unsigned m;
658
659      for(i = 0; i < n; ++i) {
660         sum += cycles[i];
661         sum2 += cycles[i]*cycles[i];
662      }
663
664      avg = sum/n;
665      std = sqrtf((sum2 - n*avg*avg)/n);
666
667      m = 0;
668      sum = 0.0;
669      for(i = 0; i < n; ++i) {
670         if(fabs(cycles[i] - avg) <= 4.0*std) {
671            sum += cycles[i];
672            ++m;
673         }
674      }
675
676      cycles_avg = sum/m;
677
678   }
679
680   if(fp)
681      write_tsv_row(fp, blend, mode, type, cycles_avg, success);
682
683   gallivm_free_function(gallivm, func, blend_test_ptr);
684
685   gallivm_destroy(gallivm);
686
687   return success;
688}
689
690
691const unsigned
692blend_factors[] = {
693   PIPE_BLENDFACTOR_ZERO,
694   PIPE_BLENDFACTOR_ONE,
695   PIPE_BLENDFACTOR_SRC_COLOR,
696   PIPE_BLENDFACTOR_SRC_ALPHA,
697   PIPE_BLENDFACTOR_DST_COLOR,
698   PIPE_BLENDFACTOR_DST_ALPHA,
699   PIPE_BLENDFACTOR_CONST_COLOR,
700   PIPE_BLENDFACTOR_CONST_ALPHA,
701#if 0
702   PIPE_BLENDFACTOR_SRC1_COLOR,
703   PIPE_BLENDFACTOR_SRC1_ALPHA,
704#endif
705   PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
706   PIPE_BLENDFACTOR_INV_SRC_COLOR,
707   PIPE_BLENDFACTOR_INV_SRC_ALPHA,
708   PIPE_BLENDFACTOR_INV_DST_COLOR,
709   PIPE_BLENDFACTOR_INV_DST_ALPHA,
710   PIPE_BLENDFACTOR_INV_CONST_COLOR,
711   PIPE_BLENDFACTOR_INV_CONST_ALPHA,
712#if 0
713   PIPE_BLENDFACTOR_INV_SRC1_COLOR,
714   PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
715#endif
716};
717
718
719const unsigned
720blend_funcs[] = {
721   PIPE_BLEND_ADD,
722   PIPE_BLEND_SUBTRACT,
723   PIPE_BLEND_REVERSE_SUBTRACT,
724   PIPE_BLEND_MIN,
725   PIPE_BLEND_MAX
726};
727
728
729const struct lp_type blend_types[] = {
730   /* float, fixed,  sign,  norm, width, len */
731   {   TRUE, FALSE,  TRUE, FALSE,    32,   4 }, /* f32 x 4 */
732   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
733};
734
735
736const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
737const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
738const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
739
740
741boolean
742test_all(unsigned verbose, FILE *fp)
743{
744   const unsigned *rgb_func;
745   const unsigned *rgb_src_factor;
746   const unsigned *rgb_dst_factor;
747   const unsigned *alpha_func;
748   const unsigned *alpha_src_factor;
749   const unsigned *alpha_dst_factor;
750   struct pipe_blend_state blend;
751   enum vector_mode mode;
752   const struct lp_type *type;
753   boolean success = TRUE;
754
755   for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
756      for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
757         for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
758            for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
759               for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
760                  for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
761                     for(mode = 0; mode < 2; ++mode) {
762                        for(type = blend_types; type < &blend_types[num_types]; ++type) {
763
764                           if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
765                              *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
766                              continue;
767
768                           memset(&blend, 0, sizeof blend);
769                           blend.rt[0].blend_enable      = 1;
770                           blend.rt[0].rgb_func          = *rgb_func;
771                           blend.rt[0].rgb_src_factor    = *rgb_src_factor;
772                           blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
773                           blend.rt[0].alpha_func        = *alpha_func;
774                           blend.rt[0].alpha_src_factor  = *alpha_src_factor;
775                           blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
776                           blend.rt[0].colormask         = PIPE_MASK_RGBA;
777
778                           if(!test_one(verbose, fp, &blend, mode, *type))
779                             success = FALSE;
780
781                        }
782                     }
783                  }
784               }
785            }
786         }
787      }
788   }
789
790   return success;
791}
792
793
794boolean
795test_some(unsigned verbose, FILE *fp,
796          unsigned long n)
797{
798   const unsigned *rgb_func;
799   const unsigned *rgb_src_factor;
800   const unsigned *rgb_dst_factor;
801   const unsigned *alpha_func;
802   const unsigned *alpha_src_factor;
803   const unsigned *alpha_dst_factor;
804   struct pipe_blend_state blend;
805   enum vector_mode mode;
806   const struct lp_type *type;
807   unsigned long i;
808   boolean success = TRUE;
809
810   for(i = 0; i < n; ++i) {
811      rgb_func = &blend_funcs[rand() % num_funcs];
812      alpha_func = &blend_funcs[rand() % num_funcs];
813      rgb_src_factor = &blend_factors[rand() % num_factors];
814      alpha_src_factor = &blend_factors[rand() % num_factors];
815
816      do {
817         rgb_dst_factor = &blend_factors[rand() % num_factors];
818      } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
819
820      do {
821         alpha_dst_factor = &blend_factors[rand() % num_factors];
822      } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
823
824      mode = rand() & 1;
825
826      type = &blend_types[rand() % num_types];
827
828      memset(&blend, 0, sizeof blend);
829      blend.rt[0].blend_enable      = 1;
830      blend.rt[0].rgb_func          = *rgb_func;
831      blend.rt[0].rgb_src_factor    = *rgb_src_factor;
832      blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
833      blend.rt[0].alpha_func        = *alpha_func;
834      blend.rt[0].alpha_src_factor  = *alpha_src_factor;
835      blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
836      blend.rt[0].colormask         = PIPE_MASK_RGBA;
837
838      if(!test_one(verbose, fp, &blend, mode, *type))
839        success = FALSE;
840   }
841
842   return success;
843}
844
845
846boolean
847test_single(unsigned verbose, FILE *fp)
848{
849   printf("no test_single()");
850   return TRUE;
851}
852