lp_test_blend.c revision 0b0f4628d6fb8276a9f1c336a785a838b602bca8
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40#include "gallivm/lp_bld_init.h"
41#include "gallivm/lp_bld_type.h"
42#include "gallivm/lp_bld_debug.h"
43#include "lp_bld_blend.h"
44#include "lp_test.h"
45
46
47enum vector_mode
48{
49   AoS = 0,
50   SoA = 1
51};
52
53
54typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
55
56/** cast wrapper */
57static blend_test_ptr_t
58voidptr_to_blend_test_ptr_t(void *p)
59{
60   union {
61      void *v;
62      blend_test_ptr_t f;
63   } u;
64   u.v = p;
65   return u.f;
66}
67
68
69
70void
71write_tsv_header(FILE *fp)
72{
73   fprintf(fp,
74           "result\t"
75           "cycles_per_channel\t"
76           "mode\t"
77           "type\t"
78           "sep_func\t"
79           "sep_src_factor\t"
80           "sep_dst_factor\t"
81           "rgb_func\t"
82           "rgb_src_factor\t"
83           "rgb_dst_factor\t"
84           "alpha_func\t"
85           "alpha_src_factor\t"
86           "alpha_dst_factor\n");
87
88   fflush(fp);
89}
90
91
92static void
93write_tsv_row(FILE *fp,
94              const struct pipe_blend_state *blend,
95              enum vector_mode mode,
96              struct lp_type type,
97              double cycles,
98              boolean success)
99{
100   fprintf(fp, "%s\t", success ? "pass" : "fail");
101
102   if (mode == AoS) {
103      fprintf(fp, "%.1f\t", cycles / type.length);
104      fprintf(fp, "aos\t");
105   }
106
107   if (mode == SoA) {
108      fprintf(fp, "%.1f\t", cycles / (4 * type.length));
109      fprintf(fp, "soa\t");
110   }
111
112   fprintf(fp, "%s%u%sx%u\t",
113           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
114           type.width,
115           type.norm ? "n" : "",
116           type.length);
117
118   fprintf(fp,
119           "%s\t%s\t%s\t",
120           blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
121           blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
122           blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
123
124   fprintf(fp,
125           "%s\t%s\t%s\t%s\t%s\t%s\n",
126           util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
127           util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
128           util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
129           util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
130           util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
131           util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
132
133   fflush(fp);
134}
135
136
137static void
138dump_blend_type(FILE *fp,
139                const struct pipe_blend_state *blend,
140                enum vector_mode mode,
141                struct lp_type type)
142{
143   fprintf(fp, "%s", mode ? "soa" : "aos");
144
145   fprintf(fp, " type=%s%u%sx%u",
146           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
147           type.width,
148           type.norm ? "n" : "",
149           type.length);
150
151   fprintf(fp,
152           " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
153           "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
154           "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
155           "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
156           "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
157           "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
158           "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
159
160   fprintf(fp, " ...\n");
161   fflush(fp);
162}
163
164
165static LLVMValueRef
166add_blend_test(struct gallivm_state *gallivm,
167               const struct pipe_blend_state *blend,
168               enum vector_mode mode,
169               struct lp_type type)
170{
171   LLVMModuleRef module = gallivm->module;
172   LLVMContextRef context = gallivm->context;
173   LLVMTypeRef vec_type;
174   LLVMTypeRef args[4];
175   LLVMValueRef func;
176   LLVMValueRef src_ptr;
177   LLVMValueRef dst_ptr;
178   LLVMValueRef const_ptr;
179   LLVMValueRef res_ptr;
180   LLVMBasicBlockRef block;
181   LLVMBuilderRef builder;
182   const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
183   const unsigned rt = 0;
184   const unsigned char swizzle[4] = { 0, 1, 2, 3 };
185
186   vec_type = lp_build_vec_type(gallivm, type);
187
188   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
189   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
190   LLVMSetFunctionCallConv(func, LLVMCCallConv);
191   src_ptr = LLVMGetParam(func, 0);
192   dst_ptr = LLVMGetParam(func, 1);
193   const_ptr = LLVMGetParam(func, 2);
194   res_ptr = LLVMGetParam(func, 3);
195
196   block = LLVMAppendBasicBlockInContext(context, func, "entry");
197   builder = gallivm->builder;
198   LLVMPositionBuilderAtEnd(builder, block);
199
200   if (mode == AoS) {
201      LLVMValueRef src;
202      LLVMValueRef dst;
203      LLVMValueRef con;
204      LLVMValueRef res;
205
206      src = LLVMBuildLoad(builder, src_ptr, "src");
207      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
208      con = LLVMBuildLoad(builder, const_ptr, "const");
209
210      res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, con, swizzle);
211
212      lp_build_name(res, "res");
213
214      LLVMBuildStore(builder, res, res_ptr);
215   }
216
217   if (mode == SoA) {
218      LLVMValueRef src[4];
219      LLVMValueRef dst[4];
220      LLVMValueRef con[4];
221      LLVMValueRef res[4];
222      unsigned i;
223
224      for(i = 0; i < 4; ++i) {
225         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
226         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
227         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
228         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
229         lp_build_name(src[i], "src.%c", "rgba"[i]);
230         lp_build_name(con[i], "con.%c", "rgba"[i]);
231         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
232      }
233
234      lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
235
236      for(i = 0; i < 4; ++i) {
237         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
238         lp_build_name(res[i], "res.%c", "rgba"[i]);
239         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
240      }
241   }
242
243   LLVMBuildRetVoid(builder);;
244
245   return func;
246}
247
248
249static void
250compute_blend_ref_term(unsigned rgb_factor,
251                       unsigned alpha_factor,
252                       const double *factor,
253                       const double *src,
254                       const double *dst,
255                       const double *con,
256                       double *term)
257{
258   double temp;
259
260   switch (rgb_factor) {
261   case PIPE_BLENDFACTOR_ONE:
262      term[0] = factor[0]; /* R */
263      term[1] = factor[1]; /* G */
264      term[2] = factor[2]; /* B */
265      break;
266   case PIPE_BLENDFACTOR_SRC_COLOR:
267      term[0] = factor[0] * src[0]; /* R */
268      term[1] = factor[1] * src[1]; /* G */
269      term[2] = factor[2] * src[2]; /* B */
270      break;
271   case PIPE_BLENDFACTOR_SRC_ALPHA:
272      term[0] = factor[0] * src[3]; /* R */
273      term[1] = factor[1] * src[3]; /* G */
274      term[2] = factor[2] * src[3]; /* B */
275      break;
276   case PIPE_BLENDFACTOR_DST_COLOR:
277      term[0] = factor[0] * dst[0]; /* R */
278      term[1] = factor[1] * dst[1]; /* G */
279      term[2] = factor[2] * dst[2]; /* B */
280      break;
281   case PIPE_BLENDFACTOR_DST_ALPHA:
282      term[0] = factor[0] * dst[3]; /* R */
283      term[1] = factor[1] * dst[3]; /* G */
284      term[2] = factor[2] * dst[3]; /* B */
285      break;
286   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
287      temp = MIN2(src[3], 1.0f - dst[3]);
288      term[0] = factor[0] * temp; /* R */
289      term[1] = factor[1] * temp; /* G */
290      term[2] = factor[2] * temp; /* B */
291      break;
292   case PIPE_BLENDFACTOR_CONST_COLOR:
293      term[0] = factor[0] * con[0]; /* R */
294      term[1] = factor[1] * con[1]; /* G */
295      term[2] = factor[2] * con[2]; /* B */
296      break;
297   case PIPE_BLENDFACTOR_CONST_ALPHA:
298      term[0] = factor[0] * con[3]; /* R */
299      term[1] = factor[1] * con[3]; /* G */
300      term[2] = factor[2] * con[3]; /* B */
301      break;
302   case PIPE_BLENDFACTOR_SRC1_COLOR:
303      assert(0); /* to do */
304      break;
305   case PIPE_BLENDFACTOR_SRC1_ALPHA:
306      assert(0); /* to do */
307      break;
308   case PIPE_BLENDFACTOR_ZERO:
309      term[0] = 0.0f; /* R */
310      term[1] = 0.0f; /* G */
311      term[2] = 0.0f; /* B */
312      break;
313   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
314      term[0] = factor[0] * (1.0f - src[0]); /* R */
315      term[1] = factor[1] * (1.0f - src[1]); /* G */
316      term[2] = factor[2] * (1.0f - src[2]); /* B */
317      break;
318   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
319      term[0] = factor[0] * (1.0f - src[3]); /* R */
320      term[1] = factor[1] * (1.0f - src[3]); /* G */
321      term[2] = factor[2] * (1.0f - src[3]); /* B */
322      break;
323   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
324      term[0] = factor[0] * (1.0f - dst[3]); /* R */
325      term[1] = factor[1] * (1.0f - dst[3]); /* G */
326      term[2] = factor[2] * (1.0f - dst[3]); /* B */
327      break;
328   case PIPE_BLENDFACTOR_INV_DST_COLOR:
329      term[0] = factor[0] * (1.0f - dst[0]); /* R */
330      term[1] = factor[1] * (1.0f - dst[1]); /* G */
331      term[2] = factor[2] * (1.0f - dst[2]); /* B */
332      break;
333   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
334      term[0] = factor[0] * (1.0f - con[0]); /* R */
335      term[1] = factor[1] * (1.0f - con[1]); /* G */
336      term[2] = factor[2] * (1.0f - con[2]); /* B */
337      break;
338   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
339      term[0] = factor[0] * (1.0f - con[3]); /* R */
340      term[1] = factor[1] * (1.0f - con[3]); /* G */
341      term[2] = factor[2] * (1.0f - con[3]); /* B */
342      break;
343   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
344      assert(0); /* to do */
345      break;
346   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
347      assert(0); /* to do */
348      break;
349   default:
350      assert(0);
351   }
352
353   /*
354    * Compute src/first term A
355    */
356   switch (alpha_factor) {
357   case PIPE_BLENDFACTOR_ONE:
358      term[3] = factor[3]; /* A */
359      break;
360   case PIPE_BLENDFACTOR_SRC_COLOR:
361   case PIPE_BLENDFACTOR_SRC_ALPHA:
362      term[3] = factor[3] * src[3]; /* A */
363      break;
364   case PIPE_BLENDFACTOR_DST_COLOR:
365   case PIPE_BLENDFACTOR_DST_ALPHA:
366      term[3] = factor[3] * dst[3]; /* A */
367      break;
368   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
369      term[3] = src[3]; /* A */
370      break;
371   case PIPE_BLENDFACTOR_CONST_COLOR:
372   case PIPE_BLENDFACTOR_CONST_ALPHA:
373      term[3] = factor[3] * con[3]; /* A */
374      break;
375   case PIPE_BLENDFACTOR_ZERO:
376      term[3] = 0.0f; /* A */
377      break;
378   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
379   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
380      term[3] = factor[3] * (1.0f - src[3]); /* A */
381      break;
382   case PIPE_BLENDFACTOR_INV_DST_COLOR:
383   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
384      term[3] = factor[3] * (1.0f - dst[3]); /* A */
385      break;
386   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
387   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
388      term[3] = factor[3] * (1.0f - con[3]);
389      break;
390   default:
391      assert(0);
392   }
393}
394
395
396static void
397compute_blend_ref(const struct pipe_blend_state *blend,
398                  const double *src,
399                  const double *dst,
400                  const double *con,
401                  double *res)
402{
403   double src_term[4];
404   double dst_term[4];
405
406   compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
407                          src, src, dst, con, src_term);
408   compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
409                          dst, src, dst, con, dst_term);
410
411   /*
412    * Combine RGB terms
413    */
414   switch (blend->rt[0].rgb_func) {
415   case PIPE_BLEND_ADD:
416      res[0] = src_term[0] + dst_term[0]; /* R */
417      res[1] = src_term[1] + dst_term[1]; /* G */
418      res[2] = src_term[2] + dst_term[2]; /* B */
419      break;
420   case PIPE_BLEND_SUBTRACT:
421      res[0] = src_term[0] - dst_term[0]; /* R */
422      res[1] = src_term[1] - dst_term[1]; /* G */
423      res[2] = src_term[2] - dst_term[2]; /* B */
424      break;
425   case PIPE_BLEND_REVERSE_SUBTRACT:
426      res[0] = dst_term[0] - src_term[0]; /* R */
427      res[1] = dst_term[1] - src_term[1]; /* G */
428      res[2] = dst_term[2] - src_term[2]; /* B */
429      break;
430   case PIPE_BLEND_MIN:
431      res[0] = MIN2(src_term[0], dst_term[0]); /* R */
432      res[1] = MIN2(src_term[1], dst_term[1]); /* G */
433      res[2] = MIN2(src_term[2], dst_term[2]); /* B */
434      break;
435   case PIPE_BLEND_MAX:
436      res[0] = MAX2(src_term[0], dst_term[0]); /* R */
437      res[1] = MAX2(src_term[1], dst_term[1]); /* G */
438      res[2] = MAX2(src_term[2], dst_term[2]); /* B */
439      break;
440   default:
441      assert(0);
442   }
443
444   /*
445    * Combine A terms
446    */
447   switch (blend->rt[0].alpha_func) {
448   case PIPE_BLEND_ADD:
449      res[3] = src_term[3] + dst_term[3]; /* A */
450      break;
451   case PIPE_BLEND_SUBTRACT:
452      res[3] = src_term[3] - dst_term[3]; /* A */
453      break;
454   case PIPE_BLEND_REVERSE_SUBTRACT:
455      res[3] = dst_term[3] - src_term[3]; /* A */
456      break;
457   case PIPE_BLEND_MIN:
458      res[3] = MIN2(src_term[3], dst_term[3]); /* A */
459      break;
460   case PIPE_BLEND_MAX:
461      res[3] = MAX2(src_term[3], dst_term[3]); /* A */
462      break;
463   default:
464      assert(0);
465   }
466}
467
468
469PIPE_ALIGN_STACK
470static boolean
471test_one(struct gallivm_state *gallivm,
472         unsigned verbose,
473         FILE *fp,
474         const struct pipe_blend_state *blend,
475         enum vector_mode mode,
476         struct lp_type type)
477{
478   LLVMModuleRef module = gallivm->module;
479   LLVMValueRef func = NULL;
480   LLVMExecutionEngineRef engine = gallivm->engine;
481   char *error = NULL;
482   blend_test_ptr_t blend_test_ptr;
483   boolean success;
484   const unsigned n = LP_TEST_NUM_SAMPLES;
485   int64_t cycles[LP_TEST_NUM_SAMPLES];
486   double cycles_avg = 0.0;
487   unsigned i, j;
488   void *code;
489
490   if(verbose >= 1)
491      dump_blend_type(stdout, blend, mode, type);
492
493   func = add_blend_test(gallivm, blend, mode, type);
494
495   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
496      LLVMDumpModule(module);
497      abort();
498   }
499   LLVMDisposeMessage(error);
500
501   code = LLVMGetPointerToGlobal(engine, func);
502   blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
503
504   if(verbose >= 2)
505      lp_disassemble(code);
506
507   success = TRUE;
508   for(i = 0; i < n && success; ++i) {
509      if(mode == AoS) {
510         PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
511         PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
512         PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
513         PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
514         PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
515         int64_t start_counter = 0;
516         int64_t end_counter = 0;
517
518         random_vec(type, src);
519         random_vec(type, dst);
520         random_vec(type, con);
521
522         {
523            double fsrc[LP_MAX_VECTOR_LENGTH];
524            double fdst[LP_MAX_VECTOR_LENGTH];
525            double fcon[LP_MAX_VECTOR_LENGTH];
526            double fref[LP_MAX_VECTOR_LENGTH];
527
528            read_vec(type, src, fsrc);
529            read_vec(type, dst, fdst);
530            read_vec(type, con, fcon);
531
532            for(j = 0; j < type.length; j += 4)
533               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
534
535            write_vec(type, ref, fref);
536         }
537
538         start_counter = rdtsc();
539         blend_test_ptr(src, dst, con, res);
540         end_counter = rdtsc();
541
542         cycles[i] = end_counter - start_counter;
543
544         if(!compare_vec(type, res, ref)) {
545            success = FALSE;
546
547            if(verbose < 1)
548               dump_blend_type(stderr, blend, mode, type);
549            fprintf(stderr, "MISMATCH\n");
550
551            fprintf(stderr, "  Src: ");
552            dump_vec(stderr, type, src);
553            fprintf(stderr, "\n");
554
555            fprintf(stderr, "  Dst: ");
556            dump_vec(stderr, type, dst);
557            fprintf(stderr, "\n");
558
559            fprintf(stderr, "  Con: ");
560            dump_vec(stderr, type, con);
561            fprintf(stderr, "\n");
562
563            fprintf(stderr, "  Res: ");
564            dump_vec(stderr, type, res);
565            fprintf(stderr, "\n");
566
567            fprintf(stderr, "  Ref: ");
568            dump_vec(stderr, type, ref);
569            fprintf(stderr, "\n");
570         }
571      }
572
573      if(mode == SoA) {
574         const unsigned stride = type.length*type.width/8;
575         PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
576         PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
577         PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
578         PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
579         PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
580         int64_t start_counter = 0;
581         int64_t end_counter = 0;
582         boolean mismatch;
583
584         for(j = 0; j < 4; ++j) {
585            random_vec(type, src + j*stride);
586            random_vec(type, dst + j*stride);
587            random_vec(type, con + j*stride);
588         }
589
590         {
591            double fsrc[4];
592            double fdst[4];
593            double fcon[4];
594            double fref[4];
595            unsigned k;
596
597            for(k = 0; k < type.length; ++k) {
598               for(j = 0; j < 4; ++j) {
599                  fsrc[j] = read_elem(type, src + j*stride, k);
600                  fdst[j] = read_elem(type, dst + j*stride, k);
601                  fcon[j] = read_elem(type, con + j*stride, k);
602               }
603
604               compute_blend_ref(blend, fsrc, fdst, fcon, fref);
605
606               for(j = 0; j < 4; ++j)
607                  write_elem(type, ref + j*stride, k, fref[j]);
608            }
609         }
610
611         start_counter = rdtsc();
612         blend_test_ptr(src, dst, con, res);
613         end_counter = rdtsc();
614
615         cycles[i] = end_counter - start_counter;
616
617         mismatch = FALSE;
618         for (j = 0; j < 4; ++j)
619            if(!compare_vec(type, res + j*stride, ref + j*stride))
620               mismatch = TRUE;
621
622         if (mismatch) {
623            success = FALSE;
624
625            if(verbose < 1)
626               dump_blend_type(stderr, blend, mode, type);
627            fprintf(stderr, "MISMATCH\n");
628            for(j = 0; j < 4; ++j) {
629               char channel = "RGBA"[j];
630               fprintf(stderr, "  Src%c: ", channel);
631               dump_vec(stderr, type, src + j*stride);
632               fprintf(stderr, "\n");
633
634               fprintf(stderr, "  Dst%c: ", channel);
635               dump_vec(stderr, type, dst + j*stride);
636               fprintf(stderr, "\n");
637
638               fprintf(stderr, "  Con%c: ", channel);
639               dump_vec(stderr, type, con + j*stride);
640               fprintf(stderr, "\n");
641
642               fprintf(stderr, "  Res%c: ", channel);
643               dump_vec(stderr, type, res + j*stride);
644               fprintf(stderr, "\n");
645
646               fprintf(stderr, "  Ref%c: ", channel);
647               dump_vec(stderr, type, ref + j*stride);
648               fprintf(stderr, "\n");
649
650               fprintf(stderr, "\n");
651            }
652         }
653      }
654   }
655
656   /*
657    * Unfortunately the output of cycle counter is not very reliable as it comes
658    * -- sometimes we get outliers (due IRQs perhaps?) which are
659    * better removed to avoid random or biased data.
660    */
661   {
662      double sum = 0.0, sum2 = 0.0;
663      double avg, std;
664      unsigned m;
665
666      for(i = 0; i < n; ++i) {
667         sum += cycles[i];
668         sum2 += cycles[i]*cycles[i];
669      }
670
671      avg = sum/n;
672      std = sqrtf((sum2 - n*avg*avg)/n);
673
674      m = 0;
675      sum = 0.0;
676      for(i = 0; i < n; ++i) {
677         if(fabs(cycles[i] - avg) <= 4.0*std) {
678            sum += cycles[i];
679            ++m;
680         }
681      }
682
683      cycles_avg = sum/m;
684
685   }
686
687   if(fp)
688      write_tsv_row(fp, blend, mode, type, cycles_avg, success);
689
690   if (!success) {
691      if(verbose < 2)
692         LLVMDumpModule(module);
693      LLVMWriteBitcodeToFile(module, "blend.bc");
694      fprintf(stderr, "blend.bc written\n");
695      fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
696      abort();
697   }
698
699   LLVMFreeMachineCodeForFunction(engine, func);
700
701   return success;
702}
703
704
705const unsigned
706blend_factors[] = {
707   PIPE_BLENDFACTOR_ZERO,
708   PIPE_BLENDFACTOR_ONE,
709   PIPE_BLENDFACTOR_SRC_COLOR,
710   PIPE_BLENDFACTOR_SRC_ALPHA,
711   PIPE_BLENDFACTOR_DST_COLOR,
712   PIPE_BLENDFACTOR_DST_ALPHA,
713   PIPE_BLENDFACTOR_CONST_COLOR,
714   PIPE_BLENDFACTOR_CONST_ALPHA,
715#if 0
716   PIPE_BLENDFACTOR_SRC1_COLOR,
717   PIPE_BLENDFACTOR_SRC1_ALPHA,
718#endif
719   PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
720   PIPE_BLENDFACTOR_INV_SRC_COLOR,
721   PIPE_BLENDFACTOR_INV_SRC_ALPHA,
722   PIPE_BLENDFACTOR_INV_DST_COLOR,
723   PIPE_BLENDFACTOR_INV_DST_ALPHA,
724   PIPE_BLENDFACTOR_INV_CONST_COLOR,
725   PIPE_BLENDFACTOR_INV_CONST_ALPHA,
726#if 0
727   PIPE_BLENDFACTOR_INV_SRC1_COLOR,
728   PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
729#endif
730};
731
732
733const unsigned
734blend_funcs[] = {
735   PIPE_BLEND_ADD,
736   PIPE_BLEND_SUBTRACT,
737   PIPE_BLEND_REVERSE_SUBTRACT,
738   PIPE_BLEND_MIN,
739   PIPE_BLEND_MAX
740};
741
742
743const struct lp_type blend_types[] = {
744   /* float, fixed,  sign,  norm, width, len */
745   {   TRUE, FALSE,  TRUE, FALSE,    32,   4 }, /* f32 x 4 */
746   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
747};
748
749
750const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
751const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
752const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
753
754
755boolean
756test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
757{
758   const unsigned *rgb_func;
759   const unsigned *rgb_src_factor;
760   const unsigned *rgb_dst_factor;
761   const unsigned *alpha_func;
762   const unsigned *alpha_src_factor;
763   const unsigned *alpha_dst_factor;
764   struct pipe_blend_state blend;
765   enum vector_mode mode;
766   const struct lp_type *type;
767   boolean success = TRUE;
768
769   for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
770      for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
771         for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
772            for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
773               for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
774                  for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
775                     for(mode = 0; mode < 2; ++mode) {
776                        for(type = blend_types; type < &blend_types[num_types]; ++type) {
777
778                           if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
779                              *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
780                              continue;
781
782                           memset(&blend, 0, sizeof blend);
783                           blend.rt[0].blend_enable      = 1;
784                           blend.rt[0].rgb_func          = *rgb_func;
785                           blend.rt[0].rgb_src_factor    = *rgb_src_factor;
786                           blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
787                           blend.rt[0].alpha_func        = *alpha_func;
788                           blend.rt[0].alpha_src_factor  = *alpha_src_factor;
789                           blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
790                           blend.rt[0].colormask         = PIPE_MASK_RGBA;
791
792                           if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
793                             success = FALSE;
794
795                        }
796                     }
797                  }
798               }
799            }
800         }
801      }
802   }
803
804   return success;
805}
806
807
808boolean
809test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
810          unsigned long n)
811{
812   const unsigned *rgb_func;
813   const unsigned *rgb_src_factor;
814   const unsigned *rgb_dst_factor;
815   const unsigned *alpha_func;
816   const unsigned *alpha_src_factor;
817   const unsigned *alpha_dst_factor;
818   struct pipe_blend_state blend;
819   enum vector_mode mode;
820   const struct lp_type *type;
821   unsigned long i;
822   boolean success = TRUE;
823
824   for(i = 0; i < n; ++i) {
825      rgb_func = &blend_funcs[rand() % num_funcs];
826      alpha_func = &blend_funcs[rand() % num_funcs];
827      rgb_src_factor = &blend_factors[rand() % num_factors];
828      alpha_src_factor = &blend_factors[rand() % num_factors];
829
830      do {
831         rgb_dst_factor = &blend_factors[rand() % num_factors];
832      } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
833
834      do {
835         alpha_dst_factor = &blend_factors[rand() % num_factors];
836      } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
837
838      mode = rand() & 1;
839
840      type = &blend_types[rand() % num_types];
841
842      memset(&blend, 0, sizeof blend);
843      blend.rt[0].blend_enable      = 1;
844      blend.rt[0].rgb_func          = *rgb_func;
845      blend.rt[0].rgb_src_factor    = *rgb_src_factor;
846      blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
847      blend.rt[0].alpha_func        = *alpha_func;
848      blend.rt[0].alpha_src_factor  = *alpha_src_factor;
849      blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
850      blend.rt[0].colormask         = PIPE_MASK_RGBA;
851
852      if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
853        success = FALSE;
854   }
855
856   return success;
857}
858
859
860boolean
861test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
862{
863   printf("no test_single()");
864   return TRUE;
865}
866