lp_test_blend.c revision 966d28cb2e5e090d8f591810f331df0d05b06271
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40#include "gallivm/lp_bld_type.h"
41#include "gallivm/lp_bld_debug.h"
42#include "lp_bld_blend.h"
43#include "lp_test.h"
44
45
46enum vector_mode
47{
48   AoS = 0,
49   SoA = 1
50};
51
52
53typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
54
55/** cast wrapper */
56static blend_test_ptr_t
57voidptr_to_blend_test_ptr_t(void *p)
58{
59   union {
60      void *v;
61      blend_test_ptr_t f;
62   } u;
63   u.v = p;
64   return u.f;
65}
66
67
68
69void
70write_tsv_header(FILE *fp)
71{
72   fprintf(fp,
73           "result\t"
74           "cycles_per_channel\t"
75           "mode\t"
76           "type\t"
77           "sep_func\t"
78           "sep_src_factor\t"
79           "sep_dst_factor\t"
80           "rgb_func\t"
81           "rgb_src_factor\t"
82           "rgb_dst_factor\t"
83           "alpha_func\t"
84           "alpha_src_factor\t"
85           "alpha_dst_factor\n");
86
87   fflush(fp);
88}
89
90
91static void
92write_tsv_row(FILE *fp,
93              const struct pipe_blend_state *blend,
94              enum vector_mode mode,
95              struct lp_type type,
96              double cycles,
97              boolean success)
98{
99   fprintf(fp, "%s\t", success ? "pass" : "fail");
100
101   if (mode == AoS) {
102      fprintf(fp, "%.1f\t", cycles / type.length);
103      fprintf(fp, "aos\t");
104   }
105
106   if (mode == SoA) {
107      fprintf(fp, "%.1f\t", cycles / (4 * type.length));
108      fprintf(fp, "soa\t");
109   }
110
111   fprintf(fp, "%s%u%sx%u\t",
112           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
113           type.width,
114           type.norm ? "n" : "",
115           type.length);
116
117   fprintf(fp,
118           "%s\t%s\t%s\t",
119           blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
120           blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
121           blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
122
123   fprintf(fp,
124           "%s\t%s\t%s\t%s\t%s\t%s\n",
125           util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
126           util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
127           util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
128           util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
129           util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
130           util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
131
132   fflush(fp);
133}
134
135
136static void
137dump_blend_type(FILE *fp,
138                const struct pipe_blend_state *blend,
139                enum vector_mode mode,
140                struct lp_type type)
141{
142   fprintf(fp, "%s", mode ? "soa" : "aos");
143
144   fprintf(fp, " type=%s%u%sx%u",
145           type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
146           type.width,
147           type.norm ? "n" : "",
148           type.length);
149
150   fprintf(fp,
151           " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
152           "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
153           "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
154           "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
155           "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
156           "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
157           "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
158
159   fprintf(fp, " ...\n");
160   fflush(fp);
161}
162
163
164static LLVMValueRef
165add_blend_test(LLVMModuleRef module,
166               const struct pipe_blend_state *blend,
167               enum vector_mode mode,
168               struct lp_type type)
169{
170   LLVMTypeRef vec_type;
171   LLVMTypeRef args[4];
172   LLVMValueRef func;
173   LLVMValueRef src_ptr;
174   LLVMValueRef dst_ptr;
175   LLVMValueRef const_ptr;
176   LLVMValueRef res_ptr;
177   LLVMBasicBlockRef block;
178   LLVMBuilderRef builder;
179   const unsigned rt = 0;
180
181   vec_type = lp_build_vec_type(type);
182
183   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
184   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
185   LLVMSetFunctionCallConv(func, LLVMCCallConv);
186   src_ptr = LLVMGetParam(func, 0);
187   dst_ptr = LLVMGetParam(func, 1);
188   const_ptr = LLVMGetParam(func, 2);
189   res_ptr = LLVMGetParam(func, 3);
190
191   block = LLVMAppendBasicBlock(func, "entry");
192   builder = LLVMCreateBuilder();
193   LLVMPositionBuilderAtEnd(builder, block);
194
195   if (mode == AoS) {
196      LLVMValueRef src;
197      LLVMValueRef dst;
198      LLVMValueRef con;
199      LLVMValueRef res;
200
201      src = LLVMBuildLoad(builder, src_ptr, "src");
202      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
203      con = LLVMBuildLoad(builder, const_ptr, "const");
204
205      res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
206
207      lp_build_name(res, "res");
208
209      LLVMBuildStore(builder, res, res_ptr);
210   }
211
212   if (mode == SoA) {
213      LLVMValueRef src[4];
214      LLVMValueRef dst[4];
215      LLVMValueRef con[4];
216      LLVMValueRef res[4];
217      unsigned i;
218
219      for(i = 0; i < 4; ++i) {
220         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
221         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
222         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
223         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
224         lp_build_name(src[i], "src.%c", "rgba"[i]);
225         lp_build_name(con[i], "con.%c", "rgba"[i]);
226         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
227      }
228
229      lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
230
231      for(i = 0; i < 4; ++i) {
232         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
233         lp_build_name(res[i], "res.%c", "rgba"[i]);
234         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
235      }
236   }
237
238   LLVMBuildRetVoid(builder);;
239
240   LLVMDisposeBuilder(builder);
241   return func;
242}
243
244
245/** Add and limit result to ceiling of 1.0 */
246#define ADD_SAT(R, A, B) \
247do { \
248   R = (A) + (B);  if (R > 1.0f) R = 1.0f; \
249} while (0)
250
251/** Subtract and limit result to floor of 0.0 */
252#define SUB_SAT(R, A, B) \
253do { \
254   R = (A) - (B);  if (R < 0.0f) R = 0.0f; \
255} while (0)
256
257
258static void
259compute_blend_ref_term(unsigned rgb_factor,
260                       unsigned alpha_factor,
261                       const double *factor,
262                       const double *src,
263                       const double *dst,
264                       const double *con,
265                       double *term)
266{
267   double temp;
268
269   switch (rgb_factor) {
270   case PIPE_BLENDFACTOR_ONE:
271      term[0] = factor[0]; /* R */
272      term[1] = factor[1]; /* G */
273      term[2] = factor[2]; /* B */
274      break;
275   case PIPE_BLENDFACTOR_SRC_COLOR:
276      term[0] = factor[0] * src[0]; /* R */
277      term[1] = factor[1] * src[1]; /* G */
278      term[2] = factor[2] * src[2]; /* B */
279      break;
280   case PIPE_BLENDFACTOR_SRC_ALPHA:
281      term[0] = factor[0] * src[3]; /* R */
282      term[1] = factor[1] * src[3]; /* G */
283      term[2] = factor[2] * src[3]; /* B */
284      break;
285   case PIPE_BLENDFACTOR_DST_COLOR:
286      term[0] = factor[0] * dst[0]; /* R */
287      term[1] = factor[1] * dst[1]; /* G */
288      term[2] = factor[2] * dst[2]; /* B */
289      break;
290   case PIPE_BLENDFACTOR_DST_ALPHA:
291      term[0] = factor[0] * dst[3]; /* R */
292      term[1] = factor[1] * dst[3]; /* G */
293      term[2] = factor[2] * dst[3]; /* B */
294      break;
295   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
296      temp = MIN2(src[3], 1.0f - dst[3]);
297      term[0] = factor[0] * temp; /* R */
298      term[1] = factor[1] * temp; /* G */
299      term[2] = factor[2] * temp; /* B */
300      break;
301   case PIPE_BLENDFACTOR_CONST_COLOR:
302      term[0] = factor[0] * con[0]; /* R */
303      term[1] = factor[1] * con[1]; /* G */
304      term[2] = factor[2] * con[2]; /* B */
305      break;
306   case PIPE_BLENDFACTOR_CONST_ALPHA:
307      term[0] = factor[0] * con[3]; /* R */
308      term[1] = factor[1] * con[3]; /* G */
309      term[2] = factor[2] * con[3]; /* B */
310      break;
311   case PIPE_BLENDFACTOR_SRC1_COLOR:
312      assert(0); /* to do */
313      break;
314   case PIPE_BLENDFACTOR_SRC1_ALPHA:
315      assert(0); /* to do */
316      break;
317   case PIPE_BLENDFACTOR_ZERO:
318      term[0] = 0.0f; /* R */
319      term[1] = 0.0f; /* G */
320      term[2] = 0.0f; /* B */
321      break;
322   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
323      term[0] = factor[0] * (1.0f - src[0]); /* R */
324      term[1] = factor[1] * (1.0f - src[1]); /* G */
325      term[2] = factor[2] * (1.0f - src[2]); /* B */
326      break;
327   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
328      term[0] = factor[0] * (1.0f - src[3]); /* R */
329      term[1] = factor[1] * (1.0f - src[3]); /* G */
330      term[2] = factor[2] * (1.0f - src[3]); /* B */
331      break;
332   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
333      term[0] = factor[0] * (1.0f - dst[3]); /* R */
334      term[1] = factor[1] * (1.0f - dst[3]); /* G */
335      term[2] = factor[2] * (1.0f - dst[3]); /* B */
336      break;
337   case PIPE_BLENDFACTOR_INV_DST_COLOR:
338      term[0] = factor[0] * (1.0f - dst[0]); /* R */
339      term[1] = factor[1] * (1.0f - dst[1]); /* G */
340      term[2] = factor[2] * (1.0f - dst[2]); /* B */
341      break;
342   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
343      term[0] = factor[0] * (1.0f - con[0]); /* R */
344      term[1] = factor[1] * (1.0f - con[1]); /* G */
345      term[2] = factor[2] * (1.0f - con[2]); /* B */
346      break;
347   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
348      term[0] = factor[0] * (1.0f - con[3]); /* R */
349      term[1] = factor[1] * (1.0f - con[3]); /* G */
350      term[2] = factor[2] * (1.0f - con[3]); /* B */
351      break;
352   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
353      assert(0); /* to do */
354      break;
355   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
356      assert(0); /* to do */
357      break;
358   default:
359      assert(0);
360   }
361
362   /*
363    * Compute src/first term A
364    */
365   switch (alpha_factor) {
366   case PIPE_BLENDFACTOR_ONE:
367      term[3] = factor[3]; /* A */
368      break;
369   case PIPE_BLENDFACTOR_SRC_COLOR:
370   case PIPE_BLENDFACTOR_SRC_ALPHA:
371      term[3] = factor[3] * src[3]; /* A */
372      break;
373   case PIPE_BLENDFACTOR_DST_COLOR:
374   case PIPE_BLENDFACTOR_DST_ALPHA:
375      term[3] = factor[3] * dst[3]; /* A */
376      break;
377   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
378      term[3] = src[3]; /* A */
379      break;
380   case PIPE_BLENDFACTOR_CONST_COLOR:
381   case PIPE_BLENDFACTOR_CONST_ALPHA:
382      term[3] = factor[3] * con[3]; /* A */
383      break;
384   case PIPE_BLENDFACTOR_ZERO:
385      term[3] = 0.0f; /* A */
386      break;
387   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
388   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
389      term[3] = factor[3] * (1.0f - src[3]); /* A */
390      break;
391   case PIPE_BLENDFACTOR_INV_DST_COLOR:
392   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
393      term[3] = factor[3] * (1.0f - dst[3]); /* A */
394      break;
395   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
396   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
397      term[3] = factor[3] * (1.0f - con[3]);
398      break;
399   default:
400      assert(0);
401   }
402}
403
404
405static void
406compute_blend_ref(const struct pipe_blend_state *blend,
407                  const double *src,
408                  const double *dst,
409                  const double *con,
410                  double *res)
411{
412   double src_term[4];
413   double dst_term[4];
414
415   compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
416                          src, src, dst, con, src_term);
417   compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
418                          dst, src, dst, con, dst_term);
419
420   /*
421    * Combine RGB terms
422    */
423   switch (blend->rt[0].rgb_func) {
424   case PIPE_BLEND_ADD:
425      ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
426      ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
427      ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
428      break;
429   case PIPE_BLEND_SUBTRACT:
430      SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
431      SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
432      SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
433      break;
434   case PIPE_BLEND_REVERSE_SUBTRACT:
435      SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
436      SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
437      SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
438      break;
439   case PIPE_BLEND_MIN:
440      res[0] = MIN2(src_term[0], dst_term[0]); /* R */
441      res[1] = MIN2(src_term[1], dst_term[1]); /* G */
442      res[2] = MIN2(src_term[2], dst_term[2]); /* B */
443      break;
444   case PIPE_BLEND_MAX:
445      res[0] = MAX2(src_term[0], dst_term[0]); /* R */
446      res[1] = MAX2(src_term[1], dst_term[1]); /* G */
447      res[2] = MAX2(src_term[2], dst_term[2]); /* B */
448      break;
449   default:
450      assert(0);
451   }
452
453   /*
454    * Combine A terms
455    */
456   switch (blend->rt[0].alpha_func) {
457   case PIPE_BLEND_ADD:
458      ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
459      break;
460   case PIPE_BLEND_SUBTRACT:
461      SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
462      break;
463   case PIPE_BLEND_REVERSE_SUBTRACT:
464      SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
465      break;
466   case PIPE_BLEND_MIN:
467      res[3] = MIN2(src_term[3], dst_term[3]); /* A */
468      break;
469   case PIPE_BLEND_MAX:
470      res[3] = MAX2(src_term[3], dst_term[3]); /* A */
471      break;
472   default:
473      assert(0);
474   }
475}
476
477
478PIPE_ALIGN_STACK
479static boolean
480test_one(unsigned verbose,
481         FILE *fp,
482         const struct pipe_blend_state *blend,
483         enum vector_mode mode,
484         struct lp_type type)
485{
486   LLVMModuleRef module = NULL;
487   LLVMValueRef func = NULL;
488   LLVMExecutionEngineRef engine = NULL;
489   LLVMModuleProviderRef provider = NULL;
490   LLVMPassManagerRef pass = NULL;
491   char *error = NULL;
492   blend_test_ptr_t blend_test_ptr;
493   boolean success;
494   const unsigned n = LP_TEST_NUM_SAMPLES;
495   int64_t cycles[LP_TEST_NUM_SAMPLES];
496   double cycles_avg = 0.0;
497   unsigned i, j;
498   void *code;
499
500   if(verbose >= 1)
501      dump_blend_type(stdout, blend, mode, type);
502
503   module = LLVMModuleCreateWithName("test");
504
505   func = add_blend_test(module, blend, mode, type);
506
507   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
508      LLVMDumpModule(module);
509      abort();
510   }
511   LLVMDisposeMessage(error);
512
513   provider = LLVMCreateModuleProviderForExistingModule(module);
514   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
515      if(verbose < 1)
516         dump_blend_type(stderr, blend, mode, type);
517      fprintf(stderr, "%s\n", error);
518      LLVMDisposeMessage(error);
519      abort();
520   }
521
522#if 0
523   pass = LLVMCreatePassManager();
524   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
525   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
526    * but there are more on SVN. */
527   LLVMAddConstantPropagationPass(pass);
528   LLVMAddInstructionCombiningPass(pass);
529   LLVMAddPromoteMemoryToRegisterPass(pass);
530   LLVMAddGVNPass(pass);
531   LLVMAddCFGSimplificationPass(pass);
532   LLVMRunPassManager(pass, module);
533#else
534   (void)pass;
535#endif
536
537   if(verbose >= 2)
538      LLVMDumpModule(module);
539
540   code = LLVMGetPointerToGlobal(engine, func);
541   blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
542
543   if(verbose >= 2)
544      lp_disassemble(code);
545
546   success = TRUE;
547   for(i = 0; i < n && success; ++i) {
548      if(mode == AoS) {
549         PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
550         PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
551         PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
552         PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
553         PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
554         int64_t start_counter = 0;
555         int64_t end_counter = 0;
556
557         random_vec(type, src);
558         random_vec(type, dst);
559         random_vec(type, con);
560
561         {
562            double fsrc[LP_MAX_VECTOR_LENGTH];
563            double fdst[LP_MAX_VECTOR_LENGTH];
564            double fcon[LP_MAX_VECTOR_LENGTH];
565            double fref[LP_MAX_VECTOR_LENGTH];
566
567            read_vec(type, src, fsrc);
568            read_vec(type, dst, fdst);
569            read_vec(type, con, fcon);
570
571            for(j = 0; j < type.length; j += 4)
572               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
573
574            write_vec(type, ref, fref);
575         }
576
577         start_counter = rdtsc();
578         blend_test_ptr(src, dst, con, res);
579         end_counter = rdtsc();
580
581         cycles[i] = end_counter - start_counter;
582
583         if(!compare_vec(type, res, ref)) {
584            success = FALSE;
585
586            if(verbose < 1)
587               dump_blend_type(stderr, blend, mode, type);
588            fprintf(stderr, "MISMATCH\n");
589
590            fprintf(stderr, "  Src: ");
591            dump_vec(stderr, type, src);
592            fprintf(stderr, "\n");
593
594            fprintf(stderr, "  Dst: ");
595            dump_vec(stderr, type, dst);
596            fprintf(stderr, "\n");
597
598            fprintf(stderr, "  Con: ");
599            dump_vec(stderr, type, con);
600            fprintf(stderr, "\n");
601
602            fprintf(stderr, "  Res: ");
603            dump_vec(stderr, type, res);
604            fprintf(stderr, "\n");
605
606            fprintf(stderr, "  Ref: ");
607            dump_vec(stderr, type, ref);
608            fprintf(stderr, "\n");
609         }
610      }
611
612      if(mode == SoA) {
613         const unsigned stride = type.length*type.width/8;
614         PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
615         PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
616         PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
617         PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
618         PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
619         int64_t start_counter = 0;
620         int64_t end_counter = 0;
621         boolean mismatch;
622
623         for(j = 0; j < 4; ++j) {
624            random_vec(type, src + j*stride);
625            random_vec(type, dst + j*stride);
626            random_vec(type, con + j*stride);
627         }
628
629         {
630            double fsrc[4];
631            double fdst[4];
632            double fcon[4];
633            double fref[4];
634            unsigned k;
635
636            for(k = 0; k < type.length; ++k) {
637               for(j = 0; j < 4; ++j) {
638                  fsrc[j] = read_elem(type, src + j*stride, k);
639                  fdst[j] = read_elem(type, dst + j*stride, k);
640                  fcon[j] = read_elem(type, con + j*stride, k);
641               }
642
643               compute_blend_ref(blend, fsrc, fdst, fcon, fref);
644
645               for(j = 0; j < 4; ++j)
646                  write_elem(type, ref + j*stride, k, fref[j]);
647            }
648         }
649
650         start_counter = rdtsc();
651         blend_test_ptr(src, dst, con, res);
652         end_counter = rdtsc();
653
654         cycles[i] = end_counter - start_counter;
655
656         mismatch = FALSE;
657         for (j = 0; j < 4; ++j)
658            if(!compare_vec(type, res + j*stride, ref + j*stride))
659               mismatch = TRUE;
660
661         if (mismatch) {
662            success = FALSE;
663
664            if(verbose < 1)
665               dump_blend_type(stderr, blend, mode, type);
666            fprintf(stderr, "MISMATCH\n");
667            for(j = 0; j < 4; ++j) {
668               char channel = "RGBA"[j];
669               fprintf(stderr, "  Src%c: ", channel);
670               dump_vec(stderr, type, src + j*stride);
671               fprintf(stderr, "\n");
672
673               fprintf(stderr, "  Dst%c: ", channel);
674               dump_vec(stderr, type, dst + j*stride);
675               fprintf(stderr, "\n");
676
677               fprintf(stderr, "  Con%c: ", channel);
678               dump_vec(stderr, type, con + j*stride);
679               fprintf(stderr, "\n");
680
681               fprintf(stderr, "  Res%c: ", channel);
682               dump_vec(stderr, type, res + j*stride);
683               fprintf(stderr, "\n");
684
685               fprintf(stderr, "  Ref%c: ", channel);
686               dump_vec(stderr, type, ref + j*stride);
687               fprintf(stderr, "\n");
688            }
689         }
690      }
691   }
692
693   /*
694    * Unfortunately the output of cycle counter is not very reliable as it comes
695    * -- sometimes we get outliers (due IRQs perhaps?) which are
696    * better removed to avoid random or biased data.
697    */
698   {
699      double sum = 0.0, sum2 = 0.0;
700      double avg, std;
701      unsigned m;
702
703      for(i = 0; i < n; ++i) {
704         sum += cycles[i];
705         sum2 += cycles[i]*cycles[i];
706      }
707
708      avg = sum/n;
709      std = sqrtf((sum2 - n*avg*avg)/n);
710
711      m = 0;
712      sum = 0.0;
713      for(i = 0; i < n; ++i) {
714         if(fabs(cycles[i] - avg) <= 4.0*std) {
715            sum += cycles[i];
716            ++m;
717         }
718      }
719
720      cycles_avg = sum/m;
721
722   }
723
724   if(fp)
725      write_tsv_row(fp, blend, mode, type, cycles_avg, success);
726
727   if (!success) {
728      if(verbose < 2)
729         LLVMDumpModule(module);
730      LLVMWriteBitcodeToFile(module, "blend.bc");
731      fprintf(stderr, "blend.bc written\n");
732      fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
733      abort();
734   }
735
736   LLVMFreeMachineCodeForFunction(engine, func);
737
738   LLVMDisposeExecutionEngine(engine);
739   if(pass)
740      LLVMDisposePassManager(pass);
741
742   return success;
743}
744
745
746const unsigned
747blend_factors[] = {
748   PIPE_BLENDFACTOR_ZERO,
749   PIPE_BLENDFACTOR_ONE,
750   PIPE_BLENDFACTOR_SRC_COLOR,
751   PIPE_BLENDFACTOR_SRC_ALPHA,
752   PIPE_BLENDFACTOR_DST_COLOR,
753   PIPE_BLENDFACTOR_DST_ALPHA,
754   PIPE_BLENDFACTOR_CONST_COLOR,
755   PIPE_BLENDFACTOR_CONST_ALPHA,
756#if 0
757   PIPE_BLENDFACTOR_SRC1_COLOR,
758   PIPE_BLENDFACTOR_SRC1_ALPHA,
759#endif
760   PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
761   PIPE_BLENDFACTOR_INV_SRC_COLOR,
762   PIPE_BLENDFACTOR_INV_SRC_ALPHA,
763   PIPE_BLENDFACTOR_INV_DST_COLOR,
764   PIPE_BLENDFACTOR_INV_DST_ALPHA,
765   PIPE_BLENDFACTOR_INV_CONST_COLOR,
766   PIPE_BLENDFACTOR_INV_CONST_ALPHA,
767#if 0
768   PIPE_BLENDFACTOR_INV_SRC1_COLOR,
769   PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
770#endif
771};
772
773
774const unsigned
775blend_funcs[] = {
776   PIPE_BLEND_ADD,
777   PIPE_BLEND_SUBTRACT,
778   PIPE_BLEND_REVERSE_SUBTRACT,
779   PIPE_BLEND_MIN,
780   PIPE_BLEND_MAX
781};
782
783
784const struct lp_type blend_types[] = {
785   /* float, fixed,  sign,  norm, width, len */
786   {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */
787   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
788};
789
790
791const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
792const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
793const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
794
795
796boolean
797test_all(unsigned verbose, FILE *fp)
798{
799   const unsigned *rgb_func;
800   const unsigned *rgb_src_factor;
801   const unsigned *rgb_dst_factor;
802   const unsigned *alpha_func;
803   const unsigned *alpha_src_factor;
804   const unsigned *alpha_dst_factor;
805   struct pipe_blend_state blend;
806   enum vector_mode mode;
807   const struct lp_type *type;
808   bool success = TRUE;
809
810   for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
811      for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
812         for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
813            for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
814               for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
815                  for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
816                     for(mode = 0; mode < 2; ++mode) {
817                        for(type = blend_types; type < &blend_types[num_types]; ++type) {
818
819                           if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
820                              *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
821                              continue;
822
823                           memset(&blend, 0, sizeof blend);
824                           blend.rt[0].blend_enable      = 1;
825                           blend.rt[0].rgb_func          = *rgb_func;
826                           blend.rt[0].rgb_src_factor    = *rgb_src_factor;
827                           blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
828                           blend.rt[0].alpha_func        = *alpha_func;
829                           blend.rt[0].alpha_src_factor  = *alpha_src_factor;
830                           blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
831                           blend.rt[0].colormask         = PIPE_MASK_RGBA;
832
833                           if(!test_one(verbose, fp, &blend, mode, *type))
834                             success = FALSE;
835
836                        }
837                     }
838                  }
839               }
840            }
841         }
842      }
843   }
844
845   return success;
846}
847
848
849boolean
850test_some(unsigned verbose, FILE *fp, unsigned long n)
851{
852   const unsigned *rgb_func;
853   const unsigned *rgb_src_factor;
854   const unsigned *rgb_dst_factor;
855   const unsigned *alpha_func;
856   const unsigned *alpha_src_factor;
857   const unsigned *alpha_dst_factor;
858   struct pipe_blend_state blend;
859   enum vector_mode mode;
860   const struct lp_type *type;
861   unsigned long i;
862   bool success = TRUE;
863
864   for(i = 0; i < n; ++i) {
865      rgb_func = &blend_funcs[rand() % num_funcs];
866      alpha_func = &blend_funcs[rand() % num_funcs];
867      rgb_src_factor = &blend_factors[rand() % num_factors];
868      alpha_src_factor = &blend_factors[rand() % num_factors];
869
870      do {
871         rgb_dst_factor = &blend_factors[rand() % num_factors];
872      } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
873
874      do {
875         alpha_dst_factor = &blend_factors[rand() % num_factors];
876      } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
877
878      mode = rand() & 1;
879
880      type = &blend_types[rand() % num_types];
881
882      memset(&blend, 0, sizeof blend);
883      blend.rt[0].blend_enable      = 1;
884      blend.rt[0].rgb_func          = *rgb_func;
885      blend.rt[0].rgb_src_factor    = *rgb_src_factor;
886      blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
887      blend.rt[0].alpha_func        = *alpha_func;
888      blend.rt[0].alpha_src_factor  = *alpha_src_factor;
889      blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
890      blend.rt[0].colormask         = PIPE_MASK_RGBA;
891
892      if(!test_one(verbose, fp, &blend, mode, *type))
893        success = FALSE;
894   }
895
896   return success;
897}
898