lowlevel-blt-bench.c revision 1176bdada62cabc6ec4b0308a930e83b679d5d36
1/*
2 * Copyright © 2009 Nokia Corporation
3 * Copyright © 2010 Movial Creative Technologies Oy
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include "utils.h"
29
30#define SOLID_FLAG 1
31#define CA_FLAG    2
32
33#define L1CACHE_SIZE (8 * 1024)
34#define L2CACHE_SIZE (128 * 1024)
35
36/* This is applied to both L1 and L2 tests - alternatively, you could
37 * parameterise bench_L or split it into two functions. It could be
38 * read at runtime on some architectures, but it only really matters
39 * that it's a number that's an integer divisor of both cacheline
40 * lengths, and further, it only really matters for caches that don't
41 * do allocate0on-write. */
42#define CACHELINE_LENGTH (32) /* bytes */
43
44#define WIDTH  1920
45#define HEIGHT 1080
46#define BUFSIZE (WIDTH * HEIGHT * 4)
47#define XWIDTH 256
48#define XHEIGHT 256
49#define TILEWIDTH 32
50#define TINYWIDTH 8
51
52#define EXCLUDE_OVERHEAD 1
53
54uint32_t *dst;
55uint32_t *src;
56uint32_t *mask;
57
58double bandwidth = 0;
59
60double
61bench_memcpy ()
62{
63    int64_t n = 0, total;
64    double  t1, t2;
65    int     x = 0;
66
67    t1 = gettime ();
68    while (1)
69    {
70	memcpy (dst, src, BUFSIZE - 64);
71	memcpy (src, dst, BUFSIZE - 64);
72	n += 4 * (BUFSIZE - 64);
73	t2 = gettime ();
74	if (t2 - t1 > 0.5)
75	    break;
76    }
77    n = total = n * 5;
78    t1 = gettime ();
79    while (n > 0)
80    {
81	if (++x >= 64)
82	    x = 0;
83	memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
84	memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
85	n -= 4 * (BUFSIZE - 64);
86    }
87    t2 = gettime ();
88    return (double)total / (t2 - t1);
89}
90
91static pixman_bool_t use_scaling = FALSE;
92static pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
93
94/* nearly 1x scale factor */
95static pixman_transform_t m =
96{
97    {
98        { pixman_fixed_1 + 1, 0,              0              },
99        { 0,                  pixman_fixed_1, 0              },
100        { 0,                  0,              pixman_fixed_1 }
101    }
102};
103
104static void
105pixman_image_composite_wrapper (pixman_implementation_t *impl,
106				pixman_composite_info_t *info)
107{
108    if (use_scaling)
109    {
110        pixman_image_set_filter (info->src_image, filter, NULL, 0);
111        pixman_image_set_transform(info->src_image, &m);
112    }
113    pixman_image_composite (info->op,
114			    info->src_image, info->mask_image, info->dest_image,
115			    info->src_x, info->src_y,
116			    info->mask_x, info->mask_y,
117			    info->dest_x, info->dest_y,
118			    info->width, info->height);
119}
120
121static void
122pixman_image_composite_empty (pixman_implementation_t *impl,
123			      pixman_composite_info_t *info)
124{
125    if (use_scaling)
126    {
127        pixman_image_set_filter (info->src_image, filter, NULL, 0);
128        pixman_image_set_transform(info->src_image, &m);
129    }
130    pixman_image_composite (info->op,
131			    info->src_image, info->mask_image, info->dest_image,
132			    0, 0, 0, 0, 0, 0, 1, 1);
133}
134
135static inline void
136call_func (pixman_composite_func_t func,
137	   pixman_op_t             op,
138	   pixman_image_t *        src_image,
139	   pixman_image_t *        mask_image,
140	   pixman_image_t *        dest_image,
141	   int32_t		   src_x,
142	   int32_t		   src_y,
143	   int32_t                 mask_x,
144	   int32_t                 mask_y,
145	   int32_t                 dest_x,
146	   int32_t                 dest_y,
147	   int32_t                 width,
148	   int32_t                 height)
149{
150    pixman_composite_info_t info;
151
152    info.op = op;
153    info.src_image = src_image;
154    info.mask_image = mask_image;
155    info.dest_image = dest_image;
156    info.src_x = src_x;
157    info.src_y = src_y;
158    info.mask_x = mask_x;
159    info.mask_y = mask_y;
160    info.dest_x = dest_x;
161    info.dest_y = dest_y;
162    info.width = width;
163    info.height = height;
164
165    func (0, &info);
166}
167
168void
169noinline
170bench_L  (pixman_op_t              op,
171          pixman_image_t *         src_img,
172          pixman_image_t *         mask_img,
173          pixman_image_t *         dst_img,
174          int64_t                  n,
175          pixman_composite_func_t  func,
176          int                      width,
177          int                      lines_count)
178{
179    int64_t      i, j, k;
180    int          x = 0;
181    int          q = 0;
182    volatile int qx;
183
184    for (i = 0; i < n; i++)
185    {
186        /* For caches without allocate-on-write, we need to force the
187         * destination buffer back into the cache on each iteration,
188         * otherwise if they are evicted during the test, they remain
189         * uncached. This doesn't matter for tests which read the
190         * destination buffer, or for caches that do allocate-on-write,
191         * but in those cases this loop just adds constant time, which
192         * should be successfully cancelled out.
193         */
194        for (j = 0; j < lines_count; j++)
195        {
196            for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst)
197            {
198                q += dst[j * WIDTH + k];
199            }
200            q += dst[j * WIDTH + width + 62];
201        }
202	if (++x >= 64)
203	    x = 0;
204	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
205    }
206    qx = q;
207}
208
209void
210noinline
211bench_M (pixman_op_t              op,
212         pixman_image_t *         src_img,
213         pixman_image_t *         mask_img,
214         pixman_image_t *         dst_img,
215         int64_t                  n,
216         pixman_composite_func_t  func)
217{
218    int64_t i;
219    int     x = 0;
220
221    for (i = 0; i < n; i++)
222    {
223	if (++x >= 64)
224	    x = 0;
225	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
226    }
227}
228
229double
230noinline
231bench_HT (pixman_op_t              op,
232          pixman_image_t *         src_img,
233          pixman_image_t *         mask_img,
234          pixman_image_t *         dst_img,
235          int64_t                  n,
236          pixman_composite_func_t  func)
237{
238    double  pix_cnt = 0;
239    int     x = 0;
240    int     y = 0;
241    int64_t i;
242
243    srand (0);
244    for (i = 0; i < n; i++)
245    {
246	int w = (rand () % (TILEWIDTH * 2)) + 1;
247	int h = (rand () % (TILEWIDTH * 2)) + 1;
248	if (x + w > WIDTH)
249	{
250	    x = 0;
251	    y += TILEWIDTH * 2;
252	}
253	if (y + h > HEIGHT)
254	{
255	    y = 0;
256	}
257	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
258	x += w;
259	pix_cnt += w * h;
260    }
261    return pix_cnt;
262}
263
264double
265noinline
266bench_VT (pixman_op_t              op,
267          pixman_image_t *         src_img,
268          pixman_image_t *         mask_img,
269          pixman_image_t *         dst_img,
270          int64_t                  n,
271          pixman_composite_func_t  func)
272{
273    double  pix_cnt = 0;
274    int     x = 0;
275    int     y = 0;
276    int64_t i;
277
278    srand (0);
279    for (i = 0; i < n; i++)
280    {
281	int w = (rand () % (TILEWIDTH * 2)) + 1;
282	int h = (rand () % (TILEWIDTH * 2)) + 1;
283	if (y + h > HEIGHT)
284	{
285	    y = 0;
286	    x += TILEWIDTH * 2;
287	}
288	if (x + w > WIDTH)
289	{
290	    x = 0;
291	}
292	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
293	y += h;
294	pix_cnt += w * h;
295    }
296    return pix_cnt;
297}
298
299double
300noinline
301bench_R (pixman_op_t              op,
302         pixman_image_t *         src_img,
303         pixman_image_t *         mask_img,
304         pixman_image_t *         dst_img,
305         int64_t                  n,
306         pixman_composite_func_t  func,
307         int                      maxw,
308         int                      maxh)
309{
310    double  pix_cnt = 0;
311    int64_t i;
312
313    if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
314    {
315	printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
316        return 0;
317    }
318
319    srand (0);
320    for (i = 0; i < n; i++)
321    {
322	int w = (rand () % (TILEWIDTH * 2)) + 1;
323	int h = (rand () % (TILEWIDTH * 2)) + 1;
324	int sx = rand () % (maxw - TILEWIDTH * 2);
325	int sy = rand () % (maxh - TILEWIDTH * 2);
326	int dx = rand () % (maxw - TILEWIDTH * 2);
327	int dy = rand () % (maxh - TILEWIDTH * 2);
328	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
329	pix_cnt += w * h;
330    }
331    return pix_cnt;
332}
333
334double
335noinline
336bench_RT (pixman_op_t              op,
337          pixman_image_t *         src_img,
338          pixman_image_t *         mask_img,
339          pixman_image_t *         dst_img,
340          int64_t                  n,
341          pixman_composite_func_t  func,
342          int                      maxw,
343          int                      maxh)
344{
345    double  pix_cnt = 0;
346    int64_t i;
347
348    if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
349    {
350	printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
351        return 0;
352    }
353
354    srand (0);
355    for (i = 0; i < n; i++)
356    {
357	int w = (rand () % (TINYWIDTH * 2)) + 1;
358	int h = (rand () % (TINYWIDTH * 2)) + 1;
359	int sx = rand () % (maxw - TINYWIDTH * 2);
360	int sy = rand () % (maxh - TINYWIDTH * 2);
361	int dx = rand () % (maxw - TINYWIDTH * 2);
362	int dy = rand () % (maxh - TINYWIDTH * 2);
363	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
364	pix_cnt += w * h;
365    }
366    return pix_cnt;
367}
368
369void
370bench_composite (char * testname,
371                 int    src_fmt,
372                 int    src_flags,
373                 int    op,
374                 int    mask_fmt,
375                 int    mask_flags,
376                 int    dst_fmt,
377                 double npix)
378{
379    pixman_image_t *                src_img;
380    pixman_image_t *                dst_img;
381    pixman_image_t *                mask_img;
382    pixman_image_t *                xsrc_img;
383    pixman_image_t *                xdst_img;
384    pixman_image_t *                xmask_img;
385    double                          t1, t2, t3, pix_cnt;
386    int64_t                         n, l1test_width, nlines;
387    double                             bytes_per_pix = 0;
388    pixman_bool_t                   bench_pixbuf = FALSE;
389
390    pixman_composite_func_t func = pixman_image_composite_wrapper;
391
392    if (!(src_flags & SOLID_FLAG))
393    {
394        bytes_per_pix += (src_fmt >> 24) / 8.0;
395        src_img = pixman_image_create_bits (src_fmt,
396                                            WIDTH, HEIGHT,
397                                            src,
398                                            WIDTH * 4);
399        xsrc_img = pixman_image_create_bits (src_fmt,
400                                             XWIDTH, XHEIGHT,
401                                             src,
402                                             XWIDTH * 4);
403    }
404    else
405    {
406        src_img = pixman_image_create_bits (src_fmt,
407                                            1, 1,
408                                            src,
409                                            4);
410        xsrc_img = pixman_image_create_bits (src_fmt,
411                                             1, 1,
412                                             src,
413                                             4);
414        pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
415        pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
416    }
417
418    bytes_per_pix += (dst_fmt >> 24) / 8.0;
419    dst_img = pixman_image_create_bits (dst_fmt,
420                                        WIDTH, HEIGHT,
421                                        dst,
422                                        WIDTH * 4);
423
424    mask_img = NULL;
425    xmask_img = NULL;
426    if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0)
427    {
428        bench_pixbuf = TRUE;
429    }
430    if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
431    {
432        bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
433        mask_img = pixman_image_create_bits (mask_fmt,
434                                             WIDTH, HEIGHT,
435                                             bench_pixbuf ? src : mask,
436                                             WIDTH * 4);
437        xmask_img = pixman_image_create_bits (mask_fmt,
438                                             XWIDTH, XHEIGHT,
439                                             bench_pixbuf ? src : mask,
440                                             XWIDTH * 4);
441    }
442    else if (mask_fmt != PIXMAN_null)
443    {
444        mask_img = pixman_image_create_bits (mask_fmt,
445                                             1, 1,
446                                             mask,
447                                             4);
448        xmask_img = pixman_image_create_bits (mask_fmt,
449                                             1, 1,
450                                             mask,
451                                             4 * 4);
452       pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
453       pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
454    }
455    if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
456    {
457       pixman_image_set_component_alpha (mask_img, 1);
458    }
459    xdst_img = pixman_image_create_bits (dst_fmt,
460                                         XWIDTH, XHEIGHT,
461                                         dst,
462                                         XWIDTH * 4);
463
464
465    printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
466            '-' : '=');
467
468    memcpy (dst, src, BUFSIZE);
469    memcpy (src, dst, BUFSIZE);
470
471    l1test_width = L1CACHE_SIZE / 8 - 64;
472    if (l1test_width < 1)
473	l1test_width = 1;
474    if (l1test_width > WIDTH - 64)
475	l1test_width = WIDTH - 64;
476    n = 1 + npix / (l1test_width * 8);
477    t1 = gettime ();
478#if EXCLUDE_OVERHEAD
479    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
480#endif
481    t2 = gettime ();
482    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
483    t3 = gettime ();
484    printf ("  L1:%7.2f", (double)n * l1test_width * 1 /
485            ((t3 - t2) - (t2 - t1)) / 1000000.);
486    fflush (stdout);
487
488    memcpy (dst, src, BUFSIZE);
489    memcpy (src, dst, BUFSIZE);
490
491    nlines = (L2CACHE_SIZE / l1test_width) /
492	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
493    if (nlines < 1)
494	nlines = 1;
495    n = 1 + npix / (l1test_width * nlines);
496    t1 = gettime ();
497#if EXCLUDE_OVERHEAD
498    bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
499#endif
500    t2 = gettime ();
501    bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
502    t3 = gettime ();
503    printf ("  L2:%7.2f", (double)n * l1test_width * nlines /
504            ((t3 - t2) - (t2 - t1)) / 1000000.);
505    fflush (stdout);
506
507    memcpy (dst, src, BUFSIZE);
508    memcpy (src, dst, BUFSIZE);
509
510    n = 1 + npix / (WIDTH * HEIGHT);
511    t1 = gettime ();
512#if EXCLUDE_OVERHEAD
513    bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
514#endif
515    t2 = gettime ();
516    bench_M (op, src_img, mask_img, dst_img, n, func);
517    t3 = gettime ();
518    printf ("  M:%6.2f (%6.2f%%)",
519        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
520        ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
521    fflush (stdout);
522
523    memcpy (dst, src, BUFSIZE);
524    memcpy (src, dst, BUFSIZE);
525
526    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
527    t1 = gettime ();
528#if EXCLUDE_OVERHEAD
529    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
530#endif
531    t2 = gettime ();
532    pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
533    t3 = gettime ();
534    printf ("  HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
535    fflush (stdout);
536
537    memcpy (dst, src, BUFSIZE);
538    memcpy (src, dst, BUFSIZE);
539
540    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
541    t1 = gettime ();
542#if EXCLUDE_OVERHEAD
543    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
544#endif
545    t2 = gettime ();
546    pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
547    t3 = gettime ();
548    printf ("  VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
549    fflush (stdout);
550
551    memcpy (dst, src, BUFSIZE);
552    memcpy (src, dst, BUFSIZE);
553
554    n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
555    t1 = gettime ();
556#if EXCLUDE_OVERHEAD
557    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
558#endif
559    t2 = gettime ();
560    pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
561    t3 = gettime ();
562    printf ("  R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
563    fflush (stdout);
564
565    memcpy (dst, src, BUFSIZE);
566    memcpy (src, dst, BUFSIZE);
567
568    n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
569    t1 = gettime ();
570#if EXCLUDE_OVERHEAD
571    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
572#endif
573    t2 = gettime ();
574    pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
575    t3 = gettime ();
576    printf ("  RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
577
578    if (mask_img) {
579	pixman_image_unref (mask_img);
580	pixman_image_unref (xmask_img);
581    }
582    pixman_image_unref (src_img);
583    pixman_image_unref (dst_img);
584    pixman_image_unref (xsrc_img);
585    pixman_image_unref (xdst_img);
586}
587
588#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
589
590struct
591{
592    char *testname;
593    int   src_fmt;
594    int   src_flags;
595    int   op;
596    int   mask_fmt;
597    int   mask_flags;
598    int   dst_fmt;
599}
600tests_tbl[] =
601{
602    { "add_8_8_8",             PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
603    { "add_n_8_8",             PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
604    { "add_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
605    { "add_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
606    { "add_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
607    { "add_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
608    { "add_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
609    { "add_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
610    { "add_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
611    { "add_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
612    { "add_n_8",               PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
613    { "add_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
614    { "add_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
615    { "add_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
616    { "add_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
617    { "add_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
618    { "add_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
619    { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
620    { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
621    { "add_8_8",               PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
622    { "add_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
623    { "add_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
624    { "add_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
625    { "add_8888_1555",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
626    { "add_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
627    { "add_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
628    { "add_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
629    { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
630    { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
631    { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
632    { "in_n_8_8",              PIXMAN_a8r8g8b8,    1, PIXMAN_OP_IN,      PIXMAN_a8,       0, PIXMAN_a8 },
633    { "in_8_8",                PIXMAN_a8,          0, PIXMAN_OP_IN,      PIXMAN_null,     0, PIXMAN_a8 },
634    { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
635    { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
636    { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
637    { "src_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
638    { "src_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
639    { "src_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
640    { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
641    { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
642    { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
643    { "src_0565_8888",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
644    { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
645    { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
646    { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
647    { "src_8888_2a10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
648    { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
649    { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
650    { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
651    { "src_0888_8888_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
652    { "src_0888_0565_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
653    { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
654    { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
655    { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
656    { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
657    { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
658    { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
659    { "src_8_8",               PIXMAN_a8,          0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
660    { "src_n_8",               PIXMAN_a8,          1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
661    { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
662    { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
663    { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
664    { "src_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
665    { "src_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
666    { "src_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
667    { "src_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
668    { "src_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
669    { "src_8888_8_0565",       PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
670    { "src_0888_8_0565",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
671    { "src_0888_8_8888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
672    { "src_0888_8_x888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
673    { "src_x888_8_x888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
674    { "src_x888_8_8888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
675    { "src_0565_8_0565",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
676    { "src_1555_8_0565",       PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
677    { "src_0565_8_1555",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
678    { "over_n_x888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
679    { "over_n_8888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
680    { "over_n_0565",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
681    { "over_n_1555",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
682    { "over_8888_0565",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
683    { "over_8888_8888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
684    { "over_8888_x888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
685    { "over_x888_8_0565",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
686    { "over_x888_8_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
687    { "over_n_8_0565",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
688    { "over_n_8_1555",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
689    { "over_n_8_4444",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
690    { "over_n_8_2222",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
691    { "over_n_8_x888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
692    { "over_n_8_8888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
693    { "over_n_8_2x10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
694    { "over_n_8_2a10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
695    { "over_n_8888_8888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
696    { "over_n_8888_x888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
697    { "over_n_8888_0565_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
698    { "over_n_8888_1555_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
699    { "over_n_8888_4444_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
700    { "over_n_8888_2222_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
701    { "over_n_8888_2x10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
702    { "over_n_8888_2a10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
703    { "over_8888_n_8888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
704    { "over_8888_n_x888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_x8r8g8b8 },
705    { "over_8888_n_0565",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_r5g6b5 },
706    { "over_8888_n_1555",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a1r5g5b5 },
707    { "over_x888_n_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
708    { "outrev_n_8_0565",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_r5g6b5 },
709    { "outrev_n_8_1555",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
710    { "outrev_n_8_x888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
711    { "outrev_n_8_8888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
712    { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
713    { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
714    { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
715    { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
716    { "over_reverse_n_8888",   PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
717    { "pixbuf",                PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
718    { "rpixbuf",               PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
719};
720
721int
722main (int argc, char *argv[])
723{
724    double x;
725    int i;
726    const char *pattern = NULL;
727    for (i = 1; i < argc; i++)
728    {
729	if (argv[i][0] == '-')
730	{
731	    if (strchr (argv[i] + 1, 'b'))
732	    {
733		use_scaling = TRUE;
734		filter = PIXMAN_FILTER_BILINEAR;
735	    }
736	    else if (strchr (argv[i] + 1, 'n'))
737	    {
738		use_scaling = TRUE;
739		filter = PIXMAN_FILTER_NEAREST;
740	    }
741	}
742	else
743	{
744	    pattern = argv[i];
745	}
746    }
747
748    if (!pattern)
749    {
750	printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n");
751	printf ("  -n : benchmark nearest scaling\n");
752	printf ("  -b : benchmark bilinear scaling\n");
753	return 1;
754    }
755
756    src = aligned_malloc (4096, BUFSIZE * 3);
757    memset (src, 0xCC, BUFSIZE * 3);
758    dst = src + (BUFSIZE / 4);
759    mask = dst + (BUFSIZE / 4);
760
761    printf ("Benchmark for a set of most commonly used functions\n");
762    printf ("---\n");
763    printf ("All results are presented in millions of pixels per second\n");
764    printf ("L1  - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
765    printf ("      memory location with small drift in horizontal direction\n");
766    printf ("L2  - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
767    printf ("      memory location with small drift in horizontal direction\n");
768    printf ("M   - large %dx%d rectangle, always blitted at the same\n",
769            WIDTH - 64, HEIGHT);
770    printf ("      memory location with small drift in horizontal direction\n");
771    printf ("HT  - random rectangles with %dx%d average size are copied from\n",
772            TILEWIDTH, TILEWIDTH);
773    printf ("      one %dx%d buffer to another, traversing from left to right\n",
774            WIDTH, HEIGHT);
775    printf ("      and from top to bottom\n");
776    printf ("VT  - random rectangles with %dx%d average size are copied from\n",
777            TILEWIDTH, TILEWIDTH);
778    printf ("      one %dx%d buffer to another, traversing from top to bottom\n",
779            WIDTH, HEIGHT);
780    printf ("      and from left to right\n");
781    printf ("R   - random rectangles with %dx%d average size are copied from\n",
782            TILEWIDTH, TILEWIDTH);
783    printf ("      random locations of one %dx%d buffer to another\n",
784            WIDTH, HEIGHT);
785    printf ("RT  - as R, but %dx%d average sized rectangles are copied\n",
786            TINYWIDTH, TINYWIDTH);
787    printf ("---\n");
788    bandwidth = x = bench_memcpy ();
789    printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
790            x / 1000000., x / 4000000);
791    if (use_scaling)
792    {
793	printf ("---\n");
794	if (filter == PIXMAN_FILTER_BILINEAR)
795	    printf ("BILINEAR scaling\n");
796	else if (filter == PIXMAN_FILTER_NEAREST)
797	    printf ("NEAREST scaling\n");
798	else
799	    printf ("UNKNOWN scaling\n");
800    }
801    printf ("---\n");
802
803    for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
804    {
805	if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)
806	{
807	    bench_composite (tests_tbl[i].testname,
808			     tests_tbl[i].src_fmt,
809			     tests_tbl[i].src_flags,
810			     tests_tbl[i].op,
811			     tests_tbl[i].mask_fmt,
812			     tests_tbl[i].mask_flags,
813			     tests_tbl[i].dst_fmt,
814			     bandwidth/8);
815	}
816    }
817
818    free (src);
819    return 0;
820}
821