1/* 2 * Copyright © 2009 Nokia Corporation 3 * Copyright © 2010 Movial Creative Technologies Oy 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25#include <stdio.h> 26#include <stdlib.h> 27#include <string.h> 28#include "utils.h" 29 30#define SOLID_FLAG 1 31#define CA_FLAG 2 32 33#define L1CACHE_SIZE (8 * 1024) 34#define L2CACHE_SIZE (128 * 1024) 35 36/* This is applied to both L1 and L2 tests - alternatively, you could 37 * parameterise bench_L or split it into two functions. It could be 38 * read at runtime on some architectures, but it only really matters 39 * that it's a number that's an integer divisor of both cacheline 40 * lengths, and further, it only really matters for caches that don't 41 * do allocate0on-write. */ 42#define CACHELINE_LENGTH (32) /* bytes */ 43 44#define WIDTH 1920 45#define HEIGHT 1080 46#define BUFSIZE (WIDTH * HEIGHT * 4) 47#define XWIDTH 256 48#define XHEIGHT 256 49#define TILEWIDTH 32 50#define TINYWIDTH 8 51 52#define EXCLUDE_OVERHEAD 1 53 54uint32_t *dst; 55uint32_t *src; 56uint32_t *mask; 57 58double bandwidth = 0; 59 60double 61bench_memcpy () 62{ 63 int64_t n = 0, total; 64 double t1, t2; 65 int x = 0; 66 67 t1 = gettime (); 68 while (1) 69 { 70 memcpy (dst, src, BUFSIZE - 64); 71 memcpy (src, dst, BUFSIZE - 64); 72 n += 4 * (BUFSIZE - 64); 73 t2 = gettime (); 74 if (t2 - t1 > 0.5) 75 break; 76 } 77 n = total = n * 5; 78 t1 = gettime (); 79 while (n > 0) 80 { 81 if (++x >= 64) 82 x = 0; 83 memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64); 84 memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64); 85 n -= 4 * (BUFSIZE - 64); 86 } 87 t2 = gettime (); 88 return (double)total / (t2 - t1); 89} 90 91static pixman_bool_t use_scaling = FALSE; 92static pixman_filter_t filter = PIXMAN_FILTER_NEAREST; 93 94/* nearly 1x scale factor */ 95static pixman_transform_t m = 96{ 97 { 98 { pixman_fixed_1 + 1, 0, 0 }, 99 { 0, pixman_fixed_1, 0 }, 100 { 0, 0, pixman_fixed_1 } 101 } 102}; 103 104static void 105pixman_image_composite_wrapper (pixman_implementation_t *impl, 106 pixman_composite_info_t *info) 107{ 108 if (use_scaling) 109 { 110 pixman_image_set_filter (info->src_image, filter, NULL, 0); 111 pixman_image_set_transform(info->src_image, &m); 112 } 113 pixman_image_composite (info->op, 114 info->src_image, info->mask_image, info->dest_image, 115 info->src_x, info->src_y, 116 info->mask_x, info->mask_y, 117 info->dest_x, info->dest_y, 118 info->width, info->height); 119} 120 121static void 122pixman_image_composite_empty (pixman_implementation_t *impl, 123 pixman_composite_info_t *info) 124{ 125 if (use_scaling) 126 { 127 pixman_image_set_filter (info->src_image, filter, NULL, 0); 128 pixman_image_set_transform(info->src_image, &m); 129 } 130 pixman_image_composite (info->op, 131 info->src_image, info->mask_image, info->dest_image, 132 0, 0, 0, 0, 0, 0, 1, 1); 133} 134 135static inline void 136call_func (pixman_composite_func_t func, 137 pixman_op_t op, 138 pixman_image_t * src_image, 139 pixman_image_t * mask_image, 140 pixman_image_t * dest_image, 141 int32_t src_x, 142 int32_t src_y, 143 int32_t mask_x, 144 int32_t mask_y, 145 int32_t dest_x, 146 int32_t dest_y, 147 int32_t width, 148 int32_t height) 149{ 150 pixman_composite_info_t info; 151 152 info.op = op; 153 info.src_image = src_image; 154 info.mask_image = mask_image; 155 info.dest_image = dest_image; 156 info.src_x = src_x; 157 info.src_y = src_y; 158 info.mask_x = mask_x; 159 info.mask_y = mask_y; 160 info.dest_x = dest_x; 161 info.dest_y = dest_y; 162 info.width = width; 163 info.height = height; 164 165 func (0, &info); 166} 167 168void 169noinline 170bench_L (pixman_op_t op, 171 pixman_image_t * src_img, 172 pixman_image_t * mask_img, 173 pixman_image_t * dst_img, 174 int64_t n, 175 pixman_composite_func_t func, 176 int width, 177 int lines_count) 178{ 179 int64_t i, j, k; 180 int x = 0; 181 int q = 0; 182 volatile int qx; 183 184 for (i = 0; i < n; i++) 185 { 186 /* For caches without allocate-on-write, we need to force the 187 * destination buffer back into the cache on each iteration, 188 * otherwise if they are evicted during the test, they remain 189 * uncached. This doesn't matter for tests which read the 190 * destination buffer, or for caches that do allocate-on-write, 191 * but in those cases this loop just adds constant time, which 192 * should be successfully cancelled out. 193 */ 194 for (j = 0; j < lines_count; j++) 195 { 196 for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst) 197 { 198 q += dst[j * WIDTH + k]; 199 } 200 q += dst[j * WIDTH + width + 62]; 201 } 202 if (++x >= 64) 203 x = 0; 204 call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count); 205 } 206 qx = q; 207} 208 209void 210noinline 211bench_M (pixman_op_t op, 212 pixman_image_t * src_img, 213 pixman_image_t * mask_img, 214 pixman_image_t * dst_img, 215 int64_t n, 216 pixman_composite_func_t func) 217{ 218 int64_t i; 219 int x = 0; 220 221 for (i = 0; i < n; i++) 222 { 223 if (++x >= 64) 224 x = 0; 225 call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT); 226 } 227} 228 229double 230noinline 231bench_HT (pixman_op_t op, 232 pixman_image_t * src_img, 233 pixman_image_t * mask_img, 234 pixman_image_t * dst_img, 235 int64_t n, 236 pixman_composite_func_t func) 237{ 238 double pix_cnt = 0; 239 int x = 0; 240 int y = 0; 241 int64_t i; 242 243 srand (0); 244 for (i = 0; i < n; i++) 245 { 246 int w = (rand () % (TILEWIDTH * 2)) + 1; 247 int h = (rand () % (TILEWIDTH * 2)) + 1; 248 if (x + w > WIDTH) 249 { 250 x = 0; 251 y += TILEWIDTH * 2; 252 } 253 if (y + h > HEIGHT) 254 { 255 y = 0; 256 } 257 call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h); 258 x += w; 259 pix_cnt += w * h; 260 } 261 return pix_cnt; 262} 263 264double 265noinline 266bench_VT (pixman_op_t op, 267 pixman_image_t * src_img, 268 pixman_image_t * mask_img, 269 pixman_image_t * dst_img, 270 int64_t n, 271 pixman_composite_func_t func) 272{ 273 double pix_cnt = 0; 274 int x = 0; 275 int y = 0; 276 int64_t i; 277 278 srand (0); 279 for (i = 0; i < n; i++) 280 { 281 int w = (rand () % (TILEWIDTH * 2)) + 1; 282 int h = (rand () % (TILEWIDTH * 2)) + 1; 283 if (y + h > HEIGHT) 284 { 285 y = 0; 286 x += TILEWIDTH * 2; 287 } 288 if (x + w > WIDTH) 289 { 290 x = 0; 291 } 292 call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h); 293 y += h; 294 pix_cnt += w * h; 295 } 296 return pix_cnt; 297} 298 299double 300noinline 301bench_R (pixman_op_t op, 302 pixman_image_t * src_img, 303 pixman_image_t * mask_img, 304 pixman_image_t * dst_img, 305 int64_t n, 306 pixman_composite_func_t func, 307 int maxw, 308 int maxh) 309{ 310 double pix_cnt = 0; 311 int64_t i; 312 313 if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2) 314 { 315 printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n"); 316 return 0; 317 } 318 319 srand (0); 320 for (i = 0; i < n; i++) 321 { 322 int w = (rand () % (TILEWIDTH * 2)) + 1; 323 int h = (rand () % (TILEWIDTH * 2)) + 1; 324 int sx = rand () % (maxw - TILEWIDTH * 2); 325 int sy = rand () % (maxh - TILEWIDTH * 2); 326 int dx = rand () % (maxw - TILEWIDTH * 2); 327 int dy = rand () % (maxh - TILEWIDTH * 2); 328 call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h); 329 pix_cnt += w * h; 330 } 331 return pix_cnt; 332} 333 334double 335noinline 336bench_RT (pixman_op_t op, 337 pixman_image_t * src_img, 338 pixman_image_t * mask_img, 339 pixman_image_t * dst_img, 340 int64_t n, 341 pixman_composite_func_t func, 342 int maxw, 343 int maxh) 344{ 345 double pix_cnt = 0; 346 int64_t i; 347 348 if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2) 349 { 350 printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n"); 351 return 0; 352 } 353 354 srand (0); 355 for (i = 0; i < n; i++) 356 { 357 int w = (rand () % (TINYWIDTH * 2)) + 1; 358 int h = (rand () % (TINYWIDTH * 2)) + 1; 359 int sx = rand () % (maxw - TINYWIDTH * 2); 360 int sy = rand () % (maxh - TINYWIDTH * 2); 361 int dx = rand () % (maxw - TINYWIDTH * 2); 362 int dy = rand () % (maxh - TINYWIDTH * 2); 363 call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h); 364 pix_cnt += w * h; 365 } 366 return pix_cnt; 367} 368 369void 370bench_composite (char * testname, 371 int src_fmt, 372 int src_flags, 373 int op, 374 int mask_fmt, 375 int mask_flags, 376 int dst_fmt, 377 double npix) 378{ 379 pixman_image_t * src_img; 380 pixman_image_t * dst_img; 381 pixman_image_t * mask_img; 382 pixman_image_t * xsrc_img; 383 pixman_image_t * xdst_img; 384 pixman_image_t * xmask_img; 385 double t1, t2, t3, pix_cnt; 386 int64_t n, l1test_width, nlines; 387 double bytes_per_pix = 0; 388 pixman_bool_t bench_pixbuf = FALSE; 389 390 pixman_composite_func_t func = pixman_image_composite_wrapper; 391 392 if (!(src_flags & SOLID_FLAG)) 393 { 394 bytes_per_pix += (src_fmt >> 24) / 8.0; 395 src_img = pixman_image_create_bits (src_fmt, 396 WIDTH, HEIGHT, 397 src, 398 WIDTH * 4); 399 xsrc_img = pixman_image_create_bits (src_fmt, 400 XWIDTH, XHEIGHT, 401 src, 402 XWIDTH * 4); 403 } 404 else 405 { 406 src_img = pixman_image_create_bits (src_fmt, 407 1, 1, 408 src, 409 4); 410 xsrc_img = pixman_image_create_bits (src_fmt, 411 1, 1, 412 src, 413 4); 414 pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); 415 pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL); 416 } 417 418 bytes_per_pix += (dst_fmt >> 24) / 8.0; 419 dst_img = pixman_image_create_bits (dst_fmt, 420 WIDTH, HEIGHT, 421 dst, 422 WIDTH * 4); 423 424 mask_img = NULL; 425 xmask_img = NULL; 426 if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0) 427 { 428 bench_pixbuf = TRUE; 429 } 430 if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null) 431 { 432 bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0); 433 mask_img = pixman_image_create_bits (mask_fmt, 434 WIDTH, HEIGHT, 435 bench_pixbuf ? src : mask, 436 WIDTH * 4); 437 xmask_img = pixman_image_create_bits (mask_fmt, 438 XWIDTH, XHEIGHT, 439 bench_pixbuf ? src : mask, 440 XWIDTH * 4); 441 } 442 else if (mask_fmt != PIXMAN_null) 443 { 444 mask_img = pixman_image_create_bits (mask_fmt, 445 1, 1, 446 mask, 447 4); 448 xmask_img = pixman_image_create_bits (mask_fmt, 449 1, 1, 450 mask, 451 4 * 4); 452 pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL); 453 pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL); 454 } 455 if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null) 456 { 457 pixman_image_set_component_alpha (mask_img, 1); 458 } 459 xdst_img = pixman_image_create_bits (dst_fmt, 460 XWIDTH, XHEIGHT, 461 dst, 462 XWIDTH * 4); 463 464 465 printf ("%24s %c", testname, func != pixman_image_composite_wrapper ? 466 '-' : '='); 467 468 memcpy (dst, src, BUFSIZE); 469 memcpy (src, dst, BUFSIZE); 470 471 l1test_width = L1CACHE_SIZE / 8 - 64; 472 if (l1test_width < 1) 473 l1test_width = 1; 474 if (l1test_width > WIDTH - 64) 475 l1test_width = WIDTH - 64; 476 n = 1 + npix / (l1test_width * 8); 477 t1 = gettime (); 478#if EXCLUDE_OVERHEAD 479 bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1); 480#endif 481 t2 = gettime (); 482 bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1); 483 t3 = gettime (); 484 printf (" L1:%7.2f", (double)n * l1test_width * 1 / 485 ((t3 - t2) - (t2 - t1)) / 1000000.); 486 fflush (stdout); 487 488 memcpy (dst, src, BUFSIZE); 489 memcpy (src, dst, BUFSIZE); 490 491 nlines = (L2CACHE_SIZE / l1test_width) / 492 ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8); 493 if (nlines < 1) 494 nlines = 1; 495 n = 1 + npix / (l1test_width * nlines); 496 t1 = gettime (); 497#if EXCLUDE_OVERHEAD 498 bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines); 499#endif 500 t2 = gettime (); 501 bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines); 502 t3 = gettime (); 503 printf (" L2:%7.2f", (double)n * l1test_width * nlines / 504 ((t3 - t2) - (t2 - t1)) / 1000000.); 505 fflush (stdout); 506 507 memcpy (dst, src, BUFSIZE); 508 memcpy (src, dst, BUFSIZE); 509 510 n = 1 + npix / (WIDTH * HEIGHT); 511 t1 = gettime (); 512#if EXCLUDE_OVERHEAD 513 bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); 514#endif 515 t2 = gettime (); 516 bench_M (op, src_img, mask_img, dst_img, n, func); 517 t3 = gettime (); 518 printf (" M:%6.2f (%6.2f%%)", 519 ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000., 520 ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) ); 521 fflush (stdout); 522 523 memcpy (dst, src, BUFSIZE); 524 memcpy (src, dst, BUFSIZE); 525 526 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); 527 t1 = gettime (); 528#if EXCLUDE_OVERHEAD 529 pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); 530#endif 531 t2 = gettime (); 532 pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func); 533 t3 = gettime (); 534 printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); 535 fflush (stdout); 536 537 memcpy (dst, src, BUFSIZE); 538 memcpy (src, dst, BUFSIZE); 539 540 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); 541 t1 = gettime (); 542#if EXCLUDE_OVERHEAD 543 pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty); 544#endif 545 t2 = gettime (); 546 pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func); 547 t3 = gettime (); 548 printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); 549 fflush (stdout); 550 551 memcpy (dst, src, BUFSIZE); 552 memcpy (src, dst, BUFSIZE); 553 554 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); 555 t1 = gettime (); 556#if EXCLUDE_OVERHEAD 557 pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT); 558#endif 559 t2 = gettime (); 560 pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT); 561 t3 = gettime (); 562 printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); 563 fflush (stdout); 564 565 memcpy (dst, src, BUFSIZE); 566 memcpy (src, dst, BUFSIZE); 567 568 n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH); 569 t1 = gettime (); 570#if EXCLUDE_OVERHEAD 571 pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT); 572#endif 573 t2 = gettime (); 574 pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT); 575 t3 = gettime (); 576 printf (" RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000)); 577 578 if (mask_img) { 579 pixman_image_unref (mask_img); 580 pixman_image_unref (xmask_img); 581 } 582 pixman_image_unref (src_img); 583 pixman_image_unref (dst_img); 584 pixman_image_unref (xsrc_img); 585 pixman_image_unref (xdst_img); 586} 587 588#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE) 589 590struct 591{ 592 char *testname; 593 int src_fmt; 594 int src_flags; 595 int op; 596 int mask_fmt; 597 int mask_flags; 598 int dst_fmt; 599} 600tests_tbl[] = 601{ 602 { "add_8_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 }, 603 { "add_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 }, 604 { "add_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 605 { "add_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 606 { "add_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 607 { "add_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, 608 { "add_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, 609 { "add_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, 610 { "add_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, 611 { "add_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, 612 { "add_n_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 }, 613 { "add_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 614 { "add_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 615 { "add_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 616 { "add_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 617 { "add_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, 618 { "add_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, 619 { "add_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, 620 { "add_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, 621 { "add_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 }, 622 { "add_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 623 { "add_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 624 { "add_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 625 { "add_8888_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 626 { "add_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, 627 { "add_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, 628 { "add_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 629 { "add_1555_1555", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 630 { "add_0565_2x10", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, 631 { "add_2a10_2a10", PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, 632 { "in_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_IN, PIXMAN_a8, 0, PIXMAN_a8 }, 633 { "in_8_8", PIXMAN_a8, 0, PIXMAN_OP_IN, PIXMAN_null, 0, PIXMAN_a8 }, 634 { "src_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, 635 { "src_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 636 { "src_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 637 { "src_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, 638 { "src_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 639 { "src_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 640 { "src_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, 641 { "src_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, 642 { "src_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 643 { "src_0565_8888", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 644 { "src_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, 645 { "src_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, 646 { "src_8888_2x10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, 647 { "src_8888_2a10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, 648 { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 649 { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 650 { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 651 { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 652 { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 653 { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 654 { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 655 { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 656 { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 657 { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 658 { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 659 { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, 660 { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, 661 { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 662 { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, 663 { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, 664 { "src_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, 665 { "src_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 666 { "src_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 667 { "src_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, 668 { "src_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, 669 { "src_8888_8_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 670 { "src_0888_8_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 671 { "src_0888_8_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 672 { "src_0888_8_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 673 { "src_x888_8_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 674 { "src_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 675 { "src_0565_8_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 676 { "src_1555_8_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 677 { "src_0565_8_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, 678 { "over_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 679 { "over_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 680 { "over_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 681 { "over_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, 682 { "over_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, 683 { "over_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 684 { "over_8888_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, 685 { "over_x888_8_0565", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 686 { "over_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 687 { "over_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 688 { "over_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, 689 { "over_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, 690 { "over_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 }, 691 { "over_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 692 { "over_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 693 { "over_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 }, 694 { "over_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 }, 695 { "over_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, 696 { "over_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, 697 { "over_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 }, 698 { "over_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 }, 699 { "over_n_8888_4444_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 }, 700 { "over_n_8888_2222_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 }, 701 { "over_n_8888_2x10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 }, 702 { "over_n_8888_2a10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 }, 703 { "over_8888_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 }, 704 { "over_8888_n_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_x8r8g8b8 }, 705 { "over_8888_n_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_r5g6b5 }, 706 { "over_8888_n_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a1r5g5b5 }, 707 { "over_x888_n_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 }, 708 { "outrev_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, 709 { "outrev_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, 710 { "outrev_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, 711 { "outrev_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, 712 { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 }, 713 { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 }, 714 { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, 715 { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, 716 { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, 717 { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 }, 718 { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 }, 719}; 720 721int 722main (int argc, char *argv[]) 723{ 724 double x; 725 int i; 726 const char *pattern = NULL; 727 for (i = 1; i < argc; i++) 728 { 729 if (argv[i][0] == '-') 730 { 731 if (strchr (argv[i] + 1, 'b')) 732 { 733 use_scaling = TRUE; 734 filter = PIXMAN_FILTER_BILINEAR; 735 } 736 else if (strchr (argv[i] + 1, 'n')) 737 { 738 use_scaling = TRUE; 739 filter = PIXMAN_FILTER_NEAREST; 740 } 741 } 742 else 743 { 744 pattern = argv[i]; 745 } 746 } 747 748 if (!pattern) 749 { 750 printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n"); 751 printf (" -n : benchmark nearest scaling\n"); 752 printf (" -b : benchmark bilinear scaling\n"); 753 return 1; 754 } 755 756 src = aligned_malloc (4096, BUFSIZE * 3); 757 memset (src, 0xCC, BUFSIZE * 3); 758 dst = src + (BUFSIZE / 4); 759 mask = dst + (BUFSIZE / 4); 760 761 printf ("Benchmark for a set of most commonly used functions\n"); 762 printf ("---\n"); 763 printf ("All results are presented in millions of pixels per second\n"); 764 printf ("L1 - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n"); 765 printf (" memory location with small drift in horizontal direction\n"); 766 printf ("L2 - small XxY rectangle (fitting L2 cache), always blitted at the same\n"); 767 printf (" memory location with small drift in horizontal direction\n"); 768 printf ("M - large %dx%d rectangle, always blitted at the same\n", 769 WIDTH - 64, HEIGHT); 770 printf (" memory location with small drift in horizontal direction\n"); 771 printf ("HT - random rectangles with %dx%d average size are copied from\n", 772 TILEWIDTH, TILEWIDTH); 773 printf (" one %dx%d buffer to another, traversing from left to right\n", 774 WIDTH, HEIGHT); 775 printf (" and from top to bottom\n"); 776 printf ("VT - random rectangles with %dx%d average size are copied from\n", 777 TILEWIDTH, TILEWIDTH); 778 printf (" one %dx%d buffer to another, traversing from top to bottom\n", 779 WIDTH, HEIGHT); 780 printf (" and from left to right\n"); 781 printf ("R - random rectangles with %dx%d average size are copied from\n", 782 TILEWIDTH, TILEWIDTH); 783 printf (" random locations of one %dx%d buffer to another\n", 784 WIDTH, HEIGHT); 785 printf ("RT - as R, but %dx%d average sized rectangles are copied\n", 786 TINYWIDTH, TINYWIDTH); 787 printf ("---\n"); 788 bandwidth = x = bench_memcpy (); 789 printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n", 790 x / 1000000., x / 4000000); 791 if (use_scaling) 792 { 793 printf ("---\n"); 794 if (filter == PIXMAN_FILTER_BILINEAR) 795 printf ("BILINEAR scaling\n"); 796 else if (filter == PIXMAN_FILTER_NEAREST) 797 printf ("NEAREST scaling\n"); 798 else 799 printf ("UNKNOWN scaling\n"); 800 } 801 printf ("---\n"); 802 803 for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++) 804 { 805 if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0) 806 { 807 bench_composite (tests_tbl[i].testname, 808 tests_tbl[i].src_fmt, 809 tests_tbl[i].src_flags, 810 tests_tbl[i].op, 811 tests_tbl[i].mask_fmt, 812 tests_tbl[i].mask_flags, 813 tests_tbl[i].dst_fmt, 814 bandwidth/8); 815 } 816 } 817 818 free (src); 819 return 0; 820} 821