1
2/* intel_init.c - SSE2 optimized filter functions
3 *
4 * Copyright (c) 2016 Google, Inc.
5 * Written by Mike Klein and Matt Sarett
6 * Derived from arm/arm_init.c, which was
7 * Copyright (c) 2014,2016 Glenn Randers-Pehrson
8 *
9 * Last changed in libpng 1.6.22 [May 26, 2016]
10 *
11 * This code is released under the libpng license.
12 * For conditions of distribution and use, see the disclaimer
13 * and license in png.h
14 */
15
16#include "../../pngpriv.h"
17
18#ifdef PNG_READ_SUPPORTED
19#if PNG_INTEL_SSE_IMPLEMENTATION > 0
20
21void
22png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
23{
24   /* The techniques used to implement each of these filters in SSE operate on
25    * one pixel at a time.
26    * So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
27    * They can scale up to 6 and 8 bpp images and down to 2 bpp images,
28    * but they'd not likely have any benefit for 1bpp images.
29    * Most of these can be implemented using only MMX and 64-bit registers,
30    * but they end up a bit slower than using the equally-ubiquitous SSE2.
31   */
32   png_debug(1, "in png_init_filter_functions_sse2");
33   if (bpp == 3)
34   {
35      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
36      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
37      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
38         png_read_filter_row_paeth3_sse2;
39   }
40   else if (bpp == 4)
41   {
42      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
43      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
44      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
45          png_read_filter_row_paeth4_sse2;
46   }
47
48   /* No need optimize PNG_FILTER_VALUE_UP.  The compiler should
49    * autovectorize.
50    */
51}
52
53#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
54#endif /* PNG_READ_SUPPORTED */
55