arm_init.c revision b50c217251b086440efcdb273c22f86a06c80cba
1
2/* arm_init.c - NEON optimised filter functions
3 *
4 * Copyright (c) 2013 Glenn Randers-Pehrson
5 * Written by Mans Rullgard, 2011.
6 * Last changed in libpng 1.5.17 [July 18, 2013]
7 *
8 * This code is released under the libpng license.
9 * For conditions of distribution and use, see the disclaimer
10 * and license in png.h
11 */
12/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
13 * called.
14 */
15#define _POSIX_SOURCE 1
16
17#include "../pngpriv.h"
18
19#ifdef PNG_READ_SUPPORTED
20#if PNG_ARM_NEON_OPT > 0
21#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
22#include <signal.h> /* for sig_atomic_t */
23
24#ifdef __ANDROID__
25/* Linux provides access to information about CPU capabilites via
26 * /proc/self/auxv, however Android blocks this while still claiming to be
27 * Linux.  The Andoid NDK, however, provides appropriate support.
28 *
29 * Documentation: http://www.kandroid.org/ndk/docs/CPU-ARM-NEON.html
30 */
31#include <cpu-features.h>
32
33static int
34png_have_neon(png_structp png_ptr)
35{
36   /* This is a whole lot easier than the mess below, however it is probably
37    * implemented as below, therefore it is better to cache the result (these
38    * function calls may be slow!)
39    */
40   PNG_UNUSED(png_ptr)
41   return android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
42      (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
43}
44#elif defined(__linux__)
45/* The generic __linux__ implementation requires reading /proc/self/auxv and
46 * looking at each element for one that records NEON capabilities.
47 */
48#include <unistd.h> /* for POSIX 1003.1 */
49#include <errno.h>  /* for EINTR */
50
51#include <sys/types.h>
52#include <sys/stat.h>
53#include <fcntl.h>
54#include <elf.h>
55#include <asm/hwcap.h>
56
57/* A read call may be interrupted, in which case it returns -1 and sets errno to
58 * EINTR if nothing was done, otherwise (if something was done) a partial read
59 * may result.
60 */
61static size_t
62safe_read(png_structp png_ptr, int fd, void *buffer_in, size_t nbytes)
63{
64   size_t ntotal = 0;
65   char *buffer = png_voidcast(char*, buffer_in);
66
67   while (nbytes > 0)
68   {
69      unsigned int nread;
70      int iread;
71
72      /* Passing nread > INT_MAX to read is implementation defined in POSIX
73       * 1003.1, therefore despite the unsigned argument portable code must
74       * limit the value to INT_MAX!
75       */
76      if (nbytes > INT_MAX)
77         nread = INT_MAX;
78
79      else
80         nread = (unsigned int)/*SAFE*/nbytes;
81
82      iread = read(fd, buffer, nread);
83
84      if (iread == -1)
85      {
86         /* This is the devil in the details, a read can terminate early with 0
87          * bytes read because of EINTR, yet it still returns -1 otherwise end
88          * of file cannot be distinguished.
89          */
90         if (errno != EINTR)
91         {
92            png_warning(png_ptr, "/proc read failed");
93            return 0; /* I.e. a permanent failure */
94         }
95      }
96
97      else if (iread < 0)
98      {
99         /* Not a valid 'read' result: */
100         png_warning(png_ptr, "OS /proc read bug");
101         return 0;
102      }
103
104      else if (iread > 0)
105      {
106         /* Continue reading until a permanent failure, or EOF */
107         buffer += iread;
108         nbytes -= (unsigned int)/*SAFE*/iread;
109         ntotal += (unsigned int)/*SAFE*/iread;
110      }
111
112      else
113         return ntotal;
114   }
115
116   return ntotal; /* nbytes == 0 */
117}
118
119static int
120png_have_neon(png_structp png_ptr)
121{
122   int fd = open("/proc/self/auxv", O_RDONLY);
123   Elf32_auxv_t aux;
124
125   /* Failsafe: failure to open means no NEON */
126   if (fd == -1)
127   {
128      png_warning(png_ptr, "/proc/self/auxv open failed");
129      return 0;
130   }
131
132   while (safe_read(png_ptr, fd, &aux, sizeof aux) == sizeof aux)
133   {
134      if (aux.a_type == AT_HWCAP && (aux.a_un.a_val & HWCAP_NEON) != 0)
135      {
136         close(fd);
137         return 1;
138      }
139   }
140
141   close(fd);
142   return 0;
143}
144#else
145   /* We don't know how to do a run-time check on this system */
146#  error "no support for run-time ARM NEON checks"
147#endif /* OS checks */
148#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
149
150#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
151#  error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
152#endif
153
154void
155png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
156{
157#ifdef PNG_ARM_NEON_API_SUPPORTED
158   switch ((pp->options >> PNG_ARM_NEON) & 3)
159   {
160      case PNG_OPTION_UNSET:
161         /* Allow the run-time check to execute if it has been enabled -
162          * thus both API and CHECK can be turned on.  If it isn't supported
163          * this case will fall through to the 'default' below, which just
164          * returns.
165          */
166#endif /* PNG_ARM_NEON_API_SUPPORTED */
167#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
168         {
169            static volatile sig_atomic_t no_neon = -1; /* not checked */
170
171            if (no_neon < 0)
172               no_neon = !png_have_neon(pp);
173
174            if (no_neon)
175               return;
176         }
177#ifdef PNG_ARM_NEON_API_SUPPORTED
178         break;
179#endif
180#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
181#ifdef PNG_ARM_NEON_API_SUPPORTED
182      case PNG_OPTION_ON:
183         /* Option turned on */
184         break;
185
186      default: /* OFF or INVALID */
187         return;
188   }
189#endif
190
191   /* IMPORTANT: any new external functions used here must be declared using
192    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
193    * 'prefix' option to configure works:
194    *
195    *    ./configure --with-libpng-prefix=foobar_
196    *
197    * Verify you have got this right by running the above command, doing a build
198    * and examining pngprefix.h; it must contain a #define for every external
199    * function you add.  (Notice that this happens automatically for the
200    * initialization function.)
201    */
202   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
203
204   if (bpp == 3)
205   {
206      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
207      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
208      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
209         png_read_filter_row_paeth3_neon;
210   }
211
212   else if (bpp == 4)
213   {
214      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
215      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
216      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
217          png_read_filter_row_paeth4_neon;
218   }
219}
220#endif /* PNG_ARM_NEON_OPT > 0 */
221#endif /* PNG_READ_SUPPORTED */
222