arm_init.c revision b50c217251b086440efcdb273c22f86a06c80cba
1 2/* arm_init.c - NEON optimised filter functions 3 * 4 * Copyright (c) 2013 Glenn Randers-Pehrson 5 * Written by Mans Rullgard, 2011. 6 * Last changed in libpng 1.5.17 [July 18, 2013] 7 * 8 * This code is released under the libpng license. 9 * For conditions of distribution and use, see the disclaimer 10 * and license in png.h 11 */ 12/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are 13 * called. 14 */ 15#define _POSIX_SOURCE 1 16 17#include "../pngpriv.h" 18 19#ifdef PNG_READ_SUPPORTED 20#if PNG_ARM_NEON_OPT > 0 21#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */ 22#include <signal.h> /* for sig_atomic_t */ 23 24#ifdef __ANDROID__ 25/* Linux provides access to information about CPU capabilites via 26 * /proc/self/auxv, however Android blocks this while still claiming to be 27 * Linux. The Andoid NDK, however, provides appropriate support. 28 * 29 * Documentation: http://www.kandroid.org/ndk/docs/CPU-ARM-NEON.html 30 */ 31#include <cpu-features.h> 32 33static int 34png_have_neon(png_structp png_ptr) 35{ 36 /* This is a whole lot easier than the mess below, however it is probably 37 * implemented as below, therefore it is better to cache the result (these 38 * function calls may be slow!) 39 */ 40 PNG_UNUSED(png_ptr) 41 return android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM && 42 (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0; 43} 44#elif defined(__linux__) 45/* The generic __linux__ implementation requires reading /proc/self/auxv and 46 * looking at each element for one that records NEON capabilities. 47 */ 48#include <unistd.h> /* for POSIX 1003.1 */ 49#include <errno.h> /* for EINTR */ 50 51#include <sys/types.h> 52#include <sys/stat.h> 53#include <fcntl.h> 54#include <elf.h> 55#include <asm/hwcap.h> 56 57/* A read call may be interrupted, in which case it returns -1 and sets errno to 58 * EINTR if nothing was done, otherwise (if something was done) a partial read 59 * may result. 60 */ 61static size_t 62safe_read(png_structp png_ptr, int fd, void *buffer_in, size_t nbytes) 63{ 64 size_t ntotal = 0; 65 char *buffer = png_voidcast(char*, buffer_in); 66 67 while (nbytes > 0) 68 { 69 unsigned int nread; 70 int iread; 71 72 /* Passing nread > INT_MAX to read is implementation defined in POSIX 73 * 1003.1, therefore despite the unsigned argument portable code must 74 * limit the value to INT_MAX! 75 */ 76 if (nbytes > INT_MAX) 77 nread = INT_MAX; 78 79 else 80 nread = (unsigned int)/*SAFE*/nbytes; 81 82 iread = read(fd, buffer, nread); 83 84 if (iread == -1) 85 { 86 /* This is the devil in the details, a read can terminate early with 0 87 * bytes read because of EINTR, yet it still returns -1 otherwise end 88 * of file cannot be distinguished. 89 */ 90 if (errno != EINTR) 91 { 92 png_warning(png_ptr, "/proc read failed"); 93 return 0; /* I.e. a permanent failure */ 94 } 95 } 96 97 else if (iread < 0) 98 { 99 /* Not a valid 'read' result: */ 100 png_warning(png_ptr, "OS /proc read bug"); 101 return 0; 102 } 103 104 else if (iread > 0) 105 { 106 /* Continue reading until a permanent failure, or EOF */ 107 buffer += iread; 108 nbytes -= (unsigned int)/*SAFE*/iread; 109 ntotal += (unsigned int)/*SAFE*/iread; 110 } 111 112 else 113 return ntotal; 114 } 115 116 return ntotal; /* nbytes == 0 */ 117} 118 119static int 120png_have_neon(png_structp png_ptr) 121{ 122 int fd = open("/proc/self/auxv", O_RDONLY); 123 Elf32_auxv_t aux; 124 125 /* Failsafe: failure to open means no NEON */ 126 if (fd == -1) 127 { 128 png_warning(png_ptr, "/proc/self/auxv open failed"); 129 return 0; 130 } 131 132 while (safe_read(png_ptr, fd, &aux, sizeof aux) == sizeof aux) 133 { 134 if (aux.a_type == AT_HWCAP && (aux.a_un.a_val & HWCAP_NEON) != 0) 135 { 136 close(fd); 137 return 1; 138 } 139 } 140 141 close(fd); 142 return 0; 143} 144#else 145 /* We don't know how to do a run-time check on this system */ 146# error "no support for run-time ARM NEON checks" 147#endif /* OS checks */ 148#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ 149 150#ifndef PNG_ALIGNED_MEMORY_SUPPORTED 151# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" 152#endif 153 154void 155png_init_filter_functions_neon(png_structp pp, unsigned int bpp) 156{ 157#ifdef PNG_ARM_NEON_API_SUPPORTED 158 switch ((pp->options >> PNG_ARM_NEON) & 3) 159 { 160 case PNG_OPTION_UNSET: 161 /* Allow the run-time check to execute if it has been enabled - 162 * thus both API and CHECK can be turned on. If it isn't supported 163 * this case will fall through to the 'default' below, which just 164 * returns. 165 */ 166#endif /* PNG_ARM_NEON_API_SUPPORTED */ 167#ifdef PNG_ARM_NEON_CHECK_SUPPORTED 168 { 169 static volatile sig_atomic_t no_neon = -1; /* not checked */ 170 171 if (no_neon < 0) 172 no_neon = !png_have_neon(pp); 173 174 if (no_neon) 175 return; 176 } 177#ifdef PNG_ARM_NEON_API_SUPPORTED 178 break; 179#endif 180#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ 181#ifdef PNG_ARM_NEON_API_SUPPORTED 182 case PNG_OPTION_ON: 183 /* Option turned on */ 184 break; 185 186 default: /* OFF or INVALID */ 187 return; 188 } 189#endif 190 191 /* IMPORTANT: any new external functions used here must be declared using 192 * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the 193 * 'prefix' option to configure works: 194 * 195 * ./configure --with-libpng-prefix=foobar_ 196 * 197 * Verify you have got this right by running the above command, doing a build 198 * and examining pngprefix.h; it must contain a #define for every external 199 * function you add. (Notice that this happens automatically for the 200 * initialization function.) 201 */ 202 pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; 203 204 if (bpp == 3) 205 { 206 pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; 207 pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; 208 pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 209 png_read_filter_row_paeth3_neon; 210 } 211 212 else if (bpp == 4) 213 { 214 pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; 215 pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; 216 pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 217 png_read_filter_row_paeth4_neon; 218 } 219} 220#endif /* PNG_ARM_NEON_OPT > 0 */ 221#endif /* PNG_READ_SUPPORTED */ 222