u_math.h revision b8de75d53760fb359d10d6f4794f28097210cef4
1674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/************************************************************************** 2674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 3674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * All Rights Reserved. 5674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 6674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Permission is hereby granted, free of charge, to any person obtaining a 7674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * copy of this software and associated documentation files (the 8674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * "Software"), to deal in the Software without restriction, including 9674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * without limitation the rights to use, copy, modify, merge, publish, 10674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * distribute, sub license, and/or sell copies of the Software, and to 11674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * permit persons to whom the Software is furnished to do so, subject to 12674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * the following conditions: 13674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 14674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * The above copyright notice and this permission notice (including the 15674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * next paragraph) shall be included in all copies or substantial portions 16674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * of the Software. 17674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 18674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 26674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen **************************************************************************/ 27674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 28674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 29674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 30674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Math utilities and approximations for common math functions. 31674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Reduced precision is usually acceptable in shaders... 32674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * 33674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * "fast" is used in the names of functions which are low-precision, 34674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * or at least lower-precision than the normal C lib functions. 35674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 36674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 37674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 38674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#ifndef U_MATH_H 39674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define U_MATH_H 40674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 41674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 42674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#include "pipe/p_compiler.h" 43674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#include "util/u_debug.h" 44674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 45674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 46674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#ifdef __cplusplus 47674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenextern "C" { 48674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif 49674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 50674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 51674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) 52674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen__inline double ceil(double val) 53674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 54674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen double ceil_val; 55674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 56674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if ((val - (long) val) == 0) { 57674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen ceil_val = val; 58674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 59674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen else { 60674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if (val > 0) { 61674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen ceil_val = (long) val + 1; 62674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 63674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen else { 64674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen ceil_val = (long) val; 65674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 66674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 67674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 68674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return ceil_val; 69674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 70674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 71674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL 72674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen__inline double floor(double val) 73674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 74674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen double floor_val; 75674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 76674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if ((val - (long) val) == 0) { 77674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen floor_val = val; 78674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 79674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen else { 80674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if (val > 0) { 81674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen floor_val = (long) val; 82674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 83674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen else { 84674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen floor_val = (long) val - 1; 85674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 86674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen } 87674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 88674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return floor_val; 89674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 90674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif 91674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 92674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#pragma function(pow) 93674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen__inline double __cdecl pow(double val, double exponent) 94674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 95674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen /* XXX */ 96674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen assert(0); 97674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return 0; 98674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 99674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 100674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#pragma function(log) 101674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen__inline double __cdecl log(double val) 102674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 103674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen /* XXX */ 104674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen assert(0); 105674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return 0; 106674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 107674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 108674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#pragma function(atan2) 109674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen__inline double __cdecl atan2(double val) 110674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 111674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen /* XXX */ 112674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen assert(0); 113674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return 0; 114674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 115674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#else 116674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#include <math.h> 117674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#include <stdarg.h> 118674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif 119674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 120674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 121674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#ifndef M_SQRT2 122674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define M_SQRT2 1.41421356237309504880 123674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif 124674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 125674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 126674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#if defined(_MSC_VER) 127674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 128674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) 129674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 130674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float cosf( float f ) 131674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 132674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) cos( (double) f ); 133674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 134674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 135674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float sinf( float f ) 136674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 137674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) sin( (double) f ); 138674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 139674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 140674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float ceilf( float f ) 141674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 142674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) ceil( (double) f ); 143674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 144674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 145674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float floorf( float f ) 146674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 147674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) floor( (double) f ); 148674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 149674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 150674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float powf( float f, float g ) 151674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 152674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) pow( (double) f, (double) g ); 153674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 154674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 155674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float sqrtf( float f ) 156674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 157674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) sqrt( (double) f ); 158674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 159674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 160674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float fabsf( float f ) 161674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 162674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) fabs( (double) f ); 163674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 164674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 165674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float logf( float f ) 166674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 167674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (float) log( (double) f ); 168674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 169674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 170674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#else 171674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/* Work-around an extra semi-colon in VS 2005 logf definition */ 172674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#ifdef logf 173674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#undef logf 174674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define logf(x) ((float)log((double)(x))) 175674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif /* logf */ 176674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 177674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define isfinite(x) _finite((double)(x)) 178674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define isnan(x) _isnan((double)(x)) 179674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif /* _MSC_VER < 1400 && !defined(__cplusplus) */ 180674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 181674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE double log2( double x ) 182674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 183674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen const double invln2 = 1.442695041; 184674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return log( x ) * invln2; 185674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 186674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 187674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE double 188674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenround(double x) 189674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 190674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return x >= 0.0 ? floor(x + 0.5) : ceil(x - 0.5); 191674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 192674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 193674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float 194674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenroundf(float x) 195674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 196674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f); 197674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 198674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 199674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#endif /* _MSC_VER */ 200674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 201674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 202674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 203674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 204674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 205674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define POW2_TABLE_SIZE_LOG2 9 206674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) 207674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) 208674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2)) 209674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenextern float pow2_table[POW2_TABLE_SIZE]; 210674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 211674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 212674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 213674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Initialize math module. This should be called before using any 214674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * other functions in this module. 215674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 216674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenextern void 217674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_init_math(void); 218674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 219674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 220674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenunion fi { 221674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen float f; 222674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen int32_t i; 223674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen uint32_t ui; 224674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen}; 225674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 226674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 227674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 228674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Fast version of 2^x 229674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Identity: exp2(a + b) = exp2(a) * exp2(b) 230674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Let ipart = int(x) 231674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Let fpart = x - ipart; 232674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * So, exp2(x) = exp2(ipart) * exp2(fpart) 233674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Compute exp2(ipart) with i << ipart 234674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Compute exp2(fpart) with lookup table. 235674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 236674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float 237674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_fast_exp2(float x) 238674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 239674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen int32_t ipart; 240674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen float fpart, mpart; 241674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen union fi epart; 242674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 243674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if(x > 129.00000f) 244674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return 3.402823466e+38f; 245674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 246674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen if (x < -126.99999f) 247674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return 0.0f; 248674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 249674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen ipart = (int32_t) x; 250674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen fpart = x - (float) ipart; 251674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 252674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen /* same as 253674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * epart.f = (float) (1 << ipart) 254674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * but faster and without integer overflow for ipart > 31 255674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 256674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen epart.i = (ipart + 127 ) << 23; 257674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 258674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; 259674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 260674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return epart.f * mpart; 261674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 262674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 263674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 264674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 265674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Fast approximation to exp(x). 266674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 267674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float 268674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_fast_exp(float x) 269674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 270674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen const float k = 1.44269f; /* = log2(e) */ 271674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return util_fast_exp2(k * x); 272674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 273674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 274674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 275674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define LOG2_TABLE_SIZE_LOG2 16 276674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2) 277674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen#define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1) 278674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenextern float log2_table[LOG2_TABLE_SIZE]; 279674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 280674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 281674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 282674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Fast approximation to log2(x). 283674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 284674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float 285674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_fast_log2(float x) 286674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 287674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen union fi num; 288674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen float epart, mpart; 289674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen num.f = x; 290674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen epart = (float)(((num.i & 0x7f800000) >> 23) - 127); 291674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */ 292674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)]; 293674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return epart + mpart; 294674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 295674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 296674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 297674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 298674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Fast approximation to x^y. 299674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 300674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE float 301674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_fast_pow(float x, float y) 302674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 303674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return util_fast_exp2(util_fast_log2(x) * y); 304674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 305674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 306674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/* Note that this counts zero as a power of two. 307674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 308674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE boolean 309674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_is_power_of_two( unsigned v ) 310674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 311674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen return (v & (v-1)) == 0; 312674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen} 313674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 314674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen 315674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen/** 316674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen * Floor(x), returned as int. 317674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen */ 318674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenstatic INLINE int 319674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogenutil_ifloor(float f) 320674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen{ 321674060f01e9090cd21b3c5656cc3204912ad17a6Jon Boekenoogen int ai, bi; 322 double af, bf; 323 union fi u; 324 af = (3 << 22) + 0.5 + (double) f; 325 bf = (3 << 22) + 0.5 - (double) f; 326 u.f = (float) af; ai = u.i; 327 u.f = (float) bf; bi = u.i; 328 return (ai - bi) >> 1; 329} 330 331 332/** 333 * Round float to nearest int. 334 */ 335static INLINE int 336util_iround(float f) 337{ 338#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 339 int r; 340 __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); 341 return r; 342#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) 343 int r; 344 _asm { 345 fld f 346 fistp r 347 } 348 return r; 349#else 350 if (f >= 0.0f) 351 return (int) (f + 0.5f); 352 else 353 return (int) (f - 0.5f); 354#endif 355} 356 357 358/** 359 * Approximate floating point comparison 360 */ 361static INLINE boolean 362util_is_approx(float a, float b, float tol) 363{ 364 return fabs(b - a) <= tol; 365} 366 367 368/** 369 * Test if x is NaN or +/- infinity. 370 */ 371static INLINE boolean 372util_is_inf_or_nan(float x) 373{ 374 union fi tmp; 375 tmp.f = x; 376 return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); 377} 378 379 380/** 381 * Find first bit set in word. Least significant bit is 1. 382 * Return 0 if no bits set. 383 */ 384#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64) 385unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask); 386#pragma intrinsic(_BitScanForward) 387static INLINE 388unsigned long ffs( unsigned long u ) 389{ 390 unsigned long i; 391 if (_BitScanForward(&i, u)) 392 return i + 1; 393 else 394 return 0; 395} 396#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) 397static INLINE 398unsigned ffs( unsigned u ) 399{ 400 unsigned i; 401 402 if (u == 0) { 403 return 0; 404 } 405 406 __asm bsf eax, [u] 407 __asm inc eax 408 __asm mov [i], eax 409 410 return i; 411} 412#elif defined(__MINGW32__) 413#define ffs __builtin_ffs 414#endif 415 416#ifdef __MINGW32__ 417#define ffs __builtin_ffs 418#endif 419 420 421/* Could also binary search for the highest bit. 422 */ 423static INLINE unsigned 424util_unsigned_logbase2(unsigned n) 425{ 426 unsigned log2 = 0; 427 while (n >>= 1) 428 ++log2; 429 return log2; 430} 431 432 433/** 434 * Return float bits. 435 */ 436static INLINE unsigned 437fui( float f ) 438{ 439 union fi fi; 440 fi.f = f; 441 return fi.ui; 442} 443 444 445/** 446 * Convert ubyte to float in [0, 1]. 447 * XXX a 256-entry lookup table would be slightly faster. 448 */ 449static INLINE float 450ubyte_to_float(ubyte ub) 451{ 452 return (float) ub * (1.0f / 255.0f); 453} 454 455 456/** 457 * Convert float in [0,1] to ubyte in [0,255] with clamping. 458 */ 459static INLINE ubyte 460float_to_ubyte(float f) 461{ 462 const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ 463 union fi tmp; 464 465 tmp.f = f; 466 if (tmp.i < 0) { 467 return (ubyte) 0; 468 } 469 else if (tmp.i >= ieee_0996) { 470 return (ubyte) 255; 471 } 472 else { 473 tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; 474 return (ubyte) tmp.i; 475 } 476} 477 478static INLINE float 479byte_to_float_tex(int8_t b) 480{ 481 return (b == -128) ? -1.0F : b * 1.0F / 127.0F; 482} 483 484static INLINE int8_t 485float_to_byte_tex(float f) 486{ 487 return (int8_t) (127.0F * f); 488} 489 490/** 491 * Calc log base 2 492 */ 493static INLINE unsigned 494util_logbase2(unsigned n) 495{ 496 unsigned log2 = 0; 497 while (n >>= 1) 498 ++log2; 499 return log2; 500} 501 502 503/** 504 * Returns the smallest power of two >= x 505 */ 506static INLINE unsigned 507util_next_power_of_two(unsigned x) 508{ 509 unsigned i; 510 511 if (x == 0) 512 return 1; 513 514 --x; 515 516 for (i = 1; i < sizeof(unsigned) * 8; i <<= 1) 517 x |= x >> i; 518 519 return x + 1; 520} 521 522 523/** 524 * Return number of bits set in n. 525 */ 526static INLINE unsigned 527util_bitcount(unsigned n) 528{ 529#if defined(PIPE_CC_GCC) 530 return __builtin_popcount(n); 531#else 532 /* K&R classic bitcount. 533 * 534 * For each iteration, clear the LSB from the bitfield. 535 * Requires only one iteration per set bit, instead of 536 * one iteration per bit less than highest set bit. 537 */ 538 unsigned bits = 0; 539 for (bits; n; bits++) { 540 n &= n - 1; 541 } 542 return bits; 543#endif 544} 545 546 547/** 548 * Reverse byte order of a 32 bit word. 549 */ 550static INLINE uint32_t 551util_bswap32(uint32_t n) 552{ 553#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 403) 554 return __builtin_bswap32(n); 555#else 556 return (n >> 24) | 557 ((n >> 8) & 0x0000ff00) | 558 ((n << 8) & 0x00ff0000) | 559 (n << 24); 560#endif 561} 562 563 564/** 565 * Reverse byte order of a 16 bit word. 566 */ 567static INLINE uint16_t 568util_bswap16(uint16_t n) 569{ 570 return (n >> 8) | 571 (n << 8); 572} 573 574 575/** 576 * Clamp X to [MIN, MAX]. 577 * This is a macro to allow float, int, uint, etc. types. 578 */ 579#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) 580 581#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) 582#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) 583 584#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) 585#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) 586 587#define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D)) 588#define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D)) 589 590 591/** 592 * Align a value, only works pot alignemnts. 593 */ 594static INLINE int 595align(int value, int alignment) 596{ 597 return (value + alignment - 1) & ~(alignment - 1); 598} 599 600/** 601 * Works like align but on npot alignments. 602 */ 603static INLINE size_t 604util_align_npot(size_t value, size_t alignment) 605{ 606 if (value % alignment) 607 return value + (alignment - (value % alignment)); 608 return value; 609} 610 611static INLINE unsigned 612u_minify(unsigned value, unsigned levels) 613{ 614 return MAX2(1, value >> levels); 615} 616 617#ifndef COPY_4V 618#define COPY_4V( DST, SRC ) \ 619do { \ 620 (DST)[0] = (SRC)[0]; \ 621 (DST)[1] = (SRC)[1]; \ 622 (DST)[2] = (SRC)[2]; \ 623 (DST)[3] = (SRC)[3]; \ 624} while (0) 625#endif 626 627 628#ifndef COPY_4FV 629#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) 630#endif 631 632 633#ifndef ASSIGN_4V 634#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ 635do { \ 636 (DST)[0] = (V0); \ 637 (DST)[1] = (V1); \ 638 (DST)[2] = (V2); \ 639 (DST)[3] = (V3); \ 640} while (0) 641#endif 642 643 644static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) 645{ 646 return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); 647} 648 649static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits) 650{ 651 return (int32_t)(value * (1<<frac_bits)); 652} 653 654 655 656#ifdef __cplusplus 657} 658#endif 659 660#endif /* U_MATH_H */ 661