1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef bbs_MATH_EM_H 18#define bbs_MATH_EM_H 19 20/** 21 * This files contains mathematical functions. 22 */ 23 24/* ---- includes ----------------------------------------------------------- */ 25 26#include "b_BasicEm/Basic.h" 27#ifdef HW_TMS320C5x 28#include "Dsplib.h" 29#endif 30 31/* ---- related objects --------------------------------------------------- */ 32 33/* ---- typedefs ----------------------------------------------------------- */ 34 35/* ---- constants ---------------------------------------------------------- */ 36 37/* ---- macros ------------------------------------------------------------- */ 38 39/** computes the maximum of two variables */ 40#define bbs_max( val1A, val2A ) ( ( val1A ) > ( val2A ) ? ( val1A ) : ( val2A ) ) 41 42/** computes the minimum of two variables */ 43#define bbs_min( val1A, val2A ) ( ( val1A ) < ( val2A ) ? ( val1A ) : ( val2A ) ) 44 45/** computes the absolute value */ 46#define bbs_abs( valA ) ( ( valA ) > 0 ? ( valA ) : -( valA ) ) 47 48/* ---- external functions ------------------------------------------------- */ 49 50/** 51 * Computes square root from 32 bit value. 52 * The return value 'r' is the largest possible integer that 53 * satisfies r * r <= valA. 54 * This behavior is identical with (uint16)sqrt( valA ). 55 * C6201: 162 cycles 56 */ 57uint16 bbs_sqrt32( uint32 valA ); 58 59/** 60 * Computes square root from 16 bit value. 61 * The return value 'r' is the largest possible integer that 62 * satisfies r * r <= valA. 63 * This behavior is identical with (uint8)sqrt( valA ). 64 */ 65uint8 bbs_sqrt16( uint16 valA ); 66 67/** Sqrt approximation */ 68uint16 bbs_fastSqrt32( uint32 valA ); 69 70/** sqrt(1/x) approximation 71 * return format 1.31 72 */ 73uint32 bbs_invSqrt32( uint32 valA ); 74 75/** 1/x approximation 76 * return format 2.30 77 */ 78int32 bbs_inv32( int32 valA ); 79 80/** Returns integer log2 of valA 81 * C6201: 24 cycles 82 */ 83uint32 bbs_intLog2( uint32 valA ); 84 85/** 86 * Returns (2^x) - 1 for a value range of [0,1[ 87 * Format of valA: 0.32 88 * Format of return value: 0.32 89 */ 90uint32 bbs_pow2M1( uint32 valA ); 91 92/** 93 * Returns (2^x) for a value range of [-16,16[ 94 * Format of valA: 5.27 95 * Format of return value: 16.16 96 */ 97uint32 bbs_pow2( int32 valA ); 98 99 100/** 101 * Returns (e^x) for a value range of [-11.0903,11.0903] 102 * If valA is smaller than -11.0903, the function returns 0 103 * If valA is larger than 11.0903, the function returns ( 2^32 - 1 ) / ( 2^16 ) 104 * Format of valA: 5.27 105 * Format of return value: 16.16 106 * C6201: 72 cycles 107 */ 108uint32 bbs_exp( int32 valA ); 109 110/** saturates a signed 32 bit value to signed 16 bit */ 111int16 bbs_satS16( int32 valA ); 112 113/** 114 * Returns the value after rounding to the nearest integer. 115 */ 116/* int32 bbs_round( int32 valA, int32 bbpA ); */ 117 118/** 119 * Computes the dot product of vec1A with vec2A, both of size sizeA. 120 * (no overflow handling, slow for sizeA < 32 ) 121 */ 122int32 bbs_dotProductInt16( const int16* vec1A, const int16* vec2A, uint32 sizeA ); 123 124/** Fermi function ( 1.0 / ( 1.0 + exp( -valA ) ) ) 125 * Format valA: 16.16 126 * Format return: 2.30 127 */ 128int32 bbs_fermi( int32 valA ); 129 130/** reduces uint32 to N bits; if it has already <= N bits, nothing happens */ 131void bbs_uint32ReduceToNBits( uint32* argPtrA, int32* bbpPtrA, uint32 nBitsA ); 132 133/** reduces int32 to N bits; if it has already <= N bits, nothing happens */ 134void bbs_int32ReduceToNBits( int32* argPtrA, int32* bbpPtrA, uint32 nBitsA ); 135 136/** converts a number with source bbp to a 32 bit number with dst bbp; 137 * applies appropriate shifting, rounding and saturation to minimize overflow-damage 138 */ 139uint32 bbs_convertU32( uint32 srcA, int32 srcBbpA, int32 dstBbpA ); 140 141/** converts a number with source bbp to a 32 bit number with dst bbp; 142 * applies appropriate shifting, rounding and saturation to minimize overflow-damage 143 */ 144int32 bbs_convertS32( int32 srcA, int32 srcBbpA, int32 dstBbpA ); 145 146/** vector power return val = sum(xA_i^2), input 1.15, output 1.30 */ 147int32 bbs_vecPowerFlt16( const int16 *xA, int16 nxA ); 148 149/** returns floating point squared norm of 32 bit vector (maximum accuracy - overflow-safe); 150 * Function is slow 151 * returned square norm = man * 2^exp 152 * The returned exponent is always even 153 */ 154void bbs_vecSqrNorm32( const int32* vecA, uint32 sizeA, uint32* manPtrA, uint32* expPtrA ); 155 156/** returns floating point squared norm of 16 bit vector (maximum accuracy - overflow-safe); 157 * returned square norm = man * 2^exp 158 * The returned exponent is always even 159 */ 160void bbs_vecSqrNorm16( const int16* vecA, uint32 sizeA, uint32* manPtrA, uint32* expPtrA ); 161 162/** returns the norm of a 16 bit vector; 163 * overflow-safe when sizeA < 65535 164 */ 165uint32 bbs_vecNorm16( const int16* vecA, uint32 sizeA ); 166 167/** multiplies two unsigned 32 bit values and returns product decomposed to mantisse and exponent 168 * maximum accuracy - overflow-safe 169 * exponent is always >= 0 170 */ 171void bbs_mulU32( uint32 v1A, uint32 v2A, uint32* manPtrA, int32* expPtrA ); 172 173/** multiplies two signed 32 bit values and returns product decomposed to mantisse and exponent 174 * maximum accuracy - overflow-safe 175 * exponent is always >= 0 176 */ 177void bbs_mulS32( int32 v1A, int32 v2A, int32* manPtrA, int32* expPtrA ); 178 179/** matrix multiply rA = x1A * x2A, input/output 1.15, no overflow protection, in-place not allowed */ 180void bbs_matMultiplyFlt16( const int16 *x1A, int16 row1A, int16 col1A, 181 const int16 *x2A, int16 col2A, int16 *rA ); 182 183/** matrix multiply rA = x1A * transposed( x2A ), input/output 1.15, no overflow protection, in-place not allowed */ 184void bbs_matMultiplyTranspFlt16( const int16 *x1A, int16 row1A, int16 col1A, 185 const int16 *x2A, int16 row2A, int16 *rA ); 186 187/* 188#ifdef mtrans 189#define bbs_matTrans mtrans 190#else 191uint16 bbs_matTrans( int16 *xA, int16 rowA, int16 colA, int16 *rA ); 192#endif 193 194#ifdef atan2_16 195#define bbs_vecPhase atan2_16 196#else 197uint16 bbs_vecPhase( int16* reA, int16* imA, int16* phaseA, uint16 sizeA ); 198#endif 199*/ 200 201#endif /* bbs_MATH_EM_H */ 202 203