1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* ---- includes ----------------------------------------------------------- */
18
19#include "b_TensorEm/Flt16Vec.h"
20#include "b_BasicEm/Memory.h"
21#include "b_BasicEm/Math.h"
22#include "b_BasicEm/Functions.h"
23
24/* ------------------------------------------------------------------------- */
25
26/* ========================================================================= */
27/*                                                                           */
28/* ---- \ghd{ auxiliary functions } ---------------------------------------- */
29/*                                                                           */
30/* ========================================================================= */
31
32/* ------------------------------------------------------------------------- */
33
34/* ========================================================================= */
35/*                                                                           */
36/* ---- \ghd{ constructor / destructor } ----------------------------------- */
37/*                                                                           */
38/* ========================================================================= */
39
40/* ------------------------------------------------------------------------- */
41
42void bts_Flt16Vec_init( struct bbs_Context* cpA,
43						struct bts_Flt16Vec* ptrA )
44{
45	bbs_Int16Arr_init( cpA, &ptrA->arrE );
46	ptrA->expE = 0;
47}
48
49/* ------------------------------------------------------------------------- */
50
51void bts_Flt16Vec_exit( struct bbs_Context* cpA,
52						struct bts_Flt16Vec* ptrA )
53{
54	bbs_Int16Arr_exit( cpA, &ptrA->arrE );
55	ptrA->expE = 0;
56}
57
58/* ------------------------------------------------------------------------- */
59
60/* ========================================================================= */
61/*                                                                           */
62/* ---- \ghd{ operators } -------------------------------------------------- */
63/*                                                                           */
64/* ========================================================================= */
65
66/* ------------------------------------------------------------------------- */
67
68void bts_Flt16Vec_copy( struct bbs_Context* cpA,
69						struct bts_Flt16Vec* ptrA,
70						const struct bts_Flt16Vec* srcPtrA )
71{
72	bbs_Int16Arr_copy( cpA, &ptrA->arrE, &srcPtrA->arrE );
73	ptrA->expE = srcPtrA->expE;
74}
75
76/* ------------------------------------------------------------------------- */
77
78flag bts_Flt16Vec_equal( struct bbs_Context* cpA,
79						 const struct bts_Flt16Vec* ptrA,
80						 const struct bts_Flt16Vec* srcPtrA )
81{
82	if( !bbs_Int16Arr_equal( cpA, &ptrA->arrE, &srcPtrA->arrE ) ) return FALSE;
83	if( ptrA->expE != srcPtrA->expE ) return FALSE;
84	return TRUE;
85}
86
87/* ------------------------------------------------------------------------- */
88
89/* ========================================================================= */
90/*                                                                           */
91/* ---- \ghd{ query functions } -------------------------------------------- */
92/*                                                                           */
93/* ========================================================================= */
94
95/* ------------------------------------------------------------------------- */
96
97int16 bts_Flt16Vec_avg( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
98{
99	uint16 iL;
100	uint16 sizeL = ptrA->arrE.sizeE;
101	int32 sumL = 0;
102	const int16* srcL = ptrA->arrE.arrPtrE;
103	for( iL = 0; iL < sizeL; iL++ )
104	{
105		sumL += srcL[ iL ];
106	}
107	return sumL / ( int32 )sizeL;
108}
109
110/* ------------------------------------------------------------------------- */
111
112uint32 bts_Flt16Vec_norm( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
113{
114	return bbs_vecNorm16( ptrA->arrE.arrPtrE, ptrA->arrE.sizeE );
115}
116
117/* ------------------------------------------------------------------------- */
118
119uint16 bts_Flt16Vec_maxAbs( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
120{
121	uint16 iL;
122	uint16 sizeL = ptrA->arrE.sizeE;
123	uint16 maxL = 0;
124	const int16* srcL = ptrA->arrE.arrPtrE;
125	for( iL = 0; iL < sizeL; iL++ )
126	{
127		uint16 vL = srcL[ iL ] > 0 ? srcL[ iL ] : -srcL[ iL ];
128		maxL = vL > maxL ? vL : maxL;
129	}
130	return maxL;
131}
132
133/* ------------------------------------------------------------------------- */
134
135/* ========================================================================= */
136/*                                                                           */
137/* ---- \ghd{ modify functions } ------------------------------------------- */
138/*                                                                           */
139/* ========================================================================= */
140
141/* ------------------------------------------------------------------------- */
142
143void bts_Flt16Vec_create( struct bbs_Context* cpA,
144						  struct bts_Flt16Vec* ptrA,
145						  uint32 sizeA,
146						  struct bbs_MemSeg* mspA )
147{
148	bbs_Int16Arr_create( cpA, &ptrA->arrE, sizeA, mspA );
149}
150
151/* ------------------------------------------------------------------------- */
152
153void bts_Flt16Vec_size( struct bbs_Context* cpA,
154						struct bts_Flt16Vec* ptrA,
155						uint32 sizeA )
156{
157	bbs_Int16Arr_size( cpA, &ptrA->arrE, sizeA );
158}
159
160/* ------------------------------------------------------------------------- */
161
162/* ========================================================================= */
163/*                                                                           */
164/* ---- \ghd{ I/O } -------------------------------------------------------- */
165/*                                                                           */
166/* ========================================================================= */
167
168/* ------------------------------------------------------------------------- */
169
170uint32 bts_Flt16Vec_memSize( struct bbs_Context* cpA,
171							  const struct bts_Flt16Vec *ptrA )
172{
173	return  bbs_SIZEOF16( uint32 ) /* mem size */
174		+ bbs_Int16Arr_memSize( cpA, &ptrA->arrE )
175		+ bbs_SIZEOF16( ptrA->expE );
176}
177
178/* ------------------------------------------------------------------------- */
179
180uint32 bts_Flt16Vec_memWrite( struct bbs_Context* cpA,
181							   const struct bts_Flt16Vec* ptrA,
182							   uint16* memPtrA )
183{
184	uint32 memSizeL = bts_Flt16Vec_memSize( cpA, ptrA );
185	memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
186	memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->arrE, memPtrA );
187	memPtrA += bbs_memWrite16( &ptrA->expE, memPtrA );
188	return memSizeL;
189}
190
191/* ------------------------------------------------------------------------- */
192
193uint32 bts_Flt16Vec_memRead( struct bbs_Context* cpA,
194							  struct bts_Flt16Vec* ptrA,
195							  const uint16* memPtrA,
196							  struct bbs_MemSeg* mspA )
197{
198	uint32 memSizeL;
199	if( bbs_Context_error( cpA ) ) return 0;
200	memPtrA += bbs_memRead32( &memSizeL, memPtrA );
201	memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->arrE, memPtrA, mspA );
202	memPtrA += bbs_memRead16( &ptrA->expE, memPtrA );
203
204	if( memSizeL != bts_Flt16Vec_memSize( cpA, ptrA ) )
205	{
206		bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_Flt16Vec_memRead( const struct bts_Flt16Vec* ptrA, const void* memPtrA ):\n"
207                   "size mismatch" );
208		return 0;
209	}
210	return memSizeL;
211}
212
213/* ------------------------------------------------------------------------- */
214
215/* ========================================================================= */
216/*                                                                           */
217/* ---- \ghd{ exec functions } --------------------------------------------- */
218/*                                                                           */
219/* ========================================================================= */
220
221/* ------------------------------------------------------------------------- */
222
223void bts_Flt16Vec_maximizeMantisse( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
224{
225    uint32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
226	int16 shlL = 0;
227
228	if( maxAbsL == 0 ) return; /* cannot maximize 0 */
229
230	while( maxAbsL < 0x4000 )
231	{
232		shlL++;
233		maxAbsL <<= 1;
234	}
235
236	if( shlL > 0 )
237	{
238		uint32 iL;
239		uint32 sizeL = ptrA->arrE.sizeE;
240		int16* dstL = ptrA->arrE.arrPtrE;
241		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] <<= shlL;
242		ptrA->expE -= shlL;
243	}
244}
245
246/* ------------------------------------------------------------------------- */
247
248uint32 bts_Flt16Vec_maximizeAbsValue( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
249{
250    int32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
251	int32 fL;
252	if( maxAbsL == 0 ) return 0; /* vector is zero */
253
254	fL = ( int32 )0x7FFF0000 / maxAbsL;
255
256	{
257		uint32 iL;
258		uint32 sizeL = ptrA->arrE.sizeE;
259		int16* dstL = ptrA->arrE.arrPtrE;
260		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( int32 )dstL[ iL ] * fL + 32768 ) >> 16;
261	}
262
263	return fL;
264}
265
266/* ------------------------------------------------------------------------- */
267
268void bts_Flt16Vec_zeroAverage( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
269{
270	uint16 iL;
271	uint16 sizeL = ptrA->arrE.sizeE;
272	int16* dstL = ptrA->arrE.arrPtrE;
273	int16 avgL = bts_Flt16Vec_avg( cpA, ptrA );
274	for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] -= avgL;
275}
276
277/* ------------------------------------------------------------------------- */
278
279void bts_Flt16Vec_normalize( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
280{
281	uint32 normL = bts_Flt16Vec_norm( cpA, ptrA );
282
283	if( normL == 0 )
284	{
285		/* vector is zero - do nothing */
286		return;
287	}
288	else
289	{
290		int16* dstL = ptrA->arrE.arrPtrE;
291		uint16 iL;
292		uint16 sizeL = ptrA->arrE.sizeE;
293	    int16 expL = 0;
294		int32 fL;
295
296		/* let norm occupy 17 bits */
297		if( ( normL & 0xFFFE0000 ) != 0 )
298		{
299			while( ( ( normL >> -expL ) & 0xFFFE0000 ) != 0 ) expL--;
300			normL >>= -expL;
301		}
302		else
303		{
304			while( ( ( normL <<  expL ) & 0xFFFF0000 ) == 0 ) expL++;
305			normL <<=  expL;
306		}
307
308		/* fL is positive and occupies only 16 bits - a product with int16 fits in int32 */
309		fL = ( uint32 )0xFFFFFFFF / normL;
310
311		/* multiply with factor */
312		for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * fL ) >> 15 ) + 1 ) >> 1;
313
314		/* set exponent */
315		ptrA->expE = expL - 16;
316	}
317/*
318	{
319		uint32 testNormL = bts_Flt16Vec_norm( cpA, ptrA );
320		printf( "test norm %f\n", ( float )testNormL / ( 1 << -ptrA->expE ) );
321	}
322*/
323}
324
325/* ------------------------------------------------------------------------- */
326
327void bts_Flt16Vec_setZero( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
328{
329	bbs_Int16Arr_fill( cpA, &ptrA->arrE, 0 );
330	ptrA->expE = 0;
331}
332
333/* ------------------------------------------------------------------------- */
334
335void bts_Flt16Vec_mul( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, int32 valA, int16 expA )
336{
337	int32 valL = valA;
338	int16 expL = expA;
339
340	if( valL == 0 )
341	{
342		bts_Flt16Vec_setZero( cpA, ptrA );
343		return;
344	}
345	else
346	{
347		uint32 iL;
348		uint32 sizeL = ptrA->arrE.sizeE;
349		int16* dstL = ptrA->arrE.arrPtrE;
350
351		/* adjust valL to maximum 16 bit accuracy  */
352		uint32 absValL = valL > 0 ? valL : -valL;
353		if( ( absValL & 0xFFFF8000 ) != 0 )
354		{
355			int32 shrL = 0;
356			while( ( absValL & 0xFFFF8000 ) != 0 )
357			{
358				absValL >>= 1;
359				shrL++;
360			}
361
362			if( shrL > 0 )
363			{
364				valL = ( ( valL >> ( shrL - 1 ) ) + 1 ) >> 1;
365				expL += shrL;
366				if( valL >= 0x08000 ) valL = 0x07FFF; /* saturate */
367			}
368		}
369		else
370		{
371			int32 shlL = 0;
372			while( ( absValL & 0xFFFFC000 ) == 0 )
373			{
374				absValL <<= 1;
375				shlL++;
376			}
377
378			valL <<= shlL;
379			expL -= shlL;
380		}
381
382		for( iL = 0; iL < sizeL; iL++ )
383		{
384			dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * valL ) >> 15 ) + 1 ) >> 1;
385		}
386		ptrA->expE += expL + 16;
387	}
388}
389
390/* ------------------------------------------------------------------------- */
391
392void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* manPtrA, int32* expPtrA )
393{
394	bbs_DEF_fNameL( "void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* matPtrA, int32* expPtrA )" )
395	uint16 iL;
396	uint16 sizeL = vp1A->arrE.sizeE;
397	const int16* arr1L = vp1A->arrE.arrPtrE;
398	const int16* arr2L = vp2A->arrE.arrPtrE;
399	int16 shrm1L = -1; /* shift minus 1 */
400	int32 sumL;
401
402	if( vp1A->arrE.sizeE != vp2A->arrE.sizeE )
403	{
404		bbs_ERROR1( "%s:\nVectors have different size", fNameL );
405		return;
406	}
407
408	sumL = 0;
409	/* shrm1L == -1 */
410	for( iL = 0; iL < sizeL; iL++ )
411	{
412		sumL += ( int32 )arr1L[ iL ] * ( int32 )arr2L[ iL ];
413		if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
414	}
415
416	if( iL < sizeL )
417	{
418		/* danger of overflow: increase shift; adjust sum */
419		shrm1L++;
420		sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
421
422		/* shrm1L == 0 */
423		for( iL = 0; iL < sizeL; iL++ )
424		{
425			sumL += ( int32 )( ( arr1L[ iL ] + 1 ) >> 1 ) * ( int32 )( ( arr2L[ iL ] + 1 ) >> 1 );
426			if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
427		}
428
429		for( iL = 0; iL < sizeL; iL++ )
430		{
431			if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 )
432			{
433				/* danger of overflow: increase shift; adjust sum */
434				shrm1L++;
435				sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
436			}
437
438			sumL += ( int32 )( ( ( arr1L[ iL ] >> shrm1L ) + 1 ) >> 1 ) * ( int32 )( ( ( arr2L[ iL ] >> shrm1L ) + 1 ) >> 1 );
439		}
440	}
441
442	if( manPtrA != NULL ) *manPtrA = sumL;
443	if( expPtrA != NULL ) *expPtrA = vp1A->expE + vp2A->expE + ( ( shrm1L + 1 ) << 1 );
444}
445
446/* ------------------------------------------------------------------------- */
447
448void bts_Flt16Vec_append( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, struct bts_Flt16Vec* srcPtrA )
449{
450	if( ptrA->arrE.sizeE == 0 )
451	{
452		bts_Flt16Vec_copy( cpA, ptrA, srcPtrA );
453	}
454	else
455	{
456		uint32 idxL = ptrA->arrE.sizeE;
457		bts_Flt16Vec_size( cpA, ptrA, idxL + srcPtrA->arrE.sizeE );
458
459		/* copy data */
460		bbs_memcpy16( ptrA->arrE.arrPtrE + idxL, srcPtrA->arrE.arrPtrE, srcPtrA->arrE.sizeE );
461
462		/* equalize exponent */
463		if( ptrA->expE > srcPtrA->expE )
464		{
465			uint32 iL;
466			uint32 sizeL = srcPtrA->arrE.sizeE;
467			uint32 shrL = ptrA->expE - srcPtrA->expE;
468			int16* dstL = ptrA->arrE.arrPtrE + idxL;
469			for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
470		}
471		else if( ptrA->expE < srcPtrA->expE )
472		{
473			uint32 iL;
474			uint32 sizeL = idxL;
475			uint32 shrL = srcPtrA->expE - ptrA->expE;
476			int16* dstL = ptrA->arrE.arrPtrE;
477			for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
478			ptrA->expE = srcPtrA->expE;
479		}
480	}
481}
482
483/* ------------------------------------------------------------------------- */
484
485/* ========================================================================= */
486
487