1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* ---- includes ----------------------------------------------------------- */
18
19#include "b_TensorEm/CompactMat.h"
20#include "b_TensorEm/Functions.h"
21#include "b_BasicEm/Math.h"
22#include "b_BasicEm/Functions.h"
23#include "b_BasicEm/Memory.h"
24
25/* ------------------------------------------------------------------------- */
26
27/* ========================================================================= */
28/*                                                                           */
29/* ---- \ghd{ auxiliary functions } ---------------------------------------- */
30/*                                                                           */
31/* ========================================================================= */
32
33/* ------------------------------------------------------------------------- */
34
35/** Returns dot product of inVec with indexed row
36    The result is a floating point expresstion:
37		upper 16 bit: signed value
38		lower 16 bit: signed exponent
39 */
40int32 bts_CompactMat_fltDotPrdRow( struct bbs_Context* cpA,
41								   struct bts_CompactMat* ptrA,
42							       const int16* inVecA,
43							       uint32 inNormBitsA,
44							       uint32 rowA )
45{
46	const int16* rowPtrL = ptrA->cpsArrE.arrPtrE + ptrA->wordsPerRowE * rowA;
47
48	/* extract row-header info */
49	uint32 offsL = *rowPtrL++;
50	uint32 sizeL = *rowPtrL++;
51	int32 factorManL = *rowPtrL++;
52	int32 factorExpL = *rowPtrL++;
53	uint32 rowNormBitsL = *rowPtrL++;
54
55	/* consider possible overflow */
56	uint16 overflowBitsL = ( inNormBitsA + rowNormBitsL >= 31 ) ? inNormBitsA + rowNormBitsL - 31 : 0;
57
58	const int16* inPtrL = inVecA + offsL;
59
60	count_t iL;
61	int32 sumL = 0;
62
63	if( overflowBitsL == 0 ) /* raw dot product fits in int32 */
64	{
65		switch( ptrA->bitsPerValueE )
66		{
67			case 16:
68			{
69				for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ );
70			}
71			break;
72
73			#ifndef HW_TMS320C5x /* platforms that don't have int8 must use the 'default' implementation */
74
75			case 8:
76			{
77				const uint16* dpL = ( uint16* )rowPtrL;
78				for( iL = sizeL; iL >= 8; iL -= 8 )
79				{
80					sumL += ( ( int8 )  dpL[ 0 ]         * ( int32 )inPtrL[ 0 ] );
81					sumL += ( ( int8 )( dpL[ 0 ] >>  8 ) * ( int32 )inPtrL[ 1 ] );
82					sumL += ( ( int8 )  dpL[ 1 ]         * ( int32 )inPtrL[ 2 ] );
83					sumL += ( ( int8 )( dpL[ 1 ] >>  8 ) * ( int32 )inPtrL[ 3 ] );
84					sumL += ( ( int8 )  dpL[ 2 ]         * ( int32 )inPtrL[ 4 ] );
85					sumL += ( ( int8 )( dpL[ 2 ] >>  8 ) * ( int32 )inPtrL[ 5 ] );
86					sumL += ( ( int8 )  dpL[ 3 ]         * ( int32 )inPtrL[ 6 ] );
87					sumL += ( ( int8 )( dpL[ 3 ] >>  8 ) * ( int32 )inPtrL[ 7 ] );
88					dpL += 4;
89					inPtrL += 8;
90				}
91				for( ; iL >= 2; iL -= 2 )
92				{
93					sumL += ( ( int8 )  *dpL         * ( int32 )inPtrL[ 0 ] );
94					sumL += ( ( int8 )( *dpL >>  8 ) * ( int32 )inPtrL[ 1 ] );
95					dpL++;
96					inPtrL += 2;
97				}
98				if( iL > 0 )
99				{
100					sumL += ( ( int8 )*dpL++ * ( int32 )inPtrL[ 0 ] );
101				}
102			}
103			break;
104
105			case 6:
106			{
107				const uint16* dpL = ( uint16* )rowPtrL;
108				for( iL = sizeL; iL >= 8; iL -= 8 )
109				{
110					int32 lSumL = 0;
111					lSumL += ( ( int8 )     ( dpL[ 0 ] <<  2 )                                  * ( int32 )inPtrL[ 0 ] );
112					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  4 )                       & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
113					lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
114					lSumL += ( ( int8 ) (   ( dpL[ 1 ]       )                       & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
115					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
116					lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
117					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  2 )                       & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
118					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  8 )                       & 0x00FC ) * ( int32 )inPtrL[ 7 ] );
119					sumL += ( lSumL >> 2 );
120					dpL += 3;
121					inPtrL += 8;
122				}
123
124				{
125					int32 lSumL = 0;
126					if( iL > 0 ) lSumL += ( ( int8 )     ( dpL[ 0 ] <<  2 )                                  * ( int32 )inPtrL[ 0 ] );
127					if( iL > 1 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  4 )                       & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
128					if( iL > 2 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
129					if( iL > 3 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ]       )                       & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
130					if( iL > 4 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
131					if( iL > 5 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
132					if( iL > 6 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  2 )                       & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
133					sumL += ( lSumL >> 2 );
134				}
135			}
136			break;
137
138			case 5:
139			{
140				const uint16* dpL = ( uint16* )rowPtrL;
141				for( iL = sizeL; iL >= 16; iL -= 16 )
142				{
143					int32 lSumL = 0;
144					lSumL += ( ( int8 )     ( dpL[ 0 ] <<  3 )                                  * ( int32 )inPtrL[  0 ] );
145					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  2 )                       & 0x00F8 ) * ( int32 )inPtrL[  1 ] );
146					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  7 )                       & 0x00F8 ) * ( int32 )inPtrL[  2 ] );
147					lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[  3 ] );
148					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  1 )                       & 0x00F8 ) * ( int32 )inPtrL[  4 ] );
149					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00F8 ) * ( int32 )inPtrL[  5 ] );
150					lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[  6 ] );
151					lSumL += ( ( int8 ) (   ( dpL[ 2 ]       )                       & 0x00F8 ) * ( int32 )inPtrL[  7 ] );
152					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  5 )                       & 0x00F8 ) * ( int32 )inPtrL[  8 ] );
153					lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[  9 ] );
154					lSumL += ( ( int8 ) (   ( dpL[ 3 ] <<  1 )                       & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
155					lSumL += ( ( int8 ) (   ( dpL[ 3 ] >>  4 )                       & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
156					lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >>  9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
157					lSumL += ( ( int8 ) (   ( dpL[ 4 ] <<  2 )                       & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
158					lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  3 )                       & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
159					lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  8 )                       & 0x00F8 ) * ( int32 )inPtrL[ 15 ] );
160					sumL += ( lSumL >> 3 );
161					dpL += 5;
162					inPtrL += 16;
163				}
164
165				{
166					int32 lSumL = 0;
167					if( iL >  0 ) lSumL += ( ( int8 )     ( dpL[ 0 ] <<  3 )                                  * ( int32 )inPtrL[  0 ] );
168					if( iL >  1 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  2 )                       & 0x00F8 ) * ( int32 )inPtrL[  1 ] );
169					if( iL >  2 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  7 )                       & 0x00F8 ) * ( int32 )inPtrL[  2 ] );
170					if( iL >  3 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[  3 ] );
171					if( iL >  4 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  1 )                       & 0x00F8 ) * ( int32 )inPtrL[  4 ] );
172					if( iL >  5 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00F8 ) * ( int32 )inPtrL[  5 ] );
173					if( iL >  6 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[  6 ] );
174					if( iL >  7 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ]       )                       & 0x00F8 ) * ( int32 )inPtrL[  7 ] );
175					if( iL >  8 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  5 )                       & 0x00F8 ) * ( int32 )inPtrL[  8 ] );
176					if( iL >  9 ) lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[  9 ] );
177					if( iL > 10 ) lSumL += ( ( int8 ) (   ( dpL[ 3 ] <<  1 )                       & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
178					if( iL > 11 ) lSumL += ( ( int8 ) (   ( dpL[ 3 ] >>  4 )                       & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
179					if( iL > 12 ) lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >>  9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
180					if( iL > 13 ) lSumL += ( ( int8 ) (   ( dpL[ 4 ] <<  2 )                       & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
181					if( iL > 14 ) lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  3 )                       & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
182					sumL += ( lSumL >> 3 );
183				}
184			}
185			break;
186
187			case 4:
188			{
189				for( iL = sizeL; iL >= 4; iL -= 4 )
190				{
191					uint16 v1L = *rowPtrL++;
192					int32 lSumL = 0;
193					lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
194					lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
195					lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
196					lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
197					inPtrL += 4;
198					sumL += ( lSumL >> 4 );
199				}
200				{
201					uint16 v1L = *rowPtrL++;
202					int32 lSumL = 0;
203					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
204					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
205					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
206					sumL += ( lSumL >> 4 );
207				}
208			}
209			break;
210
211			#endif /*ifndef HW_TMS320C5x*/
212
213			/* The default case can process all bit sizes including those that are explicitly encoded above
214			 * Use the default for all bit sizes when the platform cannot handle the int8 data type (e.g. HW_TMS320C5x)
215			 */
216			default:
217			{
218				uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
219				uint32 bitsL = ptrA->bitsPerValueE;
220				uint16 adjL = 16 - bitsL;
221				uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
222				uint32 srL = bitsL;
223				for( iL = 0; iL < sizeL; iL++ )
224				{
225					if( srL > 16 )
226					{
227						bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
228						srL -= 16;
229					}
230					sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] ) >> adjL;
231					srL += bitsL;
232				}
233			}
234		}
235	}
236	else /* raw dot product does not fit in int32 */
237	{
238		int32 roundL = 1 << ( overflowBitsL - 1 );
239		switch( ptrA->bitsPerValueE )
240		{
241			case 16:
242			{
243				for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ) + roundL ) >> overflowBitsL;
244			}
245			break;
246
247			case 8:
248			{
249				for( iL = sizeL; iL >= 2; iL -= 2 )
250				{
251					uint16 v1L = *rowPtrL++;
252					int32 lSumL =   ( ( int8 )  v1L         * ( int32 )inPtrL[ 0 ] )
253						          + ( ( int8 )( v1L >>  8 ) * ( int32 )inPtrL[ 1 ] );
254					sumL += ( lSumL + roundL ) >> overflowBitsL;
255					inPtrL += 2;
256				}
257				if( iL > 0 )
258				{
259					sumL += ( ( ( int8 )*rowPtrL++ * ( int32 )inPtrL[ 0 ] ) + roundL ) >> overflowBitsL;
260				}
261			}
262			break;
263
264			case 4:
265			{
266				for( iL = sizeL; iL >= 4; iL -= 4 )
267				{
268					uint16 v1L = *rowPtrL++;
269					int32 lSumL = 0;
270					lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
271					lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
272					lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
273					lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
274					inPtrL += 4;
275					sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
276				}
277				{
278					uint16 v1L = *rowPtrL++;
279					int32 lSumL = 0;
280					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
281					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
282					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
283					sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
284				}
285			}
286			break;
287
288			default:
289			{
290				uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
291				uint32 bitsL = ptrA->bitsPerValueE;
292				uint16 adjL = 16 - bitsL;
293				uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
294				uint32 srL = bitsL;
295				int32 lRoundL = roundL << adjL;
296				int32 lAdjL = overflowBitsL + adjL;
297				for( iL = 0; iL < sizeL; iL++ )
298				{
299					if( srL > 16 )
300					{
301						bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
302						srL -= 16;
303					}
304					sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] + lRoundL ) >> lAdjL;
305					srL += bitsL;
306				}
307			}
308		}
309	}
310
311	/* compute result */
312	{
313		int32 resultManL;
314		int32 resultExpL;
315		int32 resultLogL;
316		bbs_mulS32( sumL, factorManL, &resultManL, &resultExpL );
317		resultExpL += factorExpL + overflowBitsL;
318		resultLogL = bbs_intLog2( resultManL > 0 ? resultManL : -resultManL );
319		if( resultLogL < 30 )
320		{
321			resultManL <<= 30 - resultLogL;
322			resultExpL  -= 30 - resultLogL;
323		}
324
325		resultManL = ( ( resultManL >> 15 ) + 1 ) >> 1;
326		resultExpL = resultExpL + 16;
327
328		return ( ( resultManL & 0x0000FFFF ) << 16 ) | ( resultExpL & 0x0000FFFF );
329	}
330}
331
332/* ------------------------------------------------------------------------- */
333
334/* ========================================================================= */
335/*                                                                           */
336/* ---- \ghd{ constructor / destructor } ----------------------------------- */
337/*                                                                           */
338/* ========================================================================= */
339
340/* ------------------------------------------------------------------------- */
341
342void bts_CompactMat_init( struct bbs_Context* cpA,
343					      struct bts_CompactMat* ptrA )
344{
345	ptrA->widthE = 0;
346	ptrA->heightE = 0;
347	ptrA->bitsPerValueE = 0;
348	ptrA->wordsPerRowE = 0;
349	ptrA->maxRowBitsE = 0;
350	bbs_Int16Arr_init( cpA, &ptrA->cpsArrE );
351	bbs_Int16Arr_init( cpA, &ptrA->expArrE );
352
353}
354
355/* ------------------------------------------------------------------------- */
356
357void bts_CompactMat_exit( struct bbs_Context* cpA,
358					    struct bts_CompactMat* ptrA )
359{
360	ptrA->widthE = 0;
361	ptrA->heightE = 0;
362	ptrA->bitsPerValueE = 0;
363	ptrA->wordsPerRowE = 0;
364	ptrA->maxRowBitsE = 0;
365	bbs_Int16Arr_exit( cpA, &ptrA->cpsArrE );
366	bbs_Int16Arr_exit( cpA, &ptrA->expArrE );
367}
368/* ------------------------------------------------------------------------- */
369
370/* ========================================================================= */
371/*                                                                           */
372/* ---- \ghd{ operators } -------------------------------------------------- */
373/*                                                                           */
374/* ========================================================================= */
375
376/* ------------------------------------------------------------------------- */
377
378/* ========================================================================= */
379/*                                                                           */
380/* ---- \ghd{ query functions } -------------------------------------------- */
381/*                                                                           */
382/* ========================================================================= */
383
384/* ------------------------------------------------------------------------- */
385
386/* ========================================================================= */
387/*                                                                           */
388/* ---- \ghd{ modify functions } ------------------------------------------- */
389/*                                                                           */
390/* ========================================================================= */
391
392/* ------------------------------------------------------------------------- */
393
394void bts_CompactMat_create( struct bbs_Context* cpA,
395						    struct bts_CompactMat* ptrA,
396						    uint32 widthA,
397						    uint32 heightA,
398						    uint32 bitsA,
399							uint32 maxRowSizeA,
400				            struct bbs_MemSeg* mspA )
401{
402	if( bbs_Context_error( cpA ) ) return;
403	if( bitsA < 2 || bitsA > 16 )
404	{
405		bbs_ERROR0( "bts_CompactMat_create:\nbitsA must be between 2 and 16" );
406		return;
407	}
408
409	ptrA->widthE = widthA;
410	ptrA->heightE = heightA;
411	ptrA->bitsPerValueE = bitsA;
412	ptrA->wordsPerRowE = 6 /*header + 1*/ + ( ( maxRowSizeA * bitsA ) / ( 8 * sizeof( short ) ) );
413	ptrA->maxRowBitsE = 0;
414	if( ( ptrA->wordsPerRowE & 1 ) != 0 ) ptrA->wordsPerRowE++;
415	bbs_Int16Arr_create( cpA, &ptrA->cpsArrE, heightA * ptrA->wordsPerRowE, mspA );
416	bbs_Int16Arr_fill( cpA, &ptrA->cpsArrE, 0 );
417	bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
418	bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
419}
420
421/* ------------------------------------------------------------------------- */
422
423void bts_CompactMat_copy( struct bbs_Context* cpA,
424					      struct bts_CompactMat* ptrA,
425						  const struct bts_CompactMat* srcPtrA )
426{
427	ptrA->widthE = srcPtrA->widthE;
428	ptrA->heightE = srcPtrA->heightE;
429	ptrA->bitsPerValueE = srcPtrA->bitsPerValueE;
430	ptrA->wordsPerRowE = srcPtrA->wordsPerRowE;
431	ptrA->maxRowBitsE = srcPtrA->maxRowBitsE;
432	bbs_Int16Arr_copy( cpA, &ptrA->cpsArrE, &srcPtrA->cpsArrE );
433	bbs_Int16Arr_size( cpA, &ptrA->expArrE, ptrA->heightE );
434}
435
436/* ------------------------------------------------------------------------- */
437
438/* ========================================================================= */
439/*                                                                           */
440/* ---- \ghd{ I/O } -------------------------------------------------------- */
441/*                                                                           */
442/* ========================================================================= */
443
444/* ------------------------------------------------------------------------- */
445
446uint32 bts_CompactMat_memSize( struct bbs_Context* cpA,
447							 const struct bts_CompactMat *ptrA )
448{
449	return  bbs_SIZEOF16( uint32 )
450		  + bbs_SIZEOF16( uint32 ) /* version */
451		  + bbs_SIZEOF16( ptrA->widthE )
452		  + bbs_SIZEOF16( ptrA->heightE )
453		  + bbs_SIZEOF16( ptrA->bitsPerValueE )
454		  + bbs_SIZEOF16( ptrA->wordsPerRowE )
455		  + bbs_SIZEOF16( ptrA->maxRowBitsE )
456		  + bbs_Int16Arr_memSize( cpA, &ptrA->cpsArrE );
457}
458
459/* ------------------------------------------------------------------------- */
460
461uint32 bts_CompactMat_memWrite( struct bbs_Context* cpA,
462							  const struct bts_CompactMat* ptrA,
463							  uint16* memPtrA )
464{
465	uint32 memSizeL = bts_CompactMat_memSize( cpA, ptrA );
466	memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
467	memPtrA += bbs_memWriteUInt32( bts_COMPACT_MAT_VERSION, memPtrA );
468	memPtrA += bbs_memWrite32( &ptrA->widthE, memPtrA );
469	memPtrA += bbs_memWrite32( &ptrA->heightE, memPtrA );
470	memPtrA += bbs_memWrite32( &ptrA->bitsPerValueE, memPtrA );
471	memPtrA += bbs_memWrite32( &ptrA->wordsPerRowE, memPtrA );
472	memPtrA += bbs_memWrite32( &ptrA->maxRowBitsE, memPtrA );
473	memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->cpsArrE, memPtrA );
474	return memSizeL;
475}
476
477/* ------------------------------------------------------------------------- */
478
479uint32 bts_CompactMat_memRead( struct bbs_Context* cpA,
480							 struct bts_CompactMat* ptrA,
481							 const uint16* memPtrA,
482				             struct bbs_MemSeg* mspA )
483{
484	uint32 memSizeL, versionL;
485	if( bbs_Context_error( cpA ) ) return 0;
486	memPtrA += bbs_memRead32( &memSizeL, memPtrA );
487	memPtrA += bbs_memReadVersion32( cpA, &versionL, bts_COMPACT_MAT_VERSION, memPtrA );
488	memPtrA += bbs_memRead32( &ptrA->widthE, memPtrA );
489	memPtrA += bbs_memRead32( &ptrA->heightE, memPtrA );
490	memPtrA += bbs_memRead32( &ptrA->bitsPerValueE, memPtrA );
491	memPtrA += bbs_memRead32( &ptrA->wordsPerRowE, memPtrA );
492	memPtrA += bbs_memRead32( &ptrA->maxRowBitsE, memPtrA );
493	memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->cpsArrE, memPtrA, mspA );
494
495	if( memSizeL != bts_CompactMat_memSize( cpA, ptrA ) )
496	{
497		bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_CompactMat_memRead( const struct bts_CompactMat* ptrA, const void* memPtrA ):\n"
498                  "size mismatch" );
499	}
500
501	bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
502	bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
503
504	return memSizeL;
505}
506
507/* ------------------------------------------------------------------------- */
508
509/* ========================================================================= */
510/*                                                                           */
511/* ---- \ghd{ exec functions } --------------------------------------------- */
512/*                                                                           */
513/* ========================================================================= */
514
515/* ------------------------------------------------------------------------- */
516
517void bts_CompactMat_map( struct bbs_Context* cpA,
518						 const struct bts_CompactMat* ptrA,
519						 const int16* inVecA,
520						 int16* outVecA,
521						 int16* outExpPtrA )
522{
523	uint32 inNormBitsL = bbs_intLog2( bbs_vecNorm16( inVecA, ptrA->widthE ) ) + 1;
524	uint32 iL;
525
526	int16* expArrL = ( ( struct bts_CompactMat* )ptrA )->expArrE.arrPtrE;
527	int16 maxExpL = -32767;
528
529	for( iL = 0; iL < ptrA->heightE; iL++ )
530	{
531		int32 fltL = bts_CompactMat_fltDotPrdRow( cpA, ( struct bts_CompactMat* )ptrA, inVecA, inNormBitsL, iL );
532		outVecA[ iL ] = fltL >> 16;
533		expArrL[ iL ] = fltL & 0x0000FFFF;
534
535		maxExpL = ( expArrL[ iL ] > maxExpL ) ? expArrL[ iL ] : maxExpL;
536	}
537
538	if( outExpPtrA != NULL ) *outExpPtrA = maxExpL;
539
540	for( iL = 0; iL < ptrA->heightE; iL++ )
541	{
542		int32 shrL = maxExpL - expArrL[ iL ];
543		if( shrL > 0 )
544		{
545			outVecA[ iL ] = ( ( outVecA[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
546		}
547	}
548}
549
550/* ------------------------------------------------------------------------- */
551
552/* ========================================================================= */
553
554