1/*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16/*******************************************************************************
17	File:		transform.c
18
19	Content:	MDCT Transform functionss
20
21*******************************************************************************/
22
23#include "basic_op.h"
24#include "psy_const.h"
25#include "transform.h"
26#include "aac_rom.h"
27
28
29#define LS_TRANS ((FRAME_LEN_LONG-FRAME_LEN_SHORT)/2) /* 448 */
30#define SQRT1_2 0x5a82799a	/* sqrt(1/2) in Q31 */
31#define swap2(p0,p1) \
32	t = p0; t1 = *(&(p0)+1);	\
33	p0 = p1; *(&(p0)+1) = *(&(p1)+1);	\
34	p1 = t; *(&(p1)+1) = t1
35
36/*********************************************************************************
37*
38* function name: Shuffle
39* description:  Shuffle points prepared function for fft
40*
41**********************************************************************************/
42static void Shuffle(int *buf, int num, const unsigned char* bitTab)
43{
44    int *part0, *part1;
45	int i, j;
46	int t, t1;
47
48	part0 = buf;
49    part1 = buf + num;
50
51	while ((i = *bitTab++) != 0) {
52        j = *bitTab++;
53
54        swap2(part0[4*i+0], part0[4*j+0]);
55        swap2(part0[4*i+2], part1[4*j+0]);
56        swap2(part1[4*i+0], part0[4*j+2]);
57        swap2(part1[4*i+2], part1[4*j+2]);
58    }
59
60    do {
61        swap2(part0[4*i+2], part1[4*i+0]);
62    } while ((i = *bitTab++) != 0);
63}
64
65#if !defined(ARMV5E) && !defined(ARMV7Neon)
66
67/*****************************************************************************
68*
69* function name: Radix4First
70* description:  Radix 4 point prepared function for fft
71*
72**********************************************************************************/
73static void Radix4First(int *buf, int num)
74{
75    int r0, r1, r2, r3;
76	int r4, r5, r6, r7;
77
78	for (; num != 0; num--)
79	{
80		r0 = buf[0] + buf[2];
81		r1 = buf[1] + buf[3];
82		r2 = buf[0] - buf[2];
83		r3 = buf[1] - buf[3];
84		r4 = buf[4] + buf[6];
85		r5 = buf[5] + buf[7];
86		r6 = buf[4] - buf[6];
87		r7 = buf[5] - buf[7];
88
89		buf[0] = r0 + r4;
90		buf[1] = r1 + r5;
91		buf[4] = r0 - r4;
92		buf[5] = r1 - r5;
93		buf[2] = r2 + r7;
94		buf[3] = r3 - r6;
95		buf[6] = r2 - r7;
96		buf[7] = r3 + r6;
97
98		buf += 8;
99	}
100}
101
102/*****************************************************************************
103*
104* function name: Radix8First
105* description:  Radix 8 point prepared function for fft
106*
107**********************************************************************************/
108static void Radix8First(int *buf, int num)
109{
110   int r0, r1, r2, r3;
111   int i0, i1, i2, i3;
112   int r4, r5, r6, r7;
113   int i4, i5, i6, i7;
114   int t0, t1, t2, t3;
115
116	for ( ; num != 0; num--)
117	{
118		r0 = buf[0] + buf[2];
119		i0 = buf[1] + buf[3];
120		r1 = buf[0] - buf[2];
121		i1 = buf[1] - buf[3];
122		r2 = buf[4] + buf[6];
123		i2 = buf[5] + buf[7];
124		r3 = buf[4] - buf[6];
125		i3 = buf[5] - buf[7];
126
127		r4 = (r0 + r2) >> 1;
128		i4 = (i0 + i2) >> 1;
129		r5 = (r0 - r2) >> 1;
130		i5 = (i0 - i2) >> 1;
131		r6 = (r1 - i3) >> 1;
132		i6 = (i1 + r3) >> 1;
133		r7 = (r1 + i3) >> 1;
134		i7 = (i1 - r3) >> 1;
135
136		r0 = buf[ 8] + buf[10];
137		i0 = buf[ 9] + buf[11];
138		r1 = buf[ 8] - buf[10];
139		i1 = buf[ 9] - buf[11];
140		r2 = buf[12] + buf[14];
141		i2 = buf[13] + buf[15];
142		r3 = buf[12] - buf[14];
143		i3 = buf[13] - buf[15];
144
145		t0 = (r0 + r2) >> 1;
146		t1 = (i0 + i2) >> 1;
147		t2 = (r0 - r2) >> 1;
148		t3 = (i0 - i2) >> 1;
149
150		buf[ 0] = r4 + t0;
151		buf[ 1] = i4 + t1;
152		buf[ 8] = r4 - t0;
153		buf[ 9] = i4 - t1;
154		buf[ 4] = r5 + t3;
155		buf[ 5] = i5 - t2;
156		buf[12] = r5 - t3;
157		buf[13] = i5 + t2;
158
159		r0 = r1 - i3;
160		i0 = i1 + r3;
161		r2 = r1 + i3;
162		i2 = i1 - r3;
163
164		t0 = MULHIGH(SQRT1_2, r0 - i0);
165		t1 = MULHIGH(SQRT1_2, r0 + i0);
166		t2 = MULHIGH(SQRT1_2, r2 - i2);
167		t3 = MULHIGH(SQRT1_2, r2 + i2);
168
169		buf[ 6] = r6 - t0;
170		buf[ 7] = i6 - t1;
171		buf[14] = r6 + t0;
172		buf[15] = i6 + t1;
173		buf[ 2] = r7 + t3;
174		buf[ 3] = i7 - t2;
175		buf[10] = r7 - t3;
176		buf[11] = i7 + t2;
177
178		buf += 16;
179	}
180}
181
182/*****************************************************************************
183*
184* function name: Radix4FFT
185* description:  Radix 4 point fft core function
186*
187**********************************************************************************/
188static void Radix4FFT(int *buf, int num, int bgn, int *twidTab)
189{
190	int r0, r1, r2, r3;
191	int r4, r5, r6, r7;
192	int t0, t1;
193	int sinx, cosx;
194	int i, j, step;
195	int *xptr, *csptr;
196
197	for (num >>= 2; num != 0; num >>= 2)
198	{
199		step = 2*bgn;
200		xptr = buf;
201
202    	for (i = num; i != 0; i--)
203		{
204			csptr = twidTab;
205
206			for (j = bgn; j != 0; j--)
207			{
208				r0 = xptr[0];
209				r1 = xptr[1];
210				xptr += step;
211
212				t0 = xptr[0];
213				t1 = xptr[1];
214				cosx = csptr[0];
215				sinx = csptr[1];
216				r2 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1);		/* cos*br + sin*bi */
217				r3 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0);		/* cos*bi - sin*br */
218				xptr += step;
219
220				t0 = r0 >> 2;
221				t1 = r1 >> 2;
222				r0 = t0 - r2;
223				r1 = t1 - r3;
224				r2 = t0 + r2;
225				r3 = t1 + r3;
226
227				t0 = xptr[0];
228				t1 = xptr[1];
229				cosx = csptr[2];
230				sinx = csptr[3];
231				r4 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1);		/* cos*cr + sin*ci */
232				r5 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0);		/* cos*ci - sin*cr */
233				xptr += step;
234
235				t0 = xptr[0];
236				t1 = xptr[1];
237				cosx = csptr[4];
238				sinx = csptr[5];
239				r6 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1);		/* cos*cr + sin*ci */
240				r7 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0);		/* cos*ci - sin*cr */
241				csptr += 6;
242
243				t0 = r4;
244				t1 = r5;
245				r4 = t0 + r6;
246				r5 = r7 - t1;
247				r6 = t0 - r6;
248				r7 = r7 + t1;
249
250				xptr[0] = r0 + r5;
251				xptr[1] = r1 + r6;
252				xptr -= step;
253
254				xptr[0] = r2 - r4;
255				xptr[1] = r3 - r7;
256				xptr -= step;
257
258				xptr[0] = r0 - r5;
259				xptr[1] = r1 - r6;
260				xptr -= step;
261
262				xptr[0] = r2 + r4;
263				xptr[1] = r3 + r7;
264				xptr += 2;
265			}
266			xptr += 3*step;
267		}
268		twidTab += 3*step;
269		bgn <<= 2;
270	}
271}
272
273/*********************************************************************************
274*
275* function name: PreMDCT
276* description:  prepare MDCT process for next FFT compute
277*
278**********************************************************************************/
279static void PreMDCT(int *buf0, int num, const int *csptr)
280{
281	int i;
282	int tr1, ti1, tr2, ti2;
283	int cosa, sina, cosb, sinb;
284	int *buf1;
285
286	buf1 = buf0 + num - 1;
287
288	for(i = num >> 2; i != 0; i--)
289	{
290		cosa = *csptr++;
291		sina = *csptr++;
292		cosb = *csptr++;
293		sinb = *csptr++;
294
295		tr1 = *(buf0 + 0);
296		ti2 = *(buf0 + 1);
297		tr2 = *(buf1 - 1);
298		ti1 = *(buf1 + 0);
299
300		*buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1);
301		*buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1);
302
303		*buf1-- = MULHIGH(cosb, ti2) - MULHIGH(sinb, tr2);
304		*buf1-- = MULHIGH(cosb, tr2) + MULHIGH(sinb, ti2);
305	}
306}
307
308/*********************************************************************************
309*
310* function name: PostMDCT
311* description:   post MDCT process after next FFT for MDCT
312*
313**********************************************************************************/
314static void PostMDCT(int *buf0, int num, const int *csptr)
315{
316	int i;
317	int tr1, ti1, tr2, ti2;
318	int cosa, sina, cosb, sinb;
319	int *buf1;
320
321	buf1 = buf0 + num - 1;
322
323	for(i = num >> 2; i != 0; i--)
324	{
325		cosa = *csptr++;
326		sina = *csptr++;
327		cosb = *csptr++;
328		sinb = *csptr++;
329
330		tr1 = *(buf0 + 0);
331		ti1 = *(buf0 + 1);
332		ti2 = *(buf1 + 0);
333		tr2 = *(buf1 - 1);
334
335		*buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1);
336		*buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1);
337
338		*buf0++ = MULHIGH(sinb, tr2) - MULHIGH(cosb, ti2);
339		*buf1-- = MULHIGH(cosb, tr2) + MULHIGH(sinb, ti2);
340	}
341}
342#else
343void Radix4First(int *buf, int num);
344void Radix8First(int *buf, int num);
345void Radix4FFT(int *buf, int num, int bgn, int *twidTab);
346void PreMDCT(int *buf0, int num, const int *csptr);
347void PostMDCT(int *buf0, int num, const int *csptr);
348#endif
349
350
351/**********************************************************************************
352*
353* function name: Mdct_Long
354* description:  the long block mdct, include long_start block, end_long block
355*
356**********************************************************************************/
357void Mdct_Long(int *buf)
358{
359	PreMDCT(buf, 1024, cossintab + 128);
360
361	Shuffle(buf, 512, bitrevTab + 17);
362	Radix8First(buf, 512 >> 3);
363	Radix4FFT(buf, 512 >> 3, 8, (int *)twidTab512);
364
365	PostMDCT(buf, 1024, cossintab + 128);
366}
367
368
369/**********************************************************************************
370*
371* function name: Mdct_Short
372* description:  the short block mdct
373*
374**********************************************************************************/
375void Mdct_Short(int *buf)
376{
377	PreMDCT(buf, 128, cossintab);
378
379	Shuffle(buf, 64, bitrevTab);
380	Radix4First(buf, 64 >> 2);
381	Radix4FFT(buf, 64 >> 2, 4, (int *)twidTab64);
382
383	PostMDCT(buf, 128, cossintab);
384}
385
386
387/*****************************************************************************
388*
389* function name: shiftMdctDelayBuffer
390* description:    the mdct delay buffer has a size of 1600,
391*  so the calculation of LONG,STOP must be  spilt in two
392*  passes with 1024 samples and a mid shift,
393*  the SHORT transforms can be completed in the delay buffer,
394*  and afterwards a shift
395*
396**********************************************************************************/
397static void shiftMdctDelayBuffer(Word16 *mdctDelayBuffer, /*! start of mdct delay buffer */
398								 Word16 *timeSignal,      /*! pointer to new time signal samples, interleaved */
399								 Word16 chIncrement       /*! number of channels */
400								 )
401{
402	Word32 i;
403	Word16 *srBuf = mdctDelayBuffer;
404	Word16 *dsBuf = mdctDelayBuffer+FRAME_LEN_LONG;
405
406	for(i = 0; i < BLOCK_SWITCHING_OFFSET-FRAME_LEN_LONG; i+= 8)
407	{
408		*srBuf++ = *dsBuf++;	 *srBuf++ = *dsBuf++;
409		*srBuf++ = *dsBuf++;	 *srBuf++ = *dsBuf++;
410		*srBuf++ = *dsBuf++;	 *srBuf++ = *dsBuf++;
411		*srBuf++ = *dsBuf++;	 *srBuf++ = *dsBuf++;
412	}
413
414	srBuf = mdctDelayBuffer + BLOCK_SWITCHING_OFFSET-FRAME_LEN_LONG;
415	dsBuf = timeSignal;
416
417	for(i=0; i<FRAME_LEN_LONG; i+=8)
418	{
419		*srBuf++ = *dsBuf; dsBuf += chIncrement;
420		*srBuf++ = *dsBuf; dsBuf += chIncrement;
421		*srBuf++ = *dsBuf; dsBuf += chIncrement;
422		*srBuf++ = *dsBuf; dsBuf += chIncrement;
423		*srBuf++ = *dsBuf; dsBuf += chIncrement;
424		*srBuf++ = *dsBuf; dsBuf += chIncrement;
425		*srBuf++ = *dsBuf; dsBuf += chIncrement;
426		*srBuf++ = *dsBuf; dsBuf += chIncrement;
427	}
428}
429
430
431/*****************************************************************************
432*
433* function name: getScalefactorOfShortVectorStride
434* description:  Calculate max possible scale factor for input vector of shorts
435* returns:      Maximum scale factor
436*
437**********************************************************************************/
438static Word16 getScalefactorOfShortVectorStride(const Word16 *vector, /*!< Pointer to input vector */
439												Word16 len,           /*!< Length of input vector */
440												Word16 stride)        /*!< Stride of input vector */
441{
442	Word16 maxVal = 0;
443	Word16 absVal;
444	Word16 i;
445
446	for(i=0; i<len; i++){
447		absVal = abs_s(vector[i*stride]);
448		maxVal |= absVal;
449	}
450
451	return( maxVal ? norm_s(maxVal) : 15);
452}
453
454
455/*****************************************************************************
456*
457* function name: Transform_Real
458* description:  Calculate transform filter for input vector of shorts
459* returns:      TRUE if success
460*
461**********************************************************************************/
462void Transform_Real(Word16 *mdctDelayBuffer,
463                    Word16 *timeSignal,
464                    Word16 chIncrement,
465                    Word32 *realOut,
466                    Word16 *mdctScale,
467                    Word16 blockType
468                    )
469{
470	Word32 i,w;
471	Word32 timeSignalSample;
472	Word32 ws1,ws2;
473	Word16 *dctIn0, *dctIn1;
474	Word32 *outData0, *outData1;
475	Word32 *winPtr;
476
477	Word32 delayBufferSf,timeSignalSf,minSf;
478	Word32 headRoom=0;
479
480	switch(blockType){
481
482
483	case LONG_WINDOW:
484		/*
485		we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + 448 new timeSignal samples
486		and get the biggest scale factor for next calculate more precise
487		*/
488		delayBufferSf = getScalefactorOfShortVectorStride(mdctDelayBuffer,BLOCK_SWITCHING_OFFSET,1);
489		timeSignalSf  = getScalefactorOfShortVectorStride(timeSignal,2*FRAME_LEN_LONG-BLOCK_SWITCHING_OFFSET,chIncrement);
490		minSf = min(delayBufferSf,timeSignalSf);
491		minSf = min(minSf,14);
492
493		dctIn0 = mdctDelayBuffer;
494		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
495		outData0 = realOut + FRAME_LEN_LONG/2;
496
497		/* add windows and pre add for mdct to last buffer*/
498		winPtr = (int *)LongWindowKBD;
499		for(i=0;i<FRAME_LEN_LONG/2;i++){
500			timeSignalSample = (*dctIn0++) << minSf;
501			ws1 = timeSignalSample * (*winPtr >> 16);
502			timeSignalSample = (*dctIn1--) << minSf;
503			ws2 = timeSignalSample * (*winPtr & 0xffff);
504			winPtr ++;
505			/* shift 2 to avoid overflow next */
506			*outData0++ = (ws1 >> 2) - (ws2 >> 2);
507		}
508
509		shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
510
511		/* add windows and pre add for mdct to new buffer*/
512		dctIn0 = mdctDelayBuffer;
513		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
514		outData0 = realOut + FRAME_LEN_LONG/2 - 1;
515		winPtr = (int *)LongWindowKBD;
516		for(i=0;i<FRAME_LEN_LONG/2;i++){
517			timeSignalSample = (*dctIn0++) << minSf;
518			ws1 = timeSignalSample * (*winPtr & 0xffff);
519			timeSignalSample = (*dctIn1--) << minSf;
520			ws2 = timeSignalSample * (*winPtr >> 16);
521			winPtr++;
522			/* shift 2 to avoid overflow next */
523			*outData0-- = -((ws1 >> 2) + (ws2 >> 2));
524		}
525
526		Mdct_Long(realOut);
527		/* update scale factor */
528		minSf = 14 - minSf;
529		*mdctScale=minSf;
530		break;
531
532	case START_WINDOW:
533		/*
534		we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + no timeSignal samples
535		and get the biggest scale factor for next calculate more precise
536		*/
537		minSf = getScalefactorOfShortVectorStride(mdctDelayBuffer,BLOCK_SWITCHING_OFFSET,1);
538		minSf = min(minSf,14);
539
540		dctIn0 = mdctDelayBuffer;
541		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
542		outData0 = realOut + FRAME_LEN_LONG/2;
543		winPtr = (int *)LongWindowKBD;
544
545		/* add windows and pre add for mdct to last buffer*/
546		for(i=0;i<FRAME_LEN_LONG/2;i++){
547			timeSignalSample = (*dctIn0++) << minSf;
548			ws1 = timeSignalSample * (*winPtr >> 16);
549			timeSignalSample = (*dctIn1--) << minSf;
550			ws2 = timeSignalSample * (*winPtr & 0xffff);
551			winPtr ++;
552			*outData0++ = (ws1 >> 2) - (ws2 >> 2);  /* shift 2 to avoid overflow next */
553		}
554
555		shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
556
557		outData0 = realOut + FRAME_LEN_LONG/2 - 1;
558		for(i=0;i<LS_TRANS;i++){
559			*outData0-- = -mdctDelayBuffer[i] << (15 - 2 + minSf);
560		}
561
562		/* add windows and pre add for mdct to new buffer*/
563		dctIn0 = mdctDelayBuffer + LS_TRANS;
564		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1 - LS_TRANS;
565		outData0 = realOut + FRAME_LEN_LONG/2 - 1 -LS_TRANS;
566		winPtr = (int *)ShortWindowSine;
567		for(i=0;i<FRAME_LEN_SHORT/2;i++){
568			timeSignalSample= (*dctIn0++) << minSf;
569			ws1 = timeSignalSample * (*winPtr & 0xffff);
570			timeSignalSample= (*dctIn1--) << minSf;
571			ws2 = timeSignalSample * (*winPtr >> 16);
572			winPtr++;
573			*outData0-- =  -((ws1 >> 2) + (ws2 >> 2));  /* shift 2 to avoid overflow next */
574		}
575
576		Mdct_Long(realOut);
577		/* update scale factor */
578		minSf = 14 - minSf;
579		*mdctScale= minSf;
580		break;
581
582	case STOP_WINDOW:
583		/*
584		we access BLOCK_SWITCHING_OFFSET-LS_TRANS (1600-448 ) delay buffer samples + 448 new timeSignal samples
585		and get the biggest scale factor for next calculate more precise
586		*/
587		delayBufferSf = getScalefactorOfShortVectorStride(mdctDelayBuffer+LS_TRANS,BLOCK_SWITCHING_OFFSET-LS_TRANS,1);
588		timeSignalSf  = getScalefactorOfShortVectorStride(timeSignal,2*FRAME_LEN_LONG-BLOCK_SWITCHING_OFFSET,chIncrement);
589		minSf = min(delayBufferSf,timeSignalSf);
590		minSf = min(minSf,13);
591
592		outData0 = realOut + FRAME_LEN_LONG/2;
593		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
594		for(i=0;i<LS_TRANS;i++){
595			*outData0++ = -(*dctIn1--) << (15 - 2 + minSf);
596		}
597
598		/* add windows and pre add for mdct to last buffer*/
599		dctIn0 = mdctDelayBuffer + LS_TRANS;
600		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1 - LS_TRANS;
601		outData0 = realOut + FRAME_LEN_LONG/2 + LS_TRANS;
602		winPtr = (int *)ShortWindowSine;
603		for(i=0;i<FRAME_LEN_SHORT/2;i++){
604			timeSignalSample = (*dctIn0++) << minSf;
605			ws1 = timeSignalSample * (*winPtr >> 16);
606			timeSignalSample= (*dctIn1--) << minSf;
607			ws2 = timeSignalSample * (*winPtr & 0xffff);
608			winPtr++;
609			*outData0++ = (ws1 >> 2) - (ws2 >> 2);  /* shift 2 to avoid overflow next */
610		}
611
612		shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
613
614		/* add windows and pre add for mdct to new buffer*/
615		dctIn0 = mdctDelayBuffer;
616		dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
617		outData0 = realOut + FRAME_LEN_LONG/2 - 1;
618		winPtr = (int *)LongWindowKBD;
619		for(i=0;i<FRAME_LEN_LONG/2;i++){
620			timeSignalSample= (*dctIn0++) << minSf;
621			ws1 = timeSignalSample *(*winPtr & 0xffff);
622			timeSignalSample= (*dctIn1--) << minSf;
623			ws2 = timeSignalSample * (*winPtr >> 16);
624			*outData0-- =  -((ws1 >> 2) + (ws2 >> 2));  /* shift 2 to avoid overflow next */
625			winPtr++;
626		}
627
628		Mdct_Long(realOut);
629		minSf = 14 - minSf;
630		*mdctScale= minSf; /* update scale factor */
631		break;
632
633	case SHORT_WINDOW:
634		/*
635		we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + no new timeSignal samples
636		and get the biggest scale factor for next calculate more precise
637		*/
638		minSf = getScalefactorOfShortVectorStride(mdctDelayBuffer+TRANSFORM_OFFSET_SHORT,9*FRAME_LEN_SHORT,1);
639		minSf = min(minSf,10);
640
641
642		for(w=0;w<TRANS_FAC;w++){
643			dctIn0 = mdctDelayBuffer+w*FRAME_LEN_SHORT+TRANSFORM_OFFSET_SHORT;
644			dctIn1 = mdctDelayBuffer+w*FRAME_LEN_SHORT+TRANSFORM_OFFSET_SHORT + FRAME_LEN_SHORT-1;
645			outData0 = realOut + FRAME_LEN_SHORT/2;
646			outData1 = realOut + FRAME_LEN_SHORT/2 - 1;
647
648			winPtr = (int *)ShortWindowSine;
649			for(i=0;i<FRAME_LEN_SHORT/2;i++){
650				timeSignalSample= *dctIn0 << minSf;
651				ws1 = timeSignalSample * (*winPtr >> 16);
652				timeSignalSample= *dctIn1 << minSf;
653				ws2 = timeSignalSample * (*winPtr & 0xffff);
654				*outData0++ = (ws1 >> 2) - (ws2 >> 2);  /* shift 2 to avoid overflow next */
655
656				timeSignalSample= *(dctIn0 + FRAME_LEN_SHORT) << minSf;
657				ws1 = timeSignalSample * (*winPtr & 0xffff);
658				timeSignalSample= *(dctIn1 + FRAME_LEN_SHORT) << minSf;
659				ws2 = timeSignalSample * (*winPtr >> 16);
660				*outData1-- =  -((ws1 >> 2) + (ws2 >> 2));  /* shift 2 to avoid overflow next */
661
662				winPtr++;
663				dctIn0++;
664				dctIn1--;
665			}
666
667			Mdct_Short(realOut);
668			realOut += FRAME_LEN_SHORT;
669		}
670
671		minSf = 11 - minSf;
672		*mdctScale = minSf; /* update scale factor */
673
674		shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
675		break;
676  }
677}
678
679