1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/*
19------------------------------------------------------------------------------
20 INPUT AND OUTPUT DEFINITIONS
21
22 Inputs:
23    [input_variable_name] = [description of the input to module, its type
24                 definition, and length (when applicable)]
25
26 Local Stores/Buffers/Pointers Needed:
27    [local_store_name] = [description of the local store, its type
28                  definition, and length (when applicable)]
29    [local_buffer_name] = [description of the local buffer, its type
30                   definition, and length (when applicable)]
31    [local_ptr_name] = [description of the local pointer, its type
32                definition, and length (when applicable)]
33
34 Global Stores/Buffers/Pointers Needed:
35    [global_store_name] = [description of the global store, its type
36                   definition, and length (when applicable)]
37    [global_buffer_name] = [description of the global buffer, its type
38                definition, and length (when applicable)]
39    [global_ptr_name] = [description of the global pointer, its type
40                 definition, and length (when applicable)]
41
42 Outputs:
43    [return_variable_name] = [description of data/pointer returned
44                  by module, its type definition, and length
45                  (when applicable)]
46
47 Pointers and Buffers Modified:
48    [variable_bfr_ptr] points to the [describe where the
49      variable_bfr_ptr points to, its type definition, and length
50      (when applicable)]
51    [variable_bfr] contents are [describe the new contents of
52      variable_bfr]
53
54 Local Stores Modified:
55    [local_store_name] = [describe new contents, its type
56                  definition, and length (when applicable)]
57
58 Global Stores Modified:
59    [global_store_name] = [describe new contents, its type
60                   definition, and length (when applicable)]
61
62------------------------------------------------------------------------------
63 FUNCTION DESCRIPTION
64
65------------------------------------------------------------------------------
66 REQUIREMENTS
67
68------------------------------------------------------------------------------
69 REFERENCES
70
71------------------------------------------------------------------------------
72 PSEUDO-CODE
73
74------------------------------------------------------------------------------
75 RESOURCES USED
76   When the code is written for a specific target processor the
77     the resources used should be documented below.
78
79 STACK USAGE: [stack count for this module] + [variable to represent
80          stack usage for each subroutine called]
81
82     where: [stack usage variable] = stack usage for [subroutine
83         name] (see [filename].ext)
84
85 DATA MEMORY USED: x words
86
87 PROGRAM MEMORY USED: x words
88
89 CLOCK CYCLES: [cycle count equation for this module] + [variable
90           used to represent cycle count for each subroutine
91           called]
92
93     where: [cycle count variable] = cycle count for [subroutine
94        name] (see [filename].ext)
95
96------------------------------------------------------------------------------
97*/
98
99/*----------------------------------------------------------------------------
100; INCLUDES
101----------------------------------------------------------------------------*/
102#include "mp4dec_lib.h"
103#include "idct.h"
104#include "motion_comp.h"
105
106#define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
107/*----------------------------------------------------------------------------
108; MACROS
109; Define module specific macros here
110----------------------------------------------------------------------------*/
111
112/*----------------------------------------------------------------------------
113; DEFINES
114; Include all pre-processor statements here. Include conditional
115; compile variables also.
116----------------------------------------------------------------------------*/
117
118/*----------------------------------------------------------------------------
119; LOCAL FUNCTION DEFINITIONS
120; Function Prototype declaration
121----------------------------------------------------------------------------*/
122/* private prototypes */
123static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
124static void idctrow_intra(int16 *blk, PIXEL *, int width);
125static void idctcol(int16 *blk);
126
127#ifdef FAST_IDCT
128// mapping from nz_coefs to functions to be used
129
130
131// ARM4 does not allow global data when they are not constant hence
132// an array of function pointers cannot be considered as array of constants
133// (actual addresses are only known when the dll is loaded).
134// So instead of arrays of function pointers, we'll store here
135// arrays of rows or columns and then call the idct function
136// corresponding to such the row/column number:
137
138
139static void (*const idctcolVCA[10][4])(int16*) =
140{
141    {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
142    {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
143    {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
144    {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
145    {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
146    {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
147    {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
148    {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
149    {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
150    {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
151};
152
153
154static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
155{
156    &idctrow1,
157    &idctrow2,
158    &idctrow2,
159    &idctrow2,
160    &idctrow2,
161    &idctrow3,
162    &idctrow4,
163    &idctrow4,
164    &idctrow4,
165    &idctrow4
166};
167
168
169static void (*const idctcolVCA2[16])(int16*) =
170{
171    &idctcol0, &idctcol4, &idctcol3, &idctcol4,
172    &idctcol2, &idctcol4, &idctcol3, &idctcol4,
173    &idctcol1, &idctcol4, &idctcol3, &idctcol4,
174    &idctcol2, &idctcol4, &idctcol3, &idctcol4
175};
176
177static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
178{
179    &idctrow1, &idctrow4, &idctrow3, &idctrow4,
180    &idctrow2, &idctrow4, &idctrow3, &idctrow4
181};
182
183static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
184{
185    &idctrow1_intra,
186    &idctrow2_intra,
187    &idctrow2_intra,
188    &idctrow2_intra,
189    &idctrow2_intra,
190    &idctrow3_intra,
191    &idctrow4_intra,
192    &idctrow4_intra,
193    &idctrow4_intra,
194    &idctrow4_intra
195};
196
197static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
198{
199    &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
200    &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
201};
202#endif
203
204/*----------------------------------------------------------------------------
205; LOCAL STORE/BUFFER/POINTER DEFINITIONS
206; Variable declaration - defined here and used outside this module
207----------------------------------------------------------------------------*/
208
209/*----------------------------------------------------------------------------
210; EXTERNAL FUNCTION REFERENCES
211; Declare functions defined elsewhere and referenced in this module
212----------------------------------------------------------------------------*/
213
214/*----------------------------------------------------------------------------
215; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
216; Declare variables used in this module but defined elsewhere
217----------------------------------------------------------------------------*/
218
219/*----------------------------------------------------------------------------
220; FUNCTION CODE
221----------------------------------------------------------------------------*/
222void MBlockIDCT(VideoDecData *video)
223{
224    Vop *currVop = video->currVop;
225    MacroBlock *mblock = video->mblock;
226    PIXEL *c_comp;
227    PIXEL *cu_comp;
228    PIXEL *cv_comp;
229    int x_pos = video->mbnum_col;
230    int y_pos = video->mbnum_row;
231    int width, width_uv;
232    int32 offset;
233    width = video->width;
234    width_uv = width >> 1;
235    offset = (int32)(y_pos << 4) * width + (x_pos << 4);
236
237    c_comp  = currVop->yChan + offset;
238    cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
239    cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
240
241    BlockIDCT_intra(mblock, c_comp, 0, width);
242    BlockIDCT_intra(mblock, c_comp + 8, 1, width);
243    BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
244    BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
245    BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
246    BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
247}
248
249
250void BlockIDCT_intra(
251    MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
252{
253    /*----------------------------------------------------------------------------
254    ; Define all local variables
255    ----------------------------------------------------------------------------*/
256    int16 *coeff_in = mblock->block[comp];
257#ifdef INTEGER_IDCT
258#ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
259    int i, bmapr;
260    int nz_coefs = mblock->no_coeff[comp];
261    uint8 *bitmapcol = mblock->bitmapcol[comp];
262    uint8 bitmaprow = mblock->bitmaprow[comp];
263
264    /*----------------------------------------------------------------------------
265    ; Function body here
266    ----------------------------------------------------------------------------*/
267    if (nz_coefs <= 10)
268    {
269        bmapr = (nz_coefs - 1);
270
271        (*(idctcolVCA[bmapr]))(coeff_in);
272        (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
273        (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
274        (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
275
276        (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
277    }
278    else
279    {
280        i = 8;
281        while (i--)
282        {
283            bmapr = (int)bitmapcol[i];
284            if (bmapr)
285            {
286                if ((bmapr&0xf) == 0)         /*  07/18/01 */
287                {
288                    (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
289                }
290                else
291                {
292                    idctcol(coeff_in + i);
293                }
294            }
295        }
296        if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
297        {
298            bitmaprow >>= 4;
299            (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
300        }
301        else
302        {
303            idctrow_intra(coeff_in, c_comp, width);
304        }
305    }
306#else
307    void idct_intra(int *block, uint8 *comp, int width);
308    idct_intra(coeff_in, c_comp, width);
309#endif
310#else
311    void idctref_intra(int *block, uint8 *comp, int width);
312    idctref_intra(coeff_in, c_comp, width);
313#endif
314
315
316    /*----------------------------------------------------------------------------
317    ; Return nothing or data or data pointer
318    ----------------------------------------------------------------------------*/
319    return;
320}
321
322/*  08/04/05, no residue, just copy from pred to output */
323void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
324{
325    /* copy 4 bytes at a time */
326    width -= 4;
327    *((uint32*)dst) = *((uint32*)pred);
328    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
329    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
330    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
331    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
332    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
333    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
334    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
335    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
336    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
337    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
338    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
339    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
340    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
341    *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
342    *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
343
344    return ;
345}
346
347/*  08/04/05 compute IDCT and add prediction at the end  */
348void BlockIDCT(
349    uint8 *dst,  /* destination */
350    uint8 *pred, /* prediction block, pitch 16 */
351    int16   *coeff_in,  /* DCT data, size 64 */
352    int width, /* width of dst */
353    int nz_coefs,
354    uint8 *bitmapcol,
355    uint8 bitmaprow
356)
357{
358#ifdef INTEGER_IDCT
359#ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
360    int i, bmapr;
361    /*----------------------------------------------------------------------------
362    ; Function body here
363    ----------------------------------------------------------------------------*/
364    if (nz_coefs <= 10)
365    {
366        bmapr = (nz_coefs - 1);
367        (*(idctcolVCA[bmapr]))(coeff_in);
368        (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
369        (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
370        (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
371
372        (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
373        return ;
374    }
375    else
376    {
377        i = 8;
378
379        while (i--)
380        {
381            bmapr = (int)bitmapcol[i];
382            if (bmapr)
383            {
384                if ((bmapr&0xf) == 0)         /*  07/18/01 */
385                {
386                    (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
387                }
388                else
389                {
390                    idctcol(coeff_in + i);
391                }
392            }
393        }
394        if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
395        {
396            (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
397        }
398        else
399        {
400            idctrow(coeff_in, pred, dst, width);
401        }
402        return ;
403    }
404#else // FAST_IDCT
405    void idct(int *block, uint8 *pred, uint8 *dst, int width);
406    idct(coeff_in, pred, dst, width);
407    return;
408#endif // FAST_IDCT
409#else // INTEGER_IDCT
410    void idctref(int *block, uint8 *pred, uint8 *dst, int width);
411    idctref(coeff_in, pred, dst, width);
412    return;
413#endif // INTEGER_IDCT
414
415}
416/*----------------------------------------------------------------------------
417;  End Function: block_idct
418----------------------------------------------------------------------------*/
419
420
421/****************************************************************************/
422
423/*
424------------------------------------------------------------------------------
425 FUNCTION NAME: idctrow
426------------------------------------------------------------------------------
427 INPUT AND OUTPUT DEFINITIONS FOR idctrow
428
429 Inputs:
430    [input_variable_name] = [description of the input to module, its type
431                 definition, and length (when applicable)]
432
433 Local Stores/Buffers/Pointers Needed:
434    [local_store_name] = [description of the local store, its type
435                  definition, and length (when applicable)]
436    [local_buffer_name] = [description of the local buffer, its type
437                   definition, and length (when applicable)]
438    [local_ptr_name] = [description of the local pointer, its type
439                definition, and length (when applicable)]
440
441 Global Stores/Buffers/Pointers Needed:
442    [global_store_name] = [description of the global store, its type
443                   definition, and length (when applicable)]
444    [global_buffer_name] = [description of the global buffer, its type
445                definition, and length (when applicable)]
446    [global_ptr_name] = [description of the global pointer, its type
447                 definition, and length (when applicable)]
448
449 Outputs:
450    [return_variable_name] = [description of data/pointer returned
451                  by module, its type definition, and length
452                  (when applicable)]
453
454 Pointers and Buffers Modified:
455    [variable_bfr_ptr] points to the [describe where the
456      variable_bfr_ptr points to, its type definition, and length
457      (when applicable)]
458    [variable_bfr] contents are [describe the new contents of
459      variable_bfr]
460
461 Local Stores Modified:
462    [local_store_name] = [describe new contents, its type
463                  definition, and length (when applicable)]
464
465 Global Stores Modified:
466    [global_store_name] = [describe new contents, its type
467                   definition, and length (when applicable)]
468
469------------------------------------------------------------------------------
470 FUNCTION DESCRIPTION FOR idctrow
471
472------------------------------------------------------------------------------
473 REQUIREMENTS FOR idctrow
474
475------------------------------------------------------------------------------
476 REFERENCES FOR idctrow
477
478------------------------------------------------------------------------------
479 PSEUDO-CODE FOR idctrow
480
481------------------------------------------------------------------------------
482 RESOURCES USED FOR idctrow
483   When the code is written for a specific target processor the
484     the resources used should be documented below.
485
486 STACK USAGE: [stack count for this module] + [variable to represent
487          stack usage for each subroutine called]
488
489     where: [stack usage variable] = stack usage for [subroutine
490         name] (see [filename].ext)
491
492 DATA MEMORY USED: x words
493
494 PROGRAM MEMORY USED: x words
495
496 CLOCK CYCLES: [cycle count equation for this module] + [variable
497           used to represent cycle count for each subroutine
498           called]
499
500     where: [cycle count variable] = cycle count for [subroutine
501        name] (see [filename].ext)
502
503------------------------------------------------------------------------------
504*/
505
506/*----------------------------------------------------------------------------
507; Function Code FOR idctrow
508----------------------------------------------------------------------------*/
509void idctrow(
510    int16 *blk, uint8 *pred, uint8 *dst, int width
511)
512{
513    /*----------------------------------------------------------------------------
514    ; Define all local variables
515    ----------------------------------------------------------------------------*/
516    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
517    int i = 8;
518    uint32 pred_word, dst_word;
519    int res, res2;
520
521    /*----------------------------------------------------------------------------
522    ; Function body here
523    ----------------------------------------------------------------------------*/
524    /* row (horizontal) IDCT
525    *
526    * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
527    * ( k + - ) * l ) l=0                      8          2
528    *
529    * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
530
531    /* preset the offset, such that we can take advantage pre-offset addressing mode   */
532    width -= 4;
533    dst -= width;
534    pred -= 12;
535    blk -= 8;
536
537    while (i--)
538    {
539        x1 = (int32)blk[12] << 8;
540        blk[12] = 0;
541        x2 = blk[14];
542        blk[14] = 0;
543        x3 = blk[10];
544        blk[10] = 0;
545        x4 = blk[9];
546        blk[9] = 0;
547        x5 = blk[15];
548        blk[15] = 0;
549        x6 = blk[13];
550        blk[13] = 0;
551        x7 = blk[11];
552        blk[11] = 0;
553        x0 = ((*(blk += 8)) << 8) + 8192;
554        blk[0] = 0;   /* for proper rounding in the fourth stage */
555
556        /* first stage */
557        x8 = W7 * (x4 + x5) + 4;
558        x4 = (x8 + (W1 - W7) * x4) >> 3;
559        x5 = (x8 - (W1 + W7) * x5) >> 3;
560        x8 = W3 * (x6 + x7) + 4;
561        x6 = (x8 - (W3 - W5) * x6) >> 3;
562        x7 = (x8 - (W3 + W5) * x7) >> 3;
563
564        /* second stage */
565        x8 = x0 + x1;
566        x0 -= x1;
567        x1 = W6 * (x3 + x2) + 4;
568        x2 = (x1 - (W2 + W6) * x2) >> 3;
569        x3 = (x1 + (W2 - W6) * x3) >> 3;
570        x1 = x4 + x6;
571        x4 -= x6;
572        x6 = x5 + x7;
573        x5 -= x7;
574
575        /* third stage */
576        x7 = x8 + x3;
577        x8 -= x3;
578        x3 = x0 + x2;
579        x0 -= x2;
580        x2 = (181 * (x4 + x5) + 128) >> 8;
581        x4 = (181 * (x4 - x5) + 128) >> 8;
582
583        /* fourth stage */
584        pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
585
586        res = (x7 + x1) >> 14;
587        ADD_AND_CLIP1(res);
588        res2 = (x3 + x2) >> 14;
589        ADD_AND_CLIP2(res2);
590        dst_word = (res2 << 8) | res;
591        res = (x0 + x4) >> 14;
592        ADD_AND_CLIP3(res);
593        dst_word |= (res << 16);
594        res = (x8 + x6) >> 14;
595        ADD_AND_CLIP4(res);
596        dst_word |= (res << 24);
597        *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
598
599        pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
600
601        res = (x8 - x6) >> 14;
602        ADD_AND_CLIP1(res);
603        res2 = (x0 - x4) >> 14;
604        ADD_AND_CLIP2(res2);
605        dst_word = (res2 << 8) | res;
606        res = (x3 - x2) >> 14;
607        ADD_AND_CLIP3(res);
608        dst_word |= (res << 16);
609        res = (x7 - x1) >> 14;
610        ADD_AND_CLIP4(res);
611        dst_word |= (res << 24);
612        *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
613    }
614    /*----------------------------------------------------------------------------
615    ; Return nothing or data or data pointer
616    ----------------------------------------------------------------------------*/
617    return;
618}
619
620void idctrow_intra(
621    int16 *blk, PIXEL *comp, int width
622)
623{
624    /*----------------------------------------------------------------------------
625    ; Define all local variables
626    ----------------------------------------------------------------------------*/
627    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
628    int i = 8;
629    int offset = width;
630    int32 word;
631
632    /*----------------------------------------------------------------------------
633    ; Function body here
634    ----------------------------------------------------------------------------*/
635    /* row (horizontal) IDCT
636    *
637    * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
638    * ( k + - ) * l ) l=0                      8          2
639    *
640    * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
641    while (i--)
642    {
643        x1 = (int32)blk[4] << 8;
644        blk[4] = 0;
645        x2 = blk[6];
646        blk[6] = 0;
647        x3 = blk[2];
648        blk[2] = 0;
649        x4 = blk[1];
650        blk[1] = 0;
651        x5 = blk[7];
652        blk[7] = 0;
653        x6 = blk[5];
654        blk[5] = 0;
655        x7 = blk[3];
656        blk[3] = 0;
657#ifndef FAST_IDCT
658        /* shortcut */  /* covered by idctrow1  01/9/2001 */
659        if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
660        {
661            blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
662            return;
663        }
664#endif
665        x0 = ((int32)blk[0] << 8) + 8192;
666        blk[0] = 0;  /* for proper rounding in the fourth stage */
667
668        /* first stage */
669        x8 = W7 * (x4 + x5) + 4;
670        x4 = (x8 + (W1 - W7) * x4) >> 3;
671        x5 = (x8 - (W1 + W7) * x5) >> 3;
672        x8 = W3 * (x6 + x7) + 4;
673        x6 = (x8 - (W3 - W5) * x6) >> 3;
674        x7 = (x8 - (W3 + W5) * x7) >> 3;
675
676        /* second stage */
677        x8 = x0 + x1;
678        x0 -= x1;
679        x1 = W6 * (x3 + x2) + 4;
680        x2 = (x1 - (W2 + W6) * x2) >> 3;
681        x3 = (x1 + (W2 - W6) * x3) >> 3;
682        x1 = x4 + x6;
683        x4 -= x6;
684        x6 = x5 + x7;
685        x5 -= x7;
686
687        /* third stage */
688        x7 = x8 + x3;
689        x8 -= x3;
690        x3 = x0 + x2;
691        x0 -= x2;
692        x2 = (181 * (x4 + x5) + 128) >> 8;
693        x4 = (181 * (x4 - x5) + 128) >> 8;
694
695        /* fourth stage */
696        word = ((x7 + x1) >> 14);
697        CLIP_RESULT(word)
698
699        temp = ((x3 + x2) >> 14);
700        CLIP_RESULT(temp)
701        word = word | (temp << 8);
702
703        temp = ((x0 + x4) >> 14);
704        CLIP_RESULT(temp)
705        word = word | (temp << 16);
706
707        temp = ((x8 + x6) >> 14);
708        CLIP_RESULT(temp)
709        word = word | (temp << 24);
710        *((int32*)(comp)) = word;
711
712        word = ((x8 - x6) >> 14);
713        CLIP_RESULT(word)
714
715        temp = ((x0 - x4) >> 14);
716        CLIP_RESULT(temp)
717        word = word | (temp << 8);
718
719        temp = ((x3 - x2) >> 14);
720        CLIP_RESULT(temp)
721        word = word | (temp << 16);
722
723        temp = ((x7 - x1) >> 14);
724        CLIP_RESULT(temp)
725        word = word | (temp << 24);
726        *((int32*)(comp + 4)) = word;
727        comp += offset;
728
729        blk += B_SIZE;
730    }
731    /*----------------------------------------------------------------------------
732    ; Return nothing or data or data pointer
733    ----------------------------------------------------------------------------*/
734    return;
735}
736
737/*----------------------------------------------------------------------------
738; End Function: idctrow
739----------------------------------------------------------------------------*/
740
741
742/****************************************************************************/
743
744/*
745------------------------------------------------------------------------------
746 FUNCTION NAME: idctcol
747------------------------------------------------------------------------------
748 INPUT AND OUTPUT DEFINITIONS FOR idctcol
749
750 Inputs:
751    [input_variable_name] = [description of the input to module, its type
752                 definition, and length (when applicable)]
753
754 Local Stores/Buffers/Pointers Needed:
755    [local_store_name] = [description of the local store, its type
756                  definition, and length (when applicable)]
757    [local_buffer_name] = [description of the local buffer, its type
758                   definition, and length (when applicable)]
759    [local_ptr_name] = [description of the local pointer, its type
760                definition, and length (when applicable)]
761
762 Global Stores/Buffers/Pointers Needed:
763    [global_store_name] = [description of the global store, its type
764                   definition, and length (when applicable)]
765    [global_buffer_name] = [description of the global buffer, its type
766                definition, and length (when applicable)]
767    [global_ptr_name] = [description of the global pointer, its type
768                 definition, and length (when applicable)]
769
770 Outputs:
771    [return_variable_name] = [description of data/pointer returned
772                  by module, its type definition, and length
773                  (when applicable)]
774
775 Pointers and Buffers Modified:
776    [variable_bfr_ptr] points to the [describe where the
777      variable_bfr_ptr points to, its type definition, and length
778      (when applicable)]
779    [variable_bfr] contents are [describe the new contents of
780      variable_bfr]
781
782 Local Stores Modified:
783    [local_store_name] = [describe new contents, its type
784                  definition, and length (when applicable)]
785
786 Global Stores Modified:
787    [global_store_name] = [describe new contents, its type
788                   definition, and length (when applicable)]
789
790------------------------------------------------------------------------------
791 FUNCTION DESCRIPTION FOR idctcol
792
793------------------------------------------------------------------------------
794 REQUIREMENTS FOR idctcol
795
796------------------------------------------------------------------------------
797 REFERENCES FOR idctcol
798
799------------------------------------------------------------------------------
800 PSEUDO-CODE FOR idctcol
801
802------------------------------------------------------------------------------
803 RESOURCES USED FOR idctcol
804   When the code is written for a specific target processor the
805     the resources used should be documented below.
806
807 STACK USAGE: [stack count for this module] + [variable to represent
808          stack usage for each subroutine called]
809
810     where: [stack usage variable] = stack usage for [subroutine
811         name] (see [filename].ext)
812
813 DATA MEMORY USED: x words
814
815 PROGRAM MEMORY USED: x words
816
817 CLOCK CYCLES: [cycle count equation for this module] + [variable
818           used to represent cycle count for each subroutine
819           called]
820
821     where: [cycle count variable] = cycle count for [subroutine
822        name] (see [filename].ext)
823
824------------------------------------------------------------------------------
825*/
826
827/*----------------------------------------------------------------------------
828; Function Code FOR idctcol
829----------------------------------------------------------------------------*/
830void idctcol(
831    int16 *blk
832)
833{
834    /*----------------------------------------------------------------------------
835    ; Define all local variables
836    ----------------------------------------------------------------------------*/
837    int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
838
839    /*----------------------------------------------------------------------------
840    ; Function body here
841    ----------------------------------------------------------------------------*/
842    /* column (vertical) IDCT
843    *
844    * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
845    * cos( -- * ( k + - ) * l ) l=0                        8          2
846    *
847    * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
848    x1 = (int32)blk[32] << 11;
849    x2 = blk[48];
850    x3 = blk[16];
851    x4 = blk[8];
852    x5 = blk[56];
853    x6 = blk[40];
854    x7 = blk[24];
855#ifndef FAST_IDCT
856    /* shortcut */        /* covered by idctcolumn1  01/9/2001 */
857    if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
858    {
859        blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
860                                              = blk[0] << 3;
861        return;
862    }
863#endif
864
865    x0 = ((int32)blk[0] << 11) + 128;
866
867    /* first stage */
868    x8 = W7 * (x4 + x5);
869    x4 = x8 + (W1 - W7) * x4;
870    x5 = x8 - (W1 + W7) * x5;
871    x8 = W3 * (x6 + x7);
872    x6 = x8 - (W3 - W5) * x6;
873    x7 = x8 - (W3 + W5) * x7;
874
875    /* second stage */
876    x8 = x0 + x1;
877    x0 -= x1;
878    x1 = W6 * (x3 + x2);
879    x2 = x1 - (W2 + W6) * x2;
880    x3 = x1 + (W2 - W6) * x3;
881    x1 = x4 + x6;
882    x4 -= x6;
883    x6 = x5 + x7;
884    x5 -= x7;
885
886    /* third stage */
887    x7 = x8 + x3;
888    x8 -= x3;
889    x3 = x0 + x2;
890    x0 -= x2;
891    x2 = (181 * (x4 + x5) + 128) >> 8;
892    x4 = (181 * (x4 - x5) + 128) >> 8;
893
894    /* fourth stage */
895    blk[0]    = (x7 + x1) >> 8;
896    blk[8] = (x3 + x2) >> 8;
897    blk[16] = (x0 + x4) >> 8;
898    blk[24] = (x8 + x6) >> 8;
899    blk[32] = (x8 - x6) >> 8;
900    blk[40] = (x0 - x4) >> 8;
901    blk[48] = (x3 - x2) >> 8;
902    blk[56] = (x7 - x1) >> 8;
903    /*----------------------------------------------------------------------------
904    ; Return nothing or data or data pointer
905    ----------------------------------------------------------------------------*/
906    return;
907}
908/*----------------------------------------------------------------------------
909;  End Function: idctcol
910----------------------------------------------------------------------------*/
911
912