1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "avcenc_lib.h"
19
20/* subtract with the prediction and do transformation */
21void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
22{
23    int16 *ptr = dataBlock;
24    int r0, r1, r2, r3, j;
25    int curpitch = (uint)pitch >> 16;
26    int predpitch = (pitch & 0xFFFF);
27
28    /* horizontal */
29    j = 4;
30    while (j > 0)
31    {
32        /* calculate the residue first */
33        r0 = cur[0] - predBlock[0];
34        r1 = cur[1] - predBlock[1];
35        r2 = cur[2] - predBlock[2];
36        r3 = cur[3] - predBlock[3];
37
38        r0 += r3;           //ptr[0] + ptr[3];
39        r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
40        r1 += r2;           //ptr[1] + ptr[2];
41        r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
42
43        ptr[0] = r0 + r1;
44        ptr[2] = r0 - r1;
45        ptr[1] = (r3 << 1) + r2;
46        ptr[3] = r3 - (r2 << 1);
47
48        ptr += 16;
49        predBlock += predpitch;
50        cur += curpitch;
51        j--;
52    }
53    /* vertical */
54    ptr = dataBlock;
55    j = 4;
56    while (j > 0)
57    {
58        r0 = ptr[0] + ptr[48];
59        r3 = ptr[0] - ptr[48];
60        r1 = ptr[16] + ptr[32];
61        r2 = ptr[16] - ptr[32];
62
63        ptr[0] = r0 + r1;
64        ptr[32] = r0 - r1;
65        ptr[16] = (r3 << 1) + r2;
66        ptr[48] = r3 - (r2 << 1);
67
68        ptr++;
69        j--;
70    }
71
72    return ;
73}
74
75
76/* do residue transform quant invquant, invtrans and write output out */
77int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
78{
79    AVCCommonObj *video = encvid->common;
80    int org_pitch = encvid->currInput->pitch;
81    int pitch = video->currPic->pitch;
82    int16 *coef = video->block;
83    uint8 *pred = video->pred_block; // size 16 for a 4x4 block
84    int pred_pitch = video->pred_pitch;
85    int r0, r1, r2, r3, j, k, idx;
86    int *level, *run;
87    int Qq, Rq, q_bits, qp_const, quant;
88    int data, lev, zero_run;
89    int numcoeff;
90
91    coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
92
93    /* first take a 4x4 transform */
94    /* horizontal */
95    j = 4;
96    while (j > 0)
97    {
98        /* calculate the residue first */
99        r0 = org[0] - pred[0];   /* OPTIMIZEABLE */
100        r1 = org[1] - pred[1];
101        r2 = org[2] - pred[2];
102        r3 = org[3] - pred[3];
103
104        r0 += r3;           //ptr[0] + ptr[3];
105        r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
106        r1 += r2;           //ptr[1] + ptr[2];
107        r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
108
109        coef[0] = r0 + r1;
110        coef[2] = r0 - r1;
111        coef[1] = (r3 << 1) + r2;
112        coef[3] = r3 - (r2 << 1);
113
114        coef += 16;
115        org += org_pitch;
116        pred += pred_pitch;
117        j--;
118    }
119    /* vertical */
120    coef -= 64;
121    pred -= (pred_pitch << 2);
122    j = 4;
123    while (j > 0)   /* OPTIMIZABLE */
124    {
125        r0 = coef[0] + coef[48];
126        r3 = coef[0] - coef[48];
127        r1 = coef[16] + coef[32];
128        r2 = coef[16] - coef[32];
129
130        coef[0] = r0 + r1;
131        coef[32] = r0 - r1;
132        coef[16] = (r3 << 1) + r2;
133        coef[48] = r3 - (r2 << 1);
134
135        coef++;
136        j--;
137    }
138
139    coef -= 4;
140
141    /* quant */
142    level = encvid->level[ras2dec[blkidx]];
143    run = encvid->run[ras2dec[blkidx]];
144
145    Rq = video->QPy_mod_6;
146    Qq = video->QPy_div_6;
147    qp_const = encvid->qp_const;
148    q_bits = 15 + Qq;
149
150    zero_run = 0;
151    numcoeff = 0;
152    for (k = 0; k < 16; k++)
153    {
154        idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
155        data = coef[idx];
156        quant = quant_coef[Rq][k];
157        if (data > 0)
158        {
159            lev = data * quant + qp_const;
160        }
161        else
162        {
163            lev = -data * quant + qp_const;
164        }
165        lev >>= q_bits;
166        if (lev)
167        {
168            *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
169
170            /* dequant */
171            quant = dequant_coefres[Rq][k];
172            if (data > 0)
173            {
174                level[numcoeff] = lev;
175                coef[idx] = (lev * quant) << Qq;
176            }
177            else
178            {
179                level[numcoeff] = -lev;
180                coef[idx] = (-lev * quant) << Qq;
181            }
182            run[numcoeff++] = zero_run;
183            zero_run = 0;
184        }
185        else
186        {
187            zero_run++;
188            coef[idx] = 0;
189        }
190    }
191
192    if (video->currMB->mb_intra) // only do inverse transform with intra block
193    {
194        if (numcoeff) /* then do inverse transform */
195        {
196            for (j = 4; j > 0; j--) /* horizontal */
197            {
198                r0 = coef[0] + coef[2];
199                r1 = coef[0] - coef[2];
200                r2 = (coef[1] >> 1) - coef[3];
201                r3 = coef[1] + (coef[3] >> 1);
202
203                coef[0] = r0 + r3;
204                coef[1] = r1 + r2;
205                coef[2] = r1 - r2;
206                coef[3] = r0 - r3;
207
208                coef += 16;
209            }
210
211            coef -= 64;
212            for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
213            {
214                r0 = coef[0] + coef[32];
215                r1 = coef[0] - coef[32];
216                r2 = (coef[16] >> 1) - coef[48];
217                r3 = coef[16] + (coef[48] >> 1);
218                r0 += r3;
219                r3 = (r0 - (r3 << 1)); /* r0-r3 */
220                r1 += r2;
221                r2 = (r1 - (r2 << 1)); /* r1-r2 */
222                r0 += 32;
223                r1 += 32;
224                r2 += 32;
225                r3 += 32;
226
227                r0 = pred[0] + (r0 >> 6);
228                if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
229                r1 = *(pred += pred_pitch) + (r1 >> 6);
230                if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
231                r2 = *(pred += pred_pitch) + (r2 >> 6);
232                if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
233                r3 = pred[pred_pitch] + (r3 >> 6);
234                if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
235
236                *cur = r0;
237                *(cur += pitch) = r1;
238                *(cur += pitch) = r2;
239                cur[pitch] = r3;
240                cur -= (pitch << 1);
241                cur++;
242                pred -= (pred_pitch << 1);
243                pred++;
244                coef++;
245            }
246        }
247        else  // copy from pred to cur
248        {
249            *((uint32*)cur) = *((uint32*)pred);
250            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
251            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
252            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
253        }
254    }
255
256    return numcoeff;
257}
258
259
260void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
261{
262    int16 *coef, *coef8 = video->block;
263    uint8 *cur;  // the same as curL
264    int b8, b4;
265    int r0, r1, r2, r3, j, blkidx;
266
267    for (b8 = 0; b8 < 4; b8++)
268    {
269        cur = curL;
270        coef = coef8;
271
272        if (currMB->CBP&(1 << b8))
273        {
274            for (b4 = 0; b4 < 4; b4++)
275            {
276                blkidx = blkIdx2blkXY[b8][b4];
277                /* do IDCT */
278                if (currMB->nz_coeff[blkidx])
279                {
280                    for (j = 4; j > 0; j--) /* horizontal */
281                    {
282                        r0 = coef[0] + coef[2];
283                        r1 = coef[0] - coef[2];
284                        r2 = (coef[1] >> 1) - coef[3];
285                        r3 = coef[1] + (coef[3] >> 1);
286
287                        coef[0] = r0 + r3;
288                        coef[1] = r1 + r2;
289                        coef[2] = r1 - r2;
290                        coef[3] = r0 - r3;
291
292                        coef += 16;
293                    }
294
295                    coef -= 64;
296                    for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
297                    {
298                        r0 = coef[0] + coef[32];
299                        r1 = coef[0] - coef[32];
300                        r2 = (coef[16] >> 1) - coef[48];
301                        r3 = coef[16] + (coef[48] >> 1);
302                        r0 += r3;
303                        r3 = (r0 - (r3 << 1)); /* r0-r3 */
304                        r1 += r2;
305                        r2 = (r1 - (r2 << 1)); /* r1-r2 */
306                        r0 += 32;
307                        r1 += 32;
308                        r2 += 32;
309                        r3 += 32;
310
311                        r0 = cur[0] + (r0 >> 6);
312                        if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
313                        *cur = r0;
314                        r1 = *(cur += picPitch) + (r1 >> 6);
315                        if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
316                        *cur = r1;
317                        r2 = *(cur += picPitch) + (r2 >> 6);
318                        if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
319                        *cur = r2;
320                        r3 = cur[picPitch] + (r3 >> 6);
321                        if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
322                        cur[picPitch] = r3;
323
324                        cur -= (picPitch << 1);
325                        cur++;
326                        coef++;
327                    }
328                    cur -= 4;
329                    coef -= 4;
330                }
331                if (b4&1)
332                {
333                    cur += ((picPitch << 2) - 4);
334                    coef += 60;
335                }
336                else
337                {
338                    cur += 4;
339                    coef += 4;
340                }
341            }
342        }
343
344        if (b8&1)
345        {
346            curL += ((picPitch << 3) - 8);
347            coef8 += 120;
348        }
349        else
350        {
351            curL += 8;
352            coef8 += 8;
353        }
354    }
355
356    return ;
357}
358
359/* performa dct, quant, iquant, idct for the entire MB */
360void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
361{
362    AVCCommonObj *video = encvid->common;
363    int pitch = video->currPic->pitch;
364    int org_pitch = encvid->currInput->pitch;
365    AVCMacroblock *currMB = video->currMB;
366    int16 *coef = video->block;
367    uint8 *pred = encvid->pred_i16[currMB->i16Mode];
368    int blk_x, blk_y, j, k, idx, b8, b4;
369    int r0, r1, r2, r3, m0, m1, m2 , m3;
370    int data, lev;
371    int *level, *run, zero_run, ncoeff;
372    int Rq, Qq, quant, q_bits, qp_const;
373    int offset_cur[4], offset_pred[4], offset;
374
375    /* horizontal */
376    for (j = 16; j > 0; j--)
377    {
378        for (blk_x = 4; blk_x > 0; blk_x--)
379        {
380            /* calculate the residue first */
381            r0 = *orgL++ - *pred++;
382            r1 = *orgL++ - *pred++;
383            r2 = *orgL++ - *pred++;
384            r3 = *orgL++ - *pred++;
385
386            r0 += r3;           //ptr[0] + ptr[3];
387            r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
388            r1 += r2;           //ptr[1] + ptr[2];
389            r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
390
391            *coef++ = r0 + r1;
392            *coef++ = (r3 << 1) + r2;
393            *coef++ = r0 - r1;
394            *coef++ = r3 - (r2 << 1);
395        }
396        orgL += (org_pitch - 16);
397    }
398    pred -= 256;
399    coef -= 256;
400    /* vertical */
401    for (blk_y = 4; blk_y > 0; blk_y--)
402    {
403        for (j = 16; j > 0; j--)
404        {
405            r0 = coef[0] + coef[48];
406            r3 = coef[0] - coef[48];
407            r1 = coef[16] + coef[32];
408            r2 = coef[16] - coef[32];
409
410            coef[0] = r0 + r1;
411            coef[32] = r0 - r1;
412            coef[16] = (r3 << 1) + r2;
413            coef[48] = r3 - (r2 << 1);
414
415            coef++;
416        }
417        coef += 48;
418    }
419
420    /* then perform DC transform */
421    coef -= 256;
422    for (j = 4; j > 0; j--)
423    {
424        r0 = coef[0] + coef[12];
425        r3 = coef[0] - coef[12];
426        r1 = coef[4] + coef[8];
427        r2 = coef[4] - coef[8];
428
429        coef[0] = r0 + r1;
430        coef[8] = r0 - r1;
431        coef[4] = r3 + r2;
432        coef[12] = r3 - r2;
433        coef += 64;
434    }
435    coef -= 256;
436    for (j = 4; j > 0; j--)
437    {
438        r0 = coef[0] + coef[192];
439        r3 = coef[0] - coef[192];
440        r1 = coef[64] + coef[128];
441        r2 = coef[64] - coef[128];
442
443        coef[0] = (r0 + r1) >> 1;
444        coef[128] = (r0 - r1) >> 1;
445        coef[64] = (r3 + r2) >> 1;
446        coef[192] = (r3 - r2) >> 1;
447        coef += 4;
448    }
449
450    coef -= 16;
451    // then quantize DC
452    level = encvid->leveldc;
453    run = encvid->rundc;
454
455    Rq = video->QPy_mod_6;
456    Qq = video->QPy_div_6;
457    quant = quant_coef[Rq][0];
458    q_bits = 15 + Qq;
459    qp_const = encvid->qp_const;
460
461    zero_run = 0;
462    ncoeff = 0;
463    for (k = 0; k < 16; k++) /* in zigzag scan order */
464    {
465        idx = ZIGZAG2RASTERDC[k];
466        data = coef[idx];
467        if (data > 0)   // quant
468        {
469            lev = data * quant + (qp_const << 1);
470        }
471        else
472        {
473            lev = -data * quant + (qp_const << 1);
474        }
475        lev >>= (q_bits + 1);
476        if (lev) // dequant
477        {
478            if (data > 0)
479            {
480                level[ncoeff] = lev;
481                coef[idx] = lev;
482            }
483            else
484            {
485                level[ncoeff] = -lev;
486                coef[idx] = -lev;
487            }
488            run[ncoeff++] = zero_run;
489            zero_run = 0;
490        }
491        else
492        {
493            zero_run++;
494            coef[idx] = 0;
495        }
496    }
497
498    /* inverse transform DC */
499    encvid->numcoefdc = ncoeff;
500    if (ncoeff)
501    {
502        quant = dequant_coefres[Rq][0];
503
504        for (j = 0; j < 4; j++)
505        {
506            m0 = coef[0] + coef[4];
507            m1 = coef[0] - coef[4];
508            m2 = coef[8] + coef[12];
509            m3 = coef[8] - coef[12];
510
511
512            coef[0] = m0 + m2;
513            coef[4] = m0 - m2;
514            coef[8] = m1 - m3;
515            coef[12] = m1 + m3;
516            coef += 64;
517        }
518
519        coef -= 256;
520
521        if (Qq >= 2)  /* this way should be faster than JM */
522        {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
523            Qq -= 2;
524            for (j = 0; j < 4; j++)
525            {
526                m0 = coef[0] + coef[64];
527                m1 = coef[0] - coef[64];
528                m2 = coef[128] + coef[192];
529                m3 = coef[128] - coef[192];
530
531                coef[0] = ((m0 + m2) * quant) << Qq;
532                coef[64] = ((m0 - m2) * quant) << Qq;
533                coef[128] = ((m1 - m3) * quant) << Qq;
534                coef[192] = ((m1 + m3) * quant) << Qq;
535                coef += 4;
536            }
537            Qq += 2; /* restore the value */
538        }
539        else
540        {
541            Qq = 2 - Qq;
542            offset = 1 << (Qq - 1);
543
544            for (j = 0; j < 4; j++)
545            {
546                m0 = coef[0] + coef[64];
547                m1 = coef[0] - coef[64];
548                m2 = coef[128] + coef[192];
549                m3 = coef[128] - coef[192];
550
551                coef[0] = (((m0 + m2) * quant + offset) >> Qq);
552                coef[64] = (((m0 - m2) * quant + offset) >> Qq);
553                coef[128] = (((m1 - m3) * quant + offset) >> Qq);
554                coef[192] = (((m1 + m3) * quant + offset) >> Qq);
555                coef += 4;
556            }
557            Qq = 2 - Qq; /* restore the value */
558        }
559        coef -= 16; /* back to the origin */
560    }
561
562    /* now zigzag scan ac coefs, quant, iquant and itrans */
563    run = encvid->run[0];
564    level = encvid->level[0];
565
566    /* offset btw 4x4 block */
567    offset_cur[0] = 0;
568    offset_cur[1] = (pitch << 2) - 8;
569
570    /* offset btw 8x8 block */
571    offset_cur[2] = 8 - (pitch << 3);
572    offset_cur[3] = -8;
573
574    /* similarly for pred */
575    offset_pred[0] = 0;
576    offset_pred[1] = 56;
577    offset_pred[2] = -120;
578    offset_pred[3] = -8;
579
580    currMB->CBP = 0;
581
582    for (b8 = 0; b8 < 4; b8++)
583    {
584        for (b4 = 0; b4 < 4; b4++)
585        {
586
587            zero_run = 0;
588            ncoeff = 0;
589
590            for (k = 1; k < 16; k++)
591            {
592                idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
593                data = coef[idx];
594                quant = quant_coef[Rq][k];
595                if (data > 0)
596                {
597                    lev = data * quant + qp_const;
598                }
599                else
600                {
601                    lev = -data * quant + qp_const;
602                }
603                lev >>= q_bits;
604                if (lev)
605                {   /* dequant */
606                    quant = dequant_coefres[Rq][k];
607                    if (data > 0)
608                    {
609                        level[ncoeff] = lev;
610                        coef[idx] = (lev * quant) << Qq;
611                    }
612                    else
613                    {
614                        level[ncoeff] = -lev;
615                        coef[idx] = (-lev * quant) << Qq;
616                    }
617                    run[ncoeff++] = zero_run;
618                    zero_run = 0;
619                }
620                else
621                {
622                    zero_run++;
623                    coef[idx] = 0;
624                }
625            }
626
627            currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
628            if (ncoeff)
629            {
630                currMB->CBP |= (1 << b8);
631
632                // do inverse transform here
633                for (j = 4; j > 0; j--)
634                {
635                    r0 = coef[0] + coef[2];
636                    r1 = coef[0] - coef[2];
637                    r2 = (coef[1] >> 1) - coef[3];
638                    r3 = coef[1] + (coef[3] >> 1);
639
640                    coef[0] = r0 + r3;
641                    coef[1] = r1 + r2;
642                    coef[2] = r1 - r2;
643                    coef[3] = r0 - r3;
644
645                    coef += 16;
646                }
647                coef -= 64;
648                for (j = 4; j > 0; j--)
649                {
650                    r0 = coef[0] + coef[32];
651                    r1 = coef[0] - coef[32];
652                    r2 = (coef[16] >> 1) - coef[48];
653                    r3 = coef[16] + (coef[48] >> 1);
654
655                    r0 += r3;
656                    r3 = (r0 - (r3 << 1)); /* r0-r3 */
657                    r1 += r2;
658                    r2 = (r1 - (r2 << 1)); /* r1-r2 */
659                    r0 += 32;
660                    r1 += 32;
661                    r2 += 32;
662                    r3 += 32;
663                    r0 = pred[0] + (r0 >> 6);
664                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
665                    r1 = pred[16] + (r1 >> 6);
666                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
667                    r2 = pred[32] + (r2 >> 6);
668                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
669                    r3 = pred[48] + (r3 >> 6);
670                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
671                    *curL = r0;
672                    *(curL += pitch) = r1;
673                    *(curL += pitch) = r2;
674                    curL[pitch] = r3;
675                    curL -= (pitch << 1);
676                    curL++;
677                    pred++;
678                    coef++;
679                }
680            }
681            else  // do DC-only inverse
682            {
683                m0 = coef[0] + 32;
684
685                for (j = 4; j > 0; j--)
686                {
687                    r0 = pred[0] + (m0 >> 6);
688                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
689                    r1 = pred[16] + (m0 >> 6);
690                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
691                    r2 = pred[32] + (m0 >> 6);
692                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
693                    r3 = pred[48] + (m0 >> 6);
694                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
695                    *curL = r0;
696                    *(curL += pitch) = r1;
697                    *(curL += pitch) = r2;
698                    curL[pitch] = r3;
699                    curL -= (pitch << 1);
700                    curL++;
701                    pred++;
702                }
703                coef += 4;
704            }
705
706            run += 16;  // follow coding order
707            level += 16;
708            curL += offset_cur[b4&1];
709            pred += offset_pred[b4&1];
710            coef += offset_pred[b4&1];
711        }
712
713        curL += offset_cur[2 + (b8&1)];
714        pred += offset_pred[2 + (b8&1)];
715        coef += offset_pred[2 + (b8&1)];
716    }
717
718    return ;
719}
720
721
722void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
723{
724    AVCCommonObj *video = encvid->common;
725    AVCMacroblock *currMB = video->currMB;
726    int org_pitch = (encvid->currInput->pitch) >> 1;
727    int pitch = (video->currPic->pitch) >> 1;
728    int pred_pitch = 16;
729    int16 *coef = video->block + 256;
730    uint8 *pred = video->pred_block;
731    int j, blk_x, blk_y, k, idx, b4;
732    int r0, r1, r2, r3, m0;
733    int Qq, Rq, qp_const, q_bits, quant;
734    int *level, *run, zero_run, ncoeff;
735    int data, lev;
736    int offset_cur[2], offset_pred[2], offset_coef[2];
737    uint8 nz_temp[4];
738    int  coeff_cost;
739
740    if (cr)
741    {
742        coef += 8;
743        pred += 8;
744    }
745
746    if (currMB->mb_intra == 0) // inter mode
747    {
748        pred = curC;
749        pred_pitch = pitch;
750    }
751
752    /* do 4x4 transform */
753    /* horizontal */
754    for (j = 8; j > 0; j--)
755    {
756        for (blk_x = 2; blk_x > 0; blk_x--)
757        {
758            /* calculate the residue first */
759            r0 = *orgC++ - *pred++;
760            r1 = *orgC++ - *pred++;
761            r2 = *orgC++ - *pred++;
762            r3 = *orgC++ - *pred++;
763
764            r0 += r3;           //ptr[0] + ptr[3];
765            r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
766            r1 += r2;           //ptr[1] + ptr[2];
767            r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
768
769            *coef++ = r0 + r1;
770            *coef++ = (r3 << 1) + r2;
771            *coef++ = r0 - r1;
772            *coef++ = r3 - (r2 << 1);
773
774        }
775        coef += 8; // coef pitch is 16
776        pred += (pred_pitch - 8); // pred_pitch is 16
777        orgC += (org_pitch - 8);
778    }
779    pred -= (pred_pitch << 3);
780    coef -= 128;
781    /* vertical */
782    for (blk_y = 2; blk_y > 0; blk_y--)
783    {
784        for (j = 8; j > 0; j--)
785        {
786            r0 = coef[0] + coef[48];
787            r3 = coef[0] - coef[48];
788            r1 = coef[16] + coef[32];
789            r2 = coef[16] - coef[32];
790
791            coef[0] = r0 + r1;
792            coef[32] = r0 - r1;
793            coef[16] = (r3 << 1) + r2;
794            coef[48] = r3 - (r2 << 1);
795
796            coef++;
797        }
798        coef += 56;
799    }
800    /* then perform DC transform */
801    coef -= 128;
802
803    /* 2x2 transform of DC components*/
804    r0 = coef[0];
805    r1 = coef[4];
806    r2 = coef[64];
807    r3 = coef[68];
808
809    coef[0] = r0 + r1 + r2 + r3;
810    coef[4] = r0 - r1 + r2 - r3;
811    coef[64] = r0 + r1 - r2 - r3;
812    coef[68] = r0 - r1 - r2 + r3;
813
814    Qq    = video->QPc_div_6;
815    Rq    = video->QPc_mod_6;
816    quant = quant_coef[Rq][0];
817    q_bits    = 15 + Qq;
818    qp_const = encvid->qp_const_c;
819
820    zero_run = 0;
821    ncoeff = 0;
822    run = encvid->runcdc + (cr << 2);
823    level = encvid->levelcdc + (cr << 2);
824
825    /* in zigzag scan order */
826    for (k = 0; k < 4; k++)
827    {
828        idx = ((k >> 1) << 6) + ((k & 1) << 2);
829        data = coef[idx];
830        if (data > 0)
831        {
832            lev = data * quant + (qp_const << 1);
833        }
834        else
835        {
836            lev = -data * quant + (qp_const << 1);
837        }
838        lev >>= (q_bits + 1);
839        if (lev)
840        {
841            if (data > 0)
842            {
843                level[ncoeff] = lev;
844                coef[idx] = lev;
845            }
846            else
847            {
848                level[ncoeff] = -lev;
849                coef[idx] = -lev;
850            }
851            run[ncoeff++] = zero_run;
852            zero_run = 0;
853        }
854        else
855        {
856            zero_run++;
857            coef[idx] = 0;
858        }
859    }
860
861    encvid->numcoefcdc[cr] = ncoeff;
862
863    if (ncoeff)
864    {
865        currMB->CBP |= (1 << 4); // DC present
866        // do inverse transform
867        quant = dequant_coefres[Rq][0];
868
869        r0 = coef[0] + coef[4];
870        r1 = coef[0] - coef[4];
871        r2 = coef[64] + coef[68];
872        r3 = coef[64] - coef[68];
873
874        r0 += r2;
875        r2 = r0 - (r2 << 1);
876        r1 += r3;
877        r3 = r1 - (r3 << 1);
878
879        if (Qq >= 1)
880        {
881            Qq -= 1;
882            coef[0] = (r0 * quant) << Qq;
883            coef[4] = (r1 * quant) << Qq;
884            coef[64] = (r2 * quant) << Qq;
885            coef[68] = (r3 * quant) << Qq;
886            Qq++;
887        }
888        else
889        {
890            coef[0] = (r0 * quant) >> 1;
891            coef[4] = (r1 * quant) >> 1;
892            coef[64] = (r2 * quant) >> 1;
893            coef[68] = (r3 * quant) >> 1;
894        }
895    }
896
897    /* now do AC zigzag scan, quant, iquant and itrans */
898    if (cr)
899    {
900        run = encvid->run[20];
901        level = encvid->level[20];
902    }
903    else
904    {
905        run = encvid->run[16];
906        level = encvid->level[16];
907    }
908
909    /* offset btw 4x4 block */
910    offset_cur[0] = 0;
911    offset_cur[1] = (pitch << 2) - 8;
912    offset_pred[0] = 0;
913    offset_pred[1] = (pred_pitch << 2) - 8;
914    offset_coef[0] = 0;
915    offset_coef[1] = 56;
916
917    coeff_cost = 0;
918
919    for (b4 = 0; b4 < 4; b4++)
920    {
921        zero_run = 0;
922        ncoeff = 0;
923        for (k = 1; k < 16; k++) /* in zigzag scan order */
924        {
925            idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
926            data = coef[idx];
927            quant = quant_coef[Rq][k];
928            if (data > 0)
929            {
930                lev = data * quant + qp_const;
931            }
932            else
933            {
934                lev = -data * quant + qp_const;
935            }
936            lev >>= q_bits;
937            if (lev)
938            {
939                /* for RD performance*/
940                if (lev > 1)
941                    coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
942                else
943                    coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
944
945                /* dequant */
946                quant = dequant_coefres[Rq][k];
947                if (data > 0)
948                {
949                    level[ncoeff] = lev;
950                    coef[idx] = (lev * quant) << Qq;
951                }
952                else
953                {
954                    level[ncoeff] = -lev;
955                    coef[idx] = (-lev * quant) << Qq;
956                }
957                run[ncoeff++] = zero_run;
958                zero_run = 0;
959            }
960            else
961            {
962                zero_run++;
963                coef[idx] = 0;
964            }
965        }
966
967        nz_temp[b4] = ncoeff; // raster scan
968
969        // just advance the pointers for now, do IDCT later
970        coef += 4;
971        run += 16;
972        level += 16;
973        coef += offset_coef[b4&1];
974    }
975
976    /* rewind the pointers */
977    coef -= 128;
978
979    if (coeff_cost < _CHROMA_COEFF_COST_)
980    {
981        /* if it's not efficient to encode any blocks.
982        Just do DC only */
983        /* We can reset level and run also, but setting nz to zero should be enough. */
984        currMB->nz_coeff[16+(cr<<1)] = 0;
985        currMB->nz_coeff[17+(cr<<1)] = 0;
986        currMB->nz_coeff[20+(cr<<1)] = 0;
987        currMB->nz_coeff[21+(cr<<1)] = 0;
988
989        for (b4 = 0; b4 < 4; b4++)
990        {
991            // do DC-only inverse
992            m0 = coef[0] + 32;
993
994            for (j = 4; j > 0; j--)
995            {
996                r0 = pred[0] + (m0 >> 6);
997                if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
998                r1 = *(pred += pred_pitch) + (m0 >> 6);
999                if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1000                r2 = pred[pred_pitch] + (m0 >> 6);
1001                if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1002                r3 = pred[pred_pitch<<1] + (m0 >> 6);
1003                if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1004                *curC = r0;
1005                *(curC += pitch) = r1;
1006                *(curC += pitch) = r2;
1007                curC[pitch] = r3;
1008                curC -= (pitch << 1);
1009                curC++;
1010                pred += (1 - pred_pitch);
1011            }
1012            coef += 4;
1013            curC += offset_cur[b4&1];
1014            pred += offset_pred[b4&1];
1015            coef += offset_coef[b4&1];
1016        }
1017    }
1018    else // not dropping anything, continue with the IDCT
1019    {
1020        for (b4 = 0; b4 < 4; b4++)
1021        {
1022            ncoeff = nz_temp[b4] ; // in raster scan
1023            currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
1024
1025            if (ncoeff) // do a check on the nonzero-coeff
1026            {
1027                currMB->CBP |= (2 << 4);
1028
1029                // do inverse transform here
1030                for (j = 4; j > 0; j--)
1031                {
1032                    r0 = coef[0] + coef[2];
1033                    r1 = coef[0] - coef[2];
1034                    r2 = (coef[1] >> 1) - coef[3];
1035                    r3 = coef[1] + (coef[3] >> 1);
1036
1037                    coef[0] = r0 + r3;
1038                    coef[1] = r1 + r2;
1039                    coef[2] = r1 - r2;
1040                    coef[3] = r0 - r3;
1041
1042                    coef += 16;
1043                }
1044                coef -= 64;
1045                for (j = 4; j > 0; j--)
1046                {
1047                    r0 = coef[0] + coef[32];
1048                    r1 = coef[0] - coef[32];
1049                    r2 = (coef[16] >> 1) - coef[48];
1050                    r3 = coef[16] + (coef[48] >> 1);
1051
1052                    r0 += r3;
1053                    r3 = (r0 - (r3 << 1)); /* r0-r3 */
1054                    r1 += r2;
1055                    r2 = (r1 - (r2 << 1)); /* r1-r2 */
1056                    r0 += 32;
1057                    r1 += 32;
1058                    r2 += 32;
1059                    r3 += 32;
1060                    r0 = pred[0] + (r0 >> 6);
1061                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
1062                    r1 = *(pred += pred_pitch) + (r1 >> 6);
1063                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1064                    r2 = pred[pred_pitch] + (r2 >> 6);
1065                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1066                    r3 = pred[pred_pitch<<1] + (r3 >> 6);
1067                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1068                    *curC = r0;
1069                    *(curC += pitch) = r1;
1070                    *(curC += pitch) = r2;
1071                    curC[pitch] = r3;
1072                    curC -= (pitch << 1);
1073                    curC++;
1074                    pred += (1 - pred_pitch);
1075                    coef++;
1076                }
1077            }
1078            else
1079            {
1080                // do DC-only inverse
1081                m0 = coef[0] + 32;
1082
1083                for (j = 4; j > 0; j--)
1084                {
1085                    r0 = pred[0] + (m0 >> 6);
1086                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
1087                    r1 = *(pred += pred_pitch) + (m0 >> 6);
1088                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
1089                    r2 = pred[pred_pitch] + (m0 >> 6);
1090                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
1091                    r3 = pred[pred_pitch<<1] + (m0 >> 6);
1092                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
1093                    *curC = r0;
1094                    *(curC += pitch) = r1;
1095                    *(curC += pitch) = r2;
1096                    curC[pitch] = r3;
1097                    curC -= (pitch << 1);
1098                    curC++;
1099                    pred += (1 - pred_pitch);
1100                }
1101                coef += 4;
1102            }
1103            curC += offset_cur[b4&1];
1104            pred += offset_pred[b4&1];
1105            coef += offset_coef[b4&1];
1106        }
1107    }
1108
1109    return ;
1110}
1111
1112
1113/* only DC transform */
1114int TransQuantIntra16DC(AVCEncObject *encvid)
1115{
1116    AVCCommonObj *video = encvid->common;
1117    int16 *block = video->block;
1118    int *level = encvid->leveldc;
1119    int *run = encvid->rundc;
1120    int16 *ptr = block;
1121    int r0, r1, r2, r3, j;
1122    int Qq = video->QPy_div_6;
1123    int Rq = video->QPy_mod_6;
1124    int q_bits, qp_const, quant;
1125    int data, lev, zero_run;
1126    int k, ncoeff, idx;
1127
1128    /* DC transform */
1129    /* horizontal */
1130    j = 4;
1131    while (j)
1132    {
1133        r0 = ptr[0] + ptr[12];
1134        r3 = ptr[0] - ptr[12];
1135        r1 = ptr[4] + ptr[8];
1136        r2 = ptr[4] - ptr[8];
1137
1138        ptr[0] = r0 + r1;
1139        ptr[8] = r0 - r1;
1140        ptr[4] = r3 + r2;
1141        ptr[12] = r3 - r2;
1142        ptr += 64;
1143        j--;
1144    }
1145    /* vertical */
1146    ptr = block;
1147    j = 4;
1148    while (j)
1149    {
1150        r0 = ptr[0] + ptr[192];
1151        r3 = ptr[0] - ptr[192];
1152        r1 = ptr[64] + ptr[128];
1153        r2 = ptr[64] - ptr[128];
1154
1155        ptr[0] = (r0 + r1) >> 1;
1156        ptr[128] = (r0 - r1) >> 1;
1157        ptr[64] = (r3 + r2) >> 1;
1158        ptr[192] = (r3 - r2) >> 1;
1159        ptr += 4;
1160        j--;
1161    }
1162
1163    quant = quant_coef[Rq][0];
1164    q_bits    = 15 + Qq;
1165    qp_const = (1 << q_bits) / 3;    // intra
1166
1167    zero_run = 0;
1168    ncoeff = 0;
1169
1170    for (k = 0; k < 16; k++) /* in zigzag scan order */
1171    {
1172        idx = ZIGZAG2RASTERDC[k];
1173        data = block[idx];
1174        if (data > 0)
1175        {
1176            lev = data * quant + (qp_const << 1);
1177        }
1178        else
1179        {
1180            lev = -data * quant + (qp_const << 1);
1181        }
1182        lev >>= (q_bits + 1);
1183        if (lev)
1184        {
1185            if (data > 0)
1186            {
1187                level[ncoeff] = lev;
1188                block[idx] = lev;
1189            }
1190            else
1191            {
1192                level[ncoeff] = -lev;
1193                block[idx] = -lev;
1194            }
1195            run[ncoeff++] = zero_run;
1196            zero_run = 0;
1197        }
1198        else
1199        {
1200            zero_run++;
1201            block[idx] = 0;
1202        }
1203    }
1204    return ncoeff;
1205}
1206
1207int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
1208{
1209    AVCCommonObj *video = encvid->common;
1210    int *level, *run;
1211    int r0, r1, r2, r3;
1212    int Qq, Rq, q_bits, qp_const, quant;
1213    int data, lev, zero_run;
1214    int k, ncoeff, idx;
1215
1216    level = encvid->levelcdc + (cr << 2); /* cb or cr */
1217    run = encvid->runcdc + (cr << 2);
1218
1219    /* 2x2 transform of DC components*/
1220    r0 = block[0];
1221    r1 = block[4];
1222    r2 = block[64];
1223    r3 = block[68];
1224
1225    block[0] = r0 + r1 + r2 + r3;
1226    block[4] = r0 - r1 + r2 - r3;
1227    block[64] = r0 + r1 - r2 - r3;
1228    block[68] = r0 - r1 - r2 + r3;
1229
1230    Qq    = video->QPc_div_6;
1231    Rq    = video->QPc_mod_6;
1232    quant = quant_coef[Rq][0];
1233    q_bits    = 15 + Qq;
1234    if (slice_type == AVC_I_SLICE)
1235    {
1236        qp_const = (1 << q_bits) / 3;
1237    }
1238    else
1239    {
1240        qp_const = (1 << q_bits) / 6;
1241    }
1242
1243    zero_run = 0;
1244    ncoeff = 0;
1245
1246    for (k = 0; k < 4; k++) /* in zigzag scan order */
1247    {
1248        idx = ((k >> 1) << 6) + ((k & 1) << 2);
1249        data = block[idx];
1250        if (data > 0)
1251        {
1252            lev = data * quant + (qp_const << 1);
1253        }
1254        else
1255        {
1256            lev = -data * quant + (qp_const << 1);
1257        }
1258        lev >>= (q_bits + 1);
1259        if (lev)
1260        {
1261            if (data > 0)
1262            {
1263                level[ncoeff] = lev;
1264                block[idx] = lev;
1265            }
1266            else
1267            {
1268                level[ncoeff] = -lev;
1269                block[idx] = -lev;
1270            }
1271            run[ncoeff++] = zero_run;
1272            zero_run = 0;
1273        }
1274        else
1275        {
1276            zero_run++;
1277            block[idx] = 0;
1278        }
1279    }
1280    return ncoeff;
1281}
1282
1283
1284